// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SOLVE_QP3_USING_SMo_Hh_
#define DLIB_SOLVE_QP3_USING_SMo_Hh_
#include "optimization_solve_qp3_using_smo_abstract.h"
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class invalid_qp3_error : public dlib::error
{
public:
invalid_qp3_error(
const std::string& msg,
double B_,
double Cp_,
double Cn_
) :
dlib::error(msg),
B(B_),
Cp(Cp_),
Cn(Cn_)
{};
const double B;
const double Cp;
const double Cn;
};
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class solve_qp3_using_smo
{
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
template <
typename EXP1,
typename EXP2,
typename EXP3,
long NR
>
unsigned long operator() (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& p,
const matrix_exp<EXP3>& y,
const scalar_type B,
const scalar_type Cp,
const scalar_type Cn,
matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
scalar_type eps
)
{
DLIB_ASSERT(Q.nr() == Q.nc() && y.size() == Q.nr() && p.size() == y.size() &&
y.size() > 0 && is_col_vector(y) && is_col_vector(p) &&
sum((y == +1) + (y == -1)) == y.size() &&
Cp > 0 && Cn > 0 &&
eps > 0,
"\t void solve_qp3_using_smo::operator()"
<< "\n\t invalid arguments were given to this function"
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t Q.nc(): " << Q.nc()
<< "\n\t is_col_vector(p): " << is_col_vector(p)
<< "\n\t p.size(): " << p.size()
<< "\n\t is_col_vector(y): " << is_col_vector(y)
<< "\n\t y.size(): " << y.size()
<< "\n\t sum((y == +1) + (y == -1)): " << sum((y == +1) + (y == -1))
<< "\n\t Cp: " << Cp
<< "\n\t Cn: " << Cn
<< "\n\t eps: " << eps
);
set_initial_alpha(y, B, Cp, Cn, alpha);
const scalar_type tau = 1e-12;
typedef typename colm_exp<EXP1>::type col_type;
// initialize df. Compute df = Q*alpha + p
df = p;
for (long r = 0; r < df.nr(); ++r)
{
if (alpha(r) != 0)
{
df += alpha(r)*matrix_cast<scalar_type>(colm(Q,r));
}
}
unsigned long count = 0;
// now perform the actual optimization of alpha
long i=0, j=0;
while (find_working_group(y,alpha,Q,df,Cp,Cn,tau,eps,i,j))
{
++count;
const scalar_type old_alpha_i = alpha(i);
const scalar_type old_alpha_j = alpha(j);
optimize_working_pair(alpha,Q,y,df,tau,i,j, Cp, Cn );
// update the df vector now that we have modified alpha(i) and alpha(j)
scalar_type delta_alpha_i = alpha(i) - old_alpha_i;
scalar_type delta_alpha_j = alpha(j) - old_alpha_j;
col_type Q_i = colm(Q,i);
col_type Q_j = colm(Q,j);
for(long k = 0; k < df.nr(); ++k)
df(k) += Q_i(k)*delta_alpha_i + Q_j(k)*delta_alpha_j;
}
return count;
}
const column_matrix& get_gradient (
) const { return df; }
private:
// -------------------------------------------------------------------------------------
template <
typename scalar_type,
typename scalar_vector_type,
typename scalar_vector_type2
>
inline void set_initial_alpha (
const scalar_vector_type& y,
const scalar_type B,
const scalar_type Cp,
const scalar_type Cn,
scalar_vector_type2& alpha
) const
{
alpha.set_size(y.size());
set_all_elements(alpha,0);
// It's easy in the B == 0 case
if (B == 0)
return;
const scalar_type C = (B > 0)? Cp : Cn;
scalar_type temp = std::abs(B)/C;
long num = (long)std::floor(temp);
long num_total = (long)std::ceil(temp);
const scalar_type B_sign = (B > 0)? 1 : -1;
long count = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (y(i) == B_sign)
{
if (count < num)
{
++count;
alpha(i) = C;
}
else
{
if (count < num_total)
{
++count;
alpha(i) = C*(temp - std::floor(temp));
}
break;
}
}
}
if (count != num_total)
{
std::ostringstream sout;
sout << "Invalid QP3 constraint parameters of B: " << B << ", Cp: " << Cp << ", Cn: "<< Cn;
throw invalid_qp3_error(sout.str(),B,Cp,Cn);
}
}
// ------------------------------------------------------------------------------------
template <
typename scalar_vector_type,
typename scalar_type,
typename EXP,
typename U, typename V
>
inline bool find_working_group (
const V& y,
const U& alpha,
const matrix_exp<EXP>& Q,
const scalar_vector_type& df,
const scalar_type Cp,
const scalar_type Cn,
const scalar_type tau,
const scalar_type eps,
long& i_out,
long& j_out
) const
{
using namespace std;
long ip = 0;
long jp = 0;
typedef typename colm_exp<EXP>::type col_type;
typedef typename diag_exp<EXP>::type diag_type;
scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
scalar_type jp_val = numeric_limits<scalar_type>::infinity();
// loop over the alphas and find the maximum ip and in indices.
for (long i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (alpha(i) < Cp)
{
if (-df(i) > ip_val)
{
ip_val = -df(i);
ip = i;
}
}
}
else
{
if (alpha(i) > 0.0)
{
if (df(i) > ip_val)
{
ip_val = df(i);
ip = i;
}
}
}
}
scalar_type Mp = -numeric_limits<scalar_type>::infinity();
// Pick out the column and diagonal of Q we need below. Doing
// it this way is faster if Q is actually a symmetric_matrix_cache
// object.
col_type Q_ip = colm(Q,ip);
diag_type Q_diag = diag(Q);
// now we need to find the minimum jp indices
for (long j = 0; j < alpha.nr(); ++j)
{
if (y(j) == 1)
{
if (alpha(j) > 0.0)
{
scalar_type b = ip_val + df(j);
if (df(j) > Mp)
Mp = df(j);
if (b > 0)
{
scalar_type a = Q_ip(ip) + Q_diag(j) - 2*y(ip)*Q_ip(j);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{
jp_val = temp;
jp = j;
}
}
}
}
else
{
if (alpha(j) < Cn)
{
scalar_type b = ip_val - df(j);
if (-df(j) > Mp)
Mp = -df(j);
if (b > 0)
{
scalar_type a = Q_ip(ip) + Q_diag(j) + 2*y(ip)*Q_ip(j);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{
jp_val = temp;
jp = j;
}
}
}
}
}
// if we are at the optimal point then return false so the caller knows
// to stop optimizing
if (Mp + ip_val < eps)
return false;
i_out = ip;
j_out = jp;
return true;
}
// ------------------------------------------------------------------------------------
template <
typename EXP,
typename EXP2,
typename T, typename U
>
inline void optimize_working_pair (
T& alpha,
const matrix_exp<EXP>& Q,
const matrix_exp<EXP2>& y,
const U& df,
const scalar_type& tau,
const long i,
const long j,
const scalar_type& Cp,
const scalar_type& Cn
) const
{
const scalar_type Ci = (y(i) > 0 )? Cp : Cn;
const scalar_type Cj = (y(j) > 0 )? Cp : Cn;
if (y(i) != y(j))
{
scalar_type quad_coef = Q(i,i)+Q(j,j)+2*Q(j,i);
if (quad_coef <= 0)
quad_coef = tau;
scalar_type delta = (-df(i)-df(j))/quad_coef;
scalar_type diff = alpha(i) - alpha(j);
alpha(i) += delta;
alpha(j) += delta;
if (diff > 0)
{
if (alpha(j) < 0)
{
alpha(j) = 0;
alpha(i) = diff;
}
}
else
{
if (alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = -diff;
}
}
if (diff > Ci - Cj)
{
if (alpha(i) > Ci)
{
alpha(i) = Ci;
alpha(j) = Ci - diff;
}
}
else
{
if (alpha(j) > Cj)
{
alpha(j) = Cj;
alpha(i) = Cj + diff;
}
}
}
else
{
scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i);
if (quad_coef <= 0)
quad_coef = tau;
scalar_type delta = (df(i)-df(j))/quad_coef;
scalar_type sum = alpha(i) + alpha(j);
alpha(i) -= delta;
alpha(j) += delta;
if(sum > Ci)
{
if(alpha(i) > Ci)
{
alpha(i) = Ci;
alpha(j) = sum - Ci;
}
}
else
{
if(alpha(j) < 0)
{
alpha(j) = 0;
alpha(i) = sum;
}
}
if(sum > Cj)
{
if(alpha(j) > Cj)
{
alpha(j) = Cj;
alpha(i) = sum - Cj;
}
}
else
{
if(alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = sum;
}
}
}
}
// ------------------------------------------------------------------------------------
column_matrix df; // gradient of f(alpha)
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SOLVE_QP3_USING_SMo_Hh_