This source file includes following definitions.
- log_ratio
- isClassifier
- clear
- startTraining
- normalizeWeights
- endTraining
- scaleTree
- calcValue
- train
- updateWeightsAndTrim
- predictTrees
- writeTrainingParams
- write
- readParams
- read
- CV_IMPL_PROPERTY
- train
- predict
- write
- read
- getVarCount
- isTrained
- isClassifier
- getRoots
- getNodes
- getSplits
- getSubsets
- create
#include "precomp.hpp"
namespace cv { namespace ml {
static inline double
log_ratio( double val )
{
const double eps = 1e-5;
val = std::max( val, eps );
val = std::min( val, 1. - eps );
return log( val/(1. - val) );
}
BoostTreeParams::BoostTreeParams()
{
boostType = Boost::REAL;
weakCount = 100;
weightTrimRate = 0.95;
}
BoostTreeParams::BoostTreeParams( int _boostType, int _weak_count,
double _weightTrimRate)
{
boostType = _boostType;
weakCount = _weak_count;
weightTrimRate = _weightTrimRate;
}
class DTreesImplForBoost : public DTreesImpl
{
public:
DTreesImplForBoost()
{
params.setCVFolds(0);
params.setMaxDepth(1);
}
virtual ~DTreesImplForBoost() {}
bool isClassifier() const { return true; }
void clear()
{
DTreesImpl::clear();
}
void startTraining( const Ptr<TrainData>& trainData, int flags )
{
DTreesImpl::startTraining(trainData, flags);
sumResult.assign(w->sidx.size(), 0.);
if( bparams.boostType != Boost::DISCRETE )
{
_isClassifier = false;
int i, n = (int)w->cat_responses.size();
w->ord_responses.resize(n);
double a = -1, b = 1;
if( bparams.boostType == Boost::LOGIT )
{
a = -2, b = 2;
}
for( i = 0; i < n; i++ )
w->ord_responses[i] = w->cat_responses[i] > 0 ? b : a;
}
normalizeWeights();
}
void normalizeWeights()
{
int i, n = (int)w->sidx.size();
double sumw = 0, a, b;
for( i = 0; i < n; i++ )
sumw += w->sample_weights[w->sidx[i]];
if( sumw > DBL_EPSILON )
{
a = 1./sumw;
b = 0;
}
else
{
a = 0;
b = 1;
}
for( i = 0; i < n; i++ )
{
double& wval = w->sample_weights[w->sidx[i]];
wval = wval*a + b;
}
}
void endTraining()
{
DTreesImpl::endTraining();
vector<double> e;
std::swap(sumResult, e);
}
void scaleTree( int root, double scale )
{
int nidx = root, pidx = 0;
Node *node = 0;
for(;;)
{
for(;;)
{
node = &nodes[nidx];
node->value *= scale;
if( node->left < 0 )
break;
nidx = node->left;
}
for( pidx = node->parent; pidx >= 0 && nodes[pidx].right == nidx;
nidx = pidx, pidx = nodes[pidx].parent )
;
if( pidx < 0 )
break;
nidx = nodes[pidx].right;
}
}
void calcValue( int nidx, const vector<int>& _sidx )
{
DTreesImpl::calcValue(nidx, _sidx);
WNode* node = &w->wnodes[nidx];
if( bparams.boostType == Boost::DISCRETE )
{
node->value = node->class_idx == 0 ? -1 : 1;
}
else if( bparams.boostType == Boost::REAL )
{
double p = (node->value+1)*0.5;
node->value = 0.5*log_ratio(p);
}
}
bool train( const Ptr<TrainData>& trainData, int flags )
{
startTraining(trainData, flags);
int treeidx, ntrees = bparams.weakCount >= 0 ? bparams.weakCount : 10000;
vector<int> sidx = w->sidx;
for( treeidx = 0; treeidx < ntrees; treeidx++ )
{
int root = addTree( sidx );
if( root < 0 )
return false;
updateWeightsAndTrim( treeidx, sidx );
}
endTraining();
return true;
}
void updateWeightsAndTrim( int treeidx, vector<int>& sidx )
{
int i, n = (int)w->sidx.size();
int nvars = (int)varIdx.size();
double sumw = 0., C = 1.;
cv::AutoBuffer<double> buf(n + nvars);
double* result = buf;
float* sbuf = (float*)(result + n);
Mat sample(1, nvars, CV_32F, sbuf);
int predictFlags = bparams.boostType == Boost::DISCRETE ? (PREDICT_MAX_VOTE | RAW_OUTPUT) : PREDICT_SUM;
predictFlags |= COMPRESSED_INPUT;
for( i = 0; i < n; i++ )
{
w->data->getSample(varIdx, w->sidx[i], sbuf );
result[i] = predictTrees(Range(treeidx, treeidx+1), sample, predictFlags);
}
if( bparams.boostType == Boost::DISCRETE )
{
double err = 0.;
for( i = 0; i < n; i++ )
{
int si = w->sidx[i];
double wval = w->sample_weights[si];
sumw += wval;
err += wval*(result[i] != w->cat_responses[si]);
}
if( sumw != 0 )
err /= sumw;
C = -log_ratio( err );
double scale = std::exp(C);
sumw = 0;
for( i = 0; i < n; i++ )
{
int si = w->sidx[i];
double wval = w->sample_weights[si];
if( result[i] != w->cat_responses[si] )
wval *= scale;
sumw += wval;
w->sample_weights[si] = wval;
}
scaleTree(roots[treeidx], C);
}
else if( bparams.boostType == Boost::REAL || bparams.boostType == Boost::GENTLE )
{
for( i = 0; i < n; i++ )
{
int si = w->sidx[i];
CV_Assert( std::abs(w->ord_responses[si]) == 1 );
double wval = w->sample_weights[si]*std::exp(-result[i]*w->ord_responses[si]);
sumw += wval;
w->sample_weights[si] = wval;
}
}
else if( bparams.boostType == Boost::LOGIT )
{
const double lb_weight_thresh = FLT_EPSILON;
const double lb_z_max = 10.;
for( i = 0; i < n; i++ )
{
int si = w->sidx[i];
sumResult[i] += 0.5*result[i];
double p = 1./(1 + std::exp(-2*sumResult[i]));
double wval = std::max( p*(1 - p), lb_weight_thresh ), z;
w->sample_weights[si] = wval;
sumw += wval;
if( w->ord_responses[si] > 0 )
{
z = 1./p;
w->ord_responses[si] = std::min(z, lb_z_max);
}
else
{
z = 1./(1-p);
w->ord_responses[si] = -std::min(z, lb_z_max);
}
}
}
else
CV_Error(CV_StsNotImplemented, "Unknown boosting type");
if( sumw > FLT_EPSILON )
normalizeWeights();
if( bparams.weightTrimRate <= 0. || bparams.weightTrimRate >= 1. )
return;
for( i = 0; i < n; i++ )
result[i] = w->sample_weights[w->sidx[i]];
std::sort(result, result + n);
sumw = 1. - bparams.weightTrimRate;
for( i = 0; i < n; i++ )
{
double wval = result[i];
if( sumw <= 0 )
break;
sumw -= wval;
}
double threshold = i < n ? result[i] : DBL_MAX;
sidx.clear();
for( i = 0; i < n; i++ )
{
int si = w->sidx[i];
if( w->sample_weights[si] >= threshold )
sidx.push_back(si);
}
}
float predictTrees( const Range& range, const Mat& sample, int flags0 ) const
{
int flags = (flags0 & ~PREDICT_MASK) | PREDICT_SUM;
float val = DTreesImpl::predictTrees(range, sample, flags);
if( flags != flags0 )
{
int ival = (int)(val > 0);
if( !(flags0 & RAW_OUTPUT) )
ival = classLabels[ival];
val = (float)ival;
}
return val;
}
void writeTrainingParams( FileStorage& fs ) const
{
fs << "boosting_type" <<
(bparams.boostType == Boost::DISCRETE ? "DiscreteAdaboost" :
bparams.boostType == Boost::REAL ? "RealAdaboost" :
bparams.boostType == Boost::LOGIT ? "LogitBoost" :
bparams.boostType == Boost::GENTLE ? "GentleAdaboost" : "Unknown");
DTreesImpl::writeTrainingParams(fs);
fs << "weight_trimming_rate" << bparams.weightTrimRate;
}
void write( FileStorage& fs ) const
{
if( roots.empty() )
CV_Error( CV_StsBadArg, "RTrees have not been trained" );
writeParams(fs);
int k, ntrees = (int)roots.size();
fs << "ntrees" << ntrees
<< "trees" << "[";
for( k = 0; k < ntrees; k++ )
{
fs << "{";
writeTree(fs, roots[k]);
fs << "}";
}
fs << "]";
}
void readParams( const FileNode& fn )
{
DTreesImpl::readParams(fn);
FileNode tparams_node = fn["training_params"];
String bts = (String)(fn["boosting_type"].empty() ?
tparams_node["boosting_type"] : fn["boosting_type"]);
bparams.boostType = (bts == "DiscreteAdaboost" ? Boost::DISCRETE :
bts == "RealAdaboost" ? Boost::REAL :
bts == "LogitBoost" ? Boost::LOGIT :
bts == "GentleAdaboost" ? Boost::GENTLE : -1);
_isClassifier = bparams.boostType == Boost::DISCRETE;
bparams.weightTrimRate = (double)(fn["weight_trimming_rate"].empty() ?
tparams_node["weight_trimming_rate"] : fn["weight_trimming_rate"]);
}
void read( const FileNode& fn )
{
clear();
int ntrees = (int)fn["ntrees"];
readParams(fn);
FileNode trees_node = fn["trees"];
FileNodeIterator it = trees_node.begin();
CV_Assert( ntrees == (int)trees_node.size() );
for( int treeidx = 0; treeidx < ntrees; treeidx++, ++it )
{
FileNode nfn = (*it)["nodes"];
readTree(nfn);
}
}
BoostTreeParams bparams;
vector<double> sumResult;
};
class BoostImpl : public Boost
{
public:
BoostImpl() {}
virtual ~BoostImpl() {}
CV_IMPL_PROPERTY(int, BoostType, impl.bparams.boostType)
CV_IMPL_PROPERTY(int, WeakCount, impl.bparams.weakCount)
CV_IMPL_PROPERTY(double, WeightTrimRate, impl.bparams.weightTrimRate)
CV_WRAP_SAME_PROPERTY(int, MaxCategories, impl.params)
CV_WRAP_SAME_PROPERTY(int, MaxDepth, impl.params)
CV_WRAP_SAME_PROPERTY(int, MinSampleCount, impl.params)
CV_WRAP_SAME_PROPERTY(int, CVFolds, impl.params)
CV_WRAP_SAME_PROPERTY(bool, UseSurrogates, impl.params)
CV_WRAP_SAME_PROPERTY(bool, Use1SERule, impl.params)
CV_WRAP_SAME_PROPERTY(bool, TruncatePrunedTree, impl.params)
CV_WRAP_SAME_PROPERTY(float, RegressionAccuracy, impl.params)
CV_WRAP_SAME_PROPERTY_S(cv::Mat, Priors, impl.params)
String getDefaultName() const { return "opencv_ml_boost"; }
bool train( const Ptr<TrainData>& trainData, int flags )
{
return impl.train(trainData, flags);
}
float predict( InputArray samples, OutputArray results, int flags ) const
{
return impl.predict(samples, results, flags);
}
void write( FileStorage& fs ) const
{
impl.write(fs);
}
void read( const FileNode& fn )
{
impl.read(fn);
}
int getVarCount() const { return impl.getVarCount(); }
bool isTrained() const { return impl.isTrained(); }
bool isClassifier() const { return impl.isClassifier(); }
const vector<int>& getRoots() const { return impl.getRoots(); }
const vector<Node>& getNodes() const { return impl.getNodes(); }
const vector<Split>& getSplits() const { return impl.getSplits(); }
const vector<int>& getSubsets() const { return impl.getSubsets(); }
DTreesImplForBoost impl;
};
Ptr<Boost> Boost::create()
{
return makePtr<BoostImpl>();
}
}}