root/apps/traincascade/old_ml.hpp

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. cvParamLattice
  2. cvDefaultParamLattice
  3. get_params
  4. get_decision_function
  5. get_var_count
  6. get_num_valid
  7. set_num_valid
  8. get_work_var_count
  9. get_length_subbuf
  10. get_var_count
  11. get_layer_count
  12. get_layer_sizes
  13. get_weights

/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef __OPENCV_ML_HPP__
#define __OPENCV_ML_HPP__

#ifdef __cplusplus
#  include "opencv2/core.hpp"
#endif

#include "opencv2/core/core_c.h"
#include <limits.h>

#ifdef __cplusplus

#include <map>
#include <iostream>

// Apple defines a check() macro somewhere in the debug headers
// that interferes with a method definiton in this header
#undef check

/****************************************************************************************\
*                               Main struct definitions                                  *
\****************************************************************************************/

/* log(2*PI) */
#define CV_LOG2PI (1.8378770664093454835606594728112)

/* columns of <trainData> matrix are training samples */
#define CV_COL_SAMPLE 0

/* rows of <trainData> matrix are training samples */
#define CV_ROW_SAMPLE 1

#define CV_IS_ROW_SAMPLE(flags) ((flags) & CV_ROW_SAMPLE)

struct CvVectors
{
    int type;
    int dims, count;
    CvVectors* next;
    union
    {
        uchar** ptr;
        float** fl;
        double** db;
    } data;
};

#if 0
/* A structure, representing the lattice range of statmodel parameters.
   It is used for optimizing statmodel parameters by cross-validation method.
   The lattice is logarithmic, so <step> must be greater then 1. */
typedef struct CvParamLattice
{
    double min_val;
    double max_val;
    double step;
}
CvParamLattice;

CV_INLINE CvParamLattice cvParamLattice( double min_val, double max_val,
                                         double log_step )
{
    CvParamLattice pl;
    pl.min_val = MIN( min_val, max_val );
    pl.max_val = MAX( min_val, max_val );
    pl.step = MAX( log_step, 1. );
    return pl;
}

CV_INLINE CvParamLattice cvDefaultParamLattice( void )
{
    CvParamLattice pl = {0,0,0};
    return pl;
}
#endif

/* Variable type */
#define CV_VAR_NUMERICAL    0
#define CV_VAR_ORDERED      0
#define CV_VAR_CATEGORICAL  1

#define CV_TYPE_NAME_ML_SVM         "opencv-ml-svm"
#define CV_TYPE_NAME_ML_KNN         "opencv-ml-knn"
#define CV_TYPE_NAME_ML_NBAYES      "opencv-ml-bayesian"
#define CV_TYPE_NAME_ML_BOOSTING    "opencv-ml-boost-tree"
#define CV_TYPE_NAME_ML_TREE        "opencv-ml-tree"
#define CV_TYPE_NAME_ML_ANN_MLP     "opencv-ml-ann-mlp"
#define CV_TYPE_NAME_ML_CNN         "opencv-ml-cnn"
#define CV_TYPE_NAME_ML_RTREES      "opencv-ml-random-trees"
#define CV_TYPE_NAME_ML_ERTREES     "opencv-ml-extremely-randomized-trees"
#define CV_TYPE_NAME_ML_GBT         "opencv-ml-gradient-boosting-trees"

#define CV_TRAIN_ERROR  0
#define CV_TEST_ERROR   1

class CvStatModel
{
public:
    CvStatModel();
    virtual ~CvStatModel();

    virtual void clear();

    CV_WRAP virtual void save( const char* filename, const char* name=0 ) const;
    CV_WRAP virtual void load( const char* filename, const char* name=0 );

    virtual void write( CvFileStorage* storage, const char* name ) const;
    virtual void read( CvFileStorage* storage, CvFileNode* node );

protected:
    const char* default_model_name;
};

/****************************************************************************************\
*                                 Normal Bayes Classifier                                *
\****************************************************************************************/

/* The structure, representing the grid range of statmodel parameters.
   It is used for optimizing statmodel accuracy by varying model parameters,
   the accuracy estimate being computed by cross-validation.
   The grid is logarithmic, so <step> must be greater then 1. */

class CvMLData;

struct CvParamGrid
{
    // SVM params type
    enum { SVM_C=0, SVM_GAMMA=1, SVM_P=2, SVM_NU=3, SVM_COEF=4, SVM_DEGREE=5 };

    CvParamGrid()
    {
        min_val = max_val = step = 0;
    }

    CvParamGrid( double min_val, double max_val, double log_step );
    //CvParamGrid( int param_id );
    bool check() const;

    CV_PROP_RW double min_val;
    CV_PROP_RW double max_val;
    CV_PROP_RW double step;
};

inline CvParamGrid::CvParamGrid( double _min_val, double _max_val, double _log_step )
{
    min_val = _min_val;
    max_val = _max_val;
    step = _log_step;
}

class CvNormalBayesClassifier : public CvStatModel
{
public:
    CV_WRAP CvNormalBayesClassifier();
    virtual ~CvNormalBayesClassifier();

    CvNormalBayesClassifier( const CvMat* trainData, const CvMat* responses,
        const CvMat* varIdx=0, const CvMat* sampleIdx=0 );

    virtual bool train( const CvMat* trainData, const CvMat* responses,
        const CvMat* varIdx = 0, const CvMat* sampleIdx=0, bool update=false );

    virtual float predict( const CvMat* samples, CV_OUT CvMat* results=0, CV_OUT CvMat* results_prob=0 ) const;
    CV_WRAP virtual void clear();

    CV_WRAP CvNormalBayesClassifier( const cv::Mat& trainData, const cv::Mat& responses,
                            const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat() );
    CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
                       const cv::Mat& varIdx = cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
                       bool update=false );
    CV_WRAP virtual float predict( const cv::Mat& samples, CV_OUT cv::Mat* results=0, CV_OUT cv::Mat* results_prob=0 ) const;

    virtual void write( CvFileStorage* storage, const char* name ) const;
    virtual void read( CvFileStorage* storage, CvFileNode* node );

protected:
    int     var_count, var_all;
    CvMat*  var_idx;
    CvMat*  cls_labels;
    CvMat** count;
    CvMat** sum;
    CvMat** productsum;
    CvMat** avg;
    CvMat** inv_eigen_values;
    CvMat** cov_rotate_mats;
    CvMat*  c;
};


/****************************************************************************************\
*                          K-Nearest Neighbour Classifier                                *
\****************************************************************************************/

// k Nearest Neighbors
class CvKNearest : public CvStatModel
{
public:

    CV_WRAP CvKNearest();
    virtual ~CvKNearest();

    CvKNearest( const CvMat* trainData, const CvMat* responses,
                const CvMat* sampleIdx=0, bool isRegression=false, int max_k=32 );

    virtual bool train( const CvMat* trainData, const CvMat* responses,
                        const CvMat* sampleIdx=0, bool is_regression=false,
                        int maxK=32, bool updateBase=false );

    virtual float find_nearest( const CvMat* samples, int k, CV_OUT CvMat* results=0,
        const float** neighbors=0, CV_OUT CvMat* neighborResponses=0, CV_OUT CvMat* dist=0 ) const;

    CV_WRAP CvKNearest( const cv::Mat& trainData, const cv::Mat& responses,
               const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false, int max_k=32 );

    CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
                       const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false,
                       int maxK=32, bool updateBase=false );

    virtual float find_nearest( const cv::Mat& samples, int k, cv::Mat* results=0,
                                const float** neighbors=0, cv::Mat* neighborResponses=0,
                                cv::Mat* dist=0 ) const;
    CV_WRAP virtual float find_nearest( const cv::Mat& samples, int k, CV_OUT cv::Mat& results,
                                        CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const;

    virtual void clear();
    int get_max_k() const;
    int get_var_count() const;
    int get_sample_count() const;
    bool is_regression() const;

    virtual float write_results( int k, int k1, int start, int end,
        const float* neighbor_responses, const float* dist, CvMat* _results,
        CvMat* _neighbor_responses, CvMat* _dist, Cv32suf* sort_buf ) const;

    virtual void find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
        float* neighbor_responses, const float** neighbors, float* dist ) const;

protected:

    int max_k, var_count;
    int total;
    bool regression;
    CvVectors* samples;
};

/****************************************************************************************\
*                                   Support Vector Machines                              *
\****************************************************************************************/

// SVM training parameters
struct CvSVMParams
{
    CvSVMParams();
    CvSVMParams( int svm_type, int kernel_type,
                 double degree, double gamma, double coef0,
                 double Cvalue, double nu, double p,
                 CvMat* class_weights, CvTermCriteria term_crit );

    CV_PROP_RW int         svm_type;
    CV_PROP_RW int         kernel_type;
    CV_PROP_RW double      degree; // for poly
    CV_PROP_RW double      gamma;  // for poly/rbf/sigmoid/chi2
    CV_PROP_RW double      coef0;  // for poly/sigmoid

    CV_PROP_RW double      C;  // for CV_SVM_C_SVC, CV_SVM_EPS_SVR and CV_SVM_NU_SVR
    CV_PROP_RW double      nu; // for CV_SVM_NU_SVC, CV_SVM_ONE_CLASS, and CV_SVM_NU_SVR
    CV_PROP_RW double      p; // for CV_SVM_EPS_SVR
    CvMat*      class_weights; // for CV_SVM_C_SVC
    CV_PROP_RW CvTermCriteria term_crit; // termination criteria
};


struct CvSVMKernel
{
    typedef void (CvSVMKernel::*Calc)( int vec_count, int vec_size, const float** vecs,
                                       const float* another, float* results );
    CvSVMKernel();
    CvSVMKernel( const CvSVMParams* params, Calc _calc_func );
    virtual bool create( const CvSVMParams* params, Calc _calc_func );
    virtual ~CvSVMKernel();

    virtual void clear();
    virtual void calc( int vcount, int n, const float** vecs, const float* another, float* results );

    const CvSVMParams* params;
    Calc calc_func;

    virtual void calc_non_rbf_base( int vec_count, int vec_size, const float** vecs,
                                    const float* another, float* results,
                                    double alpha, double beta );
    virtual void calc_intersec( int vcount, int var_count, const float** vecs,
                            const float* another, float* results );
    virtual void calc_chi2( int vec_count, int vec_size, const float** vecs,
                              const float* another, float* results );
    virtual void calc_linear( int vec_count, int vec_size, const float** vecs,
                              const float* another, float* results );
    virtual void calc_rbf( int vec_count, int vec_size, const float** vecs,
                           const float* another, float* results );
    virtual void calc_poly( int vec_count, int vec_size, const float** vecs,
                            const float* another, float* results );
    virtual void calc_sigmoid( int vec_count, int vec_size, const float** vecs,
                               const float* another, float* results );
};


struct CvSVMKernelRow
{
    CvSVMKernelRow* prev;
    CvSVMKernelRow* next;
    float* data;
};


struct CvSVMSolutionInfo
{
    double obj;
    double rho;
    double upper_bound_p;
    double upper_bound_n;
    double r;   // for Solver_NU
};

class CvSVMSolver
{
public:
    typedef bool (CvSVMSolver::*SelectWorkingSet)( int& i, int& j );
    typedef float* (CvSVMSolver::*GetRow)( int i, float* row, float* dst, bool existed );
    typedef void (CvSVMSolver::*CalcRho)( double& rho, double& r );

    CvSVMSolver();

    CvSVMSolver( int count, int var_count, const float** samples, schar* y,
                 int alpha_count, double* alpha, double Cp, double Cn,
                 CvMemStorage* storage, CvSVMKernel* kernel, GetRow get_row,
                 SelectWorkingSet select_working_set, CalcRho calc_rho );
    virtual bool create( int count, int var_count, const float** samples, schar* y,
                 int alpha_count, double* alpha, double Cp, double Cn,
                 CvMemStorage* storage, CvSVMKernel* kernel, GetRow get_row,
                 SelectWorkingSet select_working_set, CalcRho calc_rho );
    virtual ~CvSVMSolver();

    virtual void clear();
    virtual bool solve_generic( CvSVMSolutionInfo& si );

    virtual bool solve_c_svc( int count, int var_count, const float** samples, schar* y,
                              double Cp, double Cn, CvMemStorage* storage,
                              CvSVMKernel* kernel, double* alpha, CvSVMSolutionInfo& si );
    virtual bool solve_nu_svc( int count, int var_count, const float** samples, schar* y,
                               CvMemStorage* storage, CvSVMKernel* kernel,
                               double* alpha, CvSVMSolutionInfo& si );
    virtual bool solve_one_class( int count, int var_count, const float** samples,
                                  CvMemStorage* storage, CvSVMKernel* kernel,
                                  double* alpha, CvSVMSolutionInfo& si );

    virtual bool solve_eps_svr( int count, int var_count, const float** samples, const float* y,
                                CvMemStorage* storage, CvSVMKernel* kernel,
                                double* alpha, CvSVMSolutionInfo& si );

    virtual bool solve_nu_svr( int count, int var_count, const float** samples, const float* y,
                               CvMemStorage* storage, CvSVMKernel* kernel,
                               double* alpha, CvSVMSolutionInfo& si );

    virtual float* get_row_base( int i, bool* _existed );
    virtual float* get_row( int i, float* dst );

    int sample_count;
    int var_count;
    int cache_size;
    int cache_line_size;
    const float** samples;
    const CvSVMParams* params;
    CvMemStorage* storage;
    CvSVMKernelRow lru_list;
    CvSVMKernelRow* rows;

    int alpha_count;

    double* G;
    double* alpha;

    // -1 - lower bound, 0 - free, 1 - upper bound
    schar* alpha_status;

    schar* y;
    double* b;
    float* buf[2];
    double eps;
    int max_iter;
    double C[2];  // C[0] == Cn, C[1] == Cp
    CvSVMKernel* kernel;

    SelectWorkingSet select_working_set_func;
    CalcRho calc_rho_func;
    GetRow get_row_func;

    virtual bool select_working_set( int& i, int& j );
    virtual bool select_working_set_nu_svm( int& i, int& j );
    virtual void calc_rho( double& rho, double& r );
    virtual void calc_rho_nu_svm( double& rho, double& r );

    virtual float* get_row_svc( int i, float* row, float* dst, bool existed );
    virtual float* get_row_one_class( int i, float* row, float* dst, bool existed );
    virtual float* get_row_svr( int i, float* row, float* dst, bool existed );
};


struct CvSVMDecisionFunc
{
    double rho;
    int sv_count;
    double* alpha;
    int* sv_index;
};


// SVM model
class CvSVM : public CvStatModel
{
public:
    // SVM type
    enum { C_SVC=100, NU_SVC=101, ONE_CLASS=102, EPS_SVR=103, NU_SVR=104 };

    // SVM kernel type
    enum { LINEAR=0, POLY=1, RBF=2, SIGMOID=3, CHI2=4, INTER=5 };

    // SVM params type
    enum { C=0, GAMMA=1, P=2, NU=3, COEF=4, DEGREE=5 };

    CV_WRAP CvSVM();
    virtual ~CvSVM();

    CvSVM( const CvMat* trainData, const CvMat* responses,
           const CvMat* varIdx=0, const CvMat* sampleIdx=0,
           CvSVMParams params=CvSVMParams() );

    virtual bool train( const CvMat* trainData, const CvMat* responses,
                        const CvMat* varIdx=0, const CvMat* sampleIdx=0,
                        CvSVMParams params=CvSVMParams() );

    virtual bool train_auto( const CvMat* trainData, const CvMat* responses,
        const CvMat* varIdx, const CvMat* sampleIdx, CvSVMParams params,
        int kfold = 10,
        CvParamGrid Cgrid      = get_default_grid(CvSVM::C),
        CvParamGrid gammaGrid  = get_default_grid(CvSVM::GAMMA),
        CvParamGrid pGrid      = get_default_grid(CvSVM::P),
        CvParamGrid nuGrid     = get_default_grid(CvSVM::NU),
        CvParamGrid coeffGrid  = get_default_grid(CvSVM::COEF),
        CvParamGrid degreeGrid = get_default_grid(CvSVM::DEGREE),
        bool balanced=false );

    virtual float predict( const CvMat* sample, bool returnDFVal=false ) const;
    virtual float predict( const CvMat* samples, CV_OUT CvMat* results, bool returnDFVal=false ) const;

    CV_WRAP CvSVM( const cv::Mat& trainData, const cv::Mat& responses,
          const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
          CvSVMParams params=CvSVMParams() );

    CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
                       const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
                       CvSVMParams params=CvSVMParams() );

    CV_WRAP virtual bool train_auto( const cv::Mat& trainData, const cv::Mat& responses,
                            const cv::Mat& varIdx, const cv::Mat& sampleIdx, CvSVMParams params,
                            int k_fold = 10,
                            CvParamGrid Cgrid      = CvSVM::get_default_grid(CvSVM::C),
                            CvParamGrid gammaGrid  = CvSVM::get_default_grid(CvSVM::GAMMA),
                            CvParamGrid pGrid      = CvSVM::get_default_grid(CvSVM::P),
                            CvParamGrid nuGrid     = CvSVM::get_default_grid(CvSVM::NU),
                            CvParamGrid coeffGrid  = CvSVM::get_default_grid(CvSVM::COEF),
                            CvParamGrid degreeGrid = CvSVM::get_default_grid(CvSVM::DEGREE),
                            bool balanced=false);
    CV_WRAP virtual float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
    CV_WRAP_AS(predict_all) virtual void predict( cv::InputArray samples, cv::OutputArray results ) const;

    CV_WRAP virtual int get_support_vector_count() const;
    virtual const float* get_support_vector(int i) const;
    virtual CvSVMParams get_params() const { return params; }
    CV_WRAP virtual void clear();

    virtual const CvSVMDecisionFunc* get_decision_function() const { return decision_func; }

    static CvParamGrid get_default_grid( int param_id );

    virtual void write( CvFileStorage* storage, const char* name ) const;
    virtual void read( CvFileStorage* storage, CvFileNode* node );
    CV_WRAP int get_var_count() const { return var_idx ? var_idx->cols : var_all; }

protected:

    virtual bool set_params( const CvSVMParams& params );
    virtual bool train1( int sample_count, int var_count, const float** samples,
                    const void* responses, double Cp, double Cn,
                    CvMemStorage* _storage, double* alpha, double& rho );
    virtual bool do_train( int svm_type, int sample_count, int var_count, const float** samples,
                    const CvMat* responses, CvMemStorage* _storage, double* alpha );
    virtual void create_kernel();
    virtual void create_solver();

    virtual float predict( const float* row_sample, int row_len, bool returnDFVal=false ) const;

    virtual void write_params( CvFileStorage* fs ) const;
    virtual void read_params( CvFileStorage* fs, CvFileNode* node );

    void optimize_linear_svm();

    CvSVMParams params;
    CvMat* class_labels;
    int var_all;
    float** sv;
    int sv_total;
    CvMat* var_idx;
    CvMat* class_weights;
    CvSVMDecisionFunc* decision_func;
    CvMemStorage* storage;

    CvSVMSolver* solver;
    CvSVMKernel* kernel;

private:
    CvSVM(const CvSVM&);
    CvSVM& operator = (const CvSVM&);
};

/****************************************************************************************\
*                                      Decision Tree                                     *
\****************************************************************************************/\
struct CvPair16u32s
{
    unsigned short* u;
    int* i;
};


#define CV_DTREE_CAT_DIR(idx,subset) \
    (2*((subset[(idx)>>5]&(1 << ((idx) & 31)))==0)-1)

struct CvDTreeSplit
{
    int var_idx;
    int condensed_idx;
    int inversed;
    float quality;
    CvDTreeSplit* next;
    union
    {
        int subset[2];
        struct
        {
            float c;
            int split_point;
        }
        ord;
    };
};

struct CvDTreeNode
{
    int class_idx;
    int Tn;
    double value;

    CvDTreeNode* parent;
    CvDTreeNode* left;
    CvDTreeNode* right;

    CvDTreeSplit* split;

    int sample_count;
    int depth;
    int* num_valid;
    int offset;
    int buf_idx;
    double maxlr;

    // global pruning data
    int complexity;
    double alpha;
    double node_risk, tree_risk, tree_error;

    // cross-validation pruning data
    int* cv_Tn;
    double* cv_node_risk;
    double* cv_node_error;

    int get_num_valid(int vi) { return num_valid ? num_valid[vi] : sample_count; }
    void set_num_valid(int vi, int n) { if( num_valid ) num_valid[vi] = n; }
};


struct CvDTreeParams
{
    CV_PROP_RW int   max_categories;
    CV_PROP_RW int   max_depth;
    CV_PROP_RW int   min_sample_count;
    CV_PROP_RW int   cv_folds;
    CV_PROP_RW bool  use_surrogates;
    CV_PROP_RW bool  use_1se_rule;
    CV_PROP_RW bool  truncate_pruned_tree;
    CV_PROP_RW float regression_accuracy;
    const float* priors;

    CvDTreeParams();
    CvDTreeParams( int max_depth, int min_sample_count,
                   float regression_accuracy, bool use_surrogates,
                   int max_categories, int cv_folds,
                   bool use_1se_rule, bool truncate_pruned_tree,
                   const float* priors );
};


struct CvDTreeTrainData
{
    CvDTreeTrainData();
    CvDTreeTrainData( const CvMat* trainData, int tflag,
                      const CvMat* responses, const CvMat* varIdx=0,
                      const CvMat* sampleIdx=0, const CvMat* varType=0,
                      const CvMat* missingDataMask=0,
                      const CvDTreeParams& params=CvDTreeParams(),
                      bool _shared=false, bool _add_labels=false );
    virtual ~CvDTreeTrainData();

    virtual void set_data( const CvMat* trainData, int tflag,
                          const CvMat* responses, const CvMat* varIdx=0,
                          const CvMat* sampleIdx=0, const CvMat* varType=0,
                          const CvMat* missingDataMask=0,
                          const CvDTreeParams& params=CvDTreeParams(),
                          bool _shared=false, bool _add_labels=false,
                          bool _update_data=false );
    virtual void do_responses_copy();

    virtual void get_vectors( const CvMat* _subsample_idx,
         float* values, uchar* missing, float* responses, bool get_class_idx=false );

    virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );

    virtual void write_params( CvFileStorage* fs ) const;
    virtual void read_params( CvFileStorage* fs, CvFileNode* node );

    // release all the data
    virtual void clear();

    int get_num_classes() const;
    int get_var_type(int vi) const;
    int get_work_var_count() const {return work_var_count;}

    virtual const float* get_ord_responses( CvDTreeNode* n, float* values_buf, int* sample_indices_buf );
    virtual const int* get_class_labels( CvDTreeNode* n, int* labels_buf );
    virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
    virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
    virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
    virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* sorted_indices_buf,
                                   const float** ord_values, const int** sorted_indices, int* sample_indices_buf );
    virtual int get_child_buf_idx( CvDTreeNode* n );

    ////////////////////////////////////

    virtual bool set_params( const CvDTreeParams& params );
    virtual CvDTreeNode* new_node( CvDTreeNode* parent, int count,
                                   int storage_idx, int offset );

    virtual CvDTreeSplit* new_split_ord( int vi, float cmp_val,
                int split_point, int inversed, float quality );
    virtual CvDTreeSplit* new_split_cat( int vi, float quality );
    virtual void free_node_data( CvDTreeNode* node );
    virtual void free_train_data();
    virtual void free_node( CvDTreeNode* node );

    int sample_count, var_all, var_count, max_c_count;
    int ord_var_count, cat_var_count, work_var_count;
    bool have_labels, have_priors;
    bool is_classifier;
    int tflag;

    const CvMat* train_data;
    const CvMat* responses;
    CvMat* responses_copy; // used in Boosting

    int buf_count, buf_size; // buf_size is obsolete, please do not use it, use expression ((int64)buf->rows * (int64)buf->cols / buf_count) instead
    bool shared;
    int is_buf_16u;

    CvMat* cat_count;
    CvMat* cat_ofs;
    CvMat* cat_map;

    CvMat* counts;
    CvMat* buf;
    inline size_t get_length_subbuf() const
    {
        size_t res = (size_t)(work_var_count + 1) * (size_t)sample_count;
        return res;
    }

    CvMat* direction;
    CvMat* split_buf;

    CvMat* var_idx;
    CvMat* var_type; // i-th element =
                     //   k<0  - ordered
                     //   k>=0 - categorical, see k-th element of cat_* arrays
    CvMat* priors;
    CvMat* priors_mult;

    CvDTreeParams params;

    CvMemStorage* tree_storage;
    CvMemStorage* temp_storage;

    CvDTreeNode* data_root;

    CvSet* node_heap;
    CvSet* split_heap;
    CvSet* cv_heap;
    CvSet* nv_heap;

    cv::RNG* rng;
};

class CvDTree;
class CvForestTree;

namespace cv
{
    struct DTreeBestSplitFinder;
    struct ForestTreeBestSplitFinder;
}

class CvDTree : public CvStatModel
{
public:
    CV_WRAP CvDTree();
    virtual ~CvDTree();

    virtual bool train( const CvMat* trainData, int tflag,
                        const CvMat* responses, const CvMat* varIdx=0,
                        const CvMat* sampleIdx=0, const CvMat* varType=0,
                        const CvMat* missingDataMask=0,
                        CvDTreeParams params=CvDTreeParams() );

    virtual bool train( CvMLData* trainData, CvDTreeParams params=CvDTreeParams() );

    // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
    virtual float calc_error( CvMLData* trainData, int type, std::vector<float> *resp = 0 );

    virtual bool train( CvDTreeTrainData* trainData, const CvMat* subsampleIdx );

    virtual CvDTreeNode* predict( const CvMat* sample, const CvMat* missingDataMask=0,
                                  bool preprocessedInput=false ) const;

    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
                       const cv::Mat& missingDataMask=cv::Mat(),
                       CvDTreeParams params=CvDTreeParams() );

    CV_WRAP virtual CvDTreeNode* predict( const cv::Mat& sample, const cv::Mat& missingDataMask=cv::Mat(),
                                  bool preprocessedInput=false ) const;
    CV_WRAP virtual cv::Mat getVarImportance();

    virtual const CvMat* get_var_importance();
    CV_WRAP virtual void clear();

    virtual void read( CvFileStorage* fs, CvFileNode* node );
    virtual void write( CvFileStorage* fs, const char* name ) const;

    // special read & write methods for trees in the tree ensembles
    virtual void read( CvFileStorage* fs, CvFileNode* node,
                       CvDTreeTrainData* data );
    virtual void write( CvFileStorage* fs ) const;

    const CvDTreeNode* get_root() const;
    int get_pruned_tree_idx() const;
    CvDTreeTrainData* get_data();

protected:
    friend struct cv::DTreeBestSplitFinder;

    virtual bool do_train( const CvMat* _subsample_idx );

    virtual void try_split_node( CvDTreeNode* n );
    virtual void split_node_data( CvDTreeNode* n );
    virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
    virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
    virtual double calc_node_dir( CvDTreeNode* node );
    virtual void complete_node_dir( CvDTreeNode* node );
    virtual void cluster_categories( const int* vectors, int vector_count,
        int var_count, int* sums, int k, int* cluster_labels );

    virtual void calc_node_value( CvDTreeNode* node );

    virtual void prune_cv();
    virtual double update_tree_rnc( int T, int fold );
    virtual int cut_tree( int T, int fold, double min_alpha );
    virtual void free_prune_data(bool cut_tree);
    virtual void free_tree();

    virtual void write_node( CvFileStorage* fs, CvDTreeNode* node ) const;
    virtual void write_split( CvFileStorage* fs, CvDTreeSplit* split ) const;
    virtual CvDTreeNode* read_node( CvFileStorage* fs, CvFileNode* node, CvDTreeNode* parent );
    virtual CvDTreeSplit* read_split( CvFileStorage* fs, CvFileNode* node );
    virtual void write_tree_nodes( CvFileStorage* fs ) const;
    virtual void read_tree_nodes( CvFileStorage* fs, CvFileNode* node );

    CvDTreeNode* root;
    CvMat* var_importance;
    CvDTreeTrainData* data;
    CvMat train_data_hdr, responses_hdr;
    cv::Mat train_data_mat, responses_mat;

public:
    int pruned_tree_idx;
};


/****************************************************************************************\
*                                   Random Trees Classifier                              *
\****************************************************************************************/

class CvRTrees;

class CvForestTree: public CvDTree
{
public:
    CvForestTree();
    virtual ~CvForestTree();

    virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx, CvRTrees* forest );

    virtual int get_var_count() const {return data ? data->var_count : 0;}
    virtual void read( CvFileStorage* fs, CvFileNode* node, CvRTrees* forest, CvDTreeTrainData* _data );

    /* dummy methods to avoid warnings: BEGIN */
    virtual bool train( const CvMat* trainData, int tflag,
                        const CvMat* responses, const CvMat* varIdx=0,
                        const CvMat* sampleIdx=0, const CvMat* varType=0,
                        const CvMat* missingDataMask=0,
                        CvDTreeParams params=CvDTreeParams() );

    virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx );
    virtual void read( CvFileStorage* fs, CvFileNode* node );
    virtual void read( CvFileStorage* fs, CvFileNode* node,
                       CvDTreeTrainData* data );
    /* dummy methods to avoid warnings: END */

protected:
    friend struct cv::ForestTreeBestSplitFinder;

    virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
    CvRTrees* forest;
};


struct CvRTParams : public CvDTreeParams
{
    //Parameters for the forest
    CV_PROP_RW bool calc_var_importance; // true <=> RF processes variable importance
    CV_PROP_RW int nactive_vars;
    CV_PROP_RW CvTermCriteria term_crit;

    CvRTParams();
    CvRTParams( int max_depth, int min_sample_count,
                float regression_accuracy, bool use_surrogates,
                int max_categories, const float* priors, bool calc_var_importance,
                int nactive_vars, int max_num_of_trees_in_the_forest,
                float forest_accuracy, int termcrit_type );
};


class CvRTrees : public CvStatModel
{
public:
    CV_WRAP CvRTrees();
    virtual ~CvRTrees();
    virtual bool train( const CvMat* trainData, int tflag,
                        const CvMat* responses, const CvMat* varIdx=0,
                        const CvMat* sampleIdx=0, const CvMat* varType=0,
                        const CvMat* missingDataMask=0,
                        CvRTParams params=CvRTParams() );

    virtual bool train( CvMLData* data, CvRTParams params=CvRTParams() );
    virtual float predict( const CvMat* sample, const CvMat* missing = 0 ) const;
    virtual float predict_prob( const CvMat* sample, const CvMat* missing = 0 ) const;

    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
                       const cv::Mat& missingDataMask=cv::Mat(),
                       CvRTParams params=CvRTParams() );
    CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing = cv::Mat() ) const;
    CV_WRAP virtual float predict_prob( const cv::Mat& sample, const cv::Mat& missing = cv::Mat() ) const;
    CV_WRAP virtual cv::Mat getVarImportance();

    CV_WRAP virtual void clear();

    virtual const CvMat* get_var_importance();
    virtual float get_proximity( const CvMat* sample1, const CvMat* sample2,
        const CvMat* missing1 = 0, const CvMat* missing2 = 0 ) const;

    virtual float calc_error( CvMLData* data, int type , std::vector<float>* resp = 0 ); // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}

    virtual float get_train_error();

    virtual void read( CvFileStorage* fs, CvFileNode* node );
    virtual void write( CvFileStorage* fs, const char* name ) const;

    CvMat* get_active_var_mask();
    CvRNG* get_rng();

    int get_tree_count() const;
    CvForestTree* get_tree(int i) const;

protected:
    virtual cv::String getName() const;

    virtual bool grow_forest( const CvTermCriteria term_crit );

    // array of the trees of the forest
    CvForestTree** trees;
    CvDTreeTrainData* data;
    CvMat train_data_hdr, responses_hdr;
    cv::Mat train_data_mat, responses_mat;
    int ntrees;
    int nclasses;
    double oob_error;
    CvMat* var_importance;
    int nsamples;

    cv::RNG* rng;
    CvMat* active_var_mask;
};

/****************************************************************************************\
*                           Extremely randomized trees Classifier                        *
\****************************************************************************************/
struct CvERTreeTrainData : public CvDTreeTrainData
{
    virtual void set_data( const CvMat* trainData, int tflag,
                          const CvMat* responses, const CvMat* varIdx=0,
                          const CvMat* sampleIdx=0, const CvMat* varType=0,
                          const CvMat* missingDataMask=0,
                          const CvDTreeParams& params=CvDTreeParams(),
                          bool _shared=false, bool _add_labels=false,
                          bool _update_data=false );
    virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* missing_buf,
                                   const float** ord_values, const int** missing, int* sample_buf = 0 );
    virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
    virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
    virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
    virtual void get_vectors( const CvMat* _subsample_idx, float* values, uchar* missing,
                              float* responses, bool get_class_idx=false );
    virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );
    const CvMat* missing_mask;
};

class CvForestERTree : public CvForestTree
{
protected:
    virtual double calc_node_dir( CvDTreeNode* node );
    virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual void split_node_data( CvDTreeNode* n );
};

class CvERTrees : public CvRTrees
{
public:
    CV_WRAP CvERTrees();
    virtual ~CvERTrees();
    virtual bool train( const CvMat* trainData, int tflag,
                        const CvMat* responses, const CvMat* varIdx=0,
                        const CvMat* sampleIdx=0, const CvMat* varType=0,
                        const CvMat* missingDataMask=0,
                        CvRTParams params=CvRTParams());
    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
                       const cv::Mat& missingDataMask=cv::Mat(),
                       CvRTParams params=CvRTParams());
    virtual bool train( CvMLData* data, CvRTParams params=CvRTParams() );
protected:
    virtual cv::String getName() const;
    virtual bool grow_forest( const CvTermCriteria term_crit );
};


/****************************************************************************************\
*                                   Boosted tree classifier                              *
\****************************************************************************************/

struct CvBoostParams : public CvDTreeParams
{
    CV_PROP_RW int boost_type;
    CV_PROP_RW int weak_count;
    CV_PROP_RW int split_criteria;
    CV_PROP_RW double weight_trim_rate;

    CvBoostParams();
    CvBoostParams( int boost_type, int weak_count, double weight_trim_rate,
                   int max_depth, bool use_surrogates, const float* priors );
};


class CvBoost;

class CvBoostTree: public CvDTree
{
public:
    CvBoostTree();
    virtual ~CvBoostTree();

    virtual bool train( CvDTreeTrainData* trainData,
                        const CvMat* subsample_idx, CvBoost* ensemble );

    virtual void scale( double s );
    virtual void read( CvFileStorage* fs, CvFileNode* node,
                       CvBoost* ensemble, CvDTreeTrainData* _data );
    virtual void clear();

    /* dummy methods to avoid warnings: BEGIN */
    virtual bool train( const CvMat* trainData, int tflag,
                        const CvMat* responses, const CvMat* varIdx=0,
                        const CvMat* sampleIdx=0, const CvMat* varType=0,
                        const CvMat* missingDataMask=0,
                        CvDTreeParams params=CvDTreeParams() );
    virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx );

    virtual void read( CvFileStorage* fs, CvFileNode* node );
    virtual void read( CvFileStorage* fs, CvFileNode* node,
                       CvDTreeTrainData* data );
    /* dummy methods to avoid warnings: END */

protected:

    virtual void try_split_node( CvDTreeNode* n );
    virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
    virtual void calc_node_value( CvDTreeNode* n );
    virtual double calc_node_dir( CvDTreeNode* n );

    CvBoost* ensemble;
};


class CvBoost : public CvStatModel
{
public:
    // Boosting type
    enum { DISCRETE=0, REAL=1, LOGIT=2, GENTLE=3 };

    // Splitting criteria
    enum { DEFAULT=0, GINI=1, MISCLASS=3, SQERR=4 };

    CV_WRAP CvBoost();
    virtual ~CvBoost();

    CvBoost( const CvMat* trainData, int tflag,
             const CvMat* responses, const CvMat* varIdx=0,
             const CvMat* sampleIdx=0, const CvMat* varType=0,
             const CvMat* missingDataMask=0,
             CvBoostParams params=CvBoostParams() );

    virtual bool train( const CvMat* trainData, int tflag,
             const CvMat* responses, const CvMat* varIdx=0,
             const CvMat* sampleIdx=0, const CvMat* varType=0,
             const CvMat* missingDataMask=0,
             CvBoostParams params=CvBoostParams(),
             bool update=false );

    virtual bool train( CvMLData* data,
             CvBoostParams params=CvBoostParams(),
             bool update=false );

    virtual float predict( const CvMat* sample, const CvMat* missing=0,
                           CvMat* weak_responses=0, CvSlice slice=CV_WHOLE_SEQ,
                           bool raw_mode=false, bool return_sum=false ) const;

    CV_WRAP CvBoost( const cv::Mat& trainData, int tflag,
            const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
            const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
            const cv::Mat& missingDataMask=cv::Mat(),
            CvBoostParams params=CvBoostParams() );

    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
                       const cv::Mat& missingDataMask=cv::Mat(),
                       CvBoostParams params=CvBoostParams(),
                       bool update=false );

    CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing=cv::Mat(),
                                   const cv::Range& slice=cv::Range::all(), bool rawMode=false,
                                   bool returnSum=false ) const;

    virtual float calc_error( CvMLData* _data, int type , std::vector<float> *resp = 0 ); // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}

    CV_WRAP virtual void prune( CvSlice slice );

    CV_WRAP virtual void clear();

    virtual void write( CvFileStorage* storage, const char* name ) const;
    virtual void read( CvFileStorage* storage, CvFileNode* node );
    virtual const CvMat* get_active_vars(bool absolute_idx=true);

    CvSeq* get_weak_predictors();

    CvMat* get_weights();
    CvMat* get_subtree_weights();
    CvMat* get_weak_response();
    const CvBoostParams& get_params() const;
    const CvDTreeTrainData* get_data() const;

protected:

    virtual bool set_params( const CvBoostParams& params );
    virtual void update_weights( CvBoostTree* tree );
    virtual void trim_weights();
    virtual void write_params( CvFileStorage* fs ) const;
    virtual void read_params( CvFileStorage* fs, CvFileNode* node );

    virtual void initialize_weights(double (&p)[2]);

    CvDTreeTrainData* data;
    CvMat train_data_hdr, responses_hdr;
    cv::Mat train_data_mat, responses_mat;
    CvBoostParams params;
    CvSeq* weak;

    CvMat* active_vars;
    CvMat* active_vars_abs;
    bool have_active_cat_vars;

    CvMat* orig_response;
    CvMat* sum_response;
    CvMat* weak_eval;
    CvMat* subsample_mask;
    CvMat* weights;
    CvMat* subtree_weights;
    bool have_subsample;
};


/****************************************************************************************\
*                                   Gradient Boosted Trees                               *
\****************************************************************************************/

// DataType: STRUCT CvGBTreesParams
// Parameters of GBT (Gradient Boosted trees model), including single
// tree settings and ensemble parameters.
//
// weak_count          - count of trees in the ensemble
// loss_function_type  - loss function used for ensemble training
// subsample_portion   - portion of whole training set used for
//                       every single tree training.
//                       subsample_portion value is in (0.0, 1.0].
//                       subsample_portion == 1.0 when whole dataset is
//                       used on each step. Count of sample used on each
//                       step is computed as
//                       int(total_samples_count * subsample_portion).
// shrinkage           - regularization parameter.
//                       Each tree prediction is multiplied on shrinkage value.


struct CvGBTreesParams : public CvDTreeParams
{
    CV_PROP_RW int weak_count;
    CV_PROP_RW int loss_function_type;
    CV_PROP_RW float subsample_portion;
    CV_PROP_RW float shrinkage;

    CvGBTreesParams();
    CvGBTreesParams( int loss_function_type, int weak_count, float shrinkage,
        float subsample_portion, int max_depth, bool use_surrogates );
};

// DataType: CLASS CvGBTrees
// Gradient Boosting Trees (GBT) algorithm implementation.
//
// data             - training dataset
// params           - parameters of the CvGBTrees
// weak             - array[0..(class_count-1)] of CvSeq
//                    for storing tree ensembles
// orig_response    - original responses of the training set samples
// sum_response     - predicitons of the current model on the training dataset.
//                    this matrix is updated on every iteration.
// sum_response_tmp - predicitons of the model on the training set on the next
//                    step. On every iteration values of sum_responses_tmp are
//                    computed via sum_responses values. When the current
//                    step is complete sum_response values become equal to
//                    sum_responses_tmp.
// sampleIdx       - indices of samples used for training the ensemble.
//                    CvGBTrees training procedure takes a set of samples
//                    (train_data) and a set of responses (responses).
//                    Only pairs (train_data[i], responses[i]), where i is
//                    in sample_idx are used for training the ensemble.
// subsample_train  - indices of samples used for training a single decision
//                    tree on the current step. This indices are countered
//                    relatively to the sample_idx, so that pairs
//                    (train_data[sample_idx[i]], responses[sample_idx[i]])
//                    are used for training a decision tree.
//                    Training set is randomly splited
//                    in two parts (subsample_train and subsample_test)
//                    on every iteration accordingly to the portion parameter.
// subsample_test   - relative indices of samples from the training set,
//                    which are not used for training a tree on the current
//                    step.
// missing          - mask of the missing values in the training set. This
//                    matrix has the same size as train_data. 1 - missing
//                    value, 0 - not a missing value.
// class_labels     - output class labels map.
// rng              - random number generator. Used for spliting the
//                    training set.
// class_count      - count of output classes.
//                    class_count == 1 in the case of regression,
//                    and > 1 in the case of classification.
// delta            - Huber loss function parameter.
// base_value       - start point of the gradient descent procedure.
//                    model prediction is
//                    f(x) = f_0 + sum_{i=1..weak_count-1}(f_i(x)), where
//                    f_0 is the base value.



class CvGBTrees : public CvStatModel
{
public:

    /*
    // DataType: ENUM
    // Loss functions implemented in CvGBTrees.
    //
    // SQUARED_LOSS
    // problem: regression
    // loss = (x - x')^2
    //
    // ABSOLUTE_LOSS
    // problem: regression
    // loss = abs(x - x')
    //
    // HUBER_LOSS
    // problem: regression
    // loss = delta*( abs(x - x') - delta/2), if abs(x - x') > delta
    //           1/2*(x - x')^2, if abs(x - x') <= delta,
    //           where delta is the alpha-quantile of pseudo responses from
    //           the training set.
    //
    // DEVIANCE_LOSS
    // problem: classification
    //
    */
    enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};


    /*
    // Default constructor. Creates a model only (without training).
    // Should be followed by one form of the train(...) function.
    //
    // API
    // CvGBTrees();

    // INPUT
    // OUTPUT
    // RESULT
    */
    CV_WRAP CvGBTrees();


    /*
    // Full form constructor. Creates a gradient boosting model and does the
    // train.
    //
    // API
    // CvGBTrees( const CvMat* trainData, int tflag,
             const CvMat* responses, const CvMat* varIdx=0,
             const CvMat* sampleIdx=0, const CvMat* varType=0,
             const CvMat* missingDataMask=0,
             CvGBTreesParams params=CvGBTreesParams() );

    // INPUT
    // trainData    - a set of input feature vectors.
    //                  size of matrix is
    //                  <count of samples> x <variables count>
    //                  or <variables count> x <count of samples>
    //                  depending on the tflag parameter.
    //                  matrix values are float.
    // tflag         - a flag showing how do samples stored in the
    //                  trainData matrix row by row (tflag=CV_ROW_SAMPLE)
    //                  or column by column (tflag=CV_COL_SAMPLE).
    // responses     - a vector of responses corresponding to the samples
    //                  in trainData.
    // varIdx       - indices of used variables. zero value means that all
    //                  variables are active.
    // sampleIdx    - indices of used samples. zero value means that all
    //                  samples from trainData are in the training set.
    // varType      - vector of <variables count> length. gives every
    //                  variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
    //                  varType = 0 means all variables are numerical.
    // missingDataMask  - a mask of misiing values in trainData.
    //                  missingDataMask = 0 means that there are no missing
    //                  values.
    // params         - parameters of GTB algorithm.
    // OUTPUT
    // RESULT
    */
    CvGBTrees( const CvMat* trainData, int tflag,
             const CvMat* responses, const CvMat* varIdx=0,
             const CvMat* sampleIdx=0, const CvMat* varType=0,
             const CvMat* missingDataMask=0,
             CvGBTreesParams params=CvGBTreesParams() );


    /*
    // Destructor.
    */
    virtual ~CvGBTrees();


    /*
    // Gradient tree boosting model training
    //
    // API
    // virtual bool train( const CvMat* trainData, int tflag,
             const CvMat* responses, const CvMat* varIdx=0,
             const CvMat* sampleIdx=0, const CvMat* varType=0,
             const CvMat* missingDataMask=0,
             CvGBTreesParams params=CvGBTreesParams(),
             bool update=false );

    // INPUT
    // trainData    - a set of input feature vectors.
    //                  size of matrix is
    //                  <count of samples> x <variables count>
    //                  or <variables count> x <count of samples>
    //                  depending on the tflag parameter.
    //                  matrix values are float.
    // tflag         - a flag showing how do samples stored in the
    //                  trainData matrix row by row (tflag=CV_ROW_SAMPLE)
    //                  or column by column (tflag=CV_COL_SAMPLE).
    // responses     - a vector of responses corresponding to the samples
    //                  in trainData.
    // varIdx       - indices of used variables. zero value means that all
    //                  variables are active.
    // sampleIdx    - indices of used samples. zero value means that all
    //                  samples from trainData are in the training set.
    // varType      - vector of <variables count> length. gives every
    //                  variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
    //                  varType = 0 means all variables are numerical.
    // missingDataMask  - a mask of misiing values in trainData.
    //                  missingDataMask = 0 means that there are no missing
    //                  values.
    // params         - parameters of GTB algorithm.
    // update         - is not supported now. (!)
    // OUTPUT
    // RESULT
    // Error state.
    */
    virtual bool train( const CvMat* trainData, int tflag,
             const CvMat* responses, const CvMat* varIdx=0,
             const CvMat* sampleIdx=0, const CvMat* varType=0,
             const CvMat* missingDataMask=0,
             CvGBTreesParams params=CvGBTreesParams(),
             bool update=false );


    /*
    // Gradient tree boosting model training
    //
    // API
    // virtual bool train( CvMLData* data,
             CvGBTreesParams params=CvGBTreesParams(),
             bool update=false ) {return false;}

    // INPUT
    // data          - training set.
    // params        - parameters of GTB algorithm.
    // update        - is not supported now. (!)
    // OUTPUT
    // RESULT
    // Error state.
    */
    virtual bool train( CvMLData* data,
             CvGBTreesParams params=CvGBTreesParams(),
             bool update=false );


    /*
    // Response value prediction
    //
    // API
    // virtual float predict_serial( const CvMat* sample, const CvMat* missing=0,
             CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
             int k=-1 ) const;

    // INPUT
    // sample         - input sample of the same type as in the training set.
    // missing        - missing values mask. missing=0 if there are no
    //                   missing values in sample vector.
    // weak_responses  - predictions of all of the trees.
    //                   not implemented (!)
    // slice           - part of the ensemble used for prediction.
    //                   slice = CV_WHOLE_SEQ when all trees are used.
    // k               - number of ensemble used.
    //                   k is in {-1,0,1,..,<count of output classes-1>}.
    //                   in the case of classification problem
    //                   <count of output classes-1> ensembles are built.
    //                   If k = -1 ordinary prediction is the result,
    //                   otherwise function gives the prediction of the
    //                   k-th ensemble only.
    // OUTPUT
    // RESULT
    // Predicted value.
    */
    virtual float predict_serial( const CvMat* sample, const CvMat* missing=0,
            CvMat* weakResponses=0, CvSlice slice = CV_WHOLE_SEQ,
            int k=-1 ) const;

    /*
    // Response value prediction.
    // Parallel version (in the case of TBB existence)
    //
    // API
    // virtual float predict( const CvMat* sample, const CvMat* missing=0,
             CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
             int k=-1 ) const;

    // INPUT
    // sample         - input sample of the same type as in the training set.
    // missing        - missing values mask. missing=0 if there are no
    //                   missing values in sample vector.
    // weak_responses  - predictions of all of the trees.
    //                   not implemented (!)
    // slice           - part of the ensemble used for prediction.
    //                   slice = CV_WHOLE_SEQ when all trees are used.
    // k               - number of ensemble used.
    //                   k is in {-1,0,1,..,<count of output classes-1>}.
    //                   in the case of classification problem
    //                   <count of output classes-1> ensembles are built.
    //                   If k = -1 ordinary prediction is the result,
    //                   otherwise function gives the prediction of the
    //                   k-th ensemble only.
    // OUTPUT
    // RESULT
    // Predicted value.
    */
    virtual float predict( const CvMat* sample, const CvMat* missing=0,
            CvMat* weakResponses=0, CvSlice slice = CV_WHOLE_SEQ,
            int k=-1 ) const;

    /*
    // Deletes all the data.
    //
    // API
    // virtual void clear();

    // INPUT
    // OUTPUT
    // delete data, weak, orig_response, sum_response,
    //        weak_eval, subsample_train, subsample_test,
    //        sample_idx, missing, lass_labels
    // delta = 0.0
    // RESULT
    */
    CV_WRAP virtual void clear();

    /*
    // Compute error on the train/test set.
    //
    // API
    // virtual float calc_error( CvMLData* _data, int type,
    //        std::vector<float> *resp = 0 );
    //
    // INPUT
    // data  - dataset
    // type  - defines which error is to compute: train (CV_TRAIN_ERROR) or
    //         test (CV_TEST_ERROR).
    // OUTPUT
    // resp  - vector of predicitons
    // RESULT
    // Error value.
    */
    virtual float calc_error( CvMLData* _data, int type,
            std::vector<float> *resp = 0 );

    /*
    //
    // Write parameters of the gtb model and data. Write learned model.
    //
    // API
    // virtual void write( CvFileStorage* fs, const char* name ) const;
    //
    // INPUT
    // fs     - file storage to read parameters from.
    // name   - model name.
    // OUTPUT
    // RESULT
    */
    virtual void write( CvFileStorage* fs, const char* name ) const;


    /*
    //
    // Read parameters of the gtb model and data. Read learned model.
    //
    // API
    // virtual void read( CvFileStorage* fs, CvFileNode* node );
    //
    // INPUT
    // fs     - file storage to read parameters from.
    // node   - file node.
    // OUTPUT
    // RESULT
    */
    virtual void read( CvFileStorage* fs, CvFileNode* node );


    // new-style C++ interface
    CV_WRAP CvGBTrees( const cv::Mat& trainData, int tflag,
              const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
              const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
              const cv::Mat& missingDataMask=cv::Mat(),
              CvGBTreesParams params=CvGBTreesParams() );

    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
                       const cv::Mat& missingDataMask=cv::Mat(),
                       CvGBTreesParams params=CvGBTreesParams(),
                       bool update=false );

    CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing=cv::Mat(),
                           const cv::Range& slice = cv::Range::all(),
                           int k=-1 ) const;

protected:

    /*
    // Compute the gradient vector components.
    //
    // API
    // virtual void find_gradient( const int k = 0);

    // INPUT
    // k        - used for classification problem, determining current
    //            tree ensemble.
    // OUTPUT
    // changes components of data->responses
    // which correspond to samples used for training
    // on the current step.
    // RESULT
    */
    virtual void find_gradient( const int k = 0);


    /*
    //
    // Change values in tree leaves according to the used loss function.
    //
    // API
    // virtual void change_values(CvDTree* tree, const int k = 0);
    //
    // INPUT
    // tree      - decision tree to change.
    // k         - used for classification problem, determining current
    //             tree ensemble.
    // OUTPUT
    // changes 'value' fields of the trees' leaves.
    // changes sum_response_tmp.
    // RESULT
    */
    virtual void change_values(CvDTree* tree, const int k = 0);


    /*
    //
    // Find optimal constant prediction value according to the used loss
    // function.
    // The goal is to find a constant which gives the minimal summary loss
    // on the _Idx samples.
    //
    // API
    // virtual float find_optimal_value( const CvMat* _Idx );
    //
    // INPUT
    // _Idx        - indices of the samples from the training set.
    // OUTPUT
    // RESULT
    // optimal constant value.
    */
    virtual float find_optimal_value( const CvMat* _Idx );


    /*
    //
    // Randomly split the whole training set in two parts according
    // to params.portion.
    //
    // API
    // virtual void do_subsample();
    //
    // INPUT
    // OUTPUT
    // subsample_train - indices of samples used for training
    // subsample_test  - indices of samples used for test
    // RESULT
    */
    virtual void do_subsample();


    /*
    //
    // Internal recursive function giving an array of subtree tree leaves.
    //
    // API
    // void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );
    //
    // INPUT
    // node         - current leaf.
    // OUTPUT
    // count        - count of leaves in the subtree.
    // leaves       - array of pointers to leaves.
    // RESULT
    */
    void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );


    /*
    //
    // Get leaves of the tree.
    //
    // API
    // CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );
    //
    // INPUT
    // dtree            - decision tree.
    // OUTPUT
    // len              - count of the leaves.
    // RESULT
    // CvDTreeNode**    - array of pointers to leaves.
    */
    CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );


    /*
    //
    // Is it a regression or a classification.
    //
    // API
    // bool problem_type();
    //
    // INPUT
    // OUTPUT
    // RESULT
    // false if it is a classification problem,
    // true - if regression.
    */
    virtual bool problem_type() const;


    /*
    //
    // Write parameters of the gtb model.
    //
    // API
    // virtual void write_params( CvFileStorage* fs ) const;
    //
    // INPUT
    // fs           - file storage to write parameters to.
    // OUTPUT
    // RESULT
    */
    virtual void write_params( CvFileStorage* fs ) const;


    /*
    //
    // Read parameters of the gtb model and data.
    //
    // API
    // virtual void read_params( CvFileStorage* fs );
    //
    // INPUT
    // fs           - file storage to read parameters from.
    // OUTPUT
    // params       - parameters of the gtb model.
    // data         - contains information about the structure
    //                of the data set (count of variables,
    //                their types, etc.).
    // class_labels - output class labels map.
    // RESULT
    */
    virtual void read_params( CvFileStorage* fs, CvFileNode* fnode );
    int get_len(const CvMat* mat) const;


    CvDTreeTrainData* data;
    CvGBTreesParams params;

    CvSeq** weak;
    CvMat* orig_response;
    CvMat* sum_response;
    CvMat* sum_response_tmp;
    CvMat* sample_idx;
    CvMat* subsample_train;
    CvMat* subsample_test;
    CvMat* missing;
    CvMat* class_labels;

    cv::RNG* rng;

    int class_count;
    float delta;
    float base_value;

};



/****************************************************************************************\
*                              Artificial Neural Networks (ANN)                          *
\****************************************************************************************/

/////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////

struct CvANN_MLP_TrainParams
{
    CvANN_MLP_TrainParams();
    CvANN_MLP_TrainParams( CvTermCriteria term_crit, int train_method,
                           double param1, double param2=0 );
    ~CvANN_MLP_TrainParams();

    enum { BACKPROP=0, RPROP=1 };

    CV_PROP_RW CvTermCriteria term_crit;
    CV_PROP_RW int train_method;

    // backpropagation parameters
    CV_PROP_RW double bp_dw_scale, bp_moment_scale;

    // rprop parameters
    CV_PROP_RW double rp_dw0, rp_dw_plus, rp_dw_minus, rp_dw_min, rp_dw_max;
};


class CvANN_MLP : public CvStatModel
{
public:
    CV_WRAP CvANN_MLP();
    CvANN_MLP( const CvMat* layerSizes,
               int activateFunc=CvANN_MLP::SIGMOID_SYM,
               double fparam1=0, double fparam2=0 );

    virtual ~CvANN_MLP();

    virtual void create( const CvMat* layerSizes,
                         int activateFunc=CvANN_MLP::SIGMOID_SYM,
                         double fparam1=0, double fparam2=0 );

    virtual int train( const CvMat* inputs, const CvMat* outputs,
                       const CvMat* sampleWeights, const CvMat* sampleIdx=0,
                       CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(),
                       int flags=0 );
    virtual float predict( const CvMat* inputs, CV_OUT CvMat* outputs ) const;

    CV_WRAP CvANN_MLP( const cv::Mat& layerSizes,
              int activateFunc=CvANN_MLP::SIGMOID_SYM,
              double fparam1=0, double fparam2=0 );

    CV_WRAP virtual void create( const cv::Mat& layerSizes,
                        int activateFunc=CvANN_MLP::SIGMOID_SYM,
                        double fparam1=0, double fparam2=0 );

    CV_WRAP virtual int train( const cv::Mat& inputs, const cv::Mat& outputs,
                      const cv::Mat& sampleWeights, const cv::Mat& sampleIdx=cv::Mat(),
                      CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(),
                      int flags=0 );

    CV_WRAP virtual float predict( const cv::Mat& inputs, CV_OUT cv::Mat& outputs ) const;

    CV_WRAP virtual void clear();

    // possible activation functions
    enum { IDENTITY = 0, SIGMOID_SYM = 1, GAUSSIAN = 2 };

    // available training flags
    enum { UPDATE_WEIGHTS = 1, NO_INPUT_SCALE = 2, NO_OUTPUT_SCALE = 4 };

    virtual void read( CvFileStorage* fs, CvFileNode* node );
    virtual void write( CvFileStorage* storage, const char* name ) const;

    int get_layer_count() { return layer_sizes ? layer_sizes->cols : 0; }
    const CvMat* get_layer_sizes() { return layer_sizes; }
    double* get_weights(int layer)
    {
        return layer_sizes && weights &&
            (unsigned)layer <= (unsigned)layer_sizes->cols ? weights[layer] : 0;
    }

    virtual void calc_activ_func_deriv( CvMat* xf, CvMat* deriv, const double* bias ) const;

protected:

    virtual bool prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
            const CvMat* _sample_weights, const CvMat* sampleIdx,
            CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags );

    // sequential random backpropagation
    virtual int train_backprop( CvVectors _ivecs, CvVectors _ovecs, const double* _sw );

    // RPROP algorithm
    virtual int train_rprop( CvVectors _ivecs, CvVectors _ovecs, const double* _sw );

    virtual void calc_activ_func( CvMat* xf, const double* bias ) const;
    virtual void set_activ_func( int _activ_func=SIGMOID_SYM,
                                 double _f_param1=0, double _f_param2=0 );
    virtual void init_weights();
    virtual void scale_input( const CvMat* _src, CvMat* _dst ) const;
    virtual void scale_output( const CvMat* _src, CvMat* _dst ) const;
    virtual void calc_input_scale( const CvVectors* vecs, int flags );
    virtual void calc_output_scale( const CvVectors* vecs, int flags );

    virtual void write_params( CvFileStorage* fs ) const;
    virtual void read_params( CvFileStorage* fs, CvFileNode* node );

    CvMat* layer_sizes;
    CvMat* wbuf;
    CvMat* sample_weights;
    double** weights;
    double f_param1, f_param2;
    double min_val, max_val, min_val1, max_val1;
    int activ_func;
    int max_count, max_buf_sz;
    CvANN_MLP_TrainParams params;
    cv::RNG* rng;
};

/****************************************************************************************\
*                           Auxilary functions declarations                              *
\****************************************************************************************/

/* Generates <sample> from multivariate normal distribution, where <mean> - is an
   average row vector, <cov> - symmetric covariation matrix */
CVAPI(void) cvRandMVNormal( CvMat* mean, CvMat* cov, CvMat* sample,
                           CvRNG* rng CV_DEFAULT(0) );

/* Generates sample from gaussian mixture distribution */
CVAPI(void) cvRandGaussMixture( CvMat* means[],
                               CvMat* covs[],
                               float weights[],
                               int clsnum,
                               CvMat* sample,
                               CvMat* sampClasses CV_DEFAULT(0) );

#define CV_TS_CONCENTRIC_SPHERES 0

/* creates test set */
CVAPI(void) cvCreateTestSet( int type, CvMat** samples,
                 int num_samples,
                 int num_features,
                 CvMat** responses,
                 int num_classes, ... );

/****************************************************************************************\
*                                      Data                                             *
\****************************************************************************************/

#define CV_COUNT     0
#define CV_PORTION   1

struct CvTrainTestSplit
{
    CvTrainTestSplit();
    CvTrainTestSplit( int train_sample_count, bool mix = true);
    CvTrainTestSplit( float train_sample_portion, bool mix = true);

    union
    {
        int count;
        float portion;
    } train_sample_part;
    int train_sample_part_mode;

    bool mix;
};

class CvMLData
{
public:
    CvMLData();
    virtual ~CvMLData();

    // returns:
    // 0 - OK
    // -1 - file can not be opened or is not correct
    int read_csv( const char* filename );

    const CvMat* get_values() const;
    const CvMat* get_responses();
    const CvMat* get_missing() const;

    void set_header_lines_number( int n );
    int get_header_lines_number() const;

    void set_response_idx( int idx ); // old response become predictors, new response_idx = idx
                                      // if idx < 0 there will be no response
    int get_response_idx() const;

    void set_train_test_split( const CvTrainTestSplit * spl );
    const CvMat* get_train_sample_idx() const;
    const CvMat* get_test_sample_idx() const;
    void mix_train_and_test_idx();

    const CvMat* get_var_idx();
    void chahge_var_idx( int vi, bool state ); // misspelled (saved for back compitability),
                                               // use change_var_idx
    void change_var_idx( int vi, bool state ); // state == true to set vi-variable as predictor

    const CvMat* get_var_types();
    int get_var_type( int var_idx ) const;
    // following 2 methods enable to change vars type
    // use these methods to assign CV_VAR_CATEGORICAL type for categorical variable
    // with numerical labels; in the other cases var types are correctly determined automatically
    void set_var_types( const char* str );  // str examples:
                                            // "ord[0-17],cat[18]", "ord[0,2,4,10-12], cat[1,3,5-9,13,14]",
                                            // "cat", "ord" (all vars are categorical/ordered)
    void change_var_type( int var_idx, int type); // type in { CV_VAR_ORDERED, CV_VAR_CATEGORICAL }

    void set_delimiter( char ch );
    char get_delimiter() const;

    void set_miss_ch( char ch );
    char get_miss_ch() const;

    const std::map<cv::String, int>& get_class_labels_map() const;

protected:
    virtual void clear();

    void str_to_flt_elem( const char* token, float& flt_elem, int& type);
    void free_train_test_idx();

    char delimiter;
    char miss_ch;
    //char flt_separator;

    CvMat* values;
    CvMat* missing;
    CvMat* var_types;
    CvMat* var_idx_mask;

    CvMat* response_out; // header
    CvMat* var_idx_out; // mat
    CvMat* var_types_out; // mat

    int header_lines_number;

    int response_idx;

    int train_sample_count;
    bool mix;

    int total_class_count;
    std::map<cv::String, int> class_map;

    CvMat* train_sample_idx;
    CvMat* test_sample_idx;
    int* sample_idx; // data of train_sample_idx and test_sample_idx

    cv::RNG* rng;
};


namespace cv
{

typedef CvStatModel StatModel;
typedef CvParamGrid ParamGrid;
typedef CvNormalBayesClassifier NormalBayesClassifier;
typedef CvKNearest KNearest;
typedef CvSVMParams SVMParams;
typedef CvSVMKernel SVMKernel;
typedef CvSVMSolver SVMSolver;
typedef CvSVM SVM;
typedef CvDTreeParams DTreeParams;
typedef CvMLData TrainData;
typedef CvDTree DecisionTree;
typedef CvForestTree ForestTree;
typedef CvRTParams RandomTreeParams;
typedef CvRTrees RandomTrees;
typedef CvERTreeTrainData ERTreeTRainData;
typedef CvForestERTree ERTree;
typedef CvERTrees ERTrees;
typedef CvBoostParams BoostParams;
typedef CvBoostTree BoostTree;
typedef CvBoost Boost;
typedef CvANN_MLP_TrainParams ANN_MLP_TrainParams;
typedef CvANN_MLP NeuralNet_MLP;
typedef CvGBTreesParams GradientBoostingTreeParams;
typedef CvGBTrees GradientBoostingTrees;

template<> void DefaultDeleter<CvDTreeSplit>::operator ()(CvDTreeSplit* obj) const;
}

#endif // __cplusplus
#endif // __OPENCV_ML_HPP__

/* End of file. */

/* [<][>][^][v][top][bottom][index][help] */