#ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
#define OPENCV_FLANN_AUTOTUNED_INDEX_H_
#include "general.h"
#include "nn_index.h"
#include "ground_truth.h"
#include "index_testing.h"
#include "sampling.h"
#include "kdtree_index.h"
#include "kdtree_single_index.h"
#include "kmeans_index.h"
#include "composite_index.h"
#include "linear_index.h"
#include "logger.h"
namespace cvflann
{
template<typename Distance>
NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance);
struct AutotunedIndexParams : public IndexParams
{
AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
{
(*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
(*this)["target_precision"] = target_precision;
(*this)["build_weight"] = build_weight;
(*this)["memory_weight"] = memory_weight;
(*this)["sample_fraction"] = sample_fraction;
}
};
template <typename Distance>
class AutotunedIndex : public NNIndex<Distance>
{
public:
typedef typename Distance::ElementType ElementType;
typedef typename Distance::ResultType DistanceType;
AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
dataset_(inputData), distance_(d)
{
target_precision_ = get_param(params, "target_precision",0.8f);
build_weight_ = get_param(params,"build_weight", 0.01f);
memory_weight_ = get_param(params, "memory_weight", 0.0f);
sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
bestIndex_ = NULL;
}
AutotunedIndex(const AutotunedIndex&);
AutotunedIndex& operator=(const AutotunedIndex&);
virtual ~AutotunedIndex()
{
if (bestIndex_ != NULL) {
delete bestIndex_;
bestIndex_ = NULL;
}
}
virtual void buildIndex()
{
std::ostringstream stream;
bestParams_ = estimateBuildParams();
print_params(bestParams_, stream);
Logger::info("----------------------------------------------------\n");
Logger::info("Autotuned parameters:\n");
Logger::info("%s", stream.str().c_str());
Logger::info("----------------------------------------------------\n");
bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
bestIndex_->buildIndex();
speedup_ = estimateSearchParams(bestSearchParams_);
stream.str(std::string());
print_params(bestSearchParams_, stream);
Logger::info("----------------------------------------------------\n");
Logger::info("Search parameters:\n");
Logger::info("%s", stream.str().c_str());
Logger::info("----------------------------------------------------\n");
}
virtual void saveIndex(FILE* stream)
{
save_value(stream, (int)bestIndex_->getType());
bestIndex_->saveIndex(stream);
save_value(stream, get_param<int>(bestSearchParams_, "checks"));
}
virtual void loadIndex(FILE* stream)
{
int index_type;
load_value(stream, index_type);
IndexParams params;
params["algorithm"] = (flann_algorithm_t)index_type;
bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
bestIndex_->loadIndex(stream);
int checks;
load_value(stream, checks);
bestSearchParams_["checks"] = checks;
}
virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
{
int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
if (checks == FLANN_CHECKS_AUTOTUNED) {
bestIndex_->findNeighbors(result, vec, bestSearchParams_);
}
else {
bestIndex_->findNeighbors(result, vec, searchParams);
}
}
IndexParams getParameters() const
{
return bestIndex_->getParameters();
}
SearchParams getSearchParameters() const
{
return bestSearchParams_;
}
float getSpeedup() const
{
return speedup_;
}
virtual size_t size() const
{
return bestIndex_->size();
}
virtual size_t veclen() const
{
return bestIndex_->veclen();
}
virtual int usedMemory() const
{
return bestIndex_->usedMemory();
}
virtual flann_algorithm_t getType() const
{
return FLANN_INDEX_AUTOTUNED;
}
private:
struct CostData
{
float searchTimeCost;
float buildTimeCost;
float memoryCost;
float totalCost;
IndexParams params;
};
void evaluate_kmeans(CostData& cost)
{
StartStopTimer t;
int checks;
const int nn = 1;
Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
get_param<int>(cost.params,"iterations"),
get_param<int>(cost.params,"branching"));
KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
t.start();
kmeans.buildIndex();
t.stop();
float buildTime = (float)t.value;
float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
cost.searchTimeCost = searchTime;
cost.buildTimeCost = buildTime;
Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
}
void evaluate_kdtree(CostData& cost)
{
StartStopTimer t;
int checks;
const int nn = 1;
Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
t.start();
kdtree.buildIndex();
t.stop();
float buildTime = (float)t.value;
float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
cost.searchTimeCost = searchTime;
cost.buildTimeCost = buildTime;
Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
}
void optimizeKMeans(std::vector<CostData>& costs)
{
Logger::info("KMEANS, Step 1: Exploring parameter space\n");
int maxIterations[] = { 1, 5, 10, 15 };
int branchingFactors[] = { 16, 32, 64, 128, 256 };
int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
costs.reserve(costs.size() + kmeansParamSpaceSize);
for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
CostData cost;
cost.params["algorithm"] = FLANN_INDEX_KMEANS;
cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
cost.params["iterations"] = maxIterations[i];
cost.params["branching"] = branchingFactors[j];
evaluate_kmeans(cost);
costs.push_back(cost);
}
}
}
void optimizeKDTree(std::vector<CostData>& costs)
{
Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
int testTrees[] = { 1, 4, 8, 16, 32 };
for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
CostData cost;
cost.params["trees"] = testTrees[i];
evaluate_kdtree(cost);
costs.push_back(cost);
}
}
IndexParams estimateBuildParams()
{
std::vector<CostData> costs;
int sampleSize = int(sample_fraction_ * dataset_.rows);
int testSampleSize = std::min(sampleSize / 10, 1000);
Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
if (testSampleSize < 10) {
Logger::info("Choosing linear, dataset too small\n");
return LinearIndexParams();
}
sampledDataset_ = random_sample(dataset_, sampleSize);
testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
Logger::info("Computing ground truth... \n");
gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1);
StartStopTimer t;
t.start();
compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
t.stop();
CostData linear_cost;
linear_cost.searchTimeCost = (float)t.value;
linear_cost.buildTimeCost = 0;
linear_cost.memoryCost = 0;
linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
costs.push_back(linear_cost);
Logger::info("Autotuning parameters...\n");
optimizeKMeans(costs);
optimizeKDTree(costs);
float bestTimeCost = costs[0].searchTimeCost;
for (size_t i = 0; i < costs.size(); ++i) {
float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
if (timeCost < bestTimeCost) {
bestTimeCost = timeCost;
}
}
float bestCost = costs[0].searchTimeCost / bestTimeCost;
IndexParams bestParams = costs[0].params;
if (bestTimeCost > 0) {
for (size_t i = 0; i < costs.size(); ++i) {
float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
memory_weight_ * costs[i].memoryCost;
if (crtCost < bestCost) {
bestCost = crtCost;
bestParams = costs[i].params;
}
}
}
delete[] gt_matches_.data;
delete[] testDataset_.data;
delete[] sampledDataset_.data;
return bestParams;
}
float estimateSearchParams(SearchParams& searchParams)
{
const int nn = 1;
const size_t SAMPLE_COUNT = 1000;
assert(bestIndex_ != NULL);
float speedup = 0;
int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
if (samples > 0) {
Matrix<ElementType> testDataset = random_sample(dataset_, samples);
Logger::info("Computing ground truth\n");
Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1);
StartStopTimer t;
t.start();
compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
t.stop();
float linear = (float)t.value;
int checks;
Logger::info("Estimating number of checks\n");
float searchTime;
float cb_index;
if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
Logger::info("KMeans algorithm, estimating cluster border factor\n");
KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_;
float bestSearchTime = -1;
float best_cb_index = -1;
int best_checks = -1;
for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
kmeans->set_cb_index(cb_index);
searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
bestSearchTime = searchTime;
best_cb_index = cb_index;
best_checks = checks;
}
}
searchTime = bestSearchTime;
cb_index = best_cb_index;
checks = best_checks;
kmeans->set_cb_index(best_cb_index);
Logger::info("Optimum cb_index: %g\n", cb_index);
bestParams_["cb_index"] = cb_index;
}
else {
searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
}
Logger::info("Required number of checks: %d \n", checks);
searchParams["checks"] = checks;
speedup = linear / searchTime;
delete[] gt_matches.data;
delete[] testDataset.data;
}
return speedup;
}
private:
NNIndex<Distance>* bestIndex_;
IndexParams bestParams_;
SearchParams bestSearchParams_;
Matrix<ElementType> sampledDataset_;
Matrix<ElementType> testDataset_;
Matrix<int> gt_matches_;
float speedup_;
const Matrix<ElementType> dataset_;
float target_precision_;
float build_weight_;
float memory_weight_;
float sample_fraction_;
Distance distance_;
};
}
#endif