This source file includes following definitions.
- createBFMatcher
- makeGpuCollection
- isMaskSupported
- add
- getTrainDescriptors
- clear
- empty
- train
- match
- match
- matchAsync
- matchAsync
- matchConvert
- knnMatch
- knnMatch
- knnMatchAsync
- knnMatchAsync
- knnMatchConvert
- radiusMatch
- radiusMatch
- radiusMatchAsync
- radiusMatchAsync
- radiusMatchConvert
- createBFMatcher
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
#else
namespace cv { namespace cuda { namespace device
{
namespace bf_match
{
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
cudaStream_t stream);
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
cudaStream_t stream);
}
namespace bf_knnmatch
{
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
cudaStream_t stream);
template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
cudaStream_t stream);
template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
cudaStream_t stream);
template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
cudaStream_t stream);
}
namespace bf_radius_match
{
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
}
}}}
namespace
{
static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
const std::vector<GpuMat>& masks,
GpuMat& trainCollection,
GpuMat& maskCollection)
{
if (trainDescCollection.empty())
return;
if (masks.empty())
{
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
*trainCollectionCPU_ptr = trainDescCollection[i];
trainCollection.upload(trainCollectionCPU);
maskCollection.release();
}
else
{
CV_Assert( masks.size() == trainDescCollection.size() );
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
{
const GpuMat& train = trainDescCollection[i];
const GpuMat& mask = masks[i];
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
*trainCollectionCPU_ptr = train;
*maskCollectionCPU_ptr = mask;
}
trainCollection.upload(trainCollectionCPU);
maskCollection.upload(maskCollectionCPU);
}
}
class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
{
public:
explicit BFMatcher_Impl(int norm) : norm_(norm)
{
CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
}
virtual bool isMaskSupported() const { return true; }
virtual void add(const std::vector<GpuMat>& descriptors)
{
trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
}
virtual const std::vector<GpuMat>& getTrainDescriptors() const
{
return trainDescCollection_;
}
virtual void clear()
{
trainDescCollection_.clear();
}
virtual bool empty() const
{
return trainDescCollection_.empty();
}
virtual void train()
{
}
virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<DMatch>& matches,
InputArray mask = noArray());
virtual void match(InputArray queryDescriptors,
std::vector<DMatch>& matches,
const std::vector<GpuMat>& masks = std::vector<GpuMat>());
virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
InputArray mask = noArray(),
Stream& stream = Stream::Null());
virtual void matchAsync(InputArray queryDescriptors,
OutputArray matches,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null());
virtual void matchConvert(InputArray gpu_matches,
std::vector<DMatch>& matches);
virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
InputArray mask = noArray(),
bool compactResult = false);
virtual void knnMatch(InputArray queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
bool compactResult = false);
virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
int k,
InputArray mask = noArray(),
Stream& stream = Stream::Null());
virtual void knnMatchAsync(InputArray queryDescriptors,
OutputArray matches,
int k,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null());
virtual void knnMatchConvert(InputArray gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult = false);
virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
InputArray mask = noArray(),
bool compactResult = false);
virtual void radiusMatch(InputArray queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
bool compactResult = false);
virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
float maxDistance,
InputArray mask = noArray(),
Stream& stream = Stream::Null());
virtual void radiusMatchAsync(InputArray queryDescriptors,
OutputArray matches,
float maxDistance,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null());
virtual void radiusMatchConvert(InputArray gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult = false);
private:
int norm_;
std::vector<GpuMat> trainDescCollection_;
};
void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
std::vector<DMatch>& matches,
InputArray _mask)
{
GpuMat d_matches;
matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
matchConvert(d_matches, matches);
}
void BFMatcher_Impl::match(InputArray _queryDescriptors,
std::vector<DMatch>& matches,
const std::vector<GpuMat>& masks)
{
GpuMat d_matches;
matchAsync(_queryDescriptors, d_matches, masks);
matchConvert(d_matches, matches);
}
void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
OutputArray _matches,
InputArray _mask,
Stream& stream)
{
using namespace cv::cuda::device::bf_match;
const GpuMat query = _queryDescriptors.getGpuMat();
const GpuMat train = _trainDescriptors.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
if (query.empty() || train.empty())
{
_matches.release();
return;
}
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
CV_Assert( train.cols == query.cols && train.type() == query.type() );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
cudaStream_t stream);
static const caller_t callersL1[] =
{
matchL1_gpu<unsigned char>, 0,
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
matchL1_gpu<int>, matchL1_gpu<float>
};
static const caller_t callersL2[] =
{
0, 0,
0, 0,
0, matchL2_gpu<float>
};
static const caller_t callersHamming[] =
{
matchHamming_gpu<unsigned char>, 0,
matchHamming_gpu<unsigned short>, 0,
matchHamming_gpu<int>, 0
};
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows;
_matches.create(2, nQuery, CV_32SC1);
GpuMat matches = _matches.getGpuMat();
GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
}
void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
OutputArray _matches,
const std::vector<GpuMat>& masks,
Stream& stream)
{
using namespace cv::cuda::device::bf_match;
const GpuMat query = _queryDescriptors.getGpuMat();
if (query.empty() || trainDescCollection_.empty())
{
_matches.release();
return;
}
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
GpuMat trainCollection, maskCollection;
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
cudaStream_t stream);
static const caller_t callersL1[] =
{
matchL1_gpu<unsigned char>, 0,
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
matchL1_gpu<int>, matchL1_gpu<float>
};
static const caller_t callersL2[] =
{
0, 0,
0, 0,
0, matchL2_gpu<float>
};
static const caller_t callersHamming[] =
{
matchHamming_gpu<unsigned char>, 0,
matchHamming_gpu<unsigned short>, 0,
matchHamming_gpu<int>, 0
};
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows;
_matches.create(3, nQuery, CV_32SC1);
GpuMat matches = _matches.getGpuMat();
GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
}
void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
std::vector<DMatch>& matches)
{
Mat gpu_matches;
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_matches.getGpuMat().download(gpu_matches);
}
else
{
gpu_matches = _gpu_matches.getMat();
}
if (gpu_matches.empty())
{
matches.clear();
return;
}
CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
const int nQuery = gpu_matches.cols;
matches.clear();
matches.reserve(nQuery);
const int* trainIdxPtr = NULL;
const int* imgIdxPtr = NULL;
const float* distancePtr = NULL;
if (gpu_matches.rows == 2)
{
trainIdxPtr = gpu_matches.ptr<int>(0);
distancePtr = gpu_matches.ptr<float>(1);
}
else
{
trainIdxPtr = gpu_matches.ptr<int>(0);
imgIdxPtr = gpu_matches.ptr<int>(1);
distancePtr = gpu_matches.ptr<float>(2);
}
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
const int trainIdx = trainIdxPtr[queryIdx];
if (trainIdx == -1)
continue;
const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
const float distance = distancePtr[queryIdx];
DMatch m(queryIdx, trainIdx, imgIdx, distance);
matches.push_back(m);
}
}
void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
InputArray _mask,
bool compactResult)
{
GpuMat d_matches;
knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
knnMatchConvert(d_matches, matches, compactResult);
}
void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
const std::vector<GpuMat>& masks,
bool compactResult)
{
if (k == 2)
{
GpuMat d_matches;
knnMatchAsync(_queryDescriptors, d_matches, k, masks);
knnMatchConvert(d_matches, matches, compactResult);
}
else
{
const GpuMat query = _queryDescriptors.getGpuMat();
if (query.empty() || trainDescCollection_.empty())
{
matches.clear();
return;
}
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
std::vector< std::vector<DMatch> > curMatches;
std::vector<DMatch> temp;
temp.reserve(2 * k);
matches.resize(query.rows);
for (size_t i = 0; i < matches.size(); ++i)
matches[i].reserve(k);
for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
{
knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
{
std::vector<DMatch>& localMatch = curMatches[queryIdx];
std::vector<DMatch>& globalMatch = matches[queryIdx];
for (size_t i = 0; i < localMatch.size(); ++i)
localMatch[i].imgIdx = imgIdx;
temp.clear();
std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
globalMatch.clear();
const size_t count = std::min(static_cast<size_t>(k), temp.size());
std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
}
}
if (compactResult)
{
std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
matches.erase(new_end, matches.end());
}
}
}
void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
OutputArray _matches,
int k,
InputArray _mask,
Stream& stream)
{
using namespace cv::cuda::device::bf_knnmatch;
const GpuMat query = _queryDescriptors.getGpuMat();
const GpuMat train = _trainDescriptors.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
if (query.empty() || train.empty())
{
_matches.release();
return;
}
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
CV_Assert( train.cols == query.cols && train.type() == query.type() );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
cudaStream_t stream);
static const caller_t callersL1[] =
{
matchL1_gpu<unsigned char>, 0,
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
matchL1_gpu<int>, matchL1_gpu<float>
};
static const caller_t callersL2[] =
{
0, 0,
0, 0,
0, matchL2_gpu<float>
};
static const caller_t callersHamming[] =
{
matchHamming_gpu<unsigned char>, 0,
matchHamming_gpu<unsigned short>, 0,
matchHamming_gpu<int>, 0
};
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows;
const int nTrain = train.rows;
GpuMat trainIdx, distance, allDist;
if (k == 2)
{
_matches.create(2, nQuery, CV_32SC2);
GpuMat matches = _matches.getGpuMat();
trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
}
else
{
_matches.create(2 * nQuery, k, CV_32SC1);
GpuMat matches = _matches.getGpuMat();
trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
BufferPool pool(stream);
allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
}
trainIdx.setTo(Scalar::all(-1), stream);
func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
}
void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
OutputArray _matches,
int k,
const std::vector<GpuMat>& masks,
Stream& stream)
{
using namespace cv::cuda::device::bf_knnmatch;
if (k != 2)
{
CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
}
const GpuMat query = _queryDescriptors.getGpuMat();
if (query.empty() || trainDescCollection_.empty())
{
_matches.release();
return;
}
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
GpuMat trainCollection, maskCollection;
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
cudaStream_t stream);
static const caller_t callersL1[] =
{
match2L1_gpu<unsigned char>, 0,
match2L1_gpu<unsigned short>, match2L1_gpu<short>,
match2L1_gpu<int>, match2L1_gpu<float>
};
static const caller_t callersL2[] =
{
0, 0,
0, 0,
0, match2L2_gpu<float>
};
static const caller_t callersHamming[] =
{
match2Hamming_gpu<unsigned char>, 0,
match2Hamming_gpu<unsigned short>, 0,
match2Hamming_gpu<int>, 0
};
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows;
_matches.create(3, nQuery, CV_32SC2);
GpuMat matches = _matches.getGpuMat();
GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
trainIdx.setTo(Scalar::all(-1), stream);
func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
}
void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult)
{
Mat gpu_matches;
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_matches.getGpuMat().download(gpu_matches);
}
else
{
gpu_matches = _gpu_matches.getMat();
}
if (gpu_matches.empty())
{
matches.clear();
return;
}
CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
(gpu_matches.type() == CV_32SC1) );
int nQuery = -1, k = -1;
const int* trainIdxPtr = NULL;
const int* imgIdxPtr = NULL;
const float* distancePtr = NULL;
if (gpu_matches.type() == CV_32SC2)
{
nQuery = gpu_matches.cols;
k = 2;
if (gpu_matches.rows == 2)
{
trainIdxPtr = gpu_matches.ptr<int>(0);
distancePtr = gpu_matches.ptr<float>(1);
}
else
{
trainIdxPtr = gpu_matches.ptr<int>(0);
imgIdxPtr = gpu_matches.ptr<int>(1);
distancePtr = gpu_matches.ptr<float>(2);
}
}
else
{
nQuery = gpu_matches.rows / 2;
k = gpu_matches.cols;
trainIdxPtr = gpu_matches.ptr<int>(0);
distancePtr = gpu_matches.ptr<float>(nQuery);
}
matches.clear();
matches.reserve(nQuery);
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
matches.push_back(std::vector<DMatch>());
std::vector<DMatch>& curMatches = matches.back();
curMatches.reserve(k);
for (int i = 0; i < k; ++i)
{
const int trainIdx = *trainIdxPtr;
if (trainIdx == -1)
continue;
const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
const float distance = *distancePtr;
DMatch m(queryIdx, trainIdx, imgIdx, distance);
curMatches.push_back(m);
++trainIdxPtr;
++distancePtr;
if (imgIdxPtr)
++imgIdxPtr;
}
if (compactResult && curMatches.empty())
{
matches.pop_back();
}
}
}
void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
InputArray _mask,
bool compactResult)
{
GpuMat d_matches;
radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
radiusMatchConvert(d_matches, matches, compactResult);
}
void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
const std::vector<GpuMat>& masks,
bool compactResult)
{
GpuMat d_matches;
radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
radiusMatchConvert(d_matches, matches, compactResult);
}
void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
OutputArray _matches,
float maxDistance,
InputArray _mask,
Stream& stream)
{
using namespace cv::cuda::device::bf_radius_match;
const GpuMat query = _queryDescriptors.getGpuMat();
const GpuMat train = _trainDescriptors.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
if (query.empty() || train.empty())
{
_matches.release();
return;
}
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
CV_Assert( train.cols == query.cols && train.type() == query.type() );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
static const caller_t callersL1[] =
{
matchL1_gpu<unsigned char>, 0,
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
matchL1_gpu<int>, matchL1_gpu<float>
};
static const caller_t callersL2[] =
{
0, 0,
0, 0,
0, matchL2_gpu<float>
};
static const caller_t callersHamming[] =
{
matchHamming_gpu<unsigned char>, 0,
matchHamming_gpu<unsigned short>, 0,
matchHamming_gpu<int>, 0
};
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows;
const int nTrain = train.rows;
const int cols = std::max((nTrain / 100), nQuery);
_matches.create(2 * nQuery + 1, cols, CV_32SC1);
GpuMat matches = _matches.getGpuMat();
GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
nMatches.setTo(Scalar::all(0), stream);
func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
}
void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
OutputArray _matches,
float maxDistance,
const std::vector<GpuMat>& masks,
Stream& stream)
{
using namespace cv::cuda::device::bf_radius_match;
const GpuMat query = _queryDescriptors.getGpuMat();
if (query.empty() || trainDescCollection_.empty())
{
_matches.release();
return;
}
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
GpuMat trainCollection, maskCollection;
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream);
static const caller_t callersL1[] =
{
matchL1_gpu<unsigned char>, 0,
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
matchL1_gpu<int>, matchL1_gpu<float>
};
static const caller_t callersL2[] =
{
0, 0,
0, 0,
0, matchL2_gpu<float>
};
static const caller_t callersHamming[] =
{
matchHamming_gpu<unsigned char>, 0,
matchHamming_gpu<unsigned short>, 0,
matchHamming_gpu<int>, 0
};
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows;
_matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
GpuMat matches = _matches.getGpuMat();
GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
nMatches.setTo(Scalar::all(0), stream);
std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
}
void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult)
{
Mat gpu_matches;
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_matches.getGpuMat().download(gpu_matches);
}
else
{
gpu_matches = _gpu_matches.getMat();
}
if (gpu_matches.empty())
{
matches.clear();
return;
}
CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
int nQuery = -1;
const int* trainIdxPtr = NULL;
const int* imgIdxPtr = NULL;
const float* distancePtr = NULL;
const int* nMatchesPtr = NULL;
if (gpu_matches.type() == CV_32SC1)
{
nQuery = (gpu_matches.rows - 1) / 2;
trainIdxPtr = gpu_matches.ptr<int>(0);
distancePtr = gpu_matches.ptr<float>(nQuery);
nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
}
else
{
nQuery = (gpu_matches.rows - 1) / 3;
trainIdxPtr = gpu_matches.ptr<int>(0);
imgIdxPtr = gpu_matches.ptr<int>(nQuery);
distancePtr = gpu_matches.ptr<float>(2 * nQuery);
nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
}
matches.clear();
matches.reserve(nQuery);
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
if (nMatched == 0)
{
if (!compactResult)
{
matches.push_back(std::vector<DMatch>());
}
}
else
{
matches.push_back(std::vector<DMatch>(nMatched));
std::vector<DMatch>& curMatches = matches.back();
for (int i = 0; i < nMatched; ++i)
{
const int trainIdx = trainIdxPtr[i];
const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
const float distance = distancePtr[i];
DMatch m(queryIdx, trainIdx, imgIdx, distance);
curMatches[i] = m;
}
std::sort(curMatches.begin(), curMatches.end());
}
trainIdxPtr += gpu_matches.cols;
distancePtr += gpu_matches.cols;
if (imgIdxPtr)
imgIdxPtr += gpu_matches.cols;
}
}
}
Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
{
return makePtr<BFMatcher_Impl>(norm);
}
#endif