This source file includes following definitions.
- add
- subtract
- multiply
- divide
- absdiff
- abs
- sqr
- sqrt
- exp
- log
- pow
- compare
- bitwise_not
- bitwise_or
- bitwise_and
- bitwise_xor
- rshift
- lshift
- min
- max
- addWeighted
- threshold
- magnitude
- magnitude
- magnitudeSqr
- magnitudeSqr
- phase
- cartToPolar
- polarToCart
- arithm_op
- add
- subtract
- multiply
- divide
- absdiff
- compare
- bitwise_or
- bitwise_and
- bitwise_xor
- call
- call
- rshift
- lshift
- min
- max
- npp_magnitude
- magnitude
- magnitudeSqr
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::cuda::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
void cv::cuda::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
void cv::cuda::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
void cv::cuda::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
void cv::cuda::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
void cv::cuda::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::rshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::lshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::addWeighted(InputArray, double, InputArray, double, double, OutputArray, int, Stream&) { throw_no_cuda(); }
double cv::cuda::threshold(InputArray, OutputArray, double, double, int, Stream&) {throw_no_cuda(); return 0.0;}
void cv::cuda::magnitude(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
#else
namespace
{
typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op);
typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op);
void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, double scale, int dtype, Stream& stream,
mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func, int op = 0)
{
const int kind1 = _src1.kind();
const int kind2 = _src2.kind();
const bool isScalar1 = (kind1 == _InputArray::MATX);
const bool isScalar2 = (kind2 == _InputArray::MATX);
CV_Assert( !isScalar1 || !isScalar2 );
GpuMat src1;
if (!isScalar1)
src1 = getInputMat(_src1, stream);
GpuMat src2;
if (!isScalar2)
src2 = getInputMat(_src2, stream);
Mat scalar;
if (isScalar1)
scalar = _src1.getMat();
else if (isScalar2)
scalar = _src2.getMat();
Scalar val;
if (!scalar.empty())
{
CV_Assert( scalar.total() <= 4 );
scalar.convertTo(Mat_<double>(scalar.rows, scalar.cols, &val[0]), CV_64F);
}
GpuMat mask = getInputMat(_mask, stream);
const int sdepth = src1.empty() ? src2.depth() : src1.depth();
const int cn = src1.empty() ? src2.channels() : src1.channels();
const Size size = src1.empty() ? src2.size() : src1.size();
if (dtype < 0)
dtype = sdepth;
const int ddepth = CV_MAT_DEPTH(dtype);
CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F );
CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) );
CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) );
if (sdepth == CV_64F || ddepth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double");
}
GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(ddepth, cn), stream);
if (isScalar1)
mat_scalar_func(src2, val, true, dst, mask, scale, stream, op);
else if (isScalar2)
mat_scalar_func(src1, val, false, dst, mask, scale, stream, op);
else
mat_mat_func(src1, src2, dst, mask, scale, stream, op);
syncOutput(dst, _dst, stream);
}
}
void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
void cv::cuda::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
{
arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, addMat, addScalar);
}
void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
void cv::cuda::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
{
arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, subMat, subScalar);
}
void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
void mulMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
void mulMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
void mulScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
{
if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
{
GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() );
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
mulMat_8uc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
}
else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
{
GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() );
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
mulMat_16sc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
}
else
{
arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, mulMat, mulScalar);
}
}
void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
{
if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
{
GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() );
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
divMat_8uc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
}
else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
{
GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() );
GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
divMat_16sc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
}
else
{
arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, divMat, divScalar);
}
}
void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
{
arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, absDiffMat, absDiffScalar);
}
void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
void cv::cuda::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream)
{
arithm_op(src1, src2, dst, noArray(), 1.0, CV_8U, stream, cmpMat, cmpScalar, cmpop);
}
namespace
{
enum
{
BIT_OP_AND,
BIT_OP_OR,
BIT_OP_XOR
};
}
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
void cv::cuda::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
{
arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_OR);
}
void cv::cuda::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
{
arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_AND);
}
void cv::cuda::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
{
arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_XOR);
}
namespace
{
template <int DEPTH, int cn> struct NppShiftFunc
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const Npp32u* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI);
};
template <int DEPTH> struct NppShiftFunc<DEPTH, 1>
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const Npp32u pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI);
};
template <int DEPTH, int cn, typename NppShiftFunc<DEPTH, cn>::func_t func> struct NppShift
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
static void call(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream)
{
NppStreamHandler h(stream);
NppiSize oSizeROI;
oSizeROI.width = src.cols;
oSizeROI.height = src.rows;
nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <int DEPTH, typename NppShiftFunc<DEPTH, 1>::func_t func> struct NppShift<DEPTH, 1, func>
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
static void call(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream)
{
NppStreamHandler h(stream);
NppiSize oSizeROI;
oSizeROI.width = src.cols;
oSizeROI.height = src.rows;
nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
}
void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
{
{NppShift<CV_8U , 1, nppiRShiftC_8u_C1R >::call, 0, NppShift<CV_8U , 3, nppiRShiftC_8u_C3R >::call, NppShift<CV_8U , 4, nppiRShiftC_8u_C4R>::call },
{NppShift<CV_8S , 1, nppiRShiftC_8s_C1R >::call, 0, NppShift<CV_8S , 3, nppiRShiftC_8s_C3R >::call, NppShift<CV_8S , 4, nppiRShiftC_8s_C4R>::call },
{NppShift<CV_16U, 1, nppiRShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiRShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiRShiftC_16u_C4R>::call},
{NppShift<CV_16S, 1, nppiRShiftC_16s_C1R>::call, 0, NppShift<CV_16S, 3, nppiRShiftC_16s_C3R>::call, NppShift<CV_16S, 4, nppiRShiftC_16s_C4R>::call},
{NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call},
};
GpuMat src = getInputMat(_src, stream);
CV_Assert( src.depth() < CV_32F );
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
}
void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] =
{
{NppShift<CV_8U , 1, nppiLShiftC_8u_C1R>::call , 0, NppShift<CV_8U , 3, nppiLShiftC_8u_C3R>::call , NppShift<CV_8U , 4, nppiLShiftC_8u_C4R>::call },
{0 , 0, 0 , 0 },
{NppShift<CV_16U, 1, nppiLShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiLShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiLShiftC_16u_C4R>::call},
{0 , 0, 0 , 0 },
{NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call},
};
GpuMat src = getInputMat(_src, stream);
CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S );
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
}
namespace
{
enum
{
MIN_OP,
MAX_OP
};
}
void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int op);
void minMaxScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op);
void cv::cuda::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
{
arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP);
}
void cv::cuda::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
{
arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP);
}
namespace
{
typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream)
{
CV_Assert(src.type() == CV_32FC2);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src = getInputMat(_src, stream);
GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
}
void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src = getInputMat(_src, stream);
GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
}
#endif