This source file includes following definitions.
- reduceThread
- reduceWarp
- reduceBlock
- reduceThread
- reduceWarp
- reduceBlock
- reduceThread
- reduceWarp
- reduceBlock
- reduceThread
- reduceWarp
- reduceBlock
#pragma once
#ifndef __OPENCV_CUDEV_BLOCK_VEC_DISTANCE_HPP__
#define __OPENCV_CUDEV_BLOCK_VEC_DISTANCE_HPP__
#include "../common.hpp"
#include "../functional/functional.hpp"
#include "../warp/reduce.hpp"
#include "reduce.hpp"
namespace cv { namespace cudev {
template <typename T> struct NormL1
{
typedef int value_type;
typedef uint result_type;
result_type mySum;
__device__ __forceinline__ NormL1() : mySum(0) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
mySum = __sad(val1, val2, mySum);
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return mySum;
}
};
template <> struct NormL1<float>
{
typedef float value_type;
typedef float result_type;
result_type mySum;
__device__ __forceinline__ NormL1() : mySum(0.0f) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
mySum += ::fabsf(val1 - val2);
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return mySum;
}
};
struct NormL2
{
typedef float value_type;
typedef float result_type;
result_type mySum;
__device__ __forceinline__ NormL2() : mySum(0.0f) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
const float diff = val1 - val2;
mySum += diff * diff;
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return ::sqrtf(mySum);
}
};
struct NormHamming
{
typedef int value_type;
typedef int result_type;
result_type mySum;
__device__ __forceinline__ NormHamming() : mySum(0) {}
__device__ __forceinline__ void reduceThread(value_type val1, value_type val2)
{
mySum += __popc(val1 ^ val2);
}
__device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
{
warpReduce(smem, mySum, tid, plus<result_type>());
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
{
blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
}
__device__ __forceinline__ operator result_type() const
{
return mySum;
}
};
}}
#endif