smem 74 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) smem 76 modules/core/include/opencv2/core/cuda/detail/reduce.hpp thrust::get<I>(smem)[tid] = thrust::get<I>(val); smem 78 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<I + 1, N>::loadToSmem(smem, val, tid); smem 81 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) smem 83 modules/core/include/opencv2/core/cuda/detail/reduce.hpp thrust::get<I>(val) = thrust::get<I>(smem)[tid]; smem 85 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<I + 1, N>::loadFromSmem(smem, val, tid); smem 89 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) smem 91 modules/core/include/opencv2/core/cuda/detail/reduce.hpp typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; smem 92 modules/core/include/opencv2/core/cuda/detail/reduce.hpp thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); smem 94 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<I + 1, N>::merge(smem, val, tid, delta, op); smem 128 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid) smem 130 modules/core/include/opencv2/core/cuda/detail/reduce.hpp smem[tid] = val; smem 133 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid) smem 135 modules/core/include/opencv2/core/cuda/detail/reduce.hpp val = smem[tid]; smem 139 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 143 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); smem 147 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 151 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid); smem 155 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op) smem 157 modules/core/include/opencv2/core/cuda/detail/reduce.hpp T reg = smem[tid + delta]; smem 158 modules/core/include/opencv2/core/cuda/detail/reduce.hpp smem[tid] = val = op(val, reg); smem 169 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 175 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); smem 190 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) smem 192 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid); smem 199 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 1024, op); smem 206 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 512, op); smem 213 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 256, op); smem 220 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 128, op); smem 227 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 64, op); smem 234 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 32, op); smem 239 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 16, op); smem 240 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 8, op); smem 241 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 4, op); smem 242 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 2, op); smem 243 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 1, op); smem 256 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op) smem 258 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, I, op); smem 259 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 276 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) smem 279 modules/core/include/opencv2/core/cuda/detail/reduce.hpp (void) smem; smem 284 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid); smem 287 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 297 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) smem 305 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 307 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid); smem 310 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 315 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 320 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadFromSmem(smem, val, tid); smem 327 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 74 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) smem 76 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp thrust::get<I>(smem)[tid] = thrust::get<I>(data); smem 78 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<I + 1, N>::loadToSmem(smem, data, tid); smem 81 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) smem 83 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp thrust::get<I>(data) = thrust::get<I>(smem)[tid]; smem 85 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<I + 1, N>::loadFromSmem(smem, data, tid); smem 171 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid) smem 173 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp smem[tid] = data; smem 176 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid) smem 178 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp data = smem[tid]; smem 182 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 186 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); smem 190 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 194 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid); smem 59 modules/core/include/opencv2/core/cuda/reduce.hpp __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op) smem 61 modules/core/include/opencv2/core/cuda/reduce.hpp reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); smem 67 modules/core/include/opencv2/core/cuda/reduce.hpp __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 75 modules/core/include/opencv2/core/cuda/reduce.hpp const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); smem 70 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) smem 72 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); smem 94 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) smem 96 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); smem 120 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) smem 122 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); smem 145 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) smem 147 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); smem 160 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) smem 173 modules/core/include/opencv2/core/cuda/vec_distance.hpp dist.reduceAll<THREAD_DIM>(smem, tid); smem 178 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) smem 182 modules/core/include/opencv2/core/cuda/vec_distance.hpp dist.reduceAll<THREAD_DIM>(smem, tid); smem 194 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const smem 196 modules/core/include/opencv2/core/cuda/vec_distance.hpp calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid); smem 205 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) smem 208 modules/core/include/opencv2/core/cuda/vec_distance.hpp smem[glob_tid] = vec1[glob_tid]; smem 215 modules/core/include/opencv2/core/cuda/vec_distance.hpp *vec1ValsPtr++ = smem[i]; smem 221 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const smem 223 modules/core/include/opencv2/core/cuda/vec_distance.hpp calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid); smem 74 modules/cudafilters/src/cuda/column_filter.hpp __shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X]; smem 90 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x)); smem 97 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step)); smem 105 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x)); smem 110 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x)); smem 117 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step)); smem 122 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step)); smem 138 modules/cudafilters/src/cuda/column_filter.hpp sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k]; smem 74 modules/cudafilters/src/cuda/row_filter.hpp __shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X]; smem 90 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]); smem 97 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row)); smem 105 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]); smem 110 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]); smem 117 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row)); smem 122 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row)); smem 138 modules/cudafilters/src/cuda/row_filter.hpp sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k]; smem 97 modules/cudaobjdetect/src/cuda/lbp.hpp Emulation::smem::atomicMin(labels + id, p); smem 101 modules/cudaobjdetect/src/cuda/lbp.hpp Emulation::smem::atomicMin(labels + tid, q); smem 83 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) smem 85 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val); smem 87 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<I + 1, N>::loadToSmem(smem, val, tid); smem 91 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid) smem 93 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp get<I>(val) = get<I>(smem)[tid]; smem 95 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<I + 1, N>::loadFromSmem(smem, val, tid); smem 99 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) smem 101 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; smem 102 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); smem 104 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<I + 1, N>::merge(smem, val, tid, delta, op); smem 146 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) smem 148 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp smem[tid] = val; smem 152 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, uint tid) smem 154 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp val = smem[tid]; smem 159 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 163 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); smem 168 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 172 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid); smem 178 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op) smem 180 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp T reg = smem[tid + delta]; smem 181 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp smem[tid] = val = op(val, reg); smem 187 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 193 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); smem 222 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 224 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid); smem 231 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 1024, op); smem 238 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 512, op); smem 245 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 256, op); smem 252 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 128, op); smem 259 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 64, op); smem 266 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 32, op); smem 271 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 16, op); smem 272 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 8, op); smem 273 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 4, op); smem 274 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 2, op); smem 275 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 1, op); smem 284 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void loop(Pointer smem, Reference val, uint tid, Op op) smem 286 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, I, op); smem 287 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 317 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 320 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp (void) smem; smem 325 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid); smem 328 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 340 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 348 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 350 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid); smem 353 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 358 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 363 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadFromSmem(smem, val, tid); smem 370 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 82 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) smem 84 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp get<I>(smem)[tid] = get<I>(data); smem 86 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<I + 1, N>::loadToSmem(smem, data, tid); smem 90 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ static void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) smem 92 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp get<I>(data) = get<I>(smem)[tid]; smem 94 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<I + 1, N>::loadFromSmem(smem, data, tid); smem 152 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) smem 154 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp smem[tid] = data; smem 158 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, uint tid) smem 160 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp data = smem[tid]; smem 165 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 169 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); smem 174 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 178 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid); smem 63 modules/cudev/include/opencv2/cudev/block/reduce.hpp __device__ __forceinline__ void blockReduce(volatile T* smem, T& val, uint tid, const Op& op) smem 65 modules/cudev/include/opencv2/cudev/block/reduce.hpp block_reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); smem 72 modules/cudev/include/opencv2/cudev/block/reduce.hpp __device__ __forceinline__ void blockReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 80 modules/cudev/include/opencv2/cudev/block/reduce.hpp const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); smem 58 modules/cudev/include/opencv2/cudev/block/scan.hpp __device__ T blockScanInclusive(T data, volatile T* smem, uint tid) smem 63 modules/cudev/include/opencv2/cudev/block/scan.hpp T warpResult = warpScanInclusive(data, smem, tid); smem 71 modules/cudev/include/opencv2/cudev/block/scan.hpp smem[tid >> LOG_WARP_SIZE] = warpResult; smem 79 modules/cudev/include/opencv2/cudev/block/scan.hpp T val = smem[tid]; smem 82 modules/cudev/include/opencv2/cudev/block/scan.hpp smem[tid] = warpScanExclusive(val, smem, tid); smem 88 modules/cudev/include/opencv2/cudev/block/scan.hpp return warpResult + smem[tid >> LOG_WARP_SIZE]; smem 92 modules/cudev/include/opencv2/cudev/block/scan.hpp return warpScanInclusive(data, smem, tid); smem 97 modules/cudev/include/opencv2/cudev/block/scan.hpp __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid) smem 99 modules/cudev/include/opencv2/cudev/block/scan.hpp return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data; smem 75 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 77 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 80 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 82 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 104 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 106 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 109 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 111 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 137 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 139 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 142 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 144 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 169 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 171 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 174 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 176 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 60 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp __shared__ ResType smem[BIN_COUNT]; smem 66 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp smem[i] = 0; smem 77 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp atomicAdd(&smem[data % BIN_COUNT], 1); smem 86 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp const ResType histVal = smem[i]; smem 63 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp __shared__ D smem[NUM_SCAN_THREADS * 2]; smem 84 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); smem 105 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp __shared__ D smem[NUM_SCAN_THREADS * 2]; smem 127 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); smem 481 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp __shared__ T smem[32][32]; smem 484 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp volatile T* smem_row = &smem[0][0] + 64 * threadIdx.y; smem 505 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 0][threadIdx.x] = 0.0f; smem 506 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 8][threadIdx.x] = 0.0f; smem 507 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 16][threadIdx.x] = 0.0f; smem 508 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 24][threadIdx.x] = 0.0f; smem 515 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + i * 8][threadIdx.x] = integral(curRowOffs + i * 8, x); smem 523 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[0] = smem[threadIdx.x][threadIdx.y ]; smem 524 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[1] = smem[threadIdx.x][threadIdx.y + 8]; smem 525 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[2] = smem[threadIdx.x][threadIdx.y + 16]; smem 526 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[3] = smem[threadIdx.x][threadIdx.y + 24]; smem 550 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y ][threadIdx.x] = curElems[0]; smem 551 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 8][threadIdx.x] = curElems[1]; smem 552 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 16][threadIdx.x] = curElems[2]; smem 553 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 24][threadIdx.x] = curElems[3]; smem 565 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp integral(curRowOffs + i * 8, x) = smem[threadIdx.x][threadIdx.y + i * 8]; smem 68 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp __shared__ work_type smem[256 + 4]; smem 86 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[2 + threadIdx.x] = sum; smem 101 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[threadIdx.x] = sum; smem 116 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[4 + threadIdx.x] = sum; smem 130 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[2 + threadIdx.x] = sum; smem 145 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[threadIdx.x] = sum; smem 160 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[4 + threadIdx.x] = sum; smem 172 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = 0.0625f * smem[2 + tid2 - 2]; smem 173 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.25f * smem[2 + tid2 - 1]; smem 174 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.375f * smem[2 + tid2 ]; smem 175 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.25f * smem[2 + tid2 + 1]; smem 176 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.0625f * smem[2 + tid2 + 2]; smem 279 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp __shared__ work_elem_type smem[BLOCK_SIZE * cn]; smem 281 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp blockReduce<BLOCK_SIZE>(Unroll<cn>::template smem<BLOCK_SIZE>(smem), Unroll<cn>::res(sum), tid, Unroll<cn>::op(plus<work_elem_type>())); smem 339 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp __shared__ work_type smem[BLOCK_SIZE]; smem 343 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp blockReduce<BLOCK_SIZE>(smem, myval, tid, op); smem 61 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal) smem 64 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); smem 70 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal) smem 73 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op)); smem 79 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal) smem 82 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op)); smem 88 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal) smem 91 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVal.z, myVal.w), threadIdx.x, make_tuple(op, op, op, op)); smem 102 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __shared__ work_elem_type smem[cn][BLOCK_SIZE]; smem 118 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, cn>::call(smem, myVal); smem 62 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y]; smem 81 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal; smem 85 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X; smem 96 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows)); smem 81 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) smem 83 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val); smem 85 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<I + 1, N>::loadToSmem(smem, val, tid); smem 89 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) smem 91 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; smem 92 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); smem 94 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<I + 1, N>::merge(smem, val, tid, delta, op); smem 132 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) smem 134 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp smem[tid] = val; smem 139 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 143 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); smem 149 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op) smem 151 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp T reg = smem[tid + delta]; smem 152 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp smem[tid] = val = op(val, reg); smem 158 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 164 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); smem 193 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 196 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp (void) smem; smem 205 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp loadToSmem(smem, val, tid); smem 209 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 16, op); smem 210 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 8, op); smem 211 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 4, op); smem 212 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 2, op); smem 213 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 1, op); smem 80 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) smem 82 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp get<I>(smem)[tid] = get<I>(data); smem 84 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp For<I + 1, N>::loadToSmem(smem, data, tid); smem 137 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) smem 139 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp smem[tid] = data; smem 144 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 148 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); smem 62 modules/cudev/include/opencv2/cudev/warp/reduce.hpp __device__ __forceinline__ void warpReduce(volatile T* smem, T& val, uint tid, const Op& op) smem 64 modules/cudev/include/opencv2/cudev/warp/reduce.hpp warp_reduce_detail::WarpReductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); smem 70 modules/cudev/include/opencv2/cudev/warp/reduce.hpp __device__ __forceinline__ void warpReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 78 modules/cudev/include/opencv2/cudev/warp/reduce.hpp const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); smem 59 modules/cudev/include/opencv2/cudev/warp/scan.hpp __device__ T warpScanInclusive(T data, volatile T* smem, uint tid) smem 62 modules/cudev/include/opencv2/cudev/warp/scan.hpp (void) smem; smem 79 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] = 0; smem 82 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] = data; smem 84 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 1]; smem 85 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 2]; smem 86 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 4]; smem 87 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 8]; smem 88 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 16]; smem 90 modules/cudev/include/opencv2/cudev/warp/scan.hpp return smem[pos]; smem 95 modules/cudev/include/opencv2/cudev/warp/scan.hpp __device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint tid) smem 97 modules/cudev/include/opencv2/cudev/warp/scan.hpp return warpScanInclusive(data, smem, tid) - data; smem 1161 modules/objdetect/src/hog.cpp int smem = (hists_size + final_hists_size) * blocks_in_group; smem 1177 modules/objdetect/src/hog.cpp idx = k.set(idx, (void*)NULL, (size_t)smem);