smem 74 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) smem 76 modules/core/include/opencv2/core/cuda/detail/reduce.hpp thrust::get<I>(smem)[tid] = thrust::get<I>(val); smem 78 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<I + 1, N>::loadToSmem(smem, val, tid); smem 81 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) smem 83 modules/core/include/opencv2/core/cuda/detail/reduce.hpp thrust::get<I>(val) = thrust::get<I>(smem)[tid]; smem 85 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<I + 1, N>::loadFromSmem(smem, val, tid); smem 89 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) smem 91 modules/core/include/opencv2/core/cuda/detail/reduce.hpp typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; smem 92 modules/core/include/opencv2/core/cuda/detail/reduce.hpp thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); smem 94 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<I + 1, N>::merge(smem, val, tid, delta, op); smem 128 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid) smem 130 modules/core/include/opencv2/core/cuda/detail/reduce.hpp smem[tid] = val; smem 133 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid) smem 135 modules/core/include/opencv2/core/cuda/detail/reduce.hpp val = smem[tid]; smem 139 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 143 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); smem 147 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 151 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid); smem 155 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op) smem 157 modules/core/include/opencv2/core/cuda/detail/reduce.hpp T reg = smem[tid + delta]; smem 158 modules/core/include/opencv2/core/cuda/detail/reduce.hpp smem[tid] = val = op(val, reg); smem 169 modules/core/include/opencv2/core/cuda/detail/reduce.hpp __device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 175 modules/core/include/opencv2/core/cuda/detail/reduce.hpp For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); smem 190 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) smem 192 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid); smem 199 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 1024, op); smem 206 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 512, op); smem 213 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 256, op); smem 220 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 128, op); smem 227 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 64, op); smem 234 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 32, op); smem 239 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 16, op); smem 240 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 8, op); smem 241 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 4, op); smem 242 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 2, op); smem 243 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, 1, op); smem 256 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op) smem 258 modules/core/include/opencv2/core/cuda/detail/reduce.hpp merge(smem, val, tid, I, op); smem 259 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 276 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) smem 279 modules/core/include/opencv2/core/cuda/detail/reduce.hpp (void) smem; smem 284 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid); smem 287 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 297 modules/core/include/opencv2/core/cuda/detail/reduce.hpp static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) smem 305 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 307 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid); smem 310 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 315 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 320 modules/core/include/opencv2/core/cuda/detail/reduce.hpp loadFromSmem(smem, val, tid); smem 327 modules/core/include/opencv2/core/cuda/detail/reduce.hpp Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 74 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) smem 76 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp thrust::get<I>(smem)[tid] = thrust::get<I>(data); smem 78 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<I + 1, N>::loadToSmem(smem, data, tid); smem 81 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) smem 83 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp thrust::get<I>(data) = thrust::get<I>(smem)[tid]; smem 85 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<I + 1, N>::loadFromSmem(smem, data, tid); smem 171 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid) smem 173 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp smem[tid] = data; smem 176 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid) smem 178 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp data = smem[tid]; smem 182 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 186 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); smem 190 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 194 modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid); smem 59 modules/core/include/opencv2/core/cuda/reduce.hpp __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op) smem 61 modules/core/include/opencv2/core/cuda/reduce.hpp reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); smem 67 modules/core/include/opencv2/core/cuda/reduce.hpp __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 75 modules/core/include/opencv2/core/cuda/reduce.hpp const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); smem 70 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) smem 72 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); smem 94 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) smem 96 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); smem 120 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) smem 122 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); smem 145 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) smem 147 modules/core/include/opencv2/core/cuda/vec_distance.hpp reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); smem 160 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) smem 173 modules/core/include/opencv2/core/cuda/vec_distance.hpp dist.reduceAll<THREAD_DIM>(smem, tid); smem 178 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) smem 182 modules/core/include/opencv2/core/cuda/vec_distance.hpp dist.reduceAll<THREAD_DIM>(smem, tid); smem 194 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const smem 196 modules/core/include/opencv2/core/cuda/vec_distance.hpp calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid); smem 205 modules/core/include/opencv2/core/cuda/vec_distance.hpp template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) smem 208 modules/core/include/opencv2/core/cuda/vec_distance.hpp smem[glob_tid] = vec1[glob_tid]; smem 215 modules/core/include/opencv2/core/cuda/vec_distance.hpp *vec1ValsPtr++ = smem[i]; smem 221 modules/core/include/opencv2/core/cuda/vec_distance.hpp __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const smem 223 modules/core/include/opencv2/core/cuda/vec_distance.hpp calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid); smem 34 modules/core/src/opencl/fft.cl void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles, smem 41 modules/core/src/opencl/fft.cl smem[dst_ind] = a0 + a1; smem 42 modules/core/src/opencl/fft.cl smem[dst_ind+block_size] = a0 - a1; smem 46 modules/core/src/opencl/fft.cl void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles, smem 61 modules/core/src/opencl/fft.cl smem[dst_ind] = b0 + b1; smem 62 modules/core/src/opencl/fft.cl smem[dst_ind + block_size] = a2 + a3; smem 63 modules/core/src/opencl/fft.cl smem[dst_ind + 2*block_size] = b0 - b1; smem 64 modules/core/src/opencl/fft.cl smem[dst_ind + 3*block_size] = a2 - a3; smem 68 modules/core/src/opencl/fft.cl void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles, smem 80 modules/core/src/opencl/fft.cl smem[dst_ind] = a0 + b1; smem 81 modules/core/src/opencl/fft.cl smem[dst_ind + block_size] = b0 + a2; smem 82 modules/core/src/opencl/fft.cl smem[dst_ind + 2*block_size] = b0 - a2; smem 86 modules/core/src/opencl/fft.cl void butterfly5(CT a0, CT a1, CT a2, CT a3, CT a4, __local CT* smem, __global const CT* twiddles, smem 96 modules/core/src/opencl/fft.cl __local CT* dst = smem + dst_ind; smem 127 modules/core/src/opencl/fft.cl void fft_radix2(__local CT* smem, __global const CT* twiddles, const int x, const int block_size, const int t) smem 133 modules/core/src/opencl/fft.cl a0 = smem[x]; smem 134 modules/core/src/opencl/fft.cl a1 = smem[x+t]; smem 140 modules/core/src/opencl/fft.cl butterfly2(a0, a1, smem, twiddles, x, block_size); smem 146 modules/core/src/opencl/fft.cl void fft_radix2_B2(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 153 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; smem 154 modules/core/src/opencl/fft.cl a2 = smem[x2]; a3 = smem[x2+t]; smem 161 modules/core/src/opencl/fft.cl butterfly2(a0, a1, smem, twiddles, x1, block_size); smem 162 modules/core/src/opencl/fft.cl butterfly2(a2, a3, smem, twiddles, x2, block_size); smem 169 modules/core/src/opencl/fft.cl void fft_radix2_B3(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 177 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; smem 178 modules/core/src/opencl/fft.cl a2 = smem[x2]; a3 = smem[x2+t]; smem 179 modules/core/src/opencl/fft.cl a4 = smem[x3]; a5 = smem[x3+t]; smem 186 modules/core/src/opencl/fft.cl butterfly2(a0, a1, smem, twiddles, x1, block_size); smem 187 modules/core/src/opencl/fft.cl butterfly2(a2, a3, smem, twiddles, x2, block_size); smem 188 modules/core/src/opencl/fft.cl butterfly2(a4, a5, smem, twiddles, x3, block_size); smem 195 modules/core/src/opencl/fft.cl void fft_radix2_B4(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 205 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; smem 206 modules/core/src/opencl/fft.cl a2 = smem[x2]; a3 = smem[x2+t]; smem 207 modules/core/src/opencl/fft.cl a4 = smem[x3]; a5 = smem[x3+t]; smem 208 modules/core/src/opencl/fft.cl a6 = smem[x4]; a7 = smem[x4+t]; smem 215 modules/core/src/opencl/fft.cl butterfly2(a0, a1, smem, twiddles, x1, block_size); smem 216 modules/core/src/opencl/fft.cl butterfly2(a2, a3, smem, twiddles, x2, block_size); smem 217 modules/core/src/opencl/fft.cl butterfly2(a4, a5, smem, twiddles, x3, block_size); smem 218 modules/core/src/opencl/fft.cl butterfly2(a6, a7, smem, twiddles, x4, block_size); smem 225 modules/core/src/opencl/fft.cl void fft_radix2_B5(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 236 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; smem 237 modules/core/src/opencl/fft.cl a2 = smem[x2]; a3 = smem[x2+t]; smem 238 modules/core/src/opencl/fft.cl a4 = smem[x3]; a5 = smem[x3+t]; smem 239 modules/core/src/opencl/fft.cl a6 = smem[x4]; a7 = smem[x4+t]; smem 240 modules/core/src/opencl/fft.cl a8 = smem[x5]; a9 = smem[x5+t]; smem 247 modules/core/src/opencl/fft.cl butterfly2(a0, a1, smem, twiddles, x1, block_size); smem 248 modules/core/src/opencl/fft.cl butterfly2(a2, a3, smem, twiddles, x2, block_size); smem 249 modules/core/src/opencl/fft.cl butterfly2(a4, a5, smem, twiddles, x3, block_size); smem 250 modules/core/src/opencl/fft.cl butterfly2(a6, a7, smem, twiddles, x4, block_size); smem 251 modules/core/src/opencl/fft.cl butterfly2(a8, a9, smem, twiddles, x5, block_size); smem 258 modules/core/src/opencl/fft.cl void fft_radix4(__local CT* smem, __global const CT* twiddles, const int x, const int block_size, const int t) smem 264 modules/core/src/opencl/fft.cl a0 = smem[x]; a1 = smem[x+t]; a2 = smem[x+2*t]; a3 = smem[x+3*t]; smem 270 modules/core/src/opencl/fft.cl butterfly4(a0, a1, a2, a3, smem, twiddles, x, block_size); smem 276 modules/core/src/opencl/fft.cl void fft_radix4_B2(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 283 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; a3 = smem[x1+3*t]; smem 284 modules/core/src/opencl/fft.cl a4 = smem[x2]; a5 = smem[x2+t]; a6 = smem[x2+2*t]; a7 = smem[x2+3*t]; smem 291 modules/core/src/opencl/fft.cl butterfly4(a0, a1, a2, a3, smem, twiddles, x1, block_size); smem 292 modules/core/src/opencl/fft.cl butterfly4(a4, a5, a6, a7, smem, twiddles, x2, block_size); smem 299 modules/core/src/opencl/fft.cl void fft_radix4_B3(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 307 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; a3 = smem[x1+3*t]; smem 308 modules/core/src/opencl/fft.cl a4 = smem[x2]; a5 = smem[x2+t]; a6 = smem[x2+2*t]; a7 = smem[x2+3*t]; smem 309 modules/core/src/opencl/fft.cl a8 = smem[x3]; a9 = smem[x3+t]; a10 = smem[x3+2*t]; a11 = smem[x3+3*t]; smem 316 modules/core/src/opencl/fft.cl butterfly4(a0, a1, a2, a3, smem, twiddles, x1, block_size); smem 317 modules/core/src/opencl/fft.cl butterfly4(a4, a5, a6, a7, smem, twiddles, x2, block_size); smem 318 modules/core/src/opencl/fft.cl butterfly4(a8, a9, a10, a11, smem, twiddles, x3, block_size); smem 325 modules/core/src/opencl/fft.cl void fft_radix8(__local CT* smem, __global const CT* twiddles, const int x, const int block_size, const int t) smem 334 modules/core/src/opencl/fft.cl a0 = smem[x]; smem 335 modules/core/src/opencl/fft.cl a1 = mul_complex(twiddles[k], smem[x + t]); smem 336 modules/core/src/opencl/fft.cl a2 = mul_complex(twiddles[k + block_size],smem[x+2*t]); smem 337 modules/core/src/opencl/fft.cl a3 = mul_complex(twiddles[k+2*block_size],smem[x+3*t]); smem 338 modules/core/src/opencl/fft.cl a4 = mul_complex(twiddles[k+3*block_size],smem[x+4*t]); smem 339 modules/core/src/opencl/fft.cl a5 = mul_complex(twiddles[k+4*block_size],smem[x+5*t]); smem 340 modules/core/src/opencl/fft.cl a6 = mul_complex(twiddles[k+5*block_size],smem[x+6*t]); smem 341 modules/core/src/opencl/fft.cl a7 = mul_complex(twiddles[k+6*block_size],smem[x+7*t]); smem 372 modules/core/src/opencl/fft.cl __local CT* dst = smem + dst_ind; smem 388 modules/core/src/opencl/fft.cl void fft_radix3(__local CT* smem, __global const CT* twiddles, const int x, const int block_size, const int t) smem 394 modules/core/src/opencl/fft.cl a0 = smem[x]; a1 = smem[x+t]; a2 = smem[x+2*t]; smem 400 modules/core/src/opencl/fft.cl butterfly3(a0, a1, a2, smem, twiddles, x, block_size); smem 406 modules/core/src/opencl/fft.cl void fft_radix3_B2(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 413 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; smem 414 modules/core/src/opencl/fft.cl a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t]; smem 421 modules/core/src/opencl/fft.cl butterfly3(a0, a1, a2, smem, twiddles, x1, block_size); smem 422 modules/core/src/opencl/fft.cl butterfly3(a3, a4, a5, smem, twiddles, x2, block_size); smem 429 modules/core/src/opencl/fft.cl void fft_radix3_B3(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 437 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; smem 438 modules/core/src/opencl/fft.cl a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t]; smem 439 modules/core/src/opencl/fft.cl a6 = smem[x3]; a7 = smem[x3+t]; a8 = smem[x3+2*t]; smem 446 modules/core/src/opencl/fft.cl butterfly3(a0, a1, a2, smem, twiddles, x1, block_size); smem 447 modules/core/src/opencl/fft.cl butterfly3(a3, a4, a5, smem, twiddles, x2, block_size); smem 448 modules/core/src/opencl/fft.cl butterfly3(a6, a7, a8, smem, twiddles, x3, block_size); smem 455 modules/core/src/opencl/fft.cl void fft_radix3_B4(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 465 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t]; smem 466 modules/core/src/opencl/fft.cl a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t]; smem 467 modules/core/src/opencl/fft.cl a6 = smem[x3]; a7 = smem[x3+t]; a8 = smem[x3+2*t]; smem 468 modules/core/src/opencl/fft.cl a9 = smem[x4]; a10 = smem[x4+t]; a11 = smem[x4+2*t]; smem 475 modules/core/src/opencl/fft.cl butterfly3(a0, a1, a2, smem, twiddles, x1, block_size); smem 476 modules/core/src/opencl/fft.cl butterfly3(a3, a4, a5, smem, twiddles, x2, block_size); smem 477 modules/core/src/opencl/fft.cl butterfly3(a6, a7, a8, smem, twiddles, x3, block_size); smem 478 modules/core/src/opencl/fft.cl butterfly3(a9, a10, a11, smem, twiddles, x4, block_size); smem 485 modules/core/src/opencl/fft.cl void fft_radix5(__local CT* smem, __global const CT* twiddles, const int x, const int block_size, const int t) smem 492 modules/core/src/opencl/fft.cl a0 = smem[x]; a1 = smem[x + t]; a2 = smem[x+2*t]; a3 = smem[x+3*t]; a4 = smem[x+4*t]; smem 498 modules/core/src/opencl/fft.cl butterfly5(a0, a1, a2, a3, a4, smem, twiddles, x, block_size); smem 504 modules/core/src/opencl/fft.cl void fft_radix5_B2(__local CT* smem, __global const CT* twiddles, const int x1, const int block_size, const int t) smem 511 modules/core/src/opencl/fft.cl a0 = smem[x1]; a1 = smem[x1 + t]; a2 = smem[x1+2*t]; a3 = smem[x1+3*t]; a4 = smem[x1+4*t]; smem 512 modules/core/src/opencl/fft.cl a5 = smem[x2]; a6 = smem[x2 + t]; a7 = smem[x2+2*t]; a8 = smem[x2+3*t]; a9 = smem[x2+4*t]; smem 519 modules/core/src/opencl/fft.cl butterfly5(a0, a1, a2, a3, a4, smem, twiddles, x1, block_size); smem 520 modules/core/src/opencl/fft.cl butterfly5(a5, a6, a7, a8, a9, smem, twiddles, x2, block_size); smem 541 modules/core/src/opencl/fft.cl __local CT smem[LOCAL_SIZE]; smem 554 modules/core/src/opencl/fft.cl smem[x+i*block_size] = src[i*block_size]; smem 559 modules/core/src/opencl/fft.cl smem[x+i*block_size] = (CT)(src[i*block_size], 0.f); smem 576 modules/core/src/opencl/fft.cl dst[i] = SCALE_VAL(smem[i], scale); smem 582 modules/core/src/opencl/fft.cl dst[dst_cols-i] = (CT)(SCALE_VAL(smem[i].x, scale), SCALE_VAL(-smem[i].y, scale)); smem 589 modules/core/src/opencl/fft.cl __local FT* smem_1cn = (__local FT*) smem; smem 620 modules/core/src/opencl/fft.cl __local CT smem[LOCAL_SIZE]; smem 629 modules/core/src/opencl/fft.cl smem[y+i*block_size] = *((__global const CT*)(src + i*block_size*src_step)); smem 639 modules/core/src/opencl/fft.cl *((__global CT*)(dst + i*block_size*dst_step)) = SCALE_VAL(smem[y + i*block_size], scale); smem 644 modules/core/src/opencl/fft.cl __local FT* smem_1cn = (__local FT*) smem; smem 654 modules/core/src/opencl/fft.cl __local FT* smem_1cn = (__local FT*) smem; smem 666 modules/core/src/opencl/fft.cl vstore2(SCALE_VAL(smem[i], scale), 0, (__global FT*) dst); smem 687 modules/core/src/opencl/fft.cl __local CT smem[LOCAL_SIZE]; smem 696 modules/core/src/opencl/fft.cl smem[x+i*block_size].x = src[i*block_size].x; smem 697 modules/core/src/opencl/fft.cl smem[x+i*block_size].y = -src[i*block_size].y; smem 707 modules/core/src/opencl/fft.cl smem[i+1].x = src[i].x; smem 708 modules/core/src/opencl/fft.cl smem[i+1].y = -src[i].y; smem 709 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE-i-1] = src[i]; smem 718 modules/core/src/opencl/fft.cl smem[i+1].x = src.x; smem 719 modules/core/src/opencl/fft.cl smem[i+1].y = -src.y; smem 720 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE-i-1] = src; smem 727 modules/core/src/opencl/fft.cl smem[0].x = *(__global const FT*)(src_ptr + mad24(y, src_step, src_offset)); smem 728 modules/core/src/opencl/fft.cl smem[0].y = 0.f; smem 733 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE/2].x = src[LOCAL_SIZE/2-1].x; smem 735 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE/2].x = *(__global const FT*)(src_ptr + mad24(y, src_step, mad24(LOCAL_SIZE-1, (int)sizeof(FT), src_offset))); smem 737 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE/2].y = 0.f; smem 752 modules/core/src/opencl/fft.cl dst[i*block_size].x = SCALE_VAL(smem[x + i*block_size].x, scale); smem 753 modules/core/src/opencl/fft.cl dst[i*block_size].y = SCALE_VAL(-smem[x + i*block_size].y, scale); smem 760 modules/core/src/opencl/fft.cl dst[i*block_size] = SCALE_VAL(smem[x + i*block_size].x, scale); smem 788 modules/core/src/opencl/fft.cl __local CT smem[LOCAL_SIZE]; smem 799 modules/core/src/opencl/fft.cl smem[y+i*block_size].x = temp.x; smem 800 modules/core/src/opencl/fft.cl smem[y+i*block_size].y = -temp.y; smem 812 modules/core/src/opencl/fft.cl res[0].x = smem[y + i*block_size].x; smem 813 modules/core/src/opencl/fft.cl res[0].y = -smem[y + i*block_size].y; smem 823 modules/core/src/opencl/fft.cl __local CT smem[LOCAL_SIZE]; smem 835 modules/core/src/opencl/fft.cl smem[y+i*block_size].x = temp.x; smem 836 modules/core/src/opencl/fft.cl smem[y+i*block_size].y = -temp.y; smem 848 modules/core/src/opencl/fft.cl smem[i+1].x = src[2*i*step]; smem 849 modules/core/src/opencl/fft.cl smem[i+1].y = -src[(2*i+1)*step]; smem 851 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE-i-1].x = src[2*i*step];; smem 852 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE-i-1].y = src[(2*i+1)*step]; smem 856 modules/core/src/opencl/fft.cl smem[0].x = *(__global const FT*)(src_ptr + mad24(ind, (int)sizeof(FT), src_offset)); smem 857 modules/core/src/opencl/fft.cl smem[0].y = 0.f; smem 861 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE/2].x = src[(LOCAL_SIZE-2)*step]; smem 862 modules/core/src/opencl/fft.cl smem[LOCAL_SIZE/2].y = 0.f; smem 877 modules/core/src/opencl/fft.cl res[0].x = smem[y + i*block_size].x; smem 878 modules/core/src/opencl/fft.cl res[0].y = -smem[y + i*block_size].y; smem 74 modules/cudafilters/src/cuda/column_filter.hpp __shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X]; smem 90 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x)); smem 97 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step)); smem 105 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x)); smem 110 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x)); smem 117 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step)); smem 122 modules/cudafilters/src/cuda/column_filter.hpp smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step)); smem 138 modules/cudafilters/src/cuda/column_filter.hpp sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k]; smem 74 modules/cudafilters/src/cuda/row_filter.hpp __shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X]; smem 90 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]); smem 97 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row)); smem 105 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]); smem 110 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]); smem 117 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row)); smem 122 modules/cudafilters/src/cuda/row_filter.hpp smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row)); smem 138 modules/cudafilters/src/cuda/row_filter.hpp sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k]; smem 97 modules/cudaobjdetect/src/cuda/lbp.hpp Emulation::smem::atomicMin(labels + id, p); smem 101 modules/cudaobjdetect/src/cuda/lbp.hpp Emulation::smem::atomicMin(labels + tid, q); smem 83 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) smem 85 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val); smem 87 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<I + 1, N>::loadToSmem(smem, val, tid); smem 91 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid) smem 93 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp get<I>(val) = get<I>(smem)[tid]; smem 95 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<I + 1, N>::loadFromSmem(smem, val, tid); smem 99 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) smem 101 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; smem 102 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); smem 104 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<I + 1, N>::merge(smem, val, tid, delta, op); smem 146 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) smem 148 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp smem[tid] = val; smem 152 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, uint tid) smem 154 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp val = smem[tid]; smem 159 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 163 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); smem 168 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void loadFromSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 172 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid); smem 178 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op) smem 180 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp T reg = smem[tid + delta]; smem 181 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp smem[tid] = val = op(val, reg); smem 187 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 193 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); smem 222 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 224 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid); smem 231 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 1024, op); smem 238 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 512, op); smem 245 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 256, op); smem 252 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 128, op); smem 259 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 64, op); smem 266 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 32, op); smem 271 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 16, op); smem 272 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 8, op); smem 273 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 4, op); smem 274 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 2, op); smem 275 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, 1, op); smem 284 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void loop(Pointer smem, Reference val, uint tid, Op op) smem 286 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp merge(smem, val, tid, I, op); smem 287 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 317 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 320 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp (void) smem; smem 325 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid); smem 328 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 340 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 348 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 350 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid); smem 353 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 358 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadToSmem(smem, val, tid / 32); smem 363 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp loadFromSmem(smem, val, tid); smem 370 modules/cudev/include/opencv2/cudev/block/detail/reduce.hpp Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); smem 82 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) smem 84 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp get<I>(smem)[tid] = get<I>(data); smem 86 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<I + 1, N>::loadToSmem(smem, data, tid); smem 90 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ static void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) smem 92 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp get<I>(data) = get<I>(smem)[tid]; smem 94 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<I + 1, N>::loadFromSmem(smem, data, tid); smem 152 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) smem 154 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp smem[tid] = data; smem 158 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, uint tid) smem 160 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp data = smem[tid]; smem 165 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 169 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); smem 174 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp __device__ __forceinline__ void loadFromSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 178 modules/cudev/include/opencv2/cudev/block/detail/reduce_key_val.hpp For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid); smem 63 modules/cudev/include/opencv2/cudev/block/reduce.hpp __device__ __forceinline__ void blockReduce(volatile T* smem, T& val, uint tid, const Op& op) smem 65 modules/cudev/include/opencv2/cudev/block/reduce.hpp block_reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); smem 72 modules/cudev/include/opencv2/cudev/block/reduce.hpp __device__ __forceinline__ void blockReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 80 modules/cudev/include/opencv2/cudev/block/reduce.hpp const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); smem 58 modules/cudev/include/opencv2/cudev/block/scan.hpp __device__ T blockScanInclusive(T data, volatile T* smem, uint tid) smem 63 modules/cudev/include/opencv2/cudev/block/scan.hpp T warpResult = warpScanInclusive(data, smem, tid); smem 71 modules/cudev/include/opencv2/cudev/block/scan.hpp smem[tid >> LOG_WARP_SIZE] = warpResult; smem 79 modules/cudev/include/opencv2/cudev/block/scan.hpp T val = smem[tid]; smem 82 modules/cudev/include/opencv2/cudev/block/scan.hpp smem[tid] = warpScanExclusive(val, smem, tid); smem 88 modules/cudev/include/opencv2/cudev/block/scan.hpp return warpResult + smem[tid >> LOG_WARP_SIZE]; smem 92 modules/cudev/include/opencv2/cudev/block/scan.hpp return warpScanInclusive(data, smem, tid); smem 97 modules/cudev/include/opencv2/cudev/block/scan.hpp __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid) smem 99 modules/cudev/include/opencv2/cudev/block/scan.hpp return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data; smem 75 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 77 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 80 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 82 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 104 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 106 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 109 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 111 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 137 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 139 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 142 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 144 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 169 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) smem 171 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp warpReduce(smem, mySum, tid, plus<result_type>()); smem 174 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) smem 176 modules/cudev/include/opencv2/cudev/block/vec_distance.hpp blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); smem 60 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp __shared__ ResType smem[BIN_COUNT]; smem 66 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp smem[i] = 0; smem 77 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp atomicAdd(&smem[data % BIN_COUNT], 1); smem 86 modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp const ResType histVal = smem[i]; smem 63 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp __shared__ D smem[NUM_SCAN_THREADS * 2]; smem 84 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); smem 105 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp __shared__ D smem[NUM_SCAN_THREADS * 2]; smem 127 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); smem 481 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp __shared__ T smem[32][32]; smem 484 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp volatile T* smem_row = &smem[0][0] + 64 * threadIdx.y; smem 505 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 0][threadIdx.x] = 0.0f; smem 506 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 8][threadIdx.x] = 0.0f; smem 507 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 16][threadIdx.x] = 0.0f; smem 508 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 24][threadIdx.x] = 0.0f; smem 515 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + i * 8][threadIdx.x] = integral(curRowOffs + i * 8, x); smem 523 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[0] = smem[threadIdx.x][threadIdx.y ]; smem 524 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[1] = smem[threadIdx.x][threadIdx.y + 8]; smem 525 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[2] = smem[threadIdx.x][threadIdx.y + 16]; smem 526 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp curElems[3] = smem[threadIdx.x][threadIdx.y + 24]; smem 550 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y ][threadIdx.x] = curElems[0]; smem 551 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 8][threadIdx.x] = curElems[1]; smem 552 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 16][threadIdx.x] = curElems[2]; smem 553 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp smem[threadIdx.y + 24][threadIdx.x] = curElems[3]; smem 565 modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp integral(curRowOffs + i * 8, x) = smem[threadIdx.x][threadIdx.y + i * 8]; smem 68 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp __shared__ work_type smem[256 + 4]; smem 86 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[2 + threadIdx.x] = sum; smem 101 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[threadIdx.x] = sum; smem 116 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[4 + threadIdx.x] = sum; smem 130 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[2 + threadIdx.x] = sum; smem 145 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[threadIdx.x] = sum; smem 160 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp smem[4 + threadIdx.x] = sum; smem 172 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = 0.0625f * smem[2 + tid2 - 2]; smem 173 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.25f * smem[2 + tid2 - 1]; smem 174 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.375f * smem[2 + tid2 ]; smem 175 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.25f * smem[2 + tid2 + 1]; smem 176 modules/cudev/include/opencv2/cudev/grid/detail/pyr_down.hpp sum = sum + 0.0625f * smem[2 + tid2 + 2]; smem 279 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp __shared__ work_elem_type smem[BLOCK_SIZE * cn]; smem 281 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp blockReduce<BLOCK_SIZE>(Unroll<cn>::template smem<BLOCK_SIZE>(smem), Unroll<cn>::res(sum), tid, Unroll<cn>::op(plus<work_elem_type>())); smem 339 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp __shared__ work_type smem[BLOCK_SIZE]; smem 343 modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp blockReduce<BLOCK_SIZE>(smem, myval, tid, op); smem 61 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal) smem 64 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); smem 70 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal) smem 73 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op)); smem 79 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal) smem 82 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op)); smem 88 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal) smem 91 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVal.z, myVal.w), threadIdx.x, make_tuple(op, op, op, op)); smem 102 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp __shared__ work_elem_type smem[cn][BLOCK_SIZE]; smem 118 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, cn>::call(smem, myVal); smem 62 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y]; smem 81 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal; smem 85 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X; smem 96 modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_row.hpp dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows)); smem 81 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) smem 83 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val); smem 85 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<I + 1, N>::loadToSmem(smem, val, tid); smem 89 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) smem 91 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; smem 92 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); smem 94 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<I + 1, N>::merge(smem, val, tid, delta, op); smem 132 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) smem 134 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp smem[tid] = val; smem 139 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 143 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); smem 149 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void merge(volatile T* smem, T& val, uint tid, uint delta, const Op& op) smem 151 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp T reg = smem[tid + delta]; smem 152 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp smem[tid] = val = op(val, reg); smem 158 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ __forceinline__ void merge(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 164 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp For<0, tuple_size<tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); smem 193 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp __device__ static void reduce(Pointer smem, Reference val, uint tid, Op op) smem 196 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp (void) smem; smem 205 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp loadToSmem(smem, val, tid); smem 209 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 16, op); smem 210 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 8, op); smem 211 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 4, op); smem 212 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 2, op); smem 213 modules/cudev/include/opencv2/cudev/warp/detail/reduce.hpp merge(smem, val, tid, 1, op); smem 80 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) smem 82 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp get<I>(smem)[tid] = get<I>(data); smem 84 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp For<I + 1, N>::loadToSmem(smem, data, tid); smem 137 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) smem 139 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp smem[tid] = data; smem 144 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, smem 148 modules/cudev/include/opencv2/cudev/warp/detail/reduce_key_val.hpp For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); smem 62 modules/cudev/include/opencv2/cudev/warp/reduce.hpp __device__ __forceinline__ void warpReduce(volatile T* smem, T& val, uint tid, const Op& op) smem 64 modules/cudev/include/opencv2/cudev/warp/reduce.hpp warp_reduce_detail::WarpReductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); smem 70 modules/cudev/include/opencv2/cudev/warp/reduce.hpp __device__ __forceinline__ void warpReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, smem 78 modules/cudev/include/opencv2/cudev/warp/reduce.hpp const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); smem 59 modules/cudev/include/opencv2/cudev/warp/scan.hpp __device__ T warpScanInclusive(T data, volatile T* smem, uint tid) smem 62 modules/cudev/include/opencv2/cudev/warp/scan.hpp (void) smem; smem 79 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] = 0; smem 82 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] = data; smem 84 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 1]; smem 85 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 2]; smem 86 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 4]; smem 87 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 8]; smem 88 modules/cudev/include/opencv2/cudev/warp/scan.hpp smem[pos] += smem[pos - 16]; smem 90 modules/cudev/include/opencv2/cudev/warp/scan.hpp return smem[pos]; smem 95 modules/cudev/include/opencv2/cudev/warp/scan.hpp __device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint tid) smem 97 modules/cudev/include/opencv2/cudev/warp/scan.hpp return warpScanInclusive(data, smem, tid) - data; smem 80 modules/imgproc/src/opencl/canny.cl inline float3 sobel(int idx, __local const floatN *smem) smem 85 modules/imgproc/src/opencl/canny.cl floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4], smem 86 modules/imgproc/src/opencl/canny.cl smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]); smem 88 modules/imgproc/src/opencl/canny.cl floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9], smem 89 modules/imgproc/src/opencl/canny.cl smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]); smem 123 modules/imgproc/src/opencl/canny.cl __local floatN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)]; smem 136 modules/imgproc/src/opencl/canny.cl smem[j] = loadpix(src + mad24(y, src_step, mad24(x, cn * (int)sizeof(TYPE), src_offset))); smem 152 modules/imgproc/src/opencl/canny.cl mag[i] = (sobel(i, smem)).z; smem 153 modules/imgproc/src/opencl/canny.cl mag[i + grp_sizey * (GRP_SIZEX + 2)] = (sobel(i + grp_sizey * (GRP_SIZEX + 4), smem)).z; smem 158 modules/imgproc/src/opencl/canny.cl mag[i * (GRP_SIZEX + 2)] = (sobel(i * (GRP_SIZEX + 4), smem)).z; smem 159 modules/imgproc/src/opencl/canny.cl mag[i * (GRP_SIZEX + 2) + grp_sizex] = (sobel(i * (GRP_SIZEX + 4) + grp_sizex, smem)).z; smem 165 modules/imgproc/src/opencl/canny.cl float3 res = sobel(idx, smem); smem 50 modules/imgproc/src/opencl/clahe.cl inline int calc_lut(__local int* smem, int val, int tid) smem 52 modules/imgproc/src/opencl/clahe.cl smem[tid] = val; smem 57 modules/imgproc/src/opencl/clahe.cl smem[i] += smem[i - 1]; smem 60 modules/imgproc/src/opencl/clahe.cl return smem[tid]; smem 64 modules/imgproc/src/opencl/clahe.cl inline void reduce(volatile __local int* smem, int val, int tid) smem 66 modules/imgproc/src/opencl/clahe.cl smem[tid] = val; smem 70 modules/imgproc/src/opencl/clahe.cl smem[tid] = val += smem[tid + 128]; smem 74 modules/imgproc/src/opencl/clahe.cl smem[tid] = val += smem[tid + 64]; smem 78 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 32]; smem 82 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 16]; smem 86 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 8]; smem 90 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 4]; smem 94 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 2]; smem 98 modules/imgproc/src/opencl/clahe.cl smem[256] = smem[tid] + smem[tid + 1]; smem 104 modules/imgproc/src/opencl/clahe.cl inline void reduce(__local volatile int* smem, int val, int tid) smem 106 modules/imgproc/src/opencl/clahe.cl smem[tid] = val; smem 110 modules/imgproc/src/opencl/clahe.cl smem[tid] = val += smem[tid + 128]; smem 114 modules/imgproc/src/opencl/clahe.cl smem[tid] = val += smem[tid + 64]; smem 119 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 32]; smem 126 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 16]; smem 134 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 8]; smem 135 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 4]; smem 136 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 2]; smem 137 modules/imgproc/src/opencl/clahe.cl smem[tid] += smem[tid + 1]; smem 148 modules/imgproc/src/opencl/clahe.cl __local int smem[512]; smem 154 modules/imgproc/src/opencl/clahe.cl smem[tid] = 0; smem 163 modules/imgproc/src/opencl/clahe.cl atomic_inc(&smem[data]); smem 168 modules/imgproc/src/opencl/clahe.cl int tHistVal = smem[tid]; smem 182 modules/imgproc/src/opencl/clahe.cl reduce(smem, clipped, tid); smem 185 modules/imgproc/src/opencl/clahe.cl clipped = smem[256]; smem 187 modules/imgproc/src/opencl/clahe.cl clipped = smem[0]; smem 208 modules/imgproc/src/opencl/clahe.cl const int lutVal = calc_lut(smem, tHistVal, tid); smem 104 modules/imgproc/src/opencl/pyr_down.cl smem[0][col_lcl] = sum0; \ smem 107 modules/imgproc/src/opencl/pyr_down.cl smem[1][col_lcl] = sum1; smem 123 modules/imgproc/src/opencl/pyr_down.cl vstore4(sum40, col_lcl, (__local float*) &smem[0][2]); \ smem 126 modules/imgproc/src/opencl/pyr_down.cl vstore4(sum41, col_lcl, (__local float*) &smem[1][2]); smem 137 modules/imgproc/src/opencl/pyr_down.cl __local FT smem[2][LOCAL_SIZE + 4]; smem 229 modules/imgproc/src/opencl/pyr_down.cl FT sum = dot(vload4(0, (__local float*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2)); smem 231 modules/imgproc/src/opencl/pyr_down.cl FT sum = dot(vload4(0, (__local double*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2)); smem 234 modules/imgproc/src/opencl/pyr_down.cl FT sum = co3 * smem[yin - y][2 + tid2 - 2]; smem 235 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum); smem 236 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co1, smem[yin - y][2 + tid2 ], sum); smem 237 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid2 + 1], sum); smem 239 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co3, smem[yin - y][2 + tid2 + 2], sum); smem 252 modules/imgproc/src/opencl/pyr_down.cl FT sum = co3* smem[yin - y][2 + tid4 + 2]; smem 253 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co3, smem[yin - y][2 + tid4 - 2], sum); smem 254 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid4 - 1], sum); smem 255 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co1, smem[yin - y][2 + tid4 ], sum); smem 256 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum); smem 260 modules/imgproc/src/opencl/pyr_down.cl sum = co3* smem[yin - y][2 + tid4 + 4]; smem 261 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co3, smem[yin - y][2 + tid4 ], sum); smem 262 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum); smem 263 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co1, smem[yin - y][2 + tid4 + 2], sum); smem 264 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid4 + 3], sum); smem 274 modules/imgproc/src/opencl/pyr_down.cl FT sum = co3* smem[yin - y][2 + tid4 + 2]; smem 275 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co3, smem[yin - y][2 + tid4 - 2], sum); smem 276 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid4 - 1], sum); smem 277 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co1, smem[yin - y][2 + tid4 ], sum); smem 278 modules/imgproc/src/opencl/pyr_down.cl sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum); smem 1161 modules/objdetect/src/hog.cpp int smem = (hists_size + final_hists_size) * blocks_in_group; smem 1177 modules/objdetect/src/hog.cpp idx = k.set(idx, (void*)NULL, (size_t)smem); smem 72 modules/objdetect/src/opencl/objdetect_hog.cl __global float* block_hists, __local float* smem) smem 87 modules/objdetect/src/opencl/objdetect_hog.cl __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * smem 172 modules/objdetect/src/opencl/objdetect_hog.cl __local float* smem = squares + boffset; smem 173 modules/objdetect/src/opencl/objdetect_hog.cl float sum = smem[hid]; smem 175 modules/objdetect/src/opencl/objdetect_hog.cl smem[hid] = sum = sum + smem[hid + 18]; smem 178 modules/objdetect/src/opencl/objdetect_hog.cl smem[hid] = sum = sum + smem[hid + 9]; smem 181 modules/objdetect/src/opencl/objdetect_hog.cl smem[hid] = sum + smem[hid + 4]; smem 183 modules/objdetect/src/opencl/objdetect_hog.cl sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; smem 192 modules/objdetect/src/opencl/objdetect_hog.cl sum = smem[hid]; smem 194 modules/objdetect/src/opencl/objdetect_hog.cl smem[hid] = sum = sum + smem[hid + 18]; smem 197 modules/objdetect/src/opencl/objdetect_hog.cl smem[hid] = sum = sum + smem[hid + 9]; smem 200 modules/objdetect/src/opencl/objdetect_hog.cl smem[hid] = sum + smem[hid + 4]; smem 202 modules/objdetect/src/opencl/objdetect_hog.cl sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; smem 210 modules/objdetect/src/opencl/objdetect_hog.cl inline float reduce_smem(volatile __local float* smem, int size) smem 213 modules/objdetect/src/opencl/objdetect_hog.cl float sum = smem[tid]; smem 215 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; smem 217 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; smem 219 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; smem 222 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32]; smem 224 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16]; smem 226 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8]; smem 228 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4]; smem 230 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2]; smem 232 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1]; smem 237 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; smem 242 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; smem 243 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; smem 244 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; smem 245 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 4) smem[tid] = sum = sum + smem[tid + 2]; smem 246 modules/objdetect/src/opencl/objdetect_hog.cl if (size >= 2) smem[tid] = sum = sum + smem[tid + 1]; smem 325 modules/objdetect/src/opencl/objdetect_hog.cl volatile __local float* smem = products; smem 327 modules/objdetect/src/opencl/objdetect_hog.cl if (tid < 13) smem[tid] = product = product + smem[tid + 32]; smem 329 modules/objdetect/src/opencl/objdetect_hog.cl if (tid < 16) smem[tid] = product = product + smem[tid + 16]; smem 331 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<8) smem[tid] = product = product + smem[tid + 8]; smem 333 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<4) smem[tid] = product = product + smem[tid + 4]; smem 335 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<2) smem[tid] = product = product + smem[tid + 2]; smem 340 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 32]; smem 347 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 16]; smem 348 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 8]; smem 349 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 4]; smem 350 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 2]; smem 355 modules/objdetect/src/opencl/objdetect_hog.cl product = product + smem[tid + 1]; smem 398 modules/objdetect/src/opencl/objdetect_hog.cl volatile __local float* smem = products; smem 400 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<32) smem[tid] = product = product + smem[tid + 32]; smem 402 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<16) smem[tid] = product = product + smem[tid + 16]; smem 404 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<8) smem[tid] = product = product + smem[tid + 8]; smem 406 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<4) smem[tid] = product = product + smem[tid + 4]; smem 408 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<2) smem[tid] = product = product + smem[tid + 2]; smem 413 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 32]; smem 418 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 16]; smem 419 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 8]; smem 420 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 4]; smem 421 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 2]; smem 425 modules/objdetect/src/opencl/objdetect_hog.cl product = product + smem[tid + 1]; smem 468 modules/objdetect/src/opencl/objdetect_hog.cl volatile __local float* smem = products; smem 470 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<32) smem[tid] = product = product + smem[tid + 32]; smem 472 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<16) smem[tid] = product = product + smem[tid + 16]; smem 474 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<8) smem[tid] = product = product + smem[tid + 8]; smem 476 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<4) smem[tid] = product = product + smem[tid + 4]; smem 478 modules/objdetect/src/opencl/objdetect_hog.cl if(tid<2) smem[tid] = product = product + smem[tid + 2]; smem 483 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 32]; smem 488 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 16]; smem 489 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 8]; smem 490 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 4]; smem 491 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 2]; smem 495 modules/objdetect/src/opencl/objdetect_hog.cl smem[tid] = product = product + smem[tid + 1]; smem 78 modules/video/src/opencl/optical_flow_farneback.cl __local float * smem, smem 85 modules/video/src/opencl/optical_flow_farneback.cl __local float *row = smem + tx; smem 163 modules/video/src/opencl/optical_flow_farneback.cl __local float * smem) smem 168 modules/video/src/opencl/optical_flow_farneback.cl __local float *row = smem + ty * (bdx + 2*ksizeHalf); smem 202 modules/video/src/opencl/optical_flow_farneback.cl __local float * smem) smem 208 modules/video/src/opencl/optical_flow_farneback.cl __local volatile float *row = smem + 5 * ty * smw; smem 354 modules/video/src/opencl/optical_flow_farneback.cl __local float * smem) smem 361 modules/video/src/opencl/optical_flow_farneback.cl __local float *row = smem + 5 * ty * smw;