This source file includes following definitions.
- resizeLN
- resizeLN
- resizeNN
- resizeAREA_FAST
- resizeAREA
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
#define INC(x,l) min(x+1,l-1)
#define noconvert
#if cn != 3
#define loadpix(addr) *(__global const T *)(addr)
#define storepix(val, addr) *(__global T *)(addr) = val
#define TSIZE (int)sizeof(T)
#else
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
#define TSIZE (int)sizeof(T1)*cn
#endif
#if defined USE_SAMPLER
#if cn == 1
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x
#define INTERMEDIATE_TYPE float
#elif cn == 2
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy
#define INTERMEDIATE_TYPE float2
#elif cn == 3
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz
#define INTERMEDIATE_TYPE float3
#elif cn == 4
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z)
#define INTERMEDIATE_TYPE float4
#endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define float1 float
#if depth == 0
#define RESULT_SCALE 255.0f
#elif depth == 1
#define RESULT_SCALE 127.0f
#elif depth == 2
#define RESULT_SCALE 65535.0f
#elif depth == 3
#define RESULT_SCALE 32767.0f
#else
#define RESULT_SCALE 1.0f
#endif
__kernel void resizeSampler(__read_only image2d_t srcImage,
__global uchar* dstptr, int dststep, int dstoffset,
int dstrows, int dstcols,
float ifx, float ify)
{
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_LINEAR;
int dx = get_global_id(0);
int dy = get_global_id(1);
float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify);
INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy));
#if depth <= 4
T uval = convertToDT(round(intermediate * RESULT_SCALE));
#else
T uval = convertToDT(intermediate * RESULT_SCALE);
#endif
if(dx < dstcols && dy < dstrows)
{
storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE));
}
}
#elif defined INTER_LINEAR_INTEGER
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const uchar * buffer)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
__global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
__global const short * ialpha = (__global const short *)(yofs + dst_rows);
__global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
ialpha += dx << 1;
int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
short a0 = ialpha[0], a1 = ialpha[1];
short b0 = ibeta[0], b1 = ibeta[1];
int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
WT data0 = convertToWT(loadpix(srcptr + src_index0));
WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
WT data2 = convertToWT(loadpix(srcptr + src_index1));
WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
storepix(convertToDT((val + 2) >> 2),
dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
}
}
#elif defined INTER_LINEAR
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
float ifx, float ify)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
int x = floor(sx), y = floor(sy);
float u = sx - x, v = sy - y;
if ( x<0 ) x=0,u=0;
if ( x>=src_cols ) x=src_cols-1,u=0;
if ( y<0 ) y=0,v=0;
if ( y>=src_rows ) y=src_rows-1,v=0;
int y_ = INC(y, src_rows);
int x_ = INC(x, src_cols);
#if depth <= 4
u = u * INTER_RESIZE_COEF_SCALE;
v = v * INTER_RESIZE_COEF_SCALE;
int U = rint(u);
int V = rint(v);
int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
#else
float u1 = 1.f - u;
float v1 = 1.f - v;
WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
#endif
storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
}
}
#elif defined INTER_NEAREST
__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
float ifx, float ify)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
float s1 = dx * ifx;
float s2 = dy * ify;
int sx = min(convert_int_rtz(s1), src_cols - 1);
int sy = min(convert_int_rtz(s2), src_rows - 1);
storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))),
dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
}
}
#elif defined INTER_AREA
#ifdef INTER_AREA_FAST
__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
int dst_index = mad24(dy, dst_step, dst_offset);
int sx = XSCALE * dx;
int sy = YSCALE * dy;
WTV sum = (WTV)(0);
#pragma unroll
for (int py = 0; py < YSCALE; ++py)
{
int y = min(sy + py, src_rows - 1);
int src_index = mad24(y, src_step, src_offset);
#pragma unroll
for (int px = 0; px < XSCALE; ++px)
{
int x = min(sx + px, src_cols - 1);
sum += convertToWTV(loadpix(src + src_index + x*TSIZE));
}
}
storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
}
}
#else
__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
float ifx, float ify, __global const int * ofs_tab,
__global const int * map_tab, __global const float * alpha_tab)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
int dst_index = mad24(dy, dst_step, dst_offset);
__global const int * xmap_tab = map_tab;
__global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
__global const float * xalpha_tab = alpha_tab;
__global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
__global const int * xofs_tab = ofs_tab;
__global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
WTV sum = (WTV)(0), buf;
int src_index = mad24(sy0, src_step, src_offset);
for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
{
WTV beta = (WTV)(yalpha_tab[yk]);
buf = (WTV)(0);
for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
{
WTV alpha = (WTV)(xalpha_tab[xk]);
buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
}
sum += buf * beta;
}
storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));
}
}
#endif
#endif