This source file includes following definitions.
- write_to_1d_arrf
- write_to_1d_arrul8
- write_to_1d_arrvec
- write_to_1d_arrf2
- read_from_1d_arrul8
- read_from_1d_arrf2
- pixel_grayscale
- convolve
- sum_deriv_prod
- sum_deriv_pow
- draw_box
- grayscale
- harris_response
- get_rect_sub_pix
- corner_sub_pix
- refine_features
- brief_descriptors
- match_descriptors
- transformed_point
- transform
- cropped_point
- crop_upscale
- draw_debug_info
#define HARRIS_THRESHOLD 3.0f
#define HARRIS_RADIUS 2
#define DISTANCE_THRESHOLD 80
#define REFINE_WIN_HALF_W 5
#define REFINE_WIN_HALF_H 5
#define REFINE_WIN_W 11
#define REFINE_WIN_H 11
#define NONMAX_WIN 30
#define NONMAX_WIN_HALF 15
typedef struct PointPair {
float2 p1;
float2 p2;
} PointPair;
typedef struct SmoothedPointPair {
int2 p1;
float2 p2;
} SmoothedPointPair;
typedef struct MotionVector {
PointPair p;
int should_consider;
} MotionVector;
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST;
const sampler_t sampler_linear = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_LINEAR;
const sampler_t sampler_linear_mirror = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_MIRRORED_REPEAT |
CLK_FILTER_LINEAR;
static void write_to_1d_arrf(__global float *buf, int2 loc, float val) {
buf[loc.x + loc.y * get_global_size(0)] = val;
}
static void write_to_1d_arrul8(__global ulong8 *buf, int2 loc, ulong8 val) {
buf[loc.x + loc.y * get_global_size(0)] = val;
}
static void write_to_1d_arrvec(__global MotionVector *buf, int2 loc, MotionVector val) {
buf[loc.x + loc.y * get_global_size(0)] = val;
}
static void write_to_1d_arrf2(__global float2 *buf, int2 loc, float2 val) {
buf[loc.x + loc.y * get_global_size(0)] = val;
}
static ulong8 read_from_1d_arrul8(__global const ulong8 *buf, int2 loc) {
return buf[loc.x + loc.y * get_global_size(0)];
}
static float2 read_from_1d_arrf2(__global const float2 *buf, int2 loc) {
return buf[loc.x + loc.y * get_global_size(0)];
}
static float pixel_grayscale(__read_only image2d_t src, int2 loc) {
float4 pixel = read_imagef(src, sampler, loc);
return (pixel.x + pixel.y + pixel.z) / 3.0f;
}
static float convolve(
__local const float *grayscale,
int local_idx_x,
int local_idx_y,
float mask[3][3]
) {
float ret = 0;
for (int i = 1, i2 = 0; i >= -1; --i, ++i2) {
for (int j = -1, j2 = 0; j <= 1; ++j, ++j2) {
ret += mask[i2][j2] * grayscale[(local_idx_x + 3 + j) + (local_idx_y + 3 + i) * 14];
}
}
return ret;
}
static float sum_deriv_prod(
__local const float *grayscale,
float mask_x[3][3],
float mask_y[3][3]
) {
float ret = 0;
for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {
for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {
ret += convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_x) *
convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_y);
}
}
return ret;
}
static float sum_deriv_pow(__local const float *grayscale, float mask[3][3])
{
float ret = 0;
for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {
for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {
float deriv = convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask);
ret += deriv * deriv;
}
}
return ret;
}
static void draw_box(__write_only image2d_t dst, int2 loc, float4 pixel, int radius)
{
for (int i = -radius; i <= radius; ++i) {
for (int j = -radius; j <= radius; ++j) {
write_imagef(
dst,
(int2)(
clamp(loc.x + i, 0, get_image_dim(dst).x - 1),
clamp(loc.y + j, 0, get_image_dim(dst).y - 1)
),
pixel
);
}
}
}
__kernel void grayscale(
__read_only image2d_t src,
__write_only image2d_t grayscale
) {
int2 loc = (int2)(get_global_id(0), get_global_id(1));
write_imagef(grayscale, loc, (float4)(pixel_grayscale(src, loc), 0.0f, 0.0f, 1.0f));
}
__kernel void harris_response(
__read_only image2d_t grayscale,
__global float *harris_buf
) {
int2 loc = (int2)(get_global_id(0), get_global_id(1));
if (loc.x > get_image_width(grayscale) - 1 || loc.y > get_image_height(grayscale) - 1) {
write_to_1d_arrf(harris_buf, loc, 0);
return;
}
float scale = 1.0f / ((1 << 2) * HARRIS_RADIUS * 255.0f);
float sobel_mask_x[3][3] = {
{-1, 0, 1},
{-2, 0, 2},
{-1, 0, 1}
};
float sobel_mask_y[3][3] = {
{ 1, 2, 1},
{ 0, 0, 0},
{-1, -2, -1}
};
__local float grayscale_data[196];
int idx = get_group_id(0) * get_local_size(0);
int idy = get_group_id(1) * get_local_size(1);
for (int i = idy - 3, it = 0; i < idy + (int)get_local_size(1) + 3; i++, it++) {
for (int j = idx - 3, jt = 0; j < idx + (int)get_local_size(0) + 3; j++, jt++) {
grayscale_data[jt + it * 14] = read_imagef(grayscale, sampler, (int2)(j, i)).x;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
float sumdxdy = sum_deriv_prod(grayscale_data, sobel_mask_x, sobel_mask_y);
float sumdx2 = sum_deriv_pow(grayscale_data, sobel_mask_x);
float sumdy2 = sum_deriv_pow(grayscale_data, sobel_mask_y);
float trace = sumdx2 + sumdy2;
float r = (sumdx2 * sumdy2 - sumdxdy * sumdxdy) - 0.04f * (trace * trace) * pown(scale, 4);
harris_buf[loc.x + loc.y * get_image_width(grayscale)] = r * step(HARRIS_THRESHOLD, r);
}
static void get_rect_sub_pix(
__read_only image2d_t grayscale,
float *buffer,
int size_x,
int size_y,
float2 center
) {
float2 offset = ((float2)(size_x, size_y) - 1.0f) * 0.5f;
for (int i = 0; i < size_y; i++) {
for (int j = 0; j < size_x; j++) {
buffer[i * size_x + j] = read_imagef(
grayscale,
sampler_linear,
(float2)(j, i) + center - offset
).x * 255.0f;
}
}
}
static float2 corner_sub_pix(
__read_only image2d_t grayscale,
float2 feature,
float *mask
) {
float2 init = feature;
int src_width = get_global_size(0);
int src_height = get_global_size(1);
const int max_iters = 40;
const float eps = 0.001f * 0.001f;
int i, j, k;
int iter = 0;
float err = 0;
float subpix[(REFINE_WIN_W + 2) * (REFINE_WIN_H + 2)];
const float flt_epsilon = 0x1.0p-23f;
do {
float2 feature_tmp;
float a = 0, b = 0, c = 0, bb1 = 0, bb2 = 0;
get_rect_sub_pix(grayscale, subpix, REFINE_WIN_W + 2, REFINE_WIN_H + 2, feature);
float *subpix_ptr = subpix;
subpix_ptr += REFINE_WIN_W + 2 + 1;
for (i = 0, k = 0; i < REFINE_WIN_H; i++, subpix_ptr += REFINE_WIN_W + 2) {
float py = i - REFINE_WIN_HALF_H;
for (j = 0; j < REFINE_WIN_W; j++, k++) {
float m = mask[k];
float tgx = subpix_ptr[j + 1] - subpix_ptr[j - 1];
float tgy = subpix_ptr[j + REFINE_WIN_W + 2] - subpix_ptr[j - REFINE_WIN_W - 2];
float gxx = tgx * tgx * m;
float gxy = tgx * tgy * m;
float gyy = tgy * tgy * m;
float px = j - REFINE_WIN_HALF_W;
a += gxx;
b += gxy;
c += gyy;
bb1 += gxx * px + gxy * py;
bb2 += gxy * px + gyy * py;
}
}
float det = a * c - b * b;
if (fabs(det) <= flt_epsilon * flt_epsilon) {
break;
}
float scale = 1.0f / det;
feature_tmp.x = (float)(feature.x + (c * scale * bb1) - (b * scale * bb2));
feature_tmp.y = (float)(feature.y - (b * scale * bb1) + (a * scale * bb2));
err = dot(feature_tmp - feature, feature_tmp - feature);
feature = feature_tmp;
if (feature.x < 0 || feature.x >= src_width || feature.y < 0 || feature.y >= src_height) {
break;
}
} while (++iter < max_iters && err > eps);
if (fabs(feature.x - init.x) > REFINE_WIN_HALF_W || fabs(feature.y - init.y) > REFINE_WIN_HALF_H) {
feature = init;
}
return feature;
}
__kernel void refine_features(
__read_only image2d_t grayscale,
__global const float *harris_buf,
__global float2 *refined_features,
int subpixel_refine
) {
int2 loc = (int2)(get_global_id(0), get_global_id(1));
int2 loc_i = (int2)(loc.x * 32, loc.y * 32);
float new_val;
float max_val = 0;
float2 loc_max = (float2)(-1, -1);
int end_x = min(loc_i.x + 32, (int)get_image_dim(grayscale).x - 1);
int end_y = min(loc_i.y + 32, (int)get_image_dim(grayscale).y - 1);
for (int i = loc_i.x; i < end_x; ++i) {
for (int j = loc_i.y; j < end_y; ++j) {
new_val = harris_buf[i + j * get_image_dim(grayscale).x];
if (new_val > max_val) {
max_val = new_val;
loc_max = (float2)(i, j);
}
}
}
if (max_val == 0) {
write_to_1d_arrf2(refined_features, loc, loc_max);
return;
}
if (subpixel_refine) {
float mask[REFINE_WIN_H * REFINE_WIN_W];
for (int i = 0; i < REFINE_WIN_H; i++) {
float y = (float)(i - REFINE_WIN_HALF_H) / REFINE_WIN_HALF_H;
float vy = exp(-y * y);
for (int j = 0; j < REFINE_WIN_W; j++) {
float x = (float)(j - REFINE_WIN_HALF_W) / REFINE_WIN_HALF_W;
mask[i * REFINE_WIN_W + j] = (float)(vy * exp(-x * x));
}
}
loc_max = corner_sub_pix(grayscale, loc_max, mask);
}
write_to_1d_arrf2(refined_features, loc, loc_max);
}
__kernel void brief_descriptors(
__read_only image2d_t grayscale,
__global const float2 *refined_features,
__global ulong8 *desc_buf,
__global const PointPair *brief_pattern
) {
int2 loc = (int2)(get_global_id(0), get_global_id(1));
float2 feature = read_from_1d_arrf2(refined_features, loc);
if (feature.x == -1) {
write_to_1d_arrul8(desc_buf, loc, (ulong8)(0));
return;
}
ulong8 desc = 0;
ulong *p = &desc;
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 64; ++j) {
PointPair pair = brief_pattern[j * (i + 1)];
float l1 = read_imagef(grayscale, sampler_linear, feature + pair.p1).x;
float l2 = read_imagef(grayscale, sampler_linear, feature + pair.p2).x;
if (l1 < l2) {
p[i] |= 1UL << j;
}
}
}
write_to_1d_arrul8(desc_buf, loc, desc);
}
__kernel void match_descriptors(
__global const float2 *prev_refined_features,
__global const float2 *refined_features,
__global const ulong8 *desc_buf,
__global const ulong8 *prev_desc_buf,
__global MotionVector *matches_buf
) {
int2 loc = (int2)(get_global_id(0), get_global_id(1));
ulong8 desc = read_from_1d_arrul8(desc_buf, loc);
const int search_radius = 3;
MotionVector invalid_vector = (MotionVector) {
(PointPair) {
(float2)(-1, -1),
(float2)(-1, -1)
},
0
};
if (desc.s0 == 0 && desc.s1 == 0) {
write_to_1d_arrvec(
matches_buf,
loc,
invalid_vector
);
return;
}
int2 start = max(loc - search_radius, 0);
int2 end = min(loc + search_radius, (int2)(get_global_size(0) - 1, get_global_size(1) - 1));
for (int i = start.x; i < end.x; ++i) {
for (int j = start.y; j < end.y; ++j) {
int2 prev_point = (int2)(i, j);
int total_dist = 0;
ulong8 prev_desc = read_from_1d_arrul8(prev_desc_buf, prev_point);
if (prev_desc.s0 == 0 && prev_desc.s1 == 0) {
continue;
}
ulong *prev_desc_p = &prev_desc;
ulong *desc_p = &desc;
for (int i = 0; i < 8; i++) {
total_dist += popcount(desc_p[i] ^ prev_desc_p[i]);
}
if (total_dist < DISTANCE_THRESHOLD) {
write_to_1d_arrvec(
matches_buf,
loc,
(MotionVector) {
(PointPair) {
read_from_1d_arrf2(prev_refined_features, prev_point),
read_from_1d_arrf2(refined_features, loc)
},
1
}
);
return;
}
}
}
write_to_1d_arrvec(
matches_buf,
loc,
invalid_vector
);
}
static float2 transformed_point(float2 p, __global const float *transform) {
float2 ret;
ret.x = p.x * transform[0] + p.y * transform[1] + transform[2];
ret.y = p.x * transform[3] + p.y * transform[4] + transform[5];
return ret;
}
__kernel void transform(
__read_only image2d_t src,
__write_only image2d_t dst,
__global const float *transform
) {
int2 loc = (int2)(get_global_id(0), get_global_id(1));
float2 norm = convert_float2(get_image_dim(src));
write_imagef(
dst,
loc,
read_imagef(
src,
sampler_linear_mirror,
transformed_point((float2)(loc.x, loc.y), transform) / norm
)
);
}
static float2 cropped_point(
float2 p,
float2 top_left,
float2 bottom_right,
int2 orig_dim
) {
float2 ret;
float crop_width = bottom_right.x - top_left.x;
float crop_height = bottom_right.y - top_left.y;
float width_norm = p.x / (float)orig_dim.x;
float height_norm = p.y / (float)orig_dim.y;
ret.x = (width_norm * crop_width) + top_left.x;
ret.y = (height_norm * crop_height) + ((float)orig_dim.y - bottom_right.y);
return ret;
}
__kernel void crop_upscale(
__read_only image2d_t src,
__write_only image2d_t dst,
float2 top_left,
float2 bottom_right
) {
int2 loc = (int2)(get_global_id(0), get_global_id(1));
write_imagef(
dst,
loc,
read_imagef(
src,
sampler_linear,
cropped_point((float2)(loc.x, loc.y), top_left, bottom_right, get_image_dim(dst))
)
);
}
__kernel void draw_debug_info(
__write_only image2d_t dst,
__global const MotionVector *matches,
__global const MotionVector *model_matches,
int num_model_matches,
__global const float *transform
) {
int loc = get_global_id(0);
MotionVector vec = matches[loc];
float4 big_rect_color = (float4)(0.1f, 0.1f, 0.1f, 1.0f);
if (vec.should_consider) {
big_rect_color = (float4)(0.0f, 1.0f, 0.0f, 1.0f);
}
for (int i = 0; i < num_model_matches; i++) {
if (vec.p.p2.x == model_matches[i].p.p2.x && vec.p.p2.y == model_matches[i].p.p2.y) {
big_rect_color = (float4)(1.0f, 0.5f, 0.0f, 1.0f);
}
}
float2 transformed_p1 = transformed_point(vec.p.p1, transform);
float2 transformed_p2 = transformed_point(vec.p.p2, transform);
draw_box(dst, (int2)(transformed_p2.x, transformed_p2.y), big_rect_color, 5);
draw_box(dst, (int2)(transformed_p1.x, transformed_p1.y), (float4)(0.0f, 0.3f, 0.7f, 1.0f), 3);
}