1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. 14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// @Authors 18// Peng Xiao, pengxiao@multicorewareinc.com 19// 20// Redistribution and use in source and binary forms, with or without modification, 21// are permitted provided that the following conditions are met: 22// 23// * Redistribution's of source code must retain the above copyright notice, 24// this list of conditions and the following disclaimer. 25// 26// * Redistribution's in binary form must reproduce the above copyright notice, 27// this list of conditions and the following disclaimer in the documentation 28// and/or other materials provided with the distribution. 29// 30// * The name of the copyright holders may not be used to endorse or promote products 31// derived from this software without specific prior written permission. 32// 33// This software is provided by the copyright holders and contributors as is and 34// any express or implied warranties, including, but not limited to, the implied 35// warranties of merchantability and fitness for a particular purpose are disclaimed. 36// In no event shall the Intel Corporation or contributors be liable for any direct, 37// indirect, incidental, special, exemplary, or consequential damages 38// (including, but not limited to, procurement of substitute goods or services; 39// loss of use, data, or profits; or business interruption) however caused 40// and on any theory of liability, whether in contract, strict liability, 41// or tort (including negligence or otherwise) arising in any way out of 42// the use of this software, even if advised of the possibility of such damage. 43// 44//M*/ 45 46#define TG22 0.4142135623730950488016887242097f 47#define TG67 2.4142135623730950488016887242097f 48 49#ifdef WITH_SOBEL 50 51#if cn == 1 52#define loadpix(addr) convert_floatN(*(__global const TYPE *)(addr)) 53#else 54#define loadpix(addr) convert_floatN(vload3(0, (__global const TYPE *)(addr))) 55#endif 56#define storepix(value, addr) *(__global int *)(addr) = (int)(value) 57 58/* 59 stage1_with_sobel: 60 Sobel operator 61 Calc magnitudes 62 Non maxima suppression 63 Double thresholding 64*/ 65 66__constant int prev[4][2] = { 67 { 0, -1 }, 68 { -1, 1 }, 69 { -1, 0 }, 70 { -1, -1 } 71}; 72 73__constant int next[4][2] = { 74 { 0, 1 }, 75 { 1, -1 }, 76 { 1, 0 }, 77 { 1, 1 } 78}; 79 80inline float3 sobel(int idx, __local const floatN *smem) 81{ 82 // result: x, y, mag 83 float3 res; 84 85 floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4], 86 smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]); 87 88 floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9], 89 smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]); 90 91#ifdef L2GRAD 92 floatN magN = fma(dx, dx, dy * dy); 93#else 94 floatN magN = fabs(dx) + fabs(dy); 95#endif 96#if cn == 1 97 res.z = magN; 98 res.x = dx; 99 res.y = dy; 100#else 101 res.z = max(magN.x, max(magN.y, magN.z)); 102 if (res.z == magN.y) 103 { 104 dx.x = dx.y; 105 dy.x = dy.y; 106 } 107 else if (res.z == magN.z) 108 { 109 dx.x = dx.z; 110 dy.x = dy.z; 111 } 112 res.x = dx.x; 113 res.y = dy.x; 114#endif 115 116 return res; 117} 118 119__kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src_offset, int rows, int cols, 120 __global uchar *map, int map_step, int map_offset, 121 float low_thr, float high_thr) 122{ 123 __local floatN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)]; 124 125 int lidx = get_local_id(0); 126 int lidy = get_local_id(1); 127 128 int start_x = GRP_SIZEX * get_group_id(0); 129 int start_y = GRP_SIZEY * get_group_id(1); 130 131 int i = lidx + lidy * GRP_SIZEX; 132 for (int j = i; j < (GRP_SIZEX + 4) * (GRP_SIZEY + 4); j += GRP_SIZEX * GRP_SIZEY) 133 { 134 int x = clamp(start_x - 2 + (j % (GRP_SIZEX + 4)), 0, cols - 1); 135 int y = clamp(start_y - 2 + (j / (GRP_SIZEX + 4)), 0, rows - 1); 136 smem[j] = loadpix(src + mad24(y, src_step, mad24(x, cn * (int)sizeof(TYPE), src_offset))); 137 } 138 139 barrier(CLK_LOCAL_MEM_FENCE); 140 141 //// Sobel, Magnitude 142 // 143 144 __local float mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)]; 145 146 lidx++; 147 lidy++; 148 149 if (i < GRP_SIZEX + 2) 150 { 151 int grp_sizey = min(GRP_SIZEY + 1, rows - start_y); 152 mag[i] = (sobel(i, smem)).z; 153 mag[i + grp_sizey * (GRP_SIZEX + 2)] = (sobel(i + grp_sizey * (GRP_SIZEX + 4), smem)).z; 154 } 155 if (i < GRP_SIZEY + 2) 156 { 157 int grp_sizex = min(GRP_SIZEX + 1, cols - start_x); 158 mag[i * (GRP_SIZEX + 2)] = (sobel(i * (GRP_SIZEX + 4), smem)).z; 159 mag[i * (GRP_SIZEX + 2) + grp_sizex] = (sobel(i * (GRP_SIZEX + 4) + grp_sizex, smem)).z; 160 } 161 162 int idx = lidx + lidy * (GRP_SIZEX + 4); 163 i = lidx + lidy * (GRP_SIZEX + 2); 164 165 float3 res = sobel(idx, smem); 166 mag[i] = res.z; 167 barrier(CLK_LOCAL_MEM_FENCE); 168 169 int x = (int) res.x; 170 int y = (int) res.y; 171 172 //// Threshold + Non maxima suppression 173 // 174 175 /* 176 Sector numbers 177 178 3 2 1 179 * * * 180 * * * 181 0*******0 182 * * * 183 * * * 184 1 2 3 185 186 We need to determine arctg(dy / dx) to one of the four directions: 0, 45, 90 or 135 degrees. 187 Therefore if abs(dy / dx) belongs to the interval 188 [0, tg(22.5)] -> 0 direction 189 [tg(22.5), tg(67.5)] -> 1 or 3 190 [tg(67,5), +oo) -> 2 191 192 Since tg(67.5) = 1 / tg(22.5), if we take 193 a = abs(dy / dx) * tg(22.5) and b = abs(dy / dx) * tg(67.5) 194 we can get another intervals 195 196 in case a: 197 [0, tg(22.5)^2] -> 0 198 [tg(22.5)^2, 1] -> 1, 3 199 [1, +oo) -> 2 200 201 in case b: 202 [0, 1] -> 0 203 [1, tg(67.5)^2] -> 1,3 204 [tg(67.5)^2, +oo) -> 2 205 206 that can help to find direction without conditions. 207 208 0 - might belong to an edge 209 1 - pixel doesn't belong to an edge 210 2 - belong to an edge 211 */ 212 213 int gidx = get_global_id(0); 214 int gidy = get_global_id(1); 215 216 if (gidx >= cols || gidy >= rows) 217 return; 218 219 float mag0 = mag[i]; 220 221 int value = 1; 222 if (mag0 > low_thr) 223 { 224 int a = (y / (float)x) * TG22; 225 int b = (y / (float)x) * TG67; 226 227 a = min((int)abs(a), 1) + 1; 228 b = min((int)abs(b), 1); 229 230 // a = { 1, 2 } 231 // b = { 0, 1 } 232 // a * b = { 0, 1, 2 } - directions that we need ( + 3 if x ^ y < 0) 233 234 int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31); // if a = 1, b = 1, dy ^ dx < 0 235 int dir = a * b + 2 * dir3; 236 float prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]]; 237 float next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1); 238 239 if (mag0 > prev_mag && mag0 >= next_mag) 240 { 241 value = (mag0 > high_thr) ? 2 : 0; 242 } 243 } 244 245 storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset))); 246} 247 248#elif defined WITHOUT_SOBEL 249 250/* 251 stage1_without_sobel: 252 Calc magnitudes 253 Non maxima suppression 254 Double thresholding 255*/ 256 257#define loadpix(addr) (__global short *)(addr) 258#define storepix(val, addr) *(__global int *)(addr) = (int)(val) 259 260#ifdef L2GRAD 261#define dist(x, y) ((int)(x) * (x) + (int)(y) * (y)) 262#else 263#define dist(x, y) (abs(x) + abs(y)) 264#endif 265 266__constant int prev[4][2] = { 267 { 0, -1 }, 268 { -1, -1 }, 269 { -1, 0 }, 270 { -1, 1 } 271}; 272 273__constant int next[4][2] = { 274 { 0, 1 }, 275 { 1, 1 }, 276 { 1, 0 }, 277 { 1, -1 } 278}; 279 280__kernel void stage1_without_sobel(__global const uchar *dxptr, int dx_step, int dx_offset, 281 __global const uchar *dyptr, int dy_step, int dy_offset, 282 __global uchar *map, int map_step, int map_offset, int rows, int cols, 283 int low_thr, int high_thr) 284{ 285 int start_x = get_group_id(0) * GRP_SIZEX; 286 int start_y = get_group_id(1) * GRP_SIZEY; 287 288 int lidx = get_local_id(0); 289 int lidy = get_local_id(1); 290 291 __local int mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)]; 292 __local short2 sigma[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)]; 293 294#pragma unroll 295 for (int i = lidx + lidy * GRP_SIZEX; i < (GRP_SIZEX + 2) * (GRP_SIZEY + 2); i += GRP_SIZEX * GRP_SIZEY) 296 { 297 int x = clamp(start_x - 1 + i % (GRP_SIZEX + 2), 0, cols - 1); 298 int y = clamp(start_y - 1 + i / (GRP_SIZEX + 2), 0, rows - 1); 299 300 int dx_index = mad24(y, dx_step, mad24(x, cn * (int)sizeof(short), dx_offset)); 301 int dy_index = mad24(y, dy_step, mad24(x, cn * (int)sizeof(short), dy_offset)); 302 303 __global short *dx = loadpix(dxptr + dx_index); 304 __global short *dy = loadpix(dyptr + dy_index); 305 306 int mag0 = dist(dx[0], dy[0]); 307#if cn > 1 308 short cdx = dx[0], cdy = dy[0]; 309#pragma unroll 310 for (int j = 1; j < cn; ++j) 311 { 312 int mag1 = dist(dx[j], dy[j]); 313 if (mag1 > mag0) 314 { 315 mag0 = mag1; 316 cdx = dx[j]; 317 cdy = dy[j]; 318 } 319 } 320 dx[0] = cdx; 321 dy[0] = cdy; 322#endif 323 mag[i] = mag0; 324 sigma[i] = (short2)(dx[0], dy[0]); 325 } 326 327 barrier(CLK_LOCAL_MEM_FENCE); 328 329 int gidx = get_global_id(0); 330 int gidy = get_global_id(1); 331 332 if (gidx >= cols || gidy >= rows) 333 return; 334 335 lidx++; 336 lidy++; 337 338 int mag0 = mag[lidx + lidy * (GRP_SIZEX + 2)]; 339 short x = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).x; 340 short y = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).y; 341 342 int value = 1; 343 if (mag0 > low_thr) 344 { 345 int a = (y / (float)x) * TG22; 346 int b = (y / (float)x) * TG67; 347 348 a = min((int)abs(a), 1) + 1; 349 b = min((int)abs(b), 1); 350 351 int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31); 352 int dir = a * b + 2 * dir3; 353 int prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]]; 354 int next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1); 355 356 if (mag0 > prev_mag && mag0 >= next_mag) 357 { 358 value = (mag0 > high_thr) ? 2 : 0; 359 } 360 } 361 362 storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset))); 363} 364 365#undef TG22 366#undef CANNY_SHIFT 367 368#elif defined STAGE2 369/* 370 stage2: 371 hysteresis (add edges labeled 0 if they are connected with an edge labeled 2) 372*/ 373 374#define loadpix(addr) *(__global int *)(addr) 375#define storepix(val, addr) *(__global int *)(addr) = (int)(val) 376#define LOCAL_TOTAL (LOCAL_X*LOCAL_Y) 377#define l_stack_size (4*LOCAL_TOTAL) 378#define p_stack_size 8 379 380__constant short move_dir[2][8] = { 381 { -1, -1, -1, 0, 0, 1, 1, 1 }, 382 { -1, 0, 1, -1, 1, -1, 0, 1 } 383}; 384 385__kernel void stage2_hysteresis(__global uchar *map_ptr, int map_step, int map_offset, int rows, int cols) 386{ 387 map_ptr += map_offset; 388 389 int x = get_global_id(0); 390 int y = get_global_id(1) * PIX_PER_WI; 391 392 int lid = get_local_id(0) + get_local_id(1) * LOCAL_X; 393 394 __local ushort2 l_stack[l_stack_size]; 395 __local int l_counter; 396 397 if (lid == 0) 398 l_counter = 0; 399 barrier(CLK_LOCAL_MEM_FENCE); 400 401 if (x < cols) 402 { 403 __global uchar* map = map_ptr + mad24(y, map_step, x * (int)sizeof(int)); 404 405 #pragma unroll 406 for (int cy = 0; cy < PIX_PER_WI; ++cy) 407 { 408 if (y < rows) 409 { 410 int type = loadpix(map); 411 if (type == 2) 412 { 413 l_stack[atomic_inc(&l_counter)] = (ushort2)(x, y); 414 } 415 416 y++; 417 map += map_step; 418 } 419 } 420 } 421 barrier(CLK_LOCAL_MEM_FENCE); 422 423 ushort2 p_stack[p_stack_size]; 424 int p_counter = 0; 425 426 while(l_counter != 0) 427 { 428 int mod = l_counter % LOCAL_TOTAL; 429 int pix_per_thr = l_counter / LOCAL_TOTAL + ((lid < mod) ? 1 : 0); 430 431 for (int i = 0; i < pix_per_thr; ++i) 432 { 433 int index = atomic_dec(&l_counter) - 1; 434 if (index < 0) 435 continue; 436 ushort2 pos = l_stack[ index ]; 437 438 #pragma unroll 439 for (int j = 0; j < 8; ++j) 440 { 441 ushort posx = pos.x + move_dir[0][j]; 442 ushort posy = pos.y + move_dir[1][j]; 443 if (posx < 0 || posy < 0 || posx >= cols || posy >= rows) 444 continue; 445 __global uchar *addr = map_ptr + mad24(posy, map_step, posx * (int)sizeof(int)); 446 int type = loadpix(addr); 447 if (type == 0) 448 { 449 p_stack[p_counter++] = (ushort2)(posx, posy); 450 storepix(2, addr); 451 } 452 } 453 } 454 barrier(CLK_LOCAL_MEM_FENCE); 455 if (l_counter < 0) 456 l_counter = 0; 457 barrier(CLK_LOCAL_MEM_FENCE); 458 459 while (p_counter > 0) 460 { 461 l_stack[ atomic_inc(&l_counter) ] = p_stack[--p_counter]; 462 } 463 barrier(CLK_LOCAL_MEM_FENCE); 464 } 465} 466 467#elif defined GET_EDGES 468 469// Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0. 470// map edge type mappings 471// dst edge output 472 473__kernel void getEdges(__global const uchar *mapptr, int map_step, int map_offset, int rows, int cols, 474 __global uchar *dst, int dst_step, int dst_offset) 475{ 476 int x = get_global_id(0); 477 int y = get_global_id(1) * PIX_PER_WI; 478 479 if (x < cols) 480 { 481 int map_index = mad24(map_step, y, mad24(x, (int)sizeof(int), map_offset)); 482 int dst_index = mad24(dst_step, y, x + dst_offset); 483 484 #pragma unroll 485 for (int cy = 0; cy < PIX_PER_WI; ++cy) 486 { 487 if (y < rows) 488 { 489 __global const int * map = (__global const int *)(mapptr + map_index); 490 dst[dst_index] = (uchar)(-(map[0] >> 1)); 491 492 y++; 493 map_index += map_step; 494 dst_index += dst_step; 495 } 496 } 497 } 498} 499 500#endif 501