1// This file is part of OpenCV project. 2// It is subject to the license terms in the LICENSE file found in the top-level directory 3// of this distribution and at http://opencv.org/license.html. 4 5// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. 6// Third party copyrights are property of their respective owners. 7 8/////////////////////////////////////////////////////////////////////////////////////////////////// 9/////////////////////////////////Macro for border type//////////////////////////////////////////// 10///////////////////////////////////////////////////////////////////////////////////////////////// 11 12#ifdef BORDER_CONSTANT 13//CCCCCC|abcdefgh|CCCCCCC 14#define EXTRAPOLATE(x, maxV) 15#elif defined BORDER_REPLICATE 16//aaaaaa|abcdefgh|hhhhhhh 17#define EXTRAPOLATE(x, maxV) \ 18 { \ 19 (x) = clamp((x), 0, (maxV)-1); \ 20 } 21#elif defined BORDER_WRAP 22//cdefgh|abcdefgh|abcdefg 23#define EXTRAPOLATE(x, maxV) \ 24 { \ 25 (x) = ( (x) + (maxV) ) % (maxV); \ 26 } 27#elif defined BORDER_REFLECT 28//fedcba|abcdefgh|hgfedcb 29#define EXTRAPOLATE(x, maxV) \ 30 { \ 31 (x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \ 32 } 33#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101 34//gfedcb|abcdefgh|gfedcba 35#define EXTRAPOLATE(x, maxV) \ 36 { \ 37 (x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \ 38 } 39#else 40#error No extrapolation method 41#endif 42 43#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*src_step))[_x]) 44 45#ifdef BORDER_CONSTANT 46//CCCCCC|abcdefgh|CCCCCCC 47#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y)) 48#else 49#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y)) 50#endif 51 52#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x]) 53#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x]) 54 55#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \ 56 int srcX = x + srcOffsetX - (kernel_border); \ 57 int srcY = y + srcOffsetY - (kernel_border); \ 58 int xb = srcX; \ 59 int yb = srcY; \ 60 \ 61 EXTRAPOLATE(xb, (width)); \ 62 EXTRAPOLATE(yb, (height)); \ 63 lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \ 64 \ 65 if(lix < ((kernel_border)*2)) \ 66 { \ 67 int xb = srcX+BLK_X; \ 68 EXTRAPOLATE(xb,(width)); \ 69 lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \ 70 } \ 71 if(liy< ((kernel_border)*2)) \ 72 { \ 73 int yb = srcY+BLK_Y; \ 74 EXTRAPOLATE(yb, (height)); \ 75 lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \ 76 } \ 77 if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \ 78 { \ 79 int xb = srcX+BLK_X; \ 80 int yb = srcY+BLK_Y; \ 81 EXTRAPOLATE(xb,(width)); \ 82 EXTRAPOLATE(yb,(height)); \ 83 lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \ 84 } 85 86__kernel void sobel3(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY, 87 __global uchar * DstX, int DstXPitch, int DstXOffset, 88 __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth, 89 int height, int width, float scale) 90{ 91 __local float lsmem[BLK_Y+2][BLK_X+2]; 92 93 int lix = get_local_id(0); 94 int liy = get_local_id(1); 95 96 int x = (int)get_global_id(0); 97 int y = (int)get_global_id(1); 98 99 INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1) 100 barrier(CLK_LOCAL_MEM_FENCE); 101 102 if( x >= dstWidth || y >=dstHeight ) return; 103 104 float u1 = lsmem[liy][lix]; 105 float u2 = lsmem[liy][lix+1]; 106 float u3 = lsmem[liy][lix+2]; 107 108 float m1 = lsmem[liy+1][lix]; 109 float m3 = lsmem[liy+1][lix+2]; 110 111 float b1 = lsmem[liy+2][lix]; 112 float b2 = lsmem[liy+2][lix+1]; 113 float b3 = lsmem[liy+2][lix+2]; 114 115 //calc and store dx and dy;// 116#ifdef SCHARR 117 DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale; 118 DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale; 119#else 120 DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale; 121 DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale; 122#endif 123} 124 125__kernel void sobel5(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY, 126 __global uchar * DstX, int DstXPitch, int DstXOffset, 127 __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth, 128 int height, int width, float scale) 129{ 130 __local float lsmem[BLK_Y+4][BLK_X+4]; 131 132 int lix = get_local_id(0); 133 int liy = get_local_id(1); 134 135 int x = (int)get_global_id(0); 136 int y = (int)get_global_id(1); 137 138 INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2) 139 barrier(CLK_LOCAL_MEM_FENCE); 140 141 if( x >= dstWidth || y >=dstHeight ) return; 142 143 float t1 = lsmem[liy][lix]; 144 float t2 = lsmem[liy][lix+1]; 145 float t3 = lsmem[liy][lix+2]; 146 float t4 = lsmem[liy][lix+3]; 147 float t5 = lsmem[liy][lix+4]; 148 149 float u1 = lsmem[liy+1][lix]; 150 float u2 = lsmem[liy+1][lix+1]; 151 float u3 = lsmem[liy+1][lix+2]; 152 float u4 = lsmem[liy+1][lix+3]; 153 float u5 = lsmem[liy+1][lix+4]; 154 155 float m1 = lsmem[liy+2][lix]; 156 float m2 = lsmem[liy+2][lix+1]; 157 float m4 = lsmem[liy+2][lix+3]; 158 float m5 = lsmem[liy+2][lix+4]; 159 160 float l1 = lsmem[liy+3][lix]; 161 float l2 = lsmem[liy+3][lix+1]; 162 float l3 = lsmem[liy+3][lix+2]; 163 float l4 = lsmem[liy+3][lix+3]; 164 float l5 = lsmem[liy+3][lix+4]; 165 166 float b1 = lsmem[liy+4][lix]; 167 float b2 = lsmem[liy+4][lix+1]; 168 float b3 = lsmem[liy+4][lix+2]; 169 float b4 = lsmem[liy+4][lix+3]; 170 float b5 = lsmem[liy+4][lix+4]; 171 172 //calc and store dx and dy;// 173 DSTX(x,y) = scale * 174 mad(12.0f, m4 - m2, 175 mad(6.0f, m5 - m1, 176 mad(8.0f, u4 - u2 + l4 - l2, 177 mad(4.0f, u5 - u1 + l5 - l1, 178 mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 ) 179 ) 180 ) 181 ) 182 ); 183 184 DSTY(x,y) = scale * 185 mad(12.0f, l3 - u3, 186 mad(6.0f, b3 - t3, 187 mad(8.0f, l2 - u2 + l4 - u4, 188 mad(4.0f, b2 - t2 + b4 - t4, 189 mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 ) 190 ) 191 ) 192 ) 193 ); 194} 195 196__kernel void sobel7(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY, 197 __global uchar * DstX, int DstXPitch, int DstXOffset, 198 __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth, 199 int height, int width, float scale) 200{ 201 __local float lsmem[BLK_Y+6][BLK_X+6]; 202 203 int lix = get_local_id(0); 204 int liy = get_local_id(1); 205 206 int x = (int)get_global_id(0); 207 int y = (int)get_global_id(1); 208 209 INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3) 210 barrier(CLK_LOCAL_MEM_FENCE); 211 212 if( x >= dstWidth || y >=dstHeight ) return; 213 214 float tt1 = lsmem[liy][lix]; 215 float tt2 = lsmem[liy][lix+1]; 216 float tt3 = lsmem[liy][lix+2]; 217 float tt4 = lsmem[liy][lix+3]; 218 float tt5 = lsmem[liy][lix+4]; 219 float tt6 = lsmem[liy][lix+5]; 220 float tt7 = lsmem[liy][lix+6]; 221 222 float t1 = lsmem[liy+1][lix]; 223 float t2 = lsmem[liy+1][lix+1]; 224 float t3 = lsmem[liy+1][lix+2]; 225 float t4 = lsmem[liy+1][lix+3]; 226 float t5 = lsmem[liy+1][lix+4]; 227 float t6 = lsmem[liy+1][lix+5]; 228 float t7 = lsmem[liy+1][lix+6]; 229 230 float u1 = lsmem[liy+2][lix]; 231 float u2 = lsmem[liy+2][lix+1]; 232 float u3 = lsmem[liy+2][lix+2]; 233 float u4 = lsmem[liy+2][lix+3]; 234 float u5 = lsmem[liy+2][lix+4]; 235 float u6 = lsmem[liy+2][lix+5]; 236 float u7 = lsmem[liy+2][lix+6]; 237 238 float m1 = lsmem[liy+3][lix]; 239 float m2 = lsmem[liy+3][lix+1]; 240 float m3 = lsmem[liy+3][lix+2]; 241 float m5 = lsmem[liy+3][lix+4]; 242 float m6 = lsmem[liy+3][lix+5]; 243 float m7 = lsmem[liy+3][lix+6]; 244 245 float l1 = lsmem[liy+4][lix]; 246 float l2 = lsmem[liy+4][lix+1]; 247 float l3 = lsmem[liy+4][lix+2]; 248 float l4 = lsmem[liy+4][lix+3]; 249 float l5 = lsmem[liy+4][lix+4]; 250 float l6 = lsmem[liy+4][lix+5]; 251 float l7 = lsmem[liy+4][lix+6]; 252 253 float b1 = lsmem[liy+5][lix]; 254 float b2 = lsmem[liy+5][lix+1]; 255 float b3 = lsmem[liy+5][lix+2]; 256 float b4 = lsmem[liy+5][lix+3]; 257 float b5 = lsmem[liy+5][lix+4]; 258 float b6 = lsmem[liy+5][lix+5]; 259 float b7 = lsmem[liy+5][lix+6]; 260 261 float bb1 = lsmem[liy+6][lix]; 262 float bb2 = lsmem[liy+6][lix+1]; 263 float bb3 = lsmem[liy+6][lix+2]; 264 float bb4 = lsmem[liy+6][lix+3]; 265 float bb5 = lsmem[liy+6][lix+4]; 266 float bb6 = lsmem[liy+6][lix+5]; 267 float bb7 = lsmem[liy+6][lix+6]; 268 269 //calc and store dx and dy 270 DSTX(x,y) = scale * 271 mad(100.0f, m5 - m3, 272 mad(80.0f, m6 - m2, 273 mad(20.0f, m7 - m1, 274 mad(75.0f, u5 - u3 + l5 - l3, 275 mad(60.0f, u6 - u2 + l6 - l2, 276 mad(15.0f, u7 - u1 + l7 - l1, 277 mad(30.0f, t5 - t3 + b5 - b3, 278 mad(24.0f, t6 - t2 + b6 - b2, 279 mad(6.0f, t7 - t1 + b7 - b1, 280 mad(5.0f, tt5 - tt3 + bb5 - bb3, 281 mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 ) 282 ) 283 ) 284 ) 285 ) 286 ) 287 ) 288 ) 289 ) 290 ) 291 ); 292 293 DSTY(x,y) = scale * 294 mad(100.0f, l4 - u4, 295 mad(80.0f, b4 - t4, 296 mad(20.0f, bb4 - tt4, 297 mad(75.0f, l5 - u5 + l3 - u3, 298 mad(60.0f, b5 - t5 + b3 - t3, 299 mad(15.0f, bb5 - tt5 + bb3 - tt3, 300 mad(30.0f, l6 - u6 + l2 - u2, 301 mad(24.0f, b6 - t6 + b2 - t2, 302 mad(6.0f, bb6 - tt6 + bb2 - tt2, 303 mad(5.0f, l7 - u7 + l1 - u1, 304 mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 ) 305 ) 306 ) 307 ) 308 ) 309 ) 310 ) 311 ) 312 ) 313 ) 314 ); 315} 316