1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
6// Third party copyrights are property of their respective owners.
7
8///////////////////////////////////////////////////////////////////////////////////////////////////
9/////////////////////////////////Macro for border type////////////////////////////////////////////
10/////////////////////////////////////////////////////////////////////////////////////////////////
11
12#ifdef BORDER_CONSTANT
13//CCCCCC|abcdefgh|CCCCCCC
14#define EXTRAPOLATE(x, maxV)
15#elif defined BORDER_REPLICATE
16//aaaaaa|abcdefgh|hhhhhhh
17#define EXTRAPOLATE(x, maxV) \
18    { \
19        (x) = clamp((x), 0, (maxV)-1); \
20    }
21#elif defined BORDER_WRAP
22//cdefgh|abcdefgh|abcdefg
23#define EXTRAPOLATE(x, maxV) \
24    { \
25        (x) = ( (x) + (maxV) ) % (maxV); \
26    }
27#elif defined BORDER_REFLECT
28//fedcba|abcdefgh|hgfedcb
29#define EXTRAPOLATE(x, maxV) \
30    { \
31        (x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
32    }
33#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101
34//gfedcb|abcdefgh|gfedcba
35#define EXTRAPOLATE(x, maxV) \
36    { \
37        (x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
38    }
39#else
40#error No extrapolation method
41#endif
42
43#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*src_step))[_x])
44
45#ifdef BORDER_CONSTANT
46//CCCCCC|abcdefgh|CCCCCCC
47#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
48#else
49#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
50#endif
51
52#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
53#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
54
55#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
56    int srcX = x + srcOffsetX - (kernel_border); \
57    int srcY = y + srcOffsetY - (kernel_border); \
58    int xb = srcX; \
59    int yb = srcY; \
60    \
61    EXTRAPOLATE(xb, (width)); \
62    EXTRAPOLATE(yb, (height)); \
63    lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
64    \
65    if(lix < ((kernel_border)*2)) \
66    { \
67        int xb = srcX+BLK_X; \
68        EXTRAPOLATE(xb,(width)); \
69        lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
70    } \
71    if(liy< ((kernel_border)*2)) \
72    { \
73        int yb = srcY+BLK_Y; \
74        EXTRAPOLATE(yb, (height)); \
75        lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
76    } \
77    if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
78    { \
79        int xb = srcX+BLK_X; \
80        int yb = srcY+BLK_Y; \
81        EXTRAPOLATE(xb,(width)); \
82        EXTRAPOLATE(yb,(height)); \
83        lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
84    }
85
86__kernel void sobel3(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY,
87                     __global uchar * DstX, int DstXPitch, int DstXOffset,
88                     __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth,
89                     int height, int width, float scale)
90{
91    __local float lsmem[BLK_Y+2][BLK_X+2];
92
93    int lix = get_local_id(0);
94    int liy = get_local_id(1);
95
96    int x = (int)get_global_id(0);
97    int y = (int)get_global_id(1);
98
99    INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
100    barrier(CLK_LOCAL_MEM_FENCE);
101
102    if( x >= dstWidth || y >=dstHeight )  return;
103
104    float u1 = lsmem[liy][lix];
105    float u2 = lsmem[liy][lix+1];
106    float u3 = lsmem[liy][lix+2];
107
108    float m1 = lsmem[liy+1][lix];
109    float m3 = lsmem[liy+1][lix+2];
110
111    float b1 = lsmem[liy+2][lix];
112    float b2 = lsmem[liy+2][lix+1];
113    float b3 = lsmem[liy+2][lix+2];
114
115    //calc and store dx and dy;//
116#ifdef SCHARR
117    DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
118    DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
119#else
120    DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
121    DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
122#endif
123}
124
125__kernel void sobel5(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY,
126                     __global uchar * DstX, int DstXPitch, int DstXOffset,
127                     __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth,
128                     int height, int width, float scale)
129{
130    __local float lsmem[BLK_Y+4][BLK_X+4];
131
132    int lix = get_local_id(0);
133    int liy = get_local_id(1);
134
135    int x = (int)get_global_id(0);
136    int y = (int)get_global_id(1);
137
138    INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
139    barrier(CLK_LOCAL_MEM_FENCE);
140
141    if( x >= dstWidth || y >=dstHeight )  return;
142
143    float t1 = lsmem[liy][lix];
144    float t2 = lsmem[liy][lix+1];
145    float t3 = lsmem[liy][lix+2];
146    float t4 = lsmem[liy][lix+3];
147    float t5 = lsmem[liy][lix+4];
148
149    float u1 = lsmem[liy+1][lix];
150    float u2 = lsmem[liy+1][lix+1];
151    float u3 = lsmem[liy+1][lix+2];
152    float u4 = lsmem[liy+1][lix+3];
153    float u5 = lsmem[liy+1][lix+4];
154
155    float m1 = lsmem[liy+2][lix];
156    float m2 = lsmem[liy+2][lix+1];
157    float m4 = lsmem[liy+2][lix+3];
158    float m5 = lsmem[liy+2][lix+4];
159
160    float l1 = lsmem[liy+3][lix];
161    float l2 = lsmem[liy+3][lix+1];
162    float l3 = lsmem[liy+3][lix+2];
163    float l4 = lsmem[liy+3][lix+3];
164    float l5 = lsmem[liy+3][lix+4];
165
166    float b1 = lsmem[liy+4][lix];
167    float b2 = lsmem[liy+4][lix+1];
168    float b3 = lsmem[liy+4][lix+2];
169    float b4 = lsmem[liy+4][lix+3];
170    float b5 = lsmem[liy+4][lix+4];
171
172    //calc and store dx and dy;//
173    DSTX(x,y) = scale *
174        mad(12.0f, m4 - m2,
175            mad(6.0f, m5 - m1,
176                mad(8.0f, u4 - u2 + l4 - l2,
177                    mad(4.0f, u5 - u1 + l5 - l1,
178                        mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
179                        )
180                    )
181                )
182            );
183
184    DSTY(x,y) = scale *
185        mad(12.0f, l3 - u3,
186            mad(6.0f, b3 - t3,
187                mad(8.0f, l2 - u2 + l4 - u4,
188                    mad(4.0f, b2 - t2 + b4 - t4,
189                        mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
190                        )
191                    )
192                )
193            );
194}
195
196__kernel void sobel7(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY,
197                     __global uchar * DstX, int DstXPitch, int DstXOffset,
198                     __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth,
199                     int height, int width, float scale)
200{
201    __local float lsmem[BLK_Y+6][BLK_X+6];
202
203    int lix = get_local_id(0);
204    int liy = get_local_id(1);
205
206    int x = (int)get_global_id(0);
207    int y = (int)get_global_id(1);
208
209    INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
210    barrier(CLK_LOCAL_MEM_FENCE);
211
212    if( x >= dstWidth || y >=dstHeight )  return;
213
214    float tt1 = lsmem[liy][lix];
215    float tt2 = lsmem[liy][lix+1];
216    float tt3 = lsmem[liy][lix+2];
217    float tt4 = lsmem[liy][lix+3];
218    float tt5 = lsmem[liy][lix+4];
219    float tt6 = lsmem[liy][lix+5];
220    float tt7 = lsmem[liy][lix+6];
221
222    float t1 = lsmem[liy+1][lix];
223    float t2 = lsmem[liy+1][lix+1];
224    float t3 = lsmem[liy+1][lix+2];
225    float t4 = lsmem[liy+1][lix+3];
226    float t5 = lsmem[liy+1][lix+4];
227    float t6 = lsmem[liy+1][lix+5];
228    float t7 = lsmem[liy+1][lix+6];
229
230    float u1 = lsmem[liy+2][lix];
231    float u2 = lsmem[liy+2][lix+1];
232    float u3 = lsmem[liy+2][lix+2];
233    float u4 = lsmem[liy+2][lix+3];
234    float u5 = lsmem[liy+2][lix+4];
235    float u6 = lsmem[liy+2][lix+5];
236    float u7 = lsmem[liy+2][lix+6];
237
238    float m1 = lsmem[liy+3][lix];
239    float m2 = lsmem[liy+3][lix+1];
240    float m3 = lsmem[liy+3][lix+2];
241    float m5 = lsmem[liy+3][lix+4];
242    float m6 = lsmem[liy+3][lix+5];
243    float m7 = lsmem[liy+3][lix+6];
244
245    float l1 = lsmem[liy+4][lix];
246    float l2 = lsmem[liy+4][lix+1];
247    float l3 = lsmem[liy+4][lix+2];
248    float l4 = lsmem[liy+4][lix+3];
249    float l5 = lsmem[liy+4][lix+4];
250    float l6 = lsmem[liy+4][lix+5];
251    float l7 = lsmem[liy+4][lix+6];
252
253    float b1 = lsmem[liy+5][lix];
254    float b2 = lsmem[liy+5][lix+1];
255    float b3 = lsmem[liy+5][lix+2];
256    float b4 = lsmem[liy+5][lix+3];
257    float b5 = lsmem[liy+5][lix+4];
258    float b6 = lsmem[liy+5][lix+5];
259    float b7 = lsmem[liy+5][lix+6];
260
261    float bb1 = lsmem[liy+6][lix];
262    float bb2 = lsmem[liy+6][lix+1];
263    float bb3 = lsmem[liy+6][lix+2];
264    float bb4 = lsmem[liy+6][lix+3];
265    float bb5 = lsmem[liy+6][lix+4];
266    float bb6 = lsmem[liy+6][lix+5];
267    float bb7 = lsmem[liy+6][lix+6];
268
269    //calc and store dx and dy
270    DSTX(x,y) = scale *
271        mad(100.0f, m5 - m3,
272            mad(80.0f, m6 - m2,
273                mad(20.0f, m7 - m1,
274                    mad(75.0f, u5 - u3 + l5 - l3,
275                        mad(60.0f, u6 - u2 + l6 - l2,
276                            mad(15.0f, u7 - u1 + l7 - l1,
277                                mad(30.0f, t5 - t3 + b5 - b3,
278                                    mad(24.0f, t6 - t2 + b6 - b2,
279                                        mad(6.0f, t7 - t1 + b7 - b1,
280                                            mad(5.0f, tt5 - tt3 + bb5 - bb3,
281                                                mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
282                                                )
283                                            )
284                                        )
285                                    )
286                                )
287                            )
288                        )
289                    )
290                )
291            );
292
293    DSTY(x,y) = scale *
294        mad(100.0f, l4 - u4,
295            mad(80.0f, b4 - t4,
296                mad(20.0f, bb4 - tt4,
297                    mad(75.0f, l5 - u5 + l3 - u3,
298                        mad(60.0f, b5 - t5 + b3 - t3,
299                            mad(15.0f, bb5 - tt5 + bb3 - tt3,
300                                mad(30.0f, l6 - u6 + l2 - u2,
301                                    mad(24.0f, b6 - t6 + b2 - t2,
302                                        mad(6.0f, bb6 - tt6 + bb2 - tt2,
303                                            mad(5.0f, l7 - u7 + l1 - u1,
304                                                mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
305                                                )
306                                            )
307                                        )
308                                    )
309                                )
310                            )
311                        )
312                    )
313                )
314            );
315}
316