1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #if !defined CUDA_DISABLER
44 
45 #include "opencv2/core/cuda/common.hpp"
46 #include "opencv2/core/cuda/emulation.hpp"
47 #include "opencv2/core/cuda/dynamic_smem.hpp"
48 
49 #include "opencv2/opencv_modules.hpp"
50 
51 #ifdef HAVE_OPENCV_CUDAFILTERS
52 
53 namespace cv { namespace cuda { namespace device
54 {
55     namespace hough_circles
56     {
57         __device__ int g_counter;
58 
59         ////////////////////////////////////////////////////////////////////////
60         // circlesAccumCenters
61 
circlesAccumCenters(const unsigned int * list,const int count,const PtrStepi dx,const PtrStepi dy,PtrStepi accum,const int width,const int height,const int minRadius,const int maxRadius,const float idp)62         __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
63                                             PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
64         {
65             const int SHIFT = 10;
66             const int ONE = 1 << SHIFT;
67 
68             const int tid = blockIdx.x * blockDim.x + threadIdx.x;
69 
70             if (tid >= count)
71                 return;
72 
73             const unsigned int val = list[tid];
74 
75             const int x = (val & 0xFFFF);
76             const int y = (val >> 16) & 0xFFFF;
77 
78             const int vx = dx(y, x);
79             const int vy = dy(y, x);
80 
81             if (vx == 0 && vy == 0)
82                 return;
83 
84             const float mag = ::sqrtf(vx * vx + vy * vy);
85 
86             const int x0 = __float2int_rn((x * idp) * ONE);
87             const int y0 = __float2int_rn((y * idp) * ONE);
88 
89             int sx = __float2int_rn((vx * idp) * ONE / mag);
90             int sy = __float2int_rn((vy * idp) * ONE / mag);
91 
92             // Step from minRadius to maxRadius in both directions of the gradient
93             for (int k1 = 0; k1 < 2; ++k1)
94             {
95                 int x1 = x0 + minRadius * sx;
96                 int y1 = y0 + minRadius * sy;
97 
98                 for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
99                 {
100                     const int x2 = x1 >> SHIFT;
101                     const int y2 = y1 >> SHIFT;
102 
103                     if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
104                         break;
105 
106                     ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
107                 }
108 
109                 sx = -sx;
110                 sy = -sy;
111             }
112         }
113 
circlesAccumCenters_gpu(const unsigned int * list,int count,PtrStepi dx,PtrStepi dy,PtrStepSzi accum,int minRadius,int maxRadius,float idp)114         void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
115         {
116             const dim3 block(256);
117             const dim3 grid(divUp(count, block.x));
118 
119             cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
120 
121             circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
122             cudaSafeCall( cudaGetLastError() );
123 
124             cudaSafeCall( cudaDeviceSynchronize() );
125         }
126 
127         ////////////////////////////////////////////////////////////////////////
128         // buildCentersList
129 
buildCentersList(const PtrStepSzi accum,unsigned int * centers,const int threshold)130         __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
131         {
132             const int x = blockIdx.x * blockDim.x + threadIdx.x;
133             const int y = blockIdx.y * blockDim.y + threadIdx.y;
134 
135             if (x < accum.cols - 2 && y < accum.rows - 2)
136             {
137                 const int top = accum(y, x + 1);
138 
139                 const int left = accum(y + 1, x);
140                 const int cur = accum(y + 1, x + 1);
141                 const int right = accum(y + 1, x + 2);
142 
143                 const int bottom = accum(y + 2, x + 1);
144 
145                 if (cur > threshold && cur > top && cur >= bottom && cur >  left && cur >= right)
146                 {
147                     const unsigned int val = (y << 16) | x;
148                     const int idx = ::atomicAdd(&g_counter, 1);
149                     centers[idx] = val;
150                 }
151             }
152         }
153 
buildCentersList_gpu(PtrStepSzi accum,unsigned int * centers,int threshold)154         int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
155         {
156             void* counterPtr;
157             cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
158 
159             cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
160 
161             const dim3 block(32, 8);
162             const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
163 
164             cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
165 
166             buildCentersList<<<grid, block>>>(accum, centers, threshold);
167             cudaSafeCall( cudaGetLastError() );
168 
169             cudaSafeCall( cudaDeviceSynchronize() );
170 
171             int totalCount;
172             cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
173 
174             return totalCount;
175         }
176 
177         ////////////////////////////////////////////////////////////////////////
178         // circlesAccumRadius
179 
circlesAccumRadius(const unsigned int * centers,const unsigned int * list,const int count,float3 * circles,const int maxCircles,const float dp,const int minRadius,const int maxRadius,const int histSize,const int threshold)180         __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count,
181                                            float3* circles, const int maxCircles, const float dp,
182                                            const int minRadius, const int maxRadius, const int histSize, const int threshold)
183         {
184             int* smem = DynamicSharedMem<int>();
185 
186             for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x)
187                 smem[i] = 0;
188             __syncthreads();
189 
190             unsigned int val = centers[blockIdx.x];
191 
192             float cx = (val & 0xFFFF);
193             float cy = (val >> 16) & 0xFFFF;
194 
195             cx = (cx + 0.5f) * dp;
196             cy = (cy + 0.5f) * dp;
197 
198             for (int i = threadIdx.x; i < count; i += blockDim.x)
199             {
200                 val = list[i];
201 
202                 const int x = (val & 0xFFFF);
203                 const int y = (val >> 16) & 0xFFFF;
204 
205                 const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y));
206                 if (rad >= minRadius && rad <= maxRadius)
207                 {
208                     const int r = __float2int_rn(rad - minRadius);
209 
210                     Emulation::smem::atomicAdd(&smem[r + 1], 1);
211                 }
212             }
213 
214             __syncthreads();
215 
216             for (int i = threadIdx.x; i < histSize; i += blockDim.x)
217             {
218                 const int curVotes = smem[i + 1];
219 
220                 if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
221                 {
222                     const int ind = ::atomicAdd(&g_counter, 1);
223                     if (ind < maxCircles)
224                         circles[ind] = make_float3(cx, cy, i + minRadius);
225                 }
226             }
227         }
228 
circlesAccumRadius_gpu(const unsigned int * centers,int centersCount,const unsigned int * list,int count,float3 * circles,int maxCircles,float dp,int minRadius,int maxRadius,int threshold,bool has20)229         int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
230                                    float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
231         {
232             void* counterPtr;
233             cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
234 
235             cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
236 
237             const dim3 block(has20 ? 1024 : 512);
238             const dim3 grid(centersCount);
239 
240             const int histSize = maxRadius - minRadius + 1;
241             size_t smemSize = (histSize + 2) * sizeof(int);
242 
243             circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
244             cudaSafeCall( cudaGetLastError() );
245 
246             cudaSafeCall( cudaDeviceSynchronize() );
247 
248             int totalCount;
249             cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
250 
251             totalCount = ::min(totalCount, maxCircles);
252 
253             return totalCount;
254         }
255     }
256 }}}
257 
258 #endif // HAVE_OPENCV_CUDAFILTERS
259 
260 #endif /* CUDA_DISABLER */
261