1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/emulation.hpp" 47 #include "opencv2/core/cuda/dynamic_smem.hpp" 48 49 #include "opencv2/opencv_modules.hpp" 50 51 #ifdef HAVE_OPENCV_CUDAFILTERS 52 53 namespace cv { namespace cuda { namespace device 54 { 55 namespace hough_circles 56 { 57 __device__ int g_counter; 58 59 //////////////////////////////////////////////////////////////////////// 60 // circlesAccumCenters 61 circlesAccumCenters(const unsigned int * list,const int count,const PtrStepi dx,const PtrStepi dy,PtrStepi accum,const int width,const int height,const int minRadius,const int maxRadius,const float idp)62 __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy, 63 PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp) 64 { 65 const int SHIFT = 10; 66 const int ONE = 1 << SHIFT; 67 68 const int tid = blockIdx.x * blockDim.x + threadIdx.x; 69 70 if (tid >= count) 71 return; 72 73 const unsigned int val = list[tid]; 74 75 const int x = (val & 0xFFFF); 76 const int y = (val >> 16) & 0xFFFF; 77 78 const int vx = dx(y, x); 79 const int vy = dy(y, x); 80 81 if (vx == 0 && vy == 0) 82 return; 83 84 const float mag = ::sqrtf(vx * vx + vy * vy); 85 86 const int x0 = __float2int_rn((x * idp) * ONE); 87 const int y0 = __float2int_rn((y * idp) * ONE); 88 89 int sx = __float2int_rn((vx * idp) * ONE / mag); 90 int sy = __float2int_rn((vy * idp) * ONE / mag); 91 92 // Step from minRadius to maxRadius in both directions of the gradient 93 for (int k1 = 0; k1 < 2; ++k1) 94 { 95 int x1 = x0 + minRadius * sx; 96 int y1 = y0 + minRadius * sy; 97 98 for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r) 99 { 100 const int x2 = x1 >> SHIFT; 101 const int y2 = y1 >> SHIFT; 102 103 if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height) 104 break; 105 106 ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1); 107 } 108 109 sx = -sx; 110 sy = -sy; 111 } 112 } 113 circlesAccumCenters_gpu(const unsigned int * list,int count,PtrStepi dx,PtrStepi dy,PtrStepSzi accum,int minRadius,int maxRadius,float idp)114 void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp) 115 { 116 const dim3 block(256); 117 const dim3 grid(divUp(count, block.x)); 118 119 cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); 120 121 circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); 122 cudaSafeCall( cudaGetLastError() ); 123 124 cudaSafeCall( cudaDeviceSynchronize() ); 125 } 126 127 //////////////////////////////////////////////////////////////////////// 128 // buildCentersList 129 buildCentersList(const PtrStepSzi accum,unsigned int * centers,const int threshold)130 __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold) 131 { 132 const int x = blockIdx.x * blockDim.x + threadIdx.x; 133 const int y = blockIdx.y * blockDim.y + threadIdx.y; 134 135 if (x < accum.cols - 2 && y < accum.rows - 2) 136 { 137 const int top = accum(y, x + 1); 138 139 const int left = accum(y + 1, x); 140 const int cur = accum(y + 1, x + 1); 141 const int right = accum(y + 1, x + 2); 142 143 const int bottom = accum(y + 2, x + 1); 144 145 if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right) 146 { 147 const unsigned int val = (y << 16) | x; 148 const int idx = ::atomicAdd(&g_counter, 1); 149 centers[idx] = val; 150 } 151 } 152 } 153 buildCentersList_gpu(PtrStepSzi accum,unsigned int * centers,int threshold)154 int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold) 155 { 156 void* counterPtr; 157 cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); 158 159 cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); 160 161 const dim3 block(32, 8); 162 const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); 163 164 cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); 165 166 buildCentersList<<<grid, block>>>(accum, centers, threshold); 167 cudaSafeCall( cudaGetLastError() ); 168 169 cudaSafeCall( cudaDeviceSynchronize() ); 170 171 int totalCount; 172 cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); 173 174 return totalCount; 175 } 176 177 //////////////////////////////////////////////////////////////////////// 178 // circlesAccumRadius 179 circlesAccumRadius(const unsigned int * centers,const unsigned int * list,const int count,float3 * circles,const int maxCircles,const float dp,const int minRadius,const int maxRadius,const int histSize,const int threshold)180 __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count, 181 float3* circles, const int maxCircles, const float dp, 182 const int minRadius, const int maxRadius, const int histSize, const int threshold) 183 { 184 int* smem = DynamicSharedMem<int>(); 185 186 for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x) 187 smem[i] = 0; 188 __syncthreads(); 189 190 unsigned int val = centers[blockIdx.x]; 191 192 float cx = (val & 0xFFFF); 193 float cy = (val >> 16) & 0xFFFF; 194 195 cx = (cx + 0.5f) * dp; 196 cy = (cy + 0.5f) * dp; 197 198 for (int i = threadIdx.x; i < count; i += blockDim.x) 199 { 200 val = list[i]; 201 202 const int x = (val & 0xFFFF); 203 const int y = (val >> 16) & 0xFFFF; 204 205 const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y)); 206 if (rad >= minRadius && rad <= maxRadius) 207 { 208 const int r = __float2int_rn(rad - minRadius); 209 210 Emulation::smem::atomicAdd(&smem[r + 1], 1); 211 } 212 } 213 214 __syncthreads(); 215 216 for (int i = threadIdx.x; i < histSize; i += blockDim.x) 217 { 218 const int curVotes = smem[i + 1]; 219 220 if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) 221 { 222 const int ind = ::atomicAdd(&g_counter, 1); 223 if (ind < maxCircles) 224 circles[ind] = make_float3(cx, cy, i + minRadius); 225 } 226 } 227 } 228 circlesAccumRadius_gpu(const unsigned int * centers,int centersCount,const unsigned int * list,int count,float3 * circles,int maxCircles,float dp,int minRadius,int maxRadius,int threshold,bool has20)229 int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, 230 float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) 231 { 232 void* counterPtr; 233 cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); 234 235 cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); 236 237 const dim3 block(has20 ? 1024 : 512); 238 const dim3 grid(centersCount); 239 240 const int histSize = maxRadius - minRadius + 1; 241 size_t smemSize = (histSize + 2) * sizeof(int); 242 243 circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); 244 cudaSafeCall( cudaGetLastError() ); 245 246 cudaSafeCall( cudaDeviceSynchronize() ); 247 248 int totalCount; 249 cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); 250 251 totalCount = ::min(totalCount, maxCircles); 252 253 return totalCount; 254 } 255 } 256 }}} 257 258 #endif // HAVE_OPENCV_CUDAFILTERS 259 260 #endif /* CUDA_DISABLER */ 261