1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/vec_traits.hpp" 47 #include "opencv2/core/cuda/vec_math.hpp" 48 #include "opencv2/core/cuda/limits.hpp" 49 50 namespace cv { namespace cuda { namespace device 51 { 52 namespace mog2 53 { 54 /////////////////////////////////////////////////////////////// 55 // Utility 56 cvt(uchar val)57 __device__ __forceinline__ float cvt(uchar val) 58 { 59 return val; 60 } cvt(const uchar3 & val)61 __device__ __forceinline__ float3 cvt(const uchar3& val) 62 { 63 return make_float3(val.x, val.y, val.z); 64 } cvt(const uchar4 & val)65 __device__ __forceinline__ float4 cvt(const uchar4& val) 66 { 67 return make_float4(val.x, val.y, val.z, val.w); 68 } 69 sqr(float val)70 __device__ __forceinline__ float sqr(float val) 71 { 72 return val * val; 73 } sqr(const float3 & val)74 __device__ __forceinline__ float sqr(const float3& val) 75 { 76 return val.x * val.x + val.y * val.y + val.z * val.z; 77 } sqr(const float4 & val)78 __device__ __forceinline__ float sqr(const float4& val) 79 { 80 return val.x * val.x + val.y * val.y + val.z * val.z; 81 } 82 sum(float val)83 __device__ __forceinline__ float sum(float val) 84 { 85 return val; 86 } sum(const float3 & val)87 __device__ __forceinline__ float sum(const float3& val) 88 { 89 return val.x + val.y + val.z; 90 } sum(const float4 & val)91 __device__ __forceinline__ float sum(const float4& val) 92 { 93 return val.x + val.y + val.z; 94 } 95 96 template <class Ptr2D> swap(Ptr2D & ptr,int x,int y,int k,int rows)97 __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) 98 { 99 typename Ptr2D::elem_type val = ptr(k * rows + y, x); 100 ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); 101 ptr((k + 1) * rows + y, x) = val; 102 } 103 104 /////////////////////////////////////////////////////////////// 105 // MOG2 106 107 __constant__ int c_nmixtures; 108 __constant__ float c_Tb; 109 __constant__ float c_TB; 110 __constant__ float c_Tg; 111 __constant__ float c_varInit; 112 __constant__ float c_varMin; 113 __constant__ float c_varMax; 114 __constant__ float c_tau; 115 __constant__ unsigned char c_shadowVal; 116 loadConstants(int nmixtures,float Tb,float TB,float Tg,float varInit,float varMin,float varMax,float tau,unsigned char shadowVal)117 void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) 118 { 119 varMin = ::fminf(varMin, varMax); 120 varMax = ::fmaxf(varMin, varMax); 121 122 cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); 123 cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); 124 cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); 125 cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); 126 cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); 127 cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); 128 cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); 129 cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); 130 cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); 131 } 132 133 template <bool detectShadows, typename SrcT, typename WorkT> mog2(const PtrStepSz<SrcT> frame,PtrStepb fgmask,PtrStepb modesUsed,PtrStepf gmm_weight,PtrStepf gmm_variance,PtrStep<WorkT> gmm_mean,const float alphaT,const float alpha1,const float prune)134 __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed, 135 PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean, 136 const float alphaT, const float alpha1, const float prune) 137 { 138 const int x = blockIdx.x * blockDim.x + threadIdx.x; 139 const int y = blockIdx.y * blockDim.y + threadIdx.y; 140 141 if (x >= frame.cols || y >= frame.rows) 142 return; 143 144 WorkT pix = cvt(frame(y, x)); 145 146 //calculate distances to the modes (+ sort) 147 //here we need to go in descending order!!! 148 149 bool background = false; // true - the pixel classified as background 150 151 //internal: 152 153 bool fitsPDF = false; //if it remains zero a new GMM mode will be added 154 155 int nmodes = modesUsed(y, x); 156 int nNewModes = nmodes; //current number of modes in GMM 157 158 float totalWeight = 0.0f; 159 160 //go through all modes 161 162 for (int mode = 0; mode < nmodes; ++mode) 163 { 164 //need only weight if fit is found 165 float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; 166 int swap_count = 0; 167 //fit not found yet 168 if (!fitsPDF) 169 { 170 //check if it belongs to some of the remaining modes 171 float var = gmm_variance(mode * frame.rows + y, x); 172 173 WorkT mean = gmm_mean(mode * frame.rows + y, x); 174 175 //calculate difference and distance 176 WorkT diff = mean - pix; 177 float dist2 = sqr(diff); 178 179 //background? - Tb - usually larger than Tg 180 if (totalWeight < c_TB && dist2 < c_Tb * var) 181 background = true; 182 183 //check fit 184 if (dist2 < c_Tg * var) 185 { 186 //belongs to the mode 187 fitsPDF = true; 188 189 //update distribution 190 191 //update weight 192 weight += alphaT; 193 float k = alphaT / weight; 194 195 //update mean 196 gmm_mean(mode * frame.rows + y, x) = mean - k * diff; 197 198 //update variance 199 float varnew = var + k * (dist2 - var); 200 201 //limit the variance 202 varnew = ::fmaxf(varnew, c_varMin); 203 varnew = ::fminf(varnew, c_varMax); 204 205 gmm_variance(mode * frame.rows + y, x) = varnew; 206 207 //sort 208 //all other weights are at the same place and 209 //only the matched (iModes) is higher -> just find the new place for it 210 211 for (int i = mode; i > 0; --i) 212 { 213 //check one up 214 if (weight < gmm_weight((i - 1) * frame.rows + y, x)) 215 break; 216 217 swap_count++; 218 //swap one up 219 swap(gmm_weight, x, y, i - 1, frame.rows); 220 swap(gmm_variance, x, y, i - 1, frame.rows); 221 swap(gmm_mean, x, y, i - 1, frame.rows); 222 } 223 224 //belongs to the mode - bFitsPDF becomes 1 225 } 226 } // !fitsPDF 227 228 //check prune 229 if (weight < -prune) 230 { 231 weight = 0.0f; 232 nmodes--; 233 } 234 235 gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value 236 totalWeight += weight; 237 } 238 239 //renormalize weights 240 241 totalWeight = 1.f / totalWeight; 242 for (int mode = 0; mode < nmodes; ++mode) 243 gmm_weight(mode * frame.rows + y, x) *= totalWeight; 244 245 nmodes = nNewModes; 246 247 //make new mode if needed and exit 248 249 if (!fitsPDF) 250 { 251 // replace the weakest or add a new one 252 int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; 253 254 if (nmodes == 1) 255 gmm_weight(mode * frame.rows + y, x) = 1.f; 256 else 257 { 258 gmm_weight(mode * frame.rows + y, x) = alphaT; 259 260 // renormalize all other weights 261 262 for (int i = 0; i < nmodes - 1; ++i) 263 gmm_weight(i * frame.rows + y, x) *= alpha1; 264 } 265 266 // init 267 268 gmm_mean(mode * frame.rows + y, x) = pix; 269 gmm_variance(mode * frame.rows + y, x) = c_varInit; 270 271 //sort 272 //find the new place for it 273 274 for (int i = nmodes - 1; i > 0; --i) 275 { 276 // check one up 277 if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) 278 break; 279 280 //swap one up 281 swap(gmm_weight, x, y, i - 1, frame.rows); 282 swap(gmm_variance, x, y, i - 1, frame.rows); 283 swap(gmm_mean, x, y, i - 1, frame.rows); 284 } 285 } 286 287 //set the number of modes 288 modesUsed(y, x) = nmodes; 289 290 bool isShadow = false; 291 if (detectShadows && !background) 292 { 293 float tWeight = 0.0f; 294 295 // check all the components marked as background: 296 for (int mode = 0; mode < nmodes; ++mode) 297 { 298 WorkT mean = gmm_mean(mode * frame.rows + y, x); 299 300 WorkT pix_mean = pix * mean; 301 302 float numerator = sum(pix_mean); 303 float denominator = sqr(mean); 304 305 // no division by zero allowed 306 if (denominator == 0) 307 break; 308 309 // if tau < a < 1 then also check the color distortion 310 if (numerator <= denominator && numerator >= c_tau * denominator) 311 { 312 float a = numerator / denominator; 313 314 WorkT dD = a * mean - pix; 315 316 if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) 317 { 318 isShadow = true; 319 break; 320 } 321 }; 322 323 tWeight += gmm_weight(mode * frame.rows + y, x); 324 if (tWeight > c_TB) 325 break; 326 } 327 } 328 329 fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; 330 } 331 332 template <typename SrcT, typename WorkT> mog2_caller(PtrStepSzb frame,PtrStepSzb fgmask,PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzf variance,PtrStepSzb mean,float alphaT,float prune,bool detectShadows,cudaStream_t stream)333 void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, 334 float alphaT, float prune, bool detectShadows, cudaStream_t stream) 335 { 336 dim3 block(32, 8); 337 dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); 338 339 const float alpha1 = 1.0f - alphaT; 340 341 if (detectShadows) 342 { 343 cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) ); 344 345 mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed, 346 weight, variance, (PtrStepSz<WorkT>) mean, 347 alphaT, alpha1, prune); 348 } 349 else 350 { 351 cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) ); 352 353 mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed, 354 weight, variance, (PtrStepSz<WorkT>) mean, 355 alphaT, alpha1, prune); 356 } 357 358 cudaSafeCall( cudaGetLastError() ); 359 360 if (stream == 0) 361 cudaSafeCall( cudaDeviceSynchronize() ); 362 } 363 mog2_gpu(PtrStepSzb frame,int cn,PtrStepSzb fgmask,PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzf variance,PtrStepSzb mean,float alphaT,float prune,bool detectShadows,cudaStream_t stream)364 void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, 365 float alphaT, float prune, bool detectShadows, cudaStream_t stream) 366 { 367 typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); 368 369 static const func_t funcs[] = 370 { 371 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4> 372 }; 373 374 funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); 375 } 376 377 template <typename WorkT, typename OutT> getBackgroundImage2(const PtrStepSzb modesUsed,const PtrStepf gmm_weight,const PtrStep<WorkT> gmm_mean,PtrStep<OutT> dst)378 __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst) 379 { 380 const int x = blockIdx.x * blockDim.x + threadIdx.x; 381 const int y = blockIdx.y * blockDim.y + threadIdx.y; 382 383 if (x >= modesUsed.cols || y >= modesUsed.rows) 384 return; 385 386 int nmodes = modesUsed(y, x); 387 388 WorkT meanVal = VecTraits<WorkT>::all(0.0f); 389 float totalWeight = 0.0f; 390 391 for (int mode = 0; mode < nmodes; ++mode) 392 { 393 float weight = gmm_weight(mode * modesUsed.rows + y, x); 394 395 WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); 396 meanVal = meanVal + weight * mean; 397 398 totalWeight += weight; 399 400 if(totalWeight > c_TB) 401 break; 402 } 403 404 meanVal = meanVal * (1.f / totalWeight); 405 406 dst(y, x) = saturate_cast<OutT>(meanVal); 407 } 408 409 template <typename WorkT, typename OutT> getBackgroundImage2_caller(PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,cudaStream_t stream)410 void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) 411 { 412 dim3 block(32, 8); 413 dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); 414 415 cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) ); 416 417 getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst); 418 cudaSafeCall( cudaGetLastError() ); 419 420 if (stream == 0) 421 cudaSafeCall( cudaDeviceSynchronize() ); 422 } 423 getBackgroundImage2_gpu(int cn,PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,cudaStream_t stream)424 void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) 425 { 426 typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); 427 428 static const func_t funcs[] = 429 { 430 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4> 431 }; 432 433 funcs[cn](modesUsed, weight, mean, dst, stream); 434 } 435 } 436 }}} 437 438 439 #endif /* CUDA_DISABLER */ 440