1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/vec_traits.hpp" 47 #include "opencv2/core/cuda/vec_math.hpp" 48 #include "opencv2/core/cuda/limits.hpp" 49 50 namespace cv { namespace cuda { namespace device 51 { 52 namespace mog 53 { 54 /////////////////////////////////////////////////////////////// 55 // Utility 56 cvt(uchar val)57 __device__ __forceinline__ float cvt(uchar val) 58 { 59 return val; 60 } cvt(const uchar3 & val)61 __device__ __forceinline__ float3 cvt(const uchar3& val) 62 { 63 return make_float3(val.x, val.y, val.z); 64 } cvt(const uchar4 & val)65 __device__ __forceinline__ float4 cvt(const uchar4& val) 66 { 67 return make_float4(val.x, val.y, val.z, val.w); 68 } 69 sqr(float val)70 __device__ __forceinline__ float sqr(float val) 71 { 72 return val * val; 73 } sqr(const float3 & val)74 __device__ __forceinline__ float sqr(const float3& val) 75 { 76 return val.x * val.x + val.y * val.y + val.z * val.z; 77 } sqr(const float4 & val)78 __device__ __forceinline__ float sqr(const float4& val) 79 { 80 return val.x * val.x + val.y * val.y + val.z * val.z; 81 } 82 sum(float val)83 __device__ __forceinline__ float sum(float val) 84 { 85 return val; 86 } sum(const float3 & val)87 __device__ __forceinline__ float sum(const float3& val) 88 { 89 return val.x + val.y + val.z; 90 } sum(const float4 & val)91 __device__ __forceinline__ float sum(const float4& val) 92 { 93 return val.x + val.y + val.z; 94 } 95 clamp(float var,float learningRate,float diff,float minVar)96 __device__ __forceinline__ float clamp(float var, float learningRate, float diff, float minVar) 97 { 98 return ::fmaxf(var + learningRate * (diff * diff - var), minVar); 99 } clamp(const float3 & var,float learningRate,const float3 & diff,float minVar)100 __device__ __forceinline__ float3 clamp(const float3& var, float learningRate, const float3& diff, float minVar) 101 { 102 return make_float3(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar), 103 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar), 104 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar)); 105 } clamp(const float4 & var,float learningRate,const float4 & diff,float minVar)106 __device__ __forceinline__ float4 clamp(const float4& var, float learningRate, const float4& diff, float minVar) 107 { 108 return make_float4(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar), 109 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar), 110 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar), 111 0.0f); 112 } 113 114 /////////////////////////////////////////////////////////////// 115 // MOG without learning 116 117 template <typename SrcT, typename WorkT> mog_withoutLearning(const PtrStepSz<SrcT> frame,PtrStepb fgmask,const PtrStepf gmm_weight,const PtrStep<WorkT> gmm_mean,const PtrStep<WorkT> gmm_var,const int nmixtures,const float varThreshold,const float backgroundRatio)118 __global__ void mog_withoutLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask, 119 const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, const PtrStep<WorkT> gmm_var, 120 const int nmixtures, const float varThreshold, const float backgroundRatio) 121 { 122 const int x = blockIdx.x * blockDim.x + threadIdx.x; 123 const int y = blockIdx.y * blockDim.y + threadIdx.y; 124 125 if (x >= frame.cols || y >= frame.rows) 126 return; 127 128 WorkT pix = cvt(frame(y, x)); 129 130 int kHit = -1; 131 int kForeground = -1; 132 133 for (int k = 0; k < nmixtures; ++k) 134 { 135 if (gmm_weight(k * frame.rows + y, x) < numeric_limits<float>::epsilon()) 136 break; 137 138 WorkT mu = gmm_mean(k * frame.rows + y, x); 139 WorkT var = gmm_var(k * frame.rows + y, x); 140 141 WorkT diff = pix - mu; 142 143 if (sqr(diff) < varThreshold * sum(var)) 144 { 145 kHit = k; 146 break; 147 } 148 } 149 150 if (kHit >= 0) 151 { 152 float wsum = 0.0f; 153 for (int k = 0; k < nmixtures; ++k) 154 { 155 wsum += gmm_weight(k * frame.rows + y, x); 156 157 if (wsum > backgroundRatio) 158 { 159 kForeground = k + 1; 160 break; 161 } 162 } 163 } 164 165 fgmask(y, x) = (uchar) (-(kHit < 0 || kHit >= kForeground)); 166 } 167 168 template <typename SrcT, typename WorkT> mog_withoutLearning_caller(PtrStepSzb frame,PtrStepSzb fgmask,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb var,int nmixtures,float varThreshold,float backgroundRatio,cudaStream_t stream)169 void mog_withoutLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, 170 int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream) 171 { 172 dim3 block(32, 8); 173 dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); 174 175 cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) ); 176 177 mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, 178 weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var, 179 nmixtures, varThreshold, backgroundRatio); 180 181 cudaSafeCall( cudaGetLastError() ); 182 183 if (stream == 0) 184 cudaSafeCall( cudaDeviceSynchronize() ); 185 } 186 187 /////////////////////////////////////////////////////////////// 188 // MOG with learning 189 190 template <typename SrcT, typename WorkT> mog_withLearning(const PtrStepSz<SrcT> frame,PtrStepb fgmask,PtrStepf gmm_weight,PtrStepf gmm_sortKey,PtrStep<WorkT> gmm_mean,PtrStep<WorkT> gmm_var,const int nmixtures,const float varThreshold,const float backgroundRatio,const float learningRate,const float minVar)191 __global__ void mog_withLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask, 192 PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep<WorkT> gmm_mean, PtrStep<WorkT> gmm_var, 193 const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar) 194 { 195 const float w0 = 0.05f; 196 const float sk0 = w0 / (30.0f * 0.5f * 2.0f); 197 const float var0 = 30.0f * 0.5f * 30.0f * 0.5f * 4.0f; 198 199 const int x = blockIdx.x * blockDim.x + threadIdx.x; 200 const int y = blockIdx.y * blockDim.y + threadIdx.y; 201 202 if (x >= frame.cols || y >= frame.rows) 203 return; 204 205 WorkT pix = cvt(frame(y, x)); 206 207 float wsum = 0.0f; 208 int kHit = -1; 209 int kForeground = -1; 210 211 int k = 0; 212 for (; k < nmixtures; ++k) 213 { 214 float w = gmm_weight(k * frame.rows + y, x); 215 wsum += w; 216 217 if (w < numeric_limits<float>::epsilon()) 218 break; 219 220 WorkT mu = gmm_mean(k * frame.rows + y, x); 221 WorkT var = gmm_var(k * frame.rows + y, x); 222 223 WorkT diff = pix - mu; 224 225 if (sqr(diff) < varThreshold * sum(var)) 226 { 227 wsum -= w; 228 float dw = learningRate * (1.0f - w); 229 230 var = clamp(var, learningRate, diff, minVar); 231 232 float sortKey_prev = w / ::sqrtf(sum(var)); 233 gmm_sortKey(k * frame.rows + y, x) = sortKey_prev; 234 235 float weight_prev = w + dw; 236 gmm_weight(k * frame.rows + y, x) = weight_prev; 237 238 WorkT mean_prev = mu + learningRate * diff; 239 gmm_mean(k * frame.rows + y, x) = mean_prev; 240 241 WorkT var_prev = var; 242 gmm_var(k * frame.rows + y, x) = var_prev; 243 244 int k1 = k - 1; 245 246 if (k1 >= 0) 247 { 248 float sortKey_next = gmm_sortKey(k1 * frame.rows + y, x); 249 float weight_next = gmm_weight(k1 * frame.rows + y, x); 250 WorkT mean_next = gmm_mean(k1 * frame.rows + y, x); 251 WorkT var_next = gmm_var(k1 * frame.rows + y, x); 252 253 for (; sortKey_next < sortKey_prev && k1 >= 0; --k1) 254 { 255 gmm_sortKey(k1 * frame.rows + y, x) = sortKey_prev; 256 gmm_sortKey((k1 + 1) * frame.rows + y, x) = sortKey_next; 257 258 gmm_weight(k1 * frame.rows + y, x) = weight_prev; 259 gmm_weight((k1 + 1) * frame.rows + y, x) = weight_next; 260 261 gmm_mean(k1 * frame.rows + y, x) = mean_prev; 262 gmm_mean((k1 + 1) * frame.rows + y, x) = mean_next; 263 264 gmm_var(k1 * frame.rows + y, x) = var_prev; 265 gmm_var((k1 + 1) * frame.rows + y, x) = var_next; 266 267 sortKey_prev = sortKey_next; 268 sortKey_next = k1 > 0 ? gmm_sortKey((k1 - 1) * frame.rows + y, x) : 0.0f; 269 270 weight_prev = weight_next; 271 weight_next = k1 > 0 ? gmm_weight((k1 - 1) * frame.rows + y, x) : 0.0f; 272 273 mean_prev = mean_next; 274 mean_next = k1 > 0 ? gmm_mean((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f); 275 276 var_prev = var_next; 277 var_next = k1 > 0 ? gmm_var((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f); 278 } 279 } 280 281 kHit = k1 + 1; 282 break; 283 } 284 } 285 286 if (kHit < 0) 287 { 288 // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one 289 kHit = k = ::min(k, nmixtures - 1); 290 wsum += w0 - gmm_weight(k * frame.rows + y, x); 291 292 gmm_weight(k * frame.rows + y, x) = w0; 293 gmm_mean(k * frame.rows + y, x) = pix; 294 gmm_var(k * frame.rows + y, x) = VecTraits<WorkT>::all(var0); 295 gmm_sortKey(k * frame.rows + y, x) = sk0; 296 } 297 else 298 { 299 for( ; k < nmixtures; k++) 300 wsum += gmm_weight(k * frame.rows + y, x); 301 } 302 303 float wscale = 1.0f / wsum; 304 wsum = 0; 305 for (k = 0; k < nmixtures; ++k) 306 { 307 float w = gmm_weight(k * frame.rows + y, x); 308 wsum += w *= wscale; 309 310 gmm_weight(k * frame.rows + y, x) = w; 311 gmm_sortKey(k * frame.rows + y, x) *= wscale; 312 313 if (wsum > backgroundRatio && kForeground < 0) 314 kForeground = k + 1; 315 } 316 317 fgmask(y, x) = (uchar)(-(kHit >= kForeground)); 318 } 319 320 template <typename SrcT, typename WorkT> mog_withLearning_caller(PtrStepSzb frame,PtrStepSzb fgmask,PtrStepSzf weight,PtrStepSzf sortKey,PtrStepSzb mean,PtrStepSzb var,int nmixtures,float varThreshold,float backgroundRatio,float learningRate,float minVar,cudaStream_t stream)321 void mog_withLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, 322 int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, 323 cudaStream_t stream) 324 { 325 dim3 block(32, 8); 326 dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); 327 328 cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) ); 329 330 mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, 331 weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var, 332 nmixtures, varThreshold, backgroundRatio, learningRate, minVar); 333 334 cudaSafeCall( cudaGetLastError() ); 335 336 if (stream == 0) 337 cudaSafeCall( cudaDeviceSynchronize() ); 338 } 339 340 /////////////////////////////////////////////////////////////// 341 // MOG 342 mog_gpu(PtrStepSzb frame,int cn,PtrStepSzb fgmask,PtrStepSzf weight,PtrStepSzf sortKey,PtrStepSzb mean,PtrStepSzb var,int nmixtures,float varThreshold,float learningRate,float backgroundRatio,float noiseSigma,cudaStream_t stream)343 void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream) 344 { 345 typedef void (*withoutLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream); 346 typedef void (*withLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream); 347 348 static const withoutLearning_t withoutLearning[] = 349 { 350 0, mog_withoutLearning_caller<uchar, float>, 0, mog_withoutLearning_caller<uchar3, float3>, mog_withoutLearning_caller<uchar4, float4> 351 }; 352 static const withLearning_t withLearning[] = 353 { 354 0, mog_withLearning_caller<uchar, float>, 0, mog_withLearning_caller<uchar3, float3>, mog_withLearning_caller<uchar4, float4> 355 }; 356 357 const float minVar = noiseSigma * noiseSigma; 358 359 if (learningRate > 0.0f) 360 withLearning[cn](frame, fgmask, weight, sortKey, mean, var, nmixtures, varThreshold, backgroundRatio, learningRate, minVar, stream); 361 else 362 withoutLearning[cn](frame, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio, stream); 363 } 364 365 template <typename WorkT, typename OutT> getBackgroundImage(const PtrStepf gmm_weight,const PtrStep<WorkT> gmm_mean,PtrStepSz<OutT> dst,const int nmixtures,const float backgroundRatio)366 __global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStepSz<OutT> dst, const int nmixtures, const float backgroundRatio) 367 { 368 const int x = blockIdx.x * blockDim.x + threadIdx.x; 369 const int y = blockIdx.y * blockDim.y + threadIdx.y; 370 371 if (x >= dst.cols || y >= dst.rows) 372 return; 373 374 WorkT meanVal = VecTraits<WorkT>::all(0.0f); 375 float totalWeight = 0.0f; 376 377 for (int mode = 0; mode < nmixtures; ++mode) 378 { 379 float weight = gmm_weight(mode * dst.rows + y, x); 380 381 WorkT mean = gmm_mean(mode * dst.rows + y, x); 382 meanVal = meanVal + weight * mean; 383 384 totalWeight += weight; 385 386 if(totalWeight > backgroundRatio) 387 break; 388 } 389 390 meanVal = meanVal * (1.f / totalWeight); 391 392 dst(y, x) = saturate_cast<OutT>(meanVal); 393 } 394 395 template <typename WorkT, typename OutT> getBackgroundImage_caller(PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,int nmixtures,float backgroundRatio,cudaStream_t stream)396 void getBackgroundImage_caller(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream) 397 { 398 dim3 block(32, 8); 399 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 400 401 cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) ); 402 403 getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio); 404 cudaSafeCall( cudaGetLastError() ); 405 406 if (stream == 0) 407 cudaSafeCall( cudaDeviceSynchronize() ); 408 } 409 getBackgroundImage_gpu(int cn,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,int nmixtures,float backgroundRatio,cudaStream_t stream)410 void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream) 411 { 412 typedef void (*func_t)(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream); 413 414 static const func_t funcs[] = 415 { 416 0, getBackgroundImage_caller<float, uchar>, 0, getBackgroundImage_caller<float3, uchar3>, getBackgroundImage_caller<float4, uchar4> 417 }; 418 419 funcs[cn](weight, mean, dst, nmixtures, backgroundRatio, stream); 420 } 421 } 422 }}} 423 424 425 #endif /* CUDA_DISABLER */ 426