1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #if !defined CUDA_DISABLER
44 
45 #include "opencv2/core/cuda/common.hpp"
46 #include "opencv2/core/cuda/vec_traits.hpp"
47 #include "opencv2/core/cuda/vec_math.hpp"
48 #include "opencv2/core/cuda/limits.hpp"
49 
50 namespace cv { namespace cuda { namespace device
51 {
52     namespace mog
53     {
54         ///////////////////////////////////////////////////////////////
55         // Utility
56 
cvt(uchar val)57         __device__ __forceinline__ float cvt(uchar val)
58         {
59             return val;
60         }
cvt(const uchar3 & val)61         __device__ __forceinline__ float3 cvt(const uchar3& val)
62         {
63             return make_float3(val.x, val.y, val.z);
64         }
cvt(const uchar4 & val)65         __device__ __forceinline__ float4 cvt(const uchar4& val)
66         {
67             return make_float4(val.x, val.y, val.z, val.w);
68         }
69 
sqr(float val)70         __device__ __forceinline__ float sqr(float val)
71         {
72             return val * val;
73         }
sqr(const float3 & val)74         __device__ __forceinline__ float sqr(const float3& val)
75         {
76             return val.x * val.x + val.y * val.y + val.z * val.z;
77         }
sqr(const float4 & val)78         __device__ __forceinline__ float sqr(const float4& val)
79         {
80             return val.x * val.x + val.y * val.y + val.z * val.z;
81         }
82 
sum(float val)83         __device__ __forceinline__ float sum(float val)
84         {
85             return val;
86         }
sum(const float3 & val)87         __device__ __forceinline__ float sum(const float3& val)
88         {
89             return val.x + val.y + val.z;
90         }
sum(const float4 & val)91         __device__ __forceinline__ float sum(const float4& val)
92         {
93             return val.x + val.y + val.z;
94         }
95 
clamp(float var,float learningRate,float diff,float minVar)96         __device__ __forceinline__ float clamp(float var, float learningRate, float diff, float minVar)
97         {
98              return ::fmaxf(var + learningRate * (diff * diff - var), minVar);
99         }
clamp(const float3 & var,float learningRate,const float3 & diff,float minVar)100         __device__ __forceinline__ float3 clamp(const float3& var, float learningRate, const float3& diff, float minVar)
101         {
102              return make_float3(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
103                                 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
104                                 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar));
105         }
clamp(const float4 & var,float learningRate,const float4 & diff,float minVar)106         __device__ __forceinline__ float4 clamp(const float4& var, float learningRate, const float4& diff, float minVar)
107         {
108              return make_float4(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
109                                 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
110                                 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar),
111                                 0.0f);
112         }
113 
114         ///////////////////////////////////////////////////////////////
115         // MOG without learning
116 
117         template <typename SrcT, typename WorkT>
mog_withoutLearning(const PtrStepSz<SrcT> frame,PtrStepb fgmask,const PtrStepf gmm_weight,const PtrStep<WorkT> gmm_mean,const PtrStep<WorkT> gmm_var,const int nmixtures,const float varThreshold,const float backgroundRatio)118         __global__ void mog_withoutLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
119                                             const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, const PtrStep<WorkT> gmm_var,
120                                             const int nmixtures, const float varThreshold, const float backgroundRatio)
121         {
122             const int x = blockIdx.x * blockDim.x + threadIdx.x;
123             const int y = blockIdx.y * blockDim.y + threadIdx.y;
124 
125             if (x >= frame.cols || y >= frame.rows)
126                 return;
127 
128             WorkT pix = cvt(frame(y, x));
129 
130             int kHit = -1;
131             int kForeground = -1;
132 
133             for (int k = 0; k < nmixtures; ++k)
134             {
135                 if (gmm_weight(k * frame.rows + y, x) < numeric_limits<float>::epsilon())
136                     break;
137 
138                 WorkT mu = gmm_mean(k * frame.rows + y, x);
139                 WorkT var = gmm_var(k * frame.rows + y, x);
140 
141                 WorkT diff = pix - mu;
142 
143                 if (sqr(diff) < varThreshold * sum(var))
144                 {
145                     kHit = k;
146                     break;
147                 }
148             }
149 
150             if (kHit >= 0)
151             {
152                 float wsum = 0.0f;
153                 for (int k = 0; k < nmixtures; ++k)
154                 {
155                     wsum += gmm_weight(k * frame.rows + y, x);
156 
157                     if (wsum > backgroundRatio)
158                     {
159                         kForeground = k + 1;
160                         break;
161                     }
162                 }
163             }
164 
165             fgmask(y, x) = (uchar) (-(kHit < 0 || kHit >= kForeground));
166         }
167 
168         template <typename SrcT, typename WorkT>
mog_withoutLearning_caller(PtrStepSzb frame,PtrStepSzb fgmask,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb var,int nmixtures,float varThreshold,float backgroundRatio,cudaStream_t stream)169         void mog_withoutLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var,
170                                         int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream)
171         {
172             dim3 block(32, 8);
173             dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
174 
175             cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
176 
177             mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
178                                                                          weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
179                                                                          nmixtures, varThreshold, backgroundRatio);
180 
181             cudaSafeCall( cudaGetLastError() );
182 
183             if (stream == 0)
184                 cudaSafeCall( cudaDeviceSynchronize() );
185         }
186 
187         ///////////////////////////////////////////////////////////////
188         // MOG with learning
189 
190         template <typename SrcT, typename WorkT>
mog_withLearning(const PtrStepSz<SrcT> frame,PtrStepb fgmask,PtrStepf gmm_weight,PtrStepf gmm_sortKey,PtrStep<WorkT> gmm_mean,PtrStep<WorkT> gmm_var,const int nmixtures,const float varThreshold,const float backgroundRatio,const float learningRate,const float minVar)191         __global__ void mog_withLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
192                                          PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep<WorkT> gmm_mean, PtrStep<WorkT> gmm_var,
193                                          const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar)
194         {
195             const float w0 = 0.05f;
196             const float sk0 = w0 / (30.0f * 0.5f * 2.0f);
197             const float var0 = 30.0f * 0.5f * 30.0f * 0.5f * 4.0f;
198 
199             const int x = blockIdx.x * blockDim.x + threadIdx.x;
200             const int y = blockIdx.y * blockDim.y + threadIdx.y;
201 
202             if (x >= frame.cols || y >= frame.rows)
203                 return;
204 
205             WorkT pix = cvt(frame(y, x));
206 
207             float wsum = 0.0f;
208             int kHit = -1;
209             int kForeground = -1;
210 
211             int k = 0;
212             for (; k < nmixtures; ++k)
213             {
214                 float w = gmm_weight(k * frame.rows + y, x);
215                 wsum += w;
216 
217                 if (w < numeric_limits<float>::epsilon())
218                     break;
219 
220                 WorkT mu = gmm_mean(k * frame.rows + y, x);
221                 WorkT var = gmm_var(k * frame.rows + y, x);
222 
223                 WorkT diff = pix - mu;
224 
225                 if (sqr(diff) < varThreshold * sum(var))
226                 {
227                     wsum -= w;
228                     float dw = learningRate * (1.0f - w);
229 
230                     var = clamp(var, learningRate, diff, minVar);
231 
232                     float sortKey_prev = w / ::sqrtf(sum(var));
233                     gmm_sortKey(k * frame.rows + y, x) = sortKey_prev;
234 
235                     float weight_prev = w + dw;
236                     gmm_weight(k * frame.rows + y, x) = weight_prev;
237 
238                     WorkT mean_prev = mu + learningRate * diff;
239                     gmm_mean(k * frame.rows + y, x) = mean_prev;
240 
241                     WorkT var_prev = var;
242                     gmm_var(k * frame.rows + y, x) = var_prev;
243 
244                     int k1 = k - 1;
245 
246                     if (k1 >= 0)
247                     {
248                         float sortKey_next = gmm_sortKey(k1 * frame.rows + y, x);
249                         float weight_next = gmm_weight(k1 * frame.rows + y, x);
250                         WorkT mean_next = gmm_mean(k1 * frame.rows + y, x);
251                         WorkT var_next = gmm_var(k1 * frame.rows + y, x);
252 
253                         for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
254                         {
255                             gmm_sortKey(k1 * frame.rows + y, x) = sortKey_prev;
256                             gmm_sortKey((k1 + 1) * frame.rows + y, x) = sortKey_next;
257 
258                             gmm_weight(k1 * frame.rows + y, x) = weight_prev;
259                             gmm_weight((k1 + 1) * frame.rows + y, x) = weight_next;
260 
261                             gmm_mean(k1 * frame.rows + y, x) = mean_prev;
262                             gmm_mean((k1 + 1) * frame.rows + y, x) = mean_next;
263 
264                             gmm_var(k1 * frame.rows + y, x) = var_prev;
265                             gmm_var((k1 + 1) * frame.rows + y, x) = var_next;
266 
267                             sortKey_prev = sortKey_next;
268                             sortKey_next = k1 > 0 ? gmm_sortKey((k1 - 1) * frame.rows + y, x) : 0.0f;
269 
270                             weight_prev = weight_next;
271                             weight_next = k1 > 0 ? gmm_weight((k1 - 1) * frame.rows + y, x) : 0.0f;
272 
273                             mean_prev = mean_next;
274                             mean_next = k1 > 0 ? gmm_mean((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
275 
276                             var_prev = var_next;
277                             var_next = k1 > 0 ? gmm_var((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
278                         }
279                     }
280 
281                     kHit = k1 + 1;
282                     break;
283                 }
284             }
285 
286             if (kHit < 0)
287             {
288                 // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one
289                 kHit = k = ::min(k, nmixtures - 1);
290                 wsum += w0 - gmm_weight(k * frame.rows + y, x);
291 
292                 gmm_weight(k * frame.rows + y, x) = w0;
293                 gmm_mean(k * frame.rows + y, x) = pix;
294                 gmm_var(k * frame.rows + y, x) = VecTraits<WorkT>::all(var0);
295                 gmm_sortKey(k * frame.rows + y, x) = sk0;
296             }
297             else
298             {
299                 for( ; k < nmixtures; k++)
300                     wsum += gmm_weight(k * frame.rows + y, x);
301             }
302 
303             float wscale = 1.0f / wsum;
304             wsum = 0;
305             for (k = 0; k < nmixtures; ++k)
306             {
307                 float w = gmm_weight(k * frame.rows + y, x);
308                 wsum += w *= wscale;
309 
310                 gmm_weight(k * frame.rows + y, x) = w;
311                 gmm_sortKey(k * frame.rows + y, x) *= wscale;
312 
313                 if (wsum > backgroundRatio && kForeground < 0)
314                     kForeground = k + 1;
315             }
316 
317             fgmask(y, x) = (uchar)(-(kHit >= kForeground));
318         }
319 
320         template <typename SrcT, typename WorkT>
mog_withLearning_caller(PtrStepSzb frame,PtrStepSzb fgmask,PtrStepSzf weight,PtrStepSzf sortKey,PtrStepSzb mean,PtrStepSzb var,int nmixtures,float varThreshold,float backgroundRatio,float learningRate,float minVar,cudaStream_t stream)321         void mog_withLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var,
322                                      int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar,
323                                      cudaStream_t stream)
324         {
325             dim3 block(32, 8);
326             dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
327 
328             cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
329 
330             mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
331                                                                       weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
332                                                                       nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
333 
334             cudaSafeCall( cudaGetLastError() );
335 
336             if (stream == 0)
337                 cudaSafeCall( cudaDeviceSynchronize() );
338         }
339 
340         ///////////////////////////////////////////////////////////////
341         // MOG
342 
mog_gpu(PtrStepSzb frame,int cn,PtrStepSzb fgmask,PtrStepSzf weight,PtrStepSzf sortKey,PtrStepSzb mean,PtrStepSzb var,int nmixtures,float varThreshold,float learningRate,float backgroundRatio,float noiseSigma,cudaStream_t stream)343         void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream)
344         {
345             typedef void (*withoutLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream);
346             typedef void (*withLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream);
347 
348             static const withoutLearning_t withoutLearning[] =
349             {
350                 0, mog_withoutLearning_caller<uchar, float>, 0, mog_withoutLearning_caller<uchar3, float3>, mog_withoutLearning_caller<uchar4, float4>
351             };
352             static const withLearning_t withLearning[] =
353             {
354                 0, mog_withLearning_caller<uchar, float>, 0, mog_withLearning_caller<uchar3, float3>, mog_withLearning_caller<uchar4, float4>
355             };
356 
357             const float minVar = noiseSigma * noiseSigma;
358 
359             if (learningRate > 0.0f)
360                 withLearning[cn](frame, fgmask, weight, sortKey, mean, var, nmixtures, varThreshold, backgroundRatio, learningRate, minVar, stream);
361             else
362                 withoutLearning[cn](frame, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio, stream);
363         }
364 
365         template <typename WorkT, typename OutT>
getBackgroundImage(const PtrStepf gmm_weight,const PtrStep<WorkT> gmm_mean,PtrStepSz<OutT> dst,const int nmixtures,const float backgroundRatio)366         __global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStepSz<OutT> dst, const int nmixtures, const float backgroundRatio)
367         {
368             const int x = blockIdx.x * blockDim.x + threadIdx.x;
369             const int y = blockIdx.y * blockDim.y + threadIdx.y;
370 
371             if (x >= dst.cols || y >= dst.rows)
372                 return;
373 
374             WorkT meanVal = VecTraits<WorkT>::all(0.0f);
375             float totalWeight = 0.0f;
376 
377             for (int mode = 0; mode < nmixtures; ++mode)
378             {
379                 float weight = gmm_weight(mode * dst.rows + y, x);
380 
381                 WorkT mean = gmm_mean(mode * dst.rows + y, x);
382                 meanVal = meanVal + weight * mean;
383 
384                 totalWeight += weight;
385 
386                 if(totalWeight > backgroundRatio)
387                     break;
388             }
389 
390             meanVal = meanVal * (1.f / totalWeight);
391 
392             dst(y, x) = saturate_cast<OutT>(meanVal);
393         }
394 
395         template <typename WorkT, typename OutT>
getBackgroundImage_caller(PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,int nmixtures,float backgroundRatio,cudaStream_t stream)396         void getBackgroundImage_caller(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
397         {
398             dim3 block(32, 8);
399             dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
400 
401             cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
402 
403             getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio);
404             cudaSafeCall( cudaGetLastError() );
405 
406             if (stream == 0)
407                 cudaSafeCall( cudaDeviceSynchronize() );
408         }
409 
getBackgroundImage_gpu(int cn,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,int nmixtures,float backgroundRatio,cudaStream_t stream)410         void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
411         {
412             typedef void (*func_t)(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
413 
414             static const func_t funcs[] =
415             {
416                 0, getBackgroundImage_caller<float, uchar>, 0, getBackgroundImage_caller<float3, uchar3>, getBackgroundImage_caller<float4, uchar4>
417             };
418 
419             funcs[cn](weight, mean, dst, nmixtures, backgroundRatio, stream);
420         }
421     }
422 }}}
423 
424 
425 #endif /* CUDA_DISABLER */
426