1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #if !defined CUDA_DISABLER
44 
45 #include "opencv2/core/cuda/common.hpp"
46 #include "opencv2/core/cuda/vec_traits.hpp"
47 #include "opencv2/core/cuda/vec_math.hpp"
48 #include "opencv2/core/cuda/limits.hpp"
49 
50 namespace cv { namespace cuda { namespace device
51 {
52     namespace mog2
53     {
54         ///////////////////////////////////////////////////////////////
55         // Utility
56 
cvt(uchar val)57         __device__ __forceinline__ float cvt(uchar val)
58         {
59             return val;
60         }
cvt(const uchar3 & val)61         __device__ __forceinline__ float3 cvt(const uchar3& val)
62         {
63             return make_float3(val.x, val.y, val.z);
64         }
cvt(const uchar4 & val)65         __device__ __forceinline__ float4 cvt(const uchar4& val)
66         {
67             return make_float4(val.x, val.y, val.z, val.w);
68         }
69 
sqr(float val)70         __device__ __forceinline__ float sqr(float val)
71         {
72             return val * val;
73         }
sqr(const float3 & val)74         __device__ __forceinline__ float sqr(const float3& val)
75         {
76             return val.x * val.x + val.y * val.y + val.z * val.z;
77         }
sqr(const float4 & val)78         __device__ __forceinline__ float sqr(const float4& val)
79         {
80             return val.x * val.x + val.y * val.y + val.z * val.z;
81         }
82 
sum(float val)83         __device__ __forceinline__ float sum(float val)
84         {
85             return val;
86         }
sum(const float3 & val)87         __device__ __forceinline__ float sum(const float3& val)
88         {
89             return val.x + val.y + val.z;
90         }
sum(const float4 & val)91         __device__ __forceinline__ float sum(const float4& val)
92         {
93             return val.x + val.y + val.z;
94         }
95 
96         template <class Ptr2D>
swap(Ptr2D & ptr,int x,int y,int k,int rows)97         __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
98         {
99             typename Ptr2D::elem_type val = ptr(k * rows + y, x);
100             ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
101             ptr((k + 1) * rows + y, x) = val;
102         }
103 
104         ///////////////////////////////////////////////////////////////
105         // MOG2
106 
107         __constant__ int           c_nmixtures;
108         __constant__ float         c_Tb;
109         __constant__ float         c_TB;
110         __constant__ float         c_Tg;
111         __constant__ float         c_varInit;
112         __constant__ float         c_varMin;
113         __constant__ float         c_varMax;
114         __constant__ float         c_tau;
115         __constant__ unsigned char c_shadowVal;
116 
loadConstants(int nmixtures,float Tb,float TB,float Tg,float varInit,float varMin,float varMax,float tau,unsigned char shadowVal)117         void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
118         {
119             varMin = ::fminf(varMin, varMax);
120             varMax = ::fmaxf(varMin, varMax);
121 
122             cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
123             cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
124             cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
125             cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
126             cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
127             cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
128             cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
129             cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
130             cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
131         }
132 
133         template <bool detectShadows, typename SrcT, typename WorkT>
mog2(const PtrStepSz<SrcT> frame,PtrStepb fgmask,PtrStepb modesUsed,PtrStepf gmm_weight,PtrStepf gmm_variance,PtrStep<WorkT> gmm_mean,const float alphaT,const float alpha1,const float prune)134         __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
135                              PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
136                              const float alphaT, const float alpha1, const float prune)
137         {
138             const int x = blockIdx.x * blockDim.x + threadIdx.x;
139             const int y = blockIdx.y * blockDim.y + threadIdx.y;
140 
141             if (x >= frame.cols || y >= frame.rows)
142                 return;
143 
144             WorkT pix = cvt(frame(y, x));
145 
146             //calculate distances to the modes (+ sort)
147             //here we need to go in descending order!!!
148 
149             bool background = false; // true - the pixel classified as background
150 
151             //internal:
152 
153             bool fitsPDF = false; //if it remains zero a new GMM mode will be added
154 
155             int nmodes = modesUsed(y, x);
156             int nNewModes = nmodes; //current number of modes in GMM
157 
158             float totalWeight = 0.0f;
159 
160             //go through all modes
161 
162             for (int mode = 0; mode < nmodes; ++mode)
163             {
164                 //need only weight if fit is found
165                 float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
166                 int swap_count = 0;
167                 //fit not found yet
168                 if (!fitsPDF)
169                 {
170                     //check if it belongs to some of the remaining modes
171                     float var = gmm_variance(mode * frame.rows + y, x);
172 
173                     WorkT mean = gmm_mean(mode * frame.rows + y, x);
174 
175                     //calculate difference and distance
176                     WorkT diff = mean - pix;
177                     float dist2 = sqr(diff);
178 
179                     //background? - Tb - usually larger than Tg
180                     if (totalWeight < c_TB && dist2 < c_Tb * var)
181                         background = true;
182 
183                     //check fit
184                     if (dist2 < c_Tg * var)
185                     {
186                         //belongs to the mode
187                         fitsPDF = true;
188 
189                         //update distribution
190 
191                         //update weight
192                         weight += alphaT;
193                         float k = alphaT / weight;
194 
195                         //update mean
196                         gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
197 
198                         //update variance
199                         float varnew = var + k * (dist2 - var);
200 
201                         //limit the variance
202                         varnew = ::fmaxf(varnew, c_varMin);
203                         varnew = ::fminf(varnew, c_varMax);
204 
205                         gmm_variance(mode * frame.rows + y, x) = varnew;
206 
207                         //sort
208                         //all other weights are at the same place and
209                         //only the matched (iModes) is higher -> just find the new place for it
210 
211                         for (int i = mode; i > 0; --i)
212                         {
213                             //check one up
214                             if (weight < gmm_weight((i - 1) * frame.rows + y, x))
215                                 break;
216 
217                             swap_count++;
218                             //swap one up
219                             swap(gmm_weight, x, y, i - 1, frame.rows);
220                             swap(gmm_variance, x, y, i - 1, frame.rows);
221                             swap(gmm_mean, x, y, i - 1, frame.rows);
222                         }
223 
224                         //belongs to the mode - bFitsPDF becomes 1
225                     }
226                 } // !fitsPDF
227 
228                 //check prune
229                 if (weight < -prune)
230                 {
231                     weight = 0.0f;
232                     nmodes--;
233                 }
234 
235                 gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
236                 totalWeight += weight;
237             }
238 
239             //renormalize weights
240 
241             totalWeight = 1.f / totalWeight;
242             for (int mode = 0; mode < nmodes; ++mode)
243                 gmm_weight(mode * frame.rows + y, x) *= totalWeight;
244 
245             nmodes = nNewModes;
246 
247             //make new mode if needed and exit
248 
249             if (!fitsPDF)
250             {
251                 // replace the weakest or add a new one
252                 int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
253 
254                 if (nmodes == 1)
255                     gmm_weight(mode * frame.rows + y, x) = 1.f;
256                 else
257                 {
258                     gmm_weight(mode * frame.rows + y, x) = alphaT;
259 
260                     // renormalize all other weights
261 
262                     for (int i = 0; i < nmodes - 1; ++i)
263                         gmm_weight(i * frame.rows + y, x) *= alpha1;
264                 }
265 
266                 // init
267 
268                 gmm_mean(mode * frame.rows + y, x) = pix;
269                 gmm_variance(mode * frame.rows + y, x) = c_varInit;
270 
271                 //sort
272                 //find the new place for it
273 
274                 for (int i = nmodes - 1; i > 0; --i)
275                 {
276                     // check one up
277                     if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
278                         break;
279 
280                     //swap one up
281                     swap(gmm_weight, x, y, i - 1, frame.rows);
282                     swap(gmm_variance, x, y, i - 1, frame.rows);
283                     swap(gmm_mean, x, y, i - 1, frame.rows);
284                 }
285             }
286 
287             //set the number of modes
288             modesUsed(y, x) = nmodes;
289 
290             bool isShadow = false;
291             if (detectShadows && !background)
292             {
293                 float tWeight = 0.0f;
294 
295                 // check all the components  marked as background:
296                 for (int mode = 0; mode < nmodes; ++mode)
297                 {
298                     WorkT mean = gmm_mean(mode * frame.rows + y, x);
299 
300                     WorkT pix_mean = pix * mean;
301 
302                     float numerator = sum(pix_mean);
303                     float denominator = sqr(mean);
304 
305                     // no division by zero allowed
306                     if (denominator == 0)
307                         break;
308 
309                     // if tau < a < 1 then also check the color distortion
310                     if (numerator <= denominator && numerator >= c_tau * denominator)
311                     {
312                         float a = numerator / denominator;
313 
314                         WorkT dD = a * mean - pix;
315 
316                         if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
317                         {
318                             isShadow = true;
319                             break;
320                         }
321                     };
322 
323                     tWeight += gmm_weight(mode * frame.rows + y, x);
324                     if (tWeight > c_TB)
325                         break;
326                 }
327             }
328 
329             fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
330         }
331 
332         template <typename SrcT, typename WorkT>
mog2_caller(PtrStepSzb frame,PtrStepSzb fgmask,PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzf variance,PtrStepSzb mean,float alphaT,float prune,bool detectShadows,cudaStream_t stream)333         void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
334                          float alphaT, float prune, bool detectShadows, cudaStream_t stream)
335         {
336             dim3 block(32, 8);
337             dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
338 
339             const float alpha1 = 1.0f - alphaT;
340 
341             if (detectShadows)
342             {
343                 cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
344 
345                 mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
346                                                                     weight, variance, (PtrStepSz<WorkT>) mean,
347                                                                     alphaT, alpha1, prune);
348             }
349             else
350             {
351                 cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
352 
353                 mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
354                                                                     weight, variance, (PtrStepSz<WorkT>) mean,
355                                                                     alphaT, alpha1, prune);
356             }
357 
358             cudaSafeCall( cudaGetLastError() );
359 
360             if (stream == 0)
361                 cudaSafeCall( cudaDeviceSynchronize() );
362         }
363 
mog2_gpu(PtrStepSzb frame,int cn,PtrStepSzb fgmask,PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzf variance,PtrStepSzb mean,float alphaT,float prune,bool detectShadows,cudaStream_t stream)364         void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
365                       float alphaT, float prune, bool detectShadows, cudaStream_t stream)
366         {
367             typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
368 
369             static const func_t funcs[] =
370             {
371                 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
372             };
373 
374             funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
375         }
376 
377         template <typename WorkT, typename OutT>
getBackgroundImage2(const PtrStepSzb modesUsed,const PtrStepf gmm_weight,const PtrStep<WorkT> gmm_mean,PtrStep<OutT> dst)378         __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
379         {
380             const int x = blockIdx.x * blockDim.x + threadIdx.x;
381             const int y = blockIdx.y * blockDim.y + threadIdx.y;
382 
383             if (x >= modesUsed.cols || y >= modesUsed.rows)
384                 return;
385 
386             int nmodes = modesUsed(y, x);
387 
388             WorkT meanVal = VecTraits<WorkT>::all(0.0f);
389             float totalWeight = 0.0f;
390 
391             for (int mode = 0; mode < nmodes; ++mode)
392             {
393                 float weight = gmm_weight(mode * modesUsed.rows + y, x);
394 
395                 WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
396                 meanVal = meanVal + weight * mean;
397 
398                 totalWeight += weight;
399 
400                 if(totalWeight > c_TB)
401                     break;
402             }
403 
404             meanVal = meanVal * (1.f / totalWeight);
405 
406             dst(y, x) = saturate_cast<OutT>(meanVal);
407         }
408 
409         template <typename WorkT, typename OutT>
getBackgroundImage2_caller(PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,cudaStream_t stream)410         void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
411         {
412             dim3 block(32, 8);
413             dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
414 
415             cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
416 
417             getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
418             cudaSafeCall( cudaGetLastError() );
419 
420             if (stream == 0)
421                 cudaSafeCall( cudaDeviceSynchronize() );
422         }
423 
getBackgroundImage2_gpu(int cn,PtrStepSzb modesUsed,PtrStepSzf weight,PtrStepSzb mean,PtrStepSzb dst,cudaStream_t stream)424         void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
425         {
426             typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
427 
428             static const func_t funcs[] =
429             {
430                 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
431             };
432 
433             funcs[cn](modesUsed, weight, mean, dst, stream);
434         }
435     }
436 }}}
437 
438 
439 #endif /* CUDA_DISABLER */
440