1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/vec_math.hpp" 47 #include "opencv2/core/cuda/limits.hpp" 48 #include "opencv2/core/cuda/utility.hpp" 49 #include "opencv2/core/cuda/reduce.hpp" 50 #include "opencv2/core/cuda/functional.hpp" 51 #include "fgd.hpp" 52 53 using namespace cv::cuda; 54 using namespace cv::cuda::device; 55 56 namespace fgd 57 { 58 //////////////////////////////////////////////////////////////////////////// 59 // calcDiffHistogram 60 61 const unsigned int UINT_BITS = 32U; 62 const int LOG_WARP_SIZE = 5; 63 const int WARP_SIZE = 1 << LOG_WARP_SIZE; 64 #if (__CUDA_ARCH__ < 120) 65 const unsigned int TAG_MASK = (1U << (UINT_BITS - LOG_WARP_SIZE)) - 1U; 66 #endif 67 68 const int MERGE_THREADBLOCK_SIZE = 256; 69 addByte(unsigned int * s_WarpHist_,unsigned int data,unsigned int threadTag)70 __device__ __forceinline__ void addByte(unsigned int* s_WarpHist_, unsigned int data, unsigned int threadTag) 71 { 72 #if (__CUDA_ARCH__ < 120) 73 volatile unsigned int* s_WarpHist = s_WarpHist_; 74 unsigned int count; 75 do 76 { 77 count = s_WarpHist[data] & TAG_MASK; 78 count = threadTag | (count + 1); 79 s_WarpHist[data] = count; 80 } while (s_WarpHist[data] != count); 81 #else 82 atomicInc(s_WarpHist_ + data, (unsigned int)(-1)); 83 #endif 84 } 85 86 87 template <typename PT, typename CT> calcPartialHistogram(const PtrStepSz<PT> prevFrame,const PtrStep<CT> curFrame,unsigned int * partialBuf0,unsigned int * partialBuf1,unsigned int * partialBuf2)88 __global__ void calcPartialHistogram(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2) 89 { 90 #if (__CUDA_ARCH__ < 200) 91 const int HISTOGRAM_WARP_COUNT = 4; 92 #else 93 const int HISTOGRAM_WARP_COUNT = 6; 94 #endif 95 const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE; 96 const int HISTOGRAM_THREADBLOCK_MEMORY = HISTOGRAM_WARP_COUNT * HISTOGRAM_BIN_COUNT; 97 98 //Per-warp subhistogram storage 99 __shared__ unsigned int s_Hist0[HISTOGRAM_THREADBLOCK_MEMORY]; 100 __shared__ unsigned int s_Hist1[HISTOGRAM_THREADBLOCK_MEMORY]; 101 __shared__ unsigned int s_Hist2[HISTOGRAM_THREADBLOCK_MEMORY]; 102 103 //Clear shared memory storage for current threadblock before processing 104 #pragma unroll 105 for (int i = 0; i < (HISTOGRAM_THREADBLOCK_MEMORY / HISTOGRAM_THREADBLOCK_SIZE); ++i) 106 { 107 s_Hist0[threadIdx.x + i * HISTOGRAM_THREADBLOCK_SIZE] = 0; 108 s_Hist1[threadIdx.x + i * HISTOGRAM_THREADBLOCK_SIZE] = 0; 109 s_Hist2[threadIdx.x + i * HISTOGRAM_THREADBLOCK_SIZE] = 0; 110 } 111 __syncthreads(); 112 113 const unsigned int warpId = threadIdx.x >> LOG_WARP_SIZE; 114 115 unsigned int* s_WarpHist0 = s_Hist0 + warpId * HISTOGRAM_BIN_COUNT; 116 unsigned int* s_WarpHist1 = s_Hist1 + warpId * HISTOGRAM_BIN_COUNT; 117 unsigned int* s_WarpHist2 = s_Hist2 + warpId * HISTOGRAM_BIN_COUNT; 118 119 const unsigned int tag = threadIdx.x << (UINT_BITS - LOG_WARP_SIZE); 120 const int dataCount = prevFrame.rows * prevFrame.cols; 121 for (unsigned int pos = blockIdx.x * HISTOGRAM_THREADBLOCK_SIZE + threadIdx.x; pos < dataCount; pos += HISTOGRAM_THREADBLOCK_SIZE * PARTIAL_HISTOGRAM_COUNT) 122 { 123 const unsigned int y = pos / prevFrame.cols; 124 const unsigned int x = pos % prevFrame.cols; 125 126 PT prevVal = prevFrame(y, x); 127 CT curVal = curFrame(y, x); 128 129 int3 diff = make_int3( 130 ::abs(curVal.x - prevVal.x), 131 ::abs(curVal.y - prevVal.y), 132 ::abs(curVal.z - prevVal.z) 133 ); 134 135 addByte(s_WarpHist0, diff.x, tag); 136 addByte(s_WarpHist1, diff.y, tag); 137 addByte(s_WarpHist2, diff.z, tag); 138 } 139 __syncthreads(); 140 141 //Merge per-warp histograms into per-block and write to global memory 142 for (unsigned int bin = threadIdx.x; bin < HISTOGRAM_BIN_COUNT; bin += HISTOGRAM_THREADBLOCK_SIZE) 143 { 144 unsigned int sum0 = 0; 145 unsigned int sum1 = 0; 146 unsigned int sum2 = 0; 147 148 #pragma unroll 149 for (int i = 0; i < HISTOGRAM_WARP_COUNT; ++i) 150 { 151 #if (__CUDA_ARCH__ < 120) 152 sum0 += s_Hist0[bin + i * HISTOGRAM_BIN_COUNT] & TAG_MASK; 153 sum1 += s_Hist1[bin + i * HISTOGRAM_BIN_COUNT] & TAG_MASK; 154 sum2 += s_Hist2[bin + i * HISTOGRAM_BIN_COUNT] & TAG_MASK; 155 #else 156 sum0 += s_Hist0[bin + i * HISTOGRAM_BIN_COUNT]; 157 sum1 += s_Hist1[bin + i * HISTOGRAM_BIN_COUNT]; 158 sum2 += s_Hist2[bin + i * HISTOGRAM_BIN_COUNT]; 159 #endif 160 } 161 162 partialBuf0[blockIdx.x * HISTOGRAM_BIN_COUNT + bin] = sum0; 163 partialBuf1[blockIdx.x * HISTOGRAM_BIN_COUNT + bin] = sum1; 164 partialBuf2[blockIdx.x * HISTOGRAM_BIN_COUNT + bin] = sum2; 165 } 166 } 167 mergeHistogram(const unsigned int * partialBuf0,const unsigned int * partialBuf1,const unsigned int * partialBuf2,unsigned int * hist0,unsigned int * hist1,unsigned int * hist2)168 __global__ void mergeHistogram(const unsigned int* partialBuf0, const unsigned int* partialBuf1, const unsigned int* partialBuf2, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2) 169 { 170 unsigned int sum0 = 0; 171 unsigned int sum1 = 0; 172 unsigned int sum2 = 0; 173 174 #pragma unroll 175 for (unsigned int i = threadIdx.x; i < PARTIAL_HISTOGRAM_COUNT; i += MERGE_THREADBLOCK_SIZE) 176 { 177 sum0 += partialBuf0[blockIdx.x + i * HISTOGRAM_BIN_COUNT]; 178 sum1 += partialBuf1[blockIdx.x + i * HISTOGRAM_BIN_COUNT]; 179 sum2 += partialBuf2[blockIdx.x + i * HISTOGRAM_BIN_COUNT]; 180 } 181 182 __shared__ unsigned int data0[MERGE_THREADBLOCK_SIZE]; 183 __shared__ unsigned int data1[MERGE_THREADBLOCK_SIZE]; 184 __shared__ unsigned int data2[MERGE_THREADBLOCK_SIZE]; 185 186 plus<unsigned int> op; 187 reduce<MERGE_THREADBLOCK_SIZE>(smem_tuple(data0, data1, data2), thrust::tie(sum0, sum1, sum2), threadIdx.x, thrust::make_tuple(op, op, op)); 188 189 if(threadIdx.x == 0) 190 { 191 hist0[blockIdx.x] = sum0; 192 hist1[blockIdx.x] = sum1; 193 hist2[blockIdx.x] = sum2; 194 } 195 } 196 197 template <typename PT, typename CT> calcDiffHistogram_gpu(PtrStepSzb prevFrame,PtrStepSzb curFrame,unsigned int * hist0,unsigned int * hist1,unsigned int * hist2,unsigned int * partialBuf0,unsigned int * partialBuf1,unsigned int * partialBuf2,bool cc20,cudaStream_t stream)198 void calcDiffHistogram_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, 199 unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, 200 unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, 201 bool cc20, cudaStream_t stream) 202 { 203 const int HISTOGRAM_WARP_COUNT = cc20 ? 6 : 4; 204 const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE; 205 206 calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>( 207 (PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, partialBuf0, partialBuf1, partialBuf2); 208 cudaSafeCall( cudaGetLastError() ); 209 210 mergeHistogram<<<HISTOGRAM_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(partialBuf0, partialBuf1, partialBuf2, hist0, hist1, hist2); 211 cudaSafeCall( cudaGetLastError() ); 212 213 if (stream == 0) 214 cudaSafeCall( cudaDeviceSynchronize() ); 215 } 216 217 template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream); 218 template void calcDiffHistogram_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream); 219 template void calcDiffHistogram_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream); 220 template void calcDiffHistogram_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream); 221 222 ///////////////////////////////////////////////////////////////////////// 223 // calcDiffThreshMask 224 225 template <typename PT, typename CT> calcDiffThreshMask(const PtrStepSz<PT> prevFrame,const PtrStep<CT> curFrame,uchar3 bestThres,PtrStepb changeMask)226 __global__ void calcDiffThreshMask(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame, uchar3 bestThres, PtrStepb changeMask) 227 { 228 const int y = blockIdx.y * blockDim.y + threadIdx.y; 229 const int x = blockIdx.x * blockDim.x + threadIdx.x; 230 231 if (y > prevFrame.rows || x > prevFrame.cols) 232 return; 233 234 PT prevVal = prevFrame(y, x); 235 CT curVal = curFrame(y, x); 236 237 int3 diff = make_int3( 238 ::abs(curVal.x - prevVal.x), 239 ::abs(curVal.y - prevVal.y), 240 ::abs(curVal.z - prevVal.z) 241 ); 242 243 if (diff.x > bestThres.x || diff.y > bestThres.y || diff.z > bestThres.z) 244 changeMask(y, x) = 255; 245 } 246 247 template <typename PT, typename CT> calcDiffThreshMask_gpu(PtrStepSzb prevFrame,PtrStepSzb curFrame,uchar3 bestThres,PtrStepSzb changeMask,cudaStream_t stream)248 void calcDiffThreshMask_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream) 249 { 250 dim3 block(32, 8); 251 dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y)); 252 253 calcDiffThreshMask<PT, CT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, bestThres, changeMask); 254 cudaSafeCall( cudaGetLastError() ); 255 256 if (stream == 0) 257 cudaSafeCall( cudaDeviceSynchronize() ); 258 } 259 260 template void calcDiffThreshMask_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream); 261 template void calcDiffThreshMask_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream); 262 template void calcDiffThreshMask_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream); 263 template void calcDiffThreshMask_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream); 264 265 ///////////////////////////////////////////////////////////////////////// 266 // bgfgClassification 267 268 __constant__ BGPixelStat c_stat; 269 setBGPixelStat(const BGPixelStat & stat)270 void setBGPixelStat(const BGPixelStat& stat) 271 { 272 cudaSafeCall( cudaMemcpyToSymbol(c_stat, &stat, sizeof(BGPixelStat)) ); 273 } 274 275 template <typename T> struct Output; 276 template <> struct Output<uchar3> 277 { makefgd::Output278 static __device__ __forceinline__ uchar3 make(uchar v0, uchar v1, uchar v2) 279 { 280 return make_uchar3(v0, v1, v2); 281 } 282 }; 283 template <> struct Output<uchar4> 284 { makefgd::Output285 static __device__ __forceinline__ uchar4 make(uchar v0, uchar v1, uchar v2) 286 { 287 return make_uchar4(v0, v1, v2, 255); 288 } 289 }; 290 291 template <typename PT, typename CT, typename OT> bgfgClassification(const PtrStepSz<PT> prevFrame,const PtrStep<CT> curFrame,const PtrStepb Ftd,const PtrStepb Fbd,PtrStepb foreground,int deltaC,int deltaCC,float alpha2,int N1c,int N1cc)292 __global__ void bgfgClassification(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame, 293 const PtrStepb Ftd, const PtrStepb Fbd, PtrStepb foreground, 294 int deltaC, int deltaCC, float alpha2, int N1c, int N1cc) 295 { 296 const int i = blockIdx.y * blockDim.y + threadIdx.y; 297 const int j = blockIdx.x * blockDim.x + threadIdx.x; 298 299 if (i > prevFrame.rows || j > prevFrame.cols) 300 return; 301 302 if (Fbd(i, j) || Ftd(i, j)) 303 { 304 float Pb = 0.0f; 305 float Pv = 0.0f; 306 float Pvb = 0.0f; 307 308 int val = 0; 309 310 // Is it a motion pixel? 311 if (Ftd(i, j)) 312 { 313 if (!c_stat.is_trained_dyn_model(i, j)) 314 val = 1; 315 else 316 { 317 PT prevVal = prevFrame(i, j); 318 CT curVal = curFrame(i, j); 319 320 // Compare with stored CCt vectors: 321 for (int k = 0; k < N1cc && c_stat.PV_CC(i, j, k) > alpha2; ++k) 322 { 323 OT v1 = c_stat.V1_CC<OT>(i, j, k); 324 OT v2 = c_stat.V2_CC<OT>(i, j, k); 325 326 if (::abs(v1.x - prevVal.x) <= deltaCC && 327 ::abs(v1.y - prevVal.y) <= deltaCC && 328 ::abs(v1.z - prevVal.z) <= deltaCC && 329 ::abs(v2.x - curVal.x) <= deltaCC && 330 ::abs(v2.y - curVal.y) <= deltaCC && 331 ::abs(v2.z - curVal.z) <= deltaCC) 332 { 333 Pv += c_stat.PV_CC(i, j, k); 334 Pvb += c_stat.PVB_CC(i, j, k); 335 } 336 } 337 338 Pb = c_stat.Pbcc(i, j); 339 if (2 * Pvb * Pb <= Pv) 340 val = 1; 341 } 342 } 343 else if(c_stat.is_trained_st_model(i, j)) 344 { 345 CT curVal = curFrame(i, j); 346 347 // Compare with stored Ct vectors: 348 for (int k = 0; k < N1c && c_stat.PV_C(i, j, k) > alpha2; ++k) 349 { 350 OT v = c_stat.V_C<OT>(i, j, k); 351 352 if (::abs(v.x - curVal.x) <= deltaC && 353 ::abs(v.y - curVal.y) <= deltaC && 354 ::abs(v.z - curVal.z) <= deltaC) 355 { 356 Pv += c_stat.PV_C(i, j, k); 357 Pvb += c_stat.PVB_C(i, j, k); 358 } 359 } 360 Pb = c_stat.Pbc(i, j); 361 if (2 * Pvb * Pb <= Pv) 362 val = 1; 363 } 364 365 // Update foreground: 366 foreground(i, j) = static_cast<uchar>(val); 367 } // end if( change detection... 368 } 369 370 template <typename PT, typename CT, typename OT> bgfgClassification_gpu(PtrStepSzb prevFrame,PtrStepSzb curFrame,PtrStepSzb Ftd,PtrStepSzb Fbd,PtrStepSzb foreground,int deltaC,int deltaCC,float alpha2,int N1c,int N1cc,cudaStream_t stream)371 void bgfgClassification_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, 372 int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream) 373 { 374 dim3 block(32, 8); 375 dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y)); 376 377 cudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) ); 378 379 bgfgClassification<PT, CT, OT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, 380 Ftd, Fbd, foreground, 381 deltaC, deltaCC, alpha2, N1c, N1cc); 382 cudaSafeCall( cudaGetLastError() ); 383 384 if (stream == 0) 385 cudaSafeCall( cudaDeviceSynchronize() ); 386 } 387 388 template void bgfgClassification_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 389 template void bgfgClassification_gpu<uchar3, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 390 template void bgfgClassification_gpu<uchar3, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 391 template void bgfgClassification_gpu<uchar3, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 392 template void bgfgClassification_gpu<uchar4, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 393 template void bgfgClassification_gpu<uchar4, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 394 template void bgfgClassification_gpu<uchar4, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 395 template void bgfgClassification_gpu<uchar4, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); 396 397 //////////////////////////////////////////////////////////////////////////// 398 // updateBackgroundModel 399 400 template <typename PT, typename CT, typename OT, class PrevFramePtr2D, class CurFramePtr2D, class FtdPtr2D, class FbdPtr2D> updateBackgroundModel(int cols,int rows,const PrevFramePtr2D prevFrame,const CurFramePtr2D curFrame,const FtdPtr2D Ftd,const FbdPtr2D Fbd,PtrStepb foreground,PtrStep<OT> background,int deltaC,int deltaCC,float alpha1,float alpha2,float alpha3,int N1c,int N1cc,int N2c,int N2cc,float T)401 __global__ void updateBackgroundModel(int cols, int rows, const PrevFramePtr2D prevFrame, const CurFramePtr2D curFrame, const FtdPtr2D Ftd, const FbdPtr2D Fbd, 402 PtrStepb foreground, PtrStep<OT> background, 403 int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T) 404 { 405 const int i = blockIdx.y * blockDim.y + threadIdx.y; 406 const int j = blockIdx.x * blockDim.x + threadIdx.x; 407 408 if (i > rows || j > cols) 409 return; 410 411 const float MIN_PV = 1e-10f; 412 413 const uchar is_trained_dyn_model = c_stat.is_trained_dyn_model(i, j); 414 if (Ftd(i, j) || !is_trained_dyn_model) 415 { 416 const float alpha = is_trained_dyn_model ? alpha2 : alpha3; 417 418 float Pbcc = c_stat.Pbcc(i, j); 419 420 //update Pb 421 Pbcc *= (1.0f - alpha); 422 if (!foreground(i, j)) 423 { 424 Pbcc += alpha; 425 } 426 427 int min_dist = numeric_limits<int>::max(); 428 int indx = -1; 429 430 PT prevVal = prevFrame(i, j); 431 CT curVal = curFrame(i, j); 432 433 // Find best Vi match: 434 for (int k = 0; k < N2cc; ++k) 435 { 436 float PV_CC = c_stat.PV_CC(i, j, k); 437 if (!PV_CC) 438 break; 439 440 if (PV_CC < MIN_PV) 441 { 442 c_stat.PV_CC(i, j, k) = 0; 443 c_stat.PVB_CC(i, j, k) = 0; 444 continue; 445 } 446 447 c_stat.PV_CC(i, j, k) = PV_CC * (1.0f - alpha); 448 c_stat.PVB_CC(i, j, k) = c_stat.PVB_CC(i, j, k) * (1.0f - alpha); 449 450 OT v1 = c_stat.V1_CC<OT>(i, j, k); 451 452 int3 val1 = make_int3( 453 ::abs(v1.x - prevVal.x), 454 ::abs(v1.y - prevVal.y), 455 ::abs(v1.z - prevVal.z) 456 ); 457 458 OT v2 = c_stat.V2_CC<OT>(i, j, k); 459 460 int3 val2 = make_int3( 461 ::abs(v2.x - curVal.x), 462 ::abs(v2.y - curVal.y), 463 ::abs(v2.z - curVal.z) 464 ); 465 466 int dist = val1.x + val1.y + val1.z + val2.x + val2.y + val2.z; 467 468 if (dist < min_dist && 469 val1.x <= deltaCC && val1.y <= deltaCC && val1.z <= deltaCC && 470 val2.x <= deltaCC && val2.y <= deltaCC && val2.z <= deltaCC) 471 { 472 min_dist = dist; 473 indx = k; 474 } 475 } 476 477 if (indx < 0) 478 { 479 // Replace N2th elem in the table by new feature: 480 indx = N2cc - 1; 481 c_stat.PV_CC(i, j, indx) = alpha; 482 c_stat.PVB_CC(i, j, indx) = alpha; 483 484 //udate Vt 485 c_stat.V1_CC<OT>(i, j, indx) = Output<OT>::make(prevVal.x, prevVal.y, prevVal.z); 486 c_stat.V2_CC<OT>(i, j, indx) = Output<OT>::make(curVal.x, curVal.y, curVal.z); 487 } 488 else 489 { 490 // Update: 491 c_stat.PV_CC(i, j, indx) += alpha; 492 493 if (!foreground(i, j)) 494 { 495 c_stat.PVB_CC(i, j, indx) += alpha; 496 } 497 } 498 499 //re-sort CCt table by Pv 500 const float PV_CC_indx = c_stat.PV_CC(i, j, indx); 501 const float PVB_CC_indx = c_stat.PVB_CC(i, j, indx); 502 const OT V1_CC_indx = c_stat.V1_CC<OT>(i, j, indx); 503 const OT V2_CC_indx = c_stat.V2_CC<OT>(i, j, indx); 504 for (int k = 0; k < indx; ++k) 505 { 506 if (c_stat.PV_CC(i, j, k) <= PV_CC_indx) 507 { 508 //shift elements 509 float Pv_tmp1; 510 float Pv_tmp2 = PV_CC_indx; 511 512 float Pvb_tmp1; 513 float Pvb_tmp2 = PVB_CC_indx; 514 515 OT v1_tmp1; 516 OT v1_tmp2 = V1_CC_indx; 517 518 OT v2_tmp1; 519 OT v2_tmp2 = V2_CC_indx; 520 521 for (int l = k; l <= indx; ++l) 522 { 523 Pv_tmp1 = c_stat.PV_CC(i, j, l); 524 c_stat.PV_CC(i, j, l) = Pv_tmp2; 525 Pv_tmp2 = Pv_tmp1; 526 527 Pvb_tmp1 = c_stat.PVB_CC(i, j, l); 528 c_stat.PVB_CC(i, j, l) = Pvb_tmp2; 529 Pvb_tmp2 = Pvb_tmp1; 530 531 v1_tmp1 = c_stat.V1_CC<OT>(i, j, l); 532 c_stat.V1_CC<OT>(i, j, l) = v1_tmp2; 533 v1_tmp2 = v1_tmp1; 534 535 v2_tmp1 = c_stat.V2_CC<OT>(i, j, l); 536 c_stat.V2_CC<OT>(i, j, l) = v2_tmp2; 537 v2_tmp2 = v2_tmp1; 538 } 539 540 break; 541 } 542 } 543 544 float sum1 = 0.0f; 545 float sum2 = 0.0f; 546 547 //check "once-off" changes 548 for (int k = 0; k < N1cc; ++k) 549 { 550 const float PV_CC = c_stat.PV_CC(i, j, k); 551 if (!PV_CC) 552 break; 553 554 sum1 += PV_CC; 555 sum2 += c_stat.PVB_CC(i, j, k); 556 } 557 558 if (sum1 > T) 559 c_stat.is_trained_dyn_model(i, j) = 1; 560 561 float diff = sum1 - Pbcc * sum2; 562 563 // Update stat table: 564 if (diff > T) 565 { 566 //new BG features are discovered 567 for (int k = 0; k < N1cc; ++k) 568 { 569 const float PV_CC = c_stat.PV_CC(i, j, k); 570 if (!PV_CC) 571 break; 572 573 c_stat.PVB_CC(i, j, k) = (PV_CC - Pbcc * c_stat.PVB_CC(i, j, k)) / (1.0f - Pbcc); 574 } 575 } 576 577 c_stat.Pbcc(i, j) = Pbcc; 578 } 579 580 // Handle "stationary" pixel: 581 if (!Ftd(i, j)) 582 { 583 const float alpha = c_stat.is_trained_st_model(i, j) ? alpha2 : alpha3; 584 585 float Pbc = c_stat.Pbc(i, j); 586 587 //update Pb 588 Pbc *= (1.0f - alpha); 589 if (!foreground(i, j)) 590 { 591 Pbc += alpha; 592 } 593 594 int min_dist = numeric_limits<int>::max(); 595 int indx = -1; 596 597 CT curVal = curFrame(i, j); 598 599 //find best Vi match 600 for (int k = 0; k < N2c; ++k) 601 { 602 float PV_C = c_stat.PV_C(i, j, k); 603 604 if (PV_C < MIN_PV) 605 { 606 c_stat.PV_C(i, j, k) = 0; 607 c_stat.PVB_C(i, j, k) = 0; 608 continue; 609 } 610 611 // Exponential decay of memory 612 c_stat.PV_C(i, j, k) = PV_C * (1.0f - alpha); 613 c_stat.PVB_C(i, j, k) = c_stat.PVB_C(i, j, k) * (1.0f - alpha); 614 615 OT v = c_stat.V_C<OT>(i, j, k); 616 int3 val = make_int3( 617 ::abs(v.x - curVal.x), 618 ::abs(v.y - curVal.y), 619 ::abs(v.z - curVal.z) 620 ); 621 622 int dist = val.x + val.y + val.z; 623 624 if (dist < min_dist && val.x <= deltaC && val.y <= deltaC && val.z <= deltaC) 625 { 626 min_dist = dist; 627 indx = k; 628 } 629 } 630 631 if (indx < 0) 632 { 633 //N2th elem in the table is replaced by a new features 634 indx = N2c - 1; 635 636 c_stat.PV_C(i, j, indx) = alpha; 637 c_stat.PVB_C(i, j, indx) = alpha; 638 639 //udate Vt 640 c_stat.V_C<OT>(i, j, indx) = Output<OT>::make(curVal.x, curVal.y, curVal.z); 641 } 642 else 643 { 644 //update 645 c_stat.PV_C(i, j, indx) += alpha; 646 647 if (!foreground(i, j)) 648 { 649 c_stat.PVB_C(i, j, indx) += alpha; 650 } 651 } 652 653 //re-sort Ct table by Pv 654 const float PV_C_indx = c_stat.PV_C(i, j, indx); 655 const float PVB_C_indx = c_stat.PVB_C(i, j, indx); 656 OT V_C_indx = c_stat.V_C<OT>(i, j, indx); 657 for (int k = 0; k < indx; ++k) 658 { 659 if (c_stat.PV_C(i, j, k) <= PV_C_indx) 660 { 661 //shift elements 662 float Pv_tmp1; 663 float Pv_tmp2 = PV_C_indx; 664 665 float Pvb_tmp1; 666 float Pvb_tmp2 = PVB_C_indx; 667 668 OT v_tmp1; 669 OT v_tmp2 = V_C_indx; 670 671 for (int l = k; l <= indx; ++l) 672 { 673 Pv_tmp1 = c_stat.PV_C(i, j, l); 674 c_stat.PV_C(i, j, l) = Pv_tmp2; 675 Pv_tmp2 = Pv_tmp1; 676 677 Pvb_tmp1 = c_stat.PVB_C(i, j, l); 678 c_stat.PVB_C(i, j, l) = Pvb_tmp2; 679 Pvb_tmp2 = Pvb_tmp1; 680 681 v_tmp1 = c_stat.V_C<OT>(i, j, l); 682 c_stat.V_C<OT>(i, j, l) = v_tmp2; 683 v_tmp2 = v_tmp1; 684 } 685 686 break; 687 } 688 } 689 690 // Check "once-off" changes: 691 float sum1 = 0.0f; 692 float sum2 = 0.0f; 693 for (int k = 0; k < N1c; ++k) 694 { 695 const float PV_C = c_stat.PV_C(i, j, k); 696 if (!PV_C) 697 break; 698 699 sum1 += PV_C; 700 sum2 += c_stat.PVB_C(i, j, k); 701 } 702 703 if (sum1 > T) 704 c_stat.is_trained_st_model(i, j) = 1; 705 706 float diff = sum1 - Pbc * sum2; 707 708 // Update stat table: 709 if (diff > T) 710 { 711 //new BG features are discovered 712 for (int k = 0; k < N1c; ++k) 713 { 714 const float PV_C = c_stat.PV_C(i, j, k); 715 if (!PV_C) 716 break; 717 718 c_stat.PVB_C(i, j, k) = (PV_C - Pbc * c_stat.PVB_C(i, j, k)) / (1.0f - Pbc); 719 } 720 721 c_stat.Pbc(i, j) = 1.0f - Pbc; 722 } 723 else 724 { 725 c_stat.Pbc(i, j) = Pbc; 726 } 727 } // if !(change detection) at pixel (i,j) 728 729 // Update the reference BG image: 730 if (!foreground(i, j)) 731 { 732 CT curVal = curFrame(i, j); 733 734 if (!Ftd(i, j) && !Fbd(i, j)) 735 { 736 // Apply IIR filter: 737 OT oldVal = background(i, j); 738 739 int3 newVal = make_int3( 740 __float2int_rn(oldVal.x * (1.0f - alpha1) + curVal.x * alpha1), 741 __float2int_rn(oldVal.y * (1.0f - alpha1) + curVal.y * alpha1), 742 __float2int_rn(oldVal.z * (1.0f - alpha1) + curVal.z * alpha1) 743 ); 744 745 background(i, j) = Output<OT>::make( 746 static_cast<uchar>(newVal.x), 747 static_cast<uchar>(newVal.y), 748 static_cast<uchar>(newVal.z) 749 ); 750 } 751 else 752 { 753 background(i, j) = Output<OT>::make(curVal.x, curVal.y, curVal.z); 754 } 755 } 756 } 757 758 template <typename PT, typename CT, typename OT> 759 struct UpdateBackgroundModel 760 { callfgd::UpdateBackgroundModel761 static void call(PtrStepSz<PT> prevFrame, PtrStepSz<CT> curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSz<OT> background, 762 int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, 763 cudaStream_t stream) 764 { 765 dim3 block(32, 8); 766 dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y)); 767 768 cudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) ); 769 770 updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb><<<grid, block, 0, stream>>>( 771 prevFrame.cols, prevFrame.rows, 772 prevFrame, curFrame, 773 Ftd, Fbd, foreground, background, 774 deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T); 775 cudaSafeCall( cudaGetLastError() ); 776 777 if (stream == 0) 778 cudaSafeCall( cudaDeviceSynchronize() ); 779 } 780 }; 781 782 template <typename PT, typename CT, typename OT> updateBackgroundModel_gpu(PtrStepSzb prevFrame,PtrStepSzb curFrame,PtrStepSzb Ftd,PtrStepSzb Fbd,PtrStepSzb foreground,PtrStepSzb background,int deltaC,int deltaCC,float alpha1,float alpha2,float alpha3,int N1c,int N1cc,int N2c,int N2cc,float T,cudaStream_t stream)783 void updateBackgroundModel_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, 784 int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, 785 cudaStream_t stream) 786 { 787 UpdateBackgroundModel<PT, CT, OT>::call(PtrStepSz<PT>(prevFrame), PtrStepSz<CT>(curFrame), Ftd, Fbd, foreground, PtrStepSz<OT>(background), 788 deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T, stream); 789 } 790 791 template void updateBackgroundModel_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 792 template void updateBackgroundModel_gpu<uchar3, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 793 template void updateBackgroundModel_gpu<uchar3, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 794 template void updateBackgroundModel_gpu<uchar3, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 795 template void updateBackgroundModel_gpu<uchar4, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 796 template void updateBackgroundModel_gpu<uchar4, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 797 template void updateBackgroundModel_gpu<uchar4, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 798 template void updateBackgroundModel_gpu<uchar4, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream); 799 } 800 801 #endif /* CUDA_DISABLER */ 802