1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 using namespace cv;
46 using namespace cv::cuda;
47 
48 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
49 
create(int,double,bool,int,int,int,double,int)50 Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
51 
52 #else
53 
54 #define MIN_SIZE 32
55 
56 // CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
57 // leads to an inefficient code. It's for debug purposes only.
58 #define ENABLE_CUDA_RESIZE 1
59 
60 namespace cv { namespace cuda { namespace device { namespace optflow_farneback
61 {
62     void setPolynomialExpansionConsts(
63             int polyN, const float *g, const float *xg, const float *xxg,
64             float ig11, float ig03, float ig33, float ig55);
65 
66     void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream);
67 
68     void setUpdateMatricesConsts();
69 
70     void updateMatricesGpu(
71             const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,
72             PtrStepSzf M, cudaStream_t stream);
73 
74     void updateFlowGpu(
75             const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream);
76 
77     void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
78 
79     void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
80 
81     void setGaussianBlurKernel(const float *gKer, int ksizeHalf);
82 
83     void gaussianBlurGpu(
84             const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
85 
86     void gaussianBlur5Gpu(
87             const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
88 
89     void gaussianBlur5Gpu_CC11(
90             const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
91 
92 }}}}
93 
94 namespace
95 {
96     class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow
97     {
98     public:
FarnebackOpticalFlowImpl(int numLevels,double pyrScale,bool fastPyramids,int winSize,int numIters,int polyN,double polySigma,int flags)99         FarnebackOpticalFlowImpl(int numLevels, double pyrScale, bool fastPyramids, int winSize,
100                                  int numIters, int polyN, double polySigma, int flags) :
101             numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize),
102             numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags)
103         {
104         }
105 
getNumLevels() const106         virtual int getNumLevels() const { return numLevels_; }
setNumLevels(int numLevels)107         virtual void setNumLevels(int numLevels) { numLevels_ = numLevels; }
108 
getPyrScale() const109         virtual double getPyrScale() const { return pyrScale_; }
setPyrScale(double pyrScale)110         virtual void setPyrScale(double pyrScale) { pyrScale_ = pyrScale; }
111 
getFastPyramids() const112         virtual bool getFastPyramids() const { return fastPyramids_; }
setFastPyramids(bool fastPyramids)113         virtual void setFastPyramids(bool fastPyramids) { fastPyramids_ = fastPyramids; }
114 
getWinSize() const115         virtual int getWinSize() const { return winSize_; }
setWinSize(int winSize)116         virtual void setWinSize(int winSize) { winSize_ = winSize; }
117 
getNumIters() const118         virtual int getNumIters() const { return numIters_; }
setNumIters(int numIters)119         virtual void setNumIters(int numIters) { numIters_ = numIters; }
120 
getPolyN() const121         virtual int getPolyN() const { return polyN_; }
setPolyN(int polyN)122         virtual void setPolyN(int polyN) { polyN_ = polyN; }
123 
getPolySigma() const124         virtual double getPolySigma() const { return polySigma_; }
setPolySigma(double polySigma)125         virtual void setPolySigma(double polySigma) { polySigma_ = polySigma; }
126 
getFlags() const127         virtual int getFlags() const { return flags_; }
setFlags(int flags)128         virtual void setFlags(int flags) { flags_ = flags; }
129 
130         virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
131 
132     private:
133         int numLevels_;
134         double pyrScale_;
135         bool fastPyramids_;
136         int winSize_;
137         int numIters_;
138         int polyN_;
139         double polySigma_;
140         int flags_;
141 
142     private:
143         void prepareGaussian(
144                 int n, double sigma, float *g, float *xg, float *xxg,
145                 double &ig11, double &ig03, double &ig33, double &ig55);
146 
147         void setPolynomialExpansionConsts(int n, double sigma);
148 
149         void updateFlow_boxFilter(
150                 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
151                 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
152 
153         void updateFlow_gaussianBlur(
154                 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
155                 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
156 
157         void calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream);
158 
159         GpuMat frames_[2];
160         GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
161         std::vector<GpuMat> pyramid0_, pyramid1_;
162     };
163 
calc(InputArray _frame0,InputArray _frame1,InputOutputArray _flow,Stream & stream)164     void FarnebackOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)
165     {
166         const GpuMat frame0 = _frame0.getGpuMat();
167         const GpuMat frame1 = _frame1.getGpuMat();
168 
169         BufferPool pool(stream);
170         GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1);
171         GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1);
172 
173         calcImpl(frame0, frame1, flowx, flowy, stream);
174 
175         GpuMat flows[] = {flowx, flowy};
176         cuda::merge(flows, 2, _flow, stream);
177     }
178 
allocMatFromBuf(int rows,int cols,int type,GpuMat & mat)179     GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
180     {
181         if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
182             return mat(Rect(0, 0, cols, rows));
183 
184         return mat = GpuMat(rows, cols, type);
185     }
186 
prepareGaussian(int n,double sigma,float * g,float * xg,float * xxg,double & ig11,double & ig03,double & ig33,double & ig55)187     void FarnebackOpticalFlowImpl::prepareGaussian(
188             int n, double sigma, float *g, float *xg, float *xxg,
189             double &ig11, double &ig03, double &ig33, double &ig55)
190     {
191         double s = 0.;
192         for (int x = -n; x <= n; x++)
193         {
194             g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
195             s += g[x];
196         }
197 
198         s = 1./s;
199         for (int x = -n; x <= n; x++)
200         {
201             g[x] = (float)(g[x]*s);
202             xg[x] = (float)(x*g[x]);
203             xxg[x] = (float)(x*x*g[x]);
204         }
205 
206         Mat_<double> G(6, 6);
207         G.setTo(0);
208 
209         for (int y = -n; y <= n; y++)
210         {
211             for (int x = -n; x <= n; x++)
212             {
213                 G(0,0) += g[y]*g[x];
214                 G(1,1) += g[y]*g[x]*x*x;
215                 G(3,3) += g[y]*g[x]*x*x*x*x;
216                 G(5,5) += g[y]*g[x]*x*x*y*y;
217             }
218         }
219 
220         //G[0][0] = 1.;
221         G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
222         G(4,4) = G(3,3);
223         G(3,4) = G(4,3) = G(5,5);
224 
225         // invG:
226         // [ x        e  e    ]
227         // [    y             ]
228         // [       y          ]
229         // [ e        z       ]
230         // [ e           z    ]
231         // [                u ]
232         Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
233 
234         ig11 = invG(1,1);
235         ig03 = invG(0,3);
236         ig33 = invG(3,3);
237         ig55 = invG(5,5);
238     }
239 
setPolynomialExpansionConsts(int n,double sigma)240     void FarnebackOpticalFlowImpl::setPolynomialExpansionConsts(int n, double sigma)
241     {
242         std::vector<float> buf(n*6 + 3);
243         float* g = &buf[0] + n;
244         float* xg = g + n*2 + 1;
245         float* xxg = xg + n*2 + 1;
246 
247         if (sigma < FLT_EPSILON)
248             sigma = n*0.3;
249 
250         double ig11, ig03, ig33, ig55;
251         prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
252 
253         device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
254     }
255 
updateFlow_boxFilter(const GpuMat & R0,const GpuMat & R1,GpuMat & flowx,GpuMat & flowy,GpuMat & M,GpuMat & bufM,int blockSize,bool updateMatrices,Stream streams[])256     void FarnebackOpticalFlowImpl::updateFlow_boxFilter(
257             const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
258             GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
259     {
260         if (deviceSupports(FEATURE_SET_COMPUTE_12))
261             device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
262         else
263             device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
264         swap(M, bufM);
265 
266         for (int i = 1; i < 5; ++i)
267             streams[i].waitForCompletion();
268         device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
269 
270         if (updateMatrices)
271             device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
272     }
273 
updateFlow_gaussianBlur(const GpuMat & R0,const GpuMat & R1,GpuMat & flowx,GpuMat & flowy,GpuMat & M,GpuMat & bufM,int blockSize,bool updateMatrices,Stream streams[])274     void FarnebackOpticalFlowImpl::updateFlow_gaussianBlur(
275             const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
276             GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
277     {
278         if (deviceSupports(FEATURE_SET_COMPUTE_12))
279             device::optflow_farneback::gaussianBlur5Gpu(
280                         M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
281         else
282             device::optflow_farneback::gaussianBlur5Gpu_CC11(
283                         M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
284         swap(M, bufM);
285 
286         device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
287 
288         if (updateMatrices)
289             device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
290     }
291 
calcImpl(const GpuMat & frame0,const GpuMat & frame1,GpuMat & flowx,GpuMat & flowy,Stream & stream)292     void FarnebackOpticalFlowImpl::calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream)
293     {
294         CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
295         CV_Assert(frame0.size() == frame1.size());
296         CV_Assert(polyN_ == 5 || polyN_ == 7);
297         CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6);
298 
299         Stream streams[5];
300         if (stream)
301             streams[0] = stream;
302 
303         Size size = frame0.size();
304         GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
305 
306         flowx.create(size, CV_32F);
307         flowy.create(size, CV_32F);
308         GpuMat flowx0 = flowx;
309         GpuMat flowy0 = flowy;
310 
311         // Crop unnecessary levels
312         double scale = 1;
313         int numLevelsCropped = 0;
314         for (; numLevelsCropped < numLevels_; numLevelsCropped++)
315         {
316             scale *= pyrScale_;
317             if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
318                 break;
319         }
320 
321         frame0.convertTo(frames_[0], CV_32F, streams[0]);
322         frame1.convertTo(frames_[1], CV_32F, streams[1]);
323 
324         if (fastPyramids_)
325         {
326             // Build Gaussian pyramids using pyrDown()
327             pyramid0_.resize(numLevelsCropped + 1);
328             pyramid1_.resize(numLevelsCropped + 1);
329             pyramid0_[0] = frames_[0];
330             pyramid1_[0] = frames_[1];
331             for (int i = 1; i <= numLevelsCropped; ++i)
332             {
333                 cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
334                 cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
335             }
336         }
337 
338         setPolynomialExpansionConsts(polyN_, polySigma_);
339         device::optflow_farneback::setUpdateMatricesConsts();
340 
341         for (int k = numLevelsCropped; k >= 0; k--)
342         {
343             streams[0].waitForCompletion();
344 
345             scale = 1;
346             for (int i = 0; i < k; i++)
347                 scale *= pyrScale_;
348 
349             double sigma = (1./scale - 1) * 0.5;
350             int smoothSize = cvRound(sigma*5) | 1;
351             smoothSize = std::max(smoothSize, 3);
352 
353             int width = cvRound(size.width*scale);
354             int height = cvRound(size.height*scale);
355 
356             if (fastPyramids_)
357             {
358                 width = pyramid0_[k].cols;
359                 height = pyramid0_[k].rows;
360             }
361 
362             if (k > 0)
363             {
364                 curFlowX.create(height, width, CV_32F);
365                 curFlowY.create(height, width, CV_32F);
366             }
367             else
368             {
369                 curFlowX = flowx0;
370                 curFlowY = flowy0;
371             }
372 
373             if (!prevFlowX.data)
374             {
375                 if (flags_ & OPTFLOW_USE_INITIAL_FLOW)
376                 {
377                     cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
378                     cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
379                     curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
380                     curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
381                 }
382                 else
383                 {
384                     curFlowX.setTo(0, streams[0]);
385                     curFlowY.setTo(0, streams[1]);
386                 }
387             }
388             else
389             {
390                 cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
391                 cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
392                 curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale_, streams[0]);
393                 curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale_, streams[1]);
394             }
395 
396             GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
397             GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
398             GpuMat R[2] =
399             {
400                 allocMatFromBuf(5*height, width, CV_32F, R_[0]),
401                 allocMatFromBuf(5*height, width, CV_32F, R_[1])
402             };
403 
404             if (fastPyramids_)
405             {
406                 device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN_, R[0], StreamAccessor::getStream(streams[0]));
407                 device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN_, R[1], StreamAccessor::getStream(streams[1]));
408             }
409             else
410             {
411                 GpuMat blurredFrame[2] =
412                 {
413                     allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
414                     allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
415                 };
416                 GpuMat pyrLevel[2] =
417                 {
418                     allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
419                     allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
420                 };
421 
422                 Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
423                 device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
424 
425                 for (int i = 0; i < 2; i++)
426                 {
427                     device::optflow_farneback::gaussianBlurGpu(
428                             frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, StreamAccessor::getStream(streams[i]));
429                     cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
430                     device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN_, R[i], StreamAccessor::getStream(streams[i]));
431                 }
432             }
433 
434             streams[1].waitForCompletion();
435             device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, StreamAccessor::getStream(streams[0]));
436 
437             if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
438             {
439                 Mat g = getGaussianKernel(winSize_, winSize_/2*0.3f, CV_32F);
440                 device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize_/2), winSize_/2);
441             }
442             for (int i = 0; i < numIters_; i++)
443             {
444                 if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
445                     updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
446                 else
447                     updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
448             }
449 
450             prevFlowX = curFlowX;
451             prevFlowY = curFlowY;
452         }
453 
454         flowx = curFlowX;
455         flowy = curFlowY;
456 
457         if (!stream)
458             streams[0].waitForCompletion();
459     }
460 }
461 
create(int numLevels,double pyrScale,bool fastPyramids,int winSize,int numIters,int polyN,double polySigma,int flags)462 Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize,
463                                                                  int numIters, int polyN, double polySigma, int flags)
464 {
465     return makePtr<FarnebackOpticalFlowImpl>(numLevels, pyrScale, fastPyramids, winSize,
466                                              numIters, polyN, polySigma, flags);
467 }
468 
469 #endif
470