1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 /*
44 //
45 // This implementation is based on Javier Sánchez Pérez <jsanchez@dis.ulpgc.es> implementation.
46 // Original BSD license:
47 //
48 // Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis
49 // All rights reserved.
50 //
51 // Redistribution and use in source and binary forms, with or without
52 // modification, are permitted provided that the following conditions are met:
53 //
54 // * Redistributions of source code must retain the above copyright notice, this
55 //   list of conditions and the following disclaimer.
56 //
57 // * Redistributions in binary form must reproduce the above copyright notice,
58 //   this list of conditions and the following disclaimer in the documentation
59 //   and/or other materials provided with the distribution.
60 //
61 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
62 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
65 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
66 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
67 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
69 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
70 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
71 // POSSIBILITY OF SUCH DAMAGE.
72 //
73 */
74 
75 #include "precomp.hpp"
76 #include "opencl_kernels_video.hpp"
77 
78 #include <limits>
79 #include <iomanip>
80 #include <iostream>
81 #include "opencv2/core/opencl/ocl_defs.hpp"
82 
83 
84 
85 using namespace cv;
86 
87 namespace {
88 
89 class OpticalFlowDual_TVL1 : public DualTVL1OpticalFlow
90 {
91 public:
92     OpticalFlowDual_TVL1();
93 
94     void calc(InputArray I0, InputArray I1, InputOutputArray flow);
95     void collectGarbage();
96 
97     CV_IMPL_PROPERTY(double, Tau, tau)
98     CV_IMPL_PROPERTY(double, Lambda, lambda)
99     CV_IMPL_PROPERTY(double, Theta, theta)
100     CV_IMPL_PROPERTY(double, Gamma, gamma)
101     CV_IMPL_PROPERTY(int, ScalesNumber, nscales)
102     CV_IMPL_PROPERTY(int, WarpingsNumber, warps)
103     CV_IMPL_PROPERTY(double, Epsilon, epsilon)
104     CV_IMPL_PROPERTY(int, InnerIterations, innerIterations)
105     CV_IMPL_PROPERTY(int, OuterIterations, outerIterations)
106     CV_IMPL_PROPERTY(bool, UseInitialFlow, useInitialFlow)
107     CV_IMPL_PROPERTY(double, ScaleStep, scaleStep)
108     CV_IMPL_PROPERTY(int, MedianFiltering, medianFiltering)
109 
110 protected:
111     double tau;
112     double lambda;
113     double theta;
114     double gamma;
115     int nscales;
116     int warps;
117     double epsilon;
118     int innerIterations;
119     int outerIterations;
120     bool useInitialFlow;
121     double scaleStep;
122     int medianFiltering;
123 
124 private:
125    void procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3);
126 
127     bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2);
128 
129     bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow);
130     struct dataMat
131     {
132         std::vector<Mat_<float> > I0s;
133         std::vector<Mat_<float> > I1s;
134         std::vector<Mat_<float> > u1s;
135         std::vector<Mat_<float> > u2s;
136         std::vector<Mat_<float> > u3s;
137 
138         Mat_<float> I1x_buf;
139         Mat_<float> I1y_buf;
140 
141         Mat_<float> flowMap1_buf;
142         Mat_<float> flowMap2_buf;
143 
144         Mat_<float> I1w_buf;
145         Mat_<float> I1wx_buf;
146         Mat_<float> I1wy_buf;
147 
148         Mat_<float> grad_buf;
149         Mat_<float> rho_c_buf;
150 
151         Mat_<float> v1_buf;
152         Mat_<float> v2_buf;
153         Mat_<float> v3_buf;
154 
155         Mat_<float> p11_buf;
156         Mat_<float> p12_buf;
157         Mat_<float> p21_buf;
158         Mat_<float> p22_buf;
159         Mat_<float> p31_buf;
160         Mat_<float> p32_buf;
161 
162         Mat_<float> div_p1_buf;
163         Mat_<float> div_p2_buf;
164         Mat_<float> div_p3_buf;
165 
166         Mat_<float> u1x_buf;
167         Mat_<float> u1y_buf;
168         Mat_<float> u2x_buf;
169         Mat_<float> u2y_buf;
170         Mat_<float> u3x_buf;
171         Mat_<float> u3y_buf;
172     } dm;
173     struct dataUMat
174     {
175         std::vector<UMat> I0s;
176         std::vector<UMat> I1s;
177         std::vector<UMat> u1s;
178         std::vector<UMat> u2s;
179 
180         UMat I1x_buf;
181         UMat I1y_buf;
182 
183         UMat I1w_buf;
184         UMat I1wx_buf;
185         UMat I1wy_buf;
186 
187         UMat grad_buf;
188         UMat rho_c_buf;
189 
190         UMat p11_buf;
191         UMat p12_buf;
192         UMat p21_buf;
193         UMat p22_buf;
194 
195         UMat diff_buf;
196         UMat norm_buf;
197     } dum;
198 };
199 
200 namespace cv_ocl_tvl1flow
201 {
202     bool centeredGradient(const UMat &src, UMat &dx, UMat &dy);
203 
204     bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
205         UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
206         UMat &grad, UMat &rho);
207 
208     bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
209         UMat &rho_c, UMat &p11, UMat &p12,
210         UMat &p21, UMat &p22, UMat &u1,
211         UMat &u2, UMat &error, float l_t, float theta, char calc_error);
212 
213     bool estimateDualVariables(UMat &u1, UMat &u2,
214         UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut);
215 }
216 
centeredGradient(const UMat & src,UMat & dx,UMat & dy)217 bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy)
218 {
219     size_t globalsize[2] = { src.cols, src.rows };
220 
221     ocl::Kernel kernel;
222     if (!kernel.create("centeredGradientKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
223         return false;
224 
225     int idxArg = 0;
226     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat
227     idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col
228     idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows
229     idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step
230     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx
231     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy
232     idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step
233     return kernel.run(2, globalsize, NULL, false);
234 }
235 
warpBackward(const UMat & I0,const UMat & I1,UMat & I1x,UMat & I1y,UMat & u1,UMat & u2,UMat & I1w,UMat & I1wx,UMat & I1wy,UMat & grad,UMat & rho)236 bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
237     UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
238     UMat &grad, UMat &rho)
239 {
240     size_t globalsize[2] = { I0.cols, I0.rows };
241 
242     ocl::Kernel kernel;
243     if (!kernel.create("warpBackwardKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
244         return false;
245 
246     int idxArg = 0;
247     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat
248     int I0_step = (int)(I0.step / I0.elemSize());
249     idxArg = kernel.set(idxArg, I0_step);//I0_step
250     idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col
251     idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row
252     ocl::Image2D imageI1(I1);
253     ocl::Image2D imageI1x(I1x);
254     ocl::Image2D imageI1y(I1y);
255     idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1
256     idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x
257     idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y
258     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1
259     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step
260     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2
261     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w
262     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx
263     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy
264     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad
265     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho
266     idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step
267     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step
268     int u1_offset_x = (int)((u1.offset) % (u1.step));
269     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
270     idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x
271     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y
272     int u2_offset_x = (int)((u2.offset) % (u2.step));
273     u2_offset_x = (int) (u2_offset_x / u2.elemSize());
274     idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x
275     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y
276     return kernel.run(2, globalsize, NULL, false);
277 }
278 
estimateU(UMat & I1wx,UMat & I1wy,UMat & grad,UMat & rho_c,UMat & p11,UMat & p12,UMat & p21,UMat & p22,UMat & u1,UMat & u2,UMat & error,float l_t,float theta,char calc_error)279 bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
280     UMat &rho_c, UMat &p11, UMat &p12,
281     UMat &p21, UMat &p22, UMat &u1,
282     UMat &u2, UMat &error, float l_t, float theta, char calc_error)
283 {
284     size_t globalsize[2] = { I1wx.cols, I1wx.rows };
285 
286     ocl::Kernel kernel;
287     if (!kernel.create("estimateUKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
288         return false;
289 
290     int idxArg = 0;
291     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx
292     idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col
293     idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row
294     idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step
295     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy
296     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad
297     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c
298     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11
299     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12
300     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21
301     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22
302     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1
303     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step
304     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2
305     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error
306     idxArg = kernel.set(idxArg, (float)l_t); //float l_t
307     idxArg = kernel.set(idxArg, (float)theta); //float theta
308     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step
309     int u1_offset_x = (int)(u1.offset % u1.step);
310     u1_offset_x = (int) (u1_offset_x  / u1.elemSize());
311     idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x
312     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y
313     int u2_offset_x = (int)(u2.offset % u2.step);
314     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
315     idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x
316     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
317     idxArg = kernel.set(idxArg, (char)calc_error);    //char calc_error
318 
319     return kernel.run(2, globalsize, NULL, false);
320 }
321 
estimateDualVariables(UMat & u1,UMat & u2,UMat & p11,UMat & p12,UMat & p21,UMat & p22,float taut)322 bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2,
323     UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut)
324 {
325     size_t globalsize[2] = { u1.cols, u1.rows };
326 
327     ocl::Kernel kernel;
328     if (!kernel.create("estimateDualVariablesKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
329         return false;
330 
331     int idxArg = 0;
332     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1
333     idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col
334     idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row
335     idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step
336     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2
337     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11
338     idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step
339     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12
340     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21
341     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22
342     idxArg = kernel.set(idxArg, (float)(taut));    //float taut
343     idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step
344     int u1_offset_x = (int)(u1.offset % u1.step);
345     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
346     idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x
347     idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y
348     int u2_offset_x = (int)(u2.offset % u2.step);
349     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
350     idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x
351     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
352 
353     return kernel.run(2, globalsize, NULL, false);
354 
355 }
356 
OpticalFlowDual_TVL1()357 OpticalFlowDual_TVL1::OpticalFlowDual_TVL1()
358 {
359     tau            = 0.25;
360     lambda         = 0.15;
361     theta          = 0.3;
362     nscales        = 5;
363     warps          = 5;
364     epsilon        = 0.01;
365     gamma          = 0.;
366     innerIterations = 30;
367     outerIterations = 10;
368     useInitialFlow = false;
369     medianFiltering = 5;
370     scaleStep      = 0.8;
371 }
372 
calc(InputArray _I0,InputArray _I1,InputOutputArray _flow)373 void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow)
374 {
375     CV_OCL_RUN(_flow.isUMat() &&
376                ocl::Image2D::isFormatSupported(CV_32F, 1, false),
377                calc_ocl(_I0, _I1, _flow))
378 
379     Mat I0 = _I0.getMat();
380     Mat I1 = _I1.getMat();
381 
382     CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
383     CV_Assert( I0.size() == I1.size() );
384     CV_Assert( I0.type() == I1.type() );
385     CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) );
386     CV_Assert( nscales > 0 );
387     bool use_gamma = gamma != 0;
388     // allocate memory for the pyramid structure
389     dm.I0s.resize(nscales);
390     dm.I1s.resize(nscales);
391     dm.u1s.resize(nscales);
392     dm.u2s.resize(nscales);
393     dm.u3s.resize(nscales);
394 
395     I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0);
396     I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0);
397 
398     dm.u1s[0].create(I0.size());
399     dm.u2s[0].create(I0.size());
400     if (use_gamma) dm.u3s[0].create(I0.size());
401 
402     if (useInitialFlow)
403     {
404         Mat_<float> mv[] = { dm.u1s[0], dm.u2s[0] };
405         split(_flow.getMat(), mv);
406     }
407 
408     dm.I1x_buf.create(I0.size());
409     dm.I1y_buf.create(I0.size());
410 
411     dm.flowMap1_buf.create(I0.size());
412     dm.flowMap2_buf.create(I0.size());
413 
414     dm.I1w_buf.create(I0.size());
415     dm.I1wx_buf.create(I0.size());
416     dm.I1wy_buf.create(I0.size());
417 
418     dm.grad_buf.create(I0.size());
419     dm.rho_c_buf.create(I0.size());
420 
421     dm.v1_buf.create(I0.size());
422     dm.v2_buf.create(I0.size());
423     dm.v3_buf.create(I0.size());
424 
425     dm.p11_buf.create(I0.size());
426     dm.p12_buf.create(I0.size());
427     dm.p21_buf.create(I0.size());
428     dm.p22_buf.create(I0.size());
429     dm.p31_buf.create(I0.size());
430     dm.p32_buf.create(I0.size());
431 
432     dm.div_p1_buf.create(I0.size());
433     dm.div_p2_buf.create(I0.size());
434     dm.div_p3_buf.create(I0.size());
435 
436     dm.u1x_buf.create(I0.size());
437     dm.u1y_buf.create(I0.size());
438     dm.u2x_buf.create(I0.size());
439     dm.u2y_buf.create(I0.size());
440     dm.u3x_buf.create(I0.size());
441     dm.u3y_buf.create(I0.size());
442 
443     // create the scales
444     for (int s = 1; s < nscales; ++s)
445     {
446         resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep);
447         resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep);
448 
449         if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16)
450         {
451             nscales = s;
452             break;
453         }
454 
455         if (useInitialFlow)
456         {
457             resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep);
458             resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep);
459 
460             multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]);
461             multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]);
462         }
463         else
464         {
465             dm.u1s[s].create(dm.I0s[s].size());
466             dm.u2s[s].create(dm.I0s[s].size());
467         }
468         if (use_gamma) dm.u3s[s].create(dm.I0s[s].size());
469     }
470     if (!useInitialFlow)
471     {
472         dm.u1s[nscales - 1].setTo(Scalar::all(0));
473         dm.u2s[nscales - 1].setTo(Scalar::all(0));
474     }
475     if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0));
476     // pyramidal structure for computing the optical flow
477     for (int s = nscales - 1; s >= 0; --s)
478     {
479         // compute the optical flow at the current scale
480         procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]);
481 
482         // if this was the last scale, finish now
483         if (s == 0)
484             break;
485 
486         // otherwise, upsample the optical flow
487 
488         // zoom the optical flow for the next finer scale
489         resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size());
490         resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size());
491         if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size());
492 
493         // scale the optical flow with the appropriate zoom factor (don't scale u3!)
494         multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]);
495         multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]);
496     }
497 
498     Mat uxy[] = { dm.u1s[0], dm.u2s[0] };
499     merge(uxy, 2, _flow);
500 }
501 
calc_ocl(InputArray _I0,InputArray _I1,InputOutputArray _flow)502 bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow)
503 {
504     UMat I0 = _I0.getUMat();
505     UMat I1 = _I1.getUMat();
506 
507     CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1);
508     CV_Assert(I0.size() == I1.size());
509     CV_Assert(I0.type() == I1.type());
510     CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2));
511     CV_Assert(nscales > 0);
512 
513     // allocate memory for the pyramid structure
514     dum.I0s.resize(nscales);
515     dum.I1s.resize(nscales);
516     dum.u1s.resize(nscales);
517     dum.u2s.resize(nscales);
518     //I0s_step == I1s_step
519     double alpha = I0.depth() == CV_8U ? 1.0 : 255.0;
520 
521     I0.convertTo(dum.I0s[0], CV_32F, alpha);
522     I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
523 
524     dum.u1s[0].create(I0.size(), CV_32FC1);
525     dum.u2s[0].create(I0.size(), CV_32FC1);
526 
527     if (useInitialFlow)
528     {
529         std::vector<UMat> umv;
530         umv.push_back(dum.u1s[0]);
531         umv.push_back(dum.u2s[0]);
532         cv::split(_flow,umv);
533     }
534 
535     dum.I1x_buf.create(I0.size(), CV_32FC1);
536     dum.I1y_buf.create(I0.size(), CV_32FC1);
537 
538     dum.I1w_buf.create(I0.size(), CV_32FC1);
539     dum.I1wx_buf.create(I0.size(), CV_32FC1);
540     dum.I1wy_buf.create(I0.size(), CV_32FC1);
541 
542     dum.grad_buf.create(I0.size(), CV_32FC1);
543     dum.rho_c_buf.create(I0.size(), CV_32FC1);
544 
545     dum.p11_buf.create(I0.size(), CV_32FC1);
546     dum.p12_buf.create(I0.size(), CV_32FC1);
547     dum.p21_buf.create(I0.size(), CV_32FC1);
548     dum.p22_buf.create(I0.size(), CV_32FC1);
549 
550     dum.diff_buf.create(I0.size(), CV_32FC1);
551 
552     // create the scales
553     for (int s = 1; s < nscales; ++s)
554     {
555         resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep);
556         resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep);
557 
558         if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16)
559         {
560             nscales = s;
561             break;
562         }
563 
564         if (useInitialFlow)
565         {
566             resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep);
567             resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep);
568 
569             //scale by scale factor
570             multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]);
571             multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]);
572         }
573     }
574 
575     // pyramidal structure for computing the optical flow
576     for (int s = nscales - 1; s >= 0; --s)
577     {
578         // compute the optical flow at the current scale
579         if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s]))
580             return false;
581 
582         // if this was the last scale, finish now
583         if (s == 0)
584             break;
585 
586         // zoom the optical flow for the next finer scale
587         resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size());
588         resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size());
589 
590         // scale the optical flow with the appropriate zoom factor
591         multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]);
592         multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]);
593     }
594 
595     std::vector<UMat> uxy;
596     uxy.push_back(dum.u1s[0]);
597     uxy.push_back(dum.u2s[0]);
598     merge(uxy, _flow);
599     return true;
600 }
601 
602 ////////////////////////////////////////////////////////////
603 // buildFlowMap
604 
605 struct BuildFlowMapBody : ParallelLoopBody
606 {
607     void operator() (const Range& range) const;
608 
609     Mat_<float> u1;
610     Mat_<float> u2;
611     mutable Mat_<float> map1;
612     mutable Mat_<float> map2;
613 };
614 
operator ()(const Range & range) const615 void BuildFlowMapBody::operator() (const Range& range) const
616 {
617     for (int y = range.start; y < range.end; ++y)
618     {
619         const float* u1Row = u1[y];
620         const float* u2Row = u2[y];
621 
622         float* map1Row = map1[y];
623         float* map2Row = map2[y];
624 
625         for (int x = 0; x < u1.cols; ++x)
626         {
627             map1Row[x] = x + u1Row[x];
628             map2Row[x] = y + u2Row[x];
629         }
630     }
631 }
632 
buildFlowMap(const Mat_<float> & u1,const Mat_<float> & u2,Mat_<float> & map1,Mat_<float> & map2)633 void buildFlowMap(const Mat_<float>& u1, const Mat_<float>& u2, Mat_<float>& map1, Mat_<float>& map2)
634 {
635     CV_DbgAssert( u2.size() == u1.size() );
636     CV_DbgAssert( map1.size() == u1.size() );
637     CV_DbgAssert( map2.size() == u1.size() );
638 
639     BuildFlowMapBody body;
640 
641     body.u1 = u1;
642     body.u2 = u2;
643     body.map1 = map1;
644     body.map2 = map2;
645 
646     parallel_for_(Range(0, u1.rows), body);
647 }
648 
649 ////////////////////////////////////////////////////////////
650 // centeredGradient
651 
652 struct CenteredGradientBody : ParallelLoopBody
653 {
654     void operator() (const Range& range) const;
655 
656     Mat_<float> src;
657     mutable Mat_<float> dx;
658     mutable Mat_<float> dy;
659 };
660 
operator ()(const Range & range) const661 void CenteredGradientBody::operator() (const Range& range) const
662 {
663     const int last_col = src.cols - 1;
664 
665     for (int y = range.start; y < range.end; ++y)
666     {
667         const float* srcPrevRow = src[y - 1];
668         const float* srcCurRow = src[y];
669         const float* srcNextRow = src[y + 1];
670 
671         float* dxRow = dx[y];
672         float* dyRow = dy[y];
673 
674         for (int x = 1; x < last_col; ++x)
675         {
676             dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]);
677             dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]);
678         }
679     }
680 }
681 
centeredGradient(const Mat_<float> & src,Mat_<float> & dx,Mat_<float> & dy)682 void centeredGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
683 {
684     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
685     CV_DbgAssert( dx.size() == src.size() );
686     CV_DbgAssert( dy.size() == src.size() );
687 
688     const int last_row = src.rows - 1;
689     const int last_col = src.cols - 1;
690 
691     // compute the gradient on the center body of the image
692     {
693         CenteredGradientBody body;
694 
695         body.src = src;
696         body.dx = dx;
697         body.dy = dy;
698 
699         parallel_for_(Range(1, last_row), body);
700     }
701 
702     // compute the gradient on the first and last rows
703     for (int x = 1; x < last_col; ++x)
704     {
705         dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1));
706         dy(0, x) = 0.5f * (src(1, x) - src(0, x));
707 
708         dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1));
709         dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x));
710     }
711 
712     // compute the gradient on the first and last columns
713     for (int y = 1; y < last_row; ++y)
714     {
715         dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0));
716         dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0));
717 
718         dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1));
719         dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col));
720     }
721 
722     // compute the gradient at the four corners
723     dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0));
724     dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0));
725 
726     dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1));
727     dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col));
728 
729     dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0));
730     dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0));
731 
732     dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1));
733     dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col));
734 }
735 
736 ////////////////////////////////////////////////////////////
737 // forwardGradient
738 
739 struct ForwardGradientBody : ParallelLoopBody
740 {
741     void operator() (const Range& range) const;
742 
743     Mat_<float> src;
744     mutable Mat_<float> dx;
745     mutable Mat_<float> dy;
746 };
747 
operator ()(const Range & range) const748 void ForwardGradientBody::operator() (const Range& range) const
749 {
750     const int last_col = src.cols - 1;
751 
752     for (int y = range.start; y < range.end; ++y)
753     {
754         const float* srcCurRow = src[y];
755         const float* srcNextRow = src[y + 1];
756 
757         float* dxRow = dx[y];
758         float* dyRow = dy[y];
759 
760         for (int x = 0; x < last_col; ++x)
761         {
762             dxRow[x] = srcCurRow[x + 1] - srcCurRow[x];
763             dyRow[x] = srcNextRow[x] - srcCurRow[x];
764         }
765     }
766 }
767 
forwardGradient(const Mat_<float> & src,Mat_<float> & dx,Mat_<float> & dy)768 void forwardGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
769 {
770     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
771     CV_DbgAssert( dx.size() == src.size() );
772     CV_DbgAssert( dy.size() == src.size() );
773 
774     const int last_row = src.rows - 1;
775     const int last_col = src.cols - 1;
776 
777     // compute the gradient on the central body of the image
778     {
779         ForwardGradientBody body;
780 
781         body.src = src;
782         body.dx = dx;
783         body.dy = dy;
784 
785         parallel_for_(Range(0, last_row), body);
786     }
787 
788     // compute the gradient on the last row
789     for (int x = 0; x < last_col; ++x)
790     {
791         dx(last_row, x) = src(last_row, x + 1) - src(last_row, x);
792         dy(last_row, x) = 0.0f;
793     }
794 
795     // compute the gradient on the last column
796     for (int y = 0; y < last_row; ++y)
797     {
798         dx(y, last_col) = 0.0f;
799         dy(y, last_col) = src(y + 1, last_col) - src(y, last_col);
800     }
801 
802     dx(last_row, last_col) = 0.0f;
803     dy(last_row, last_col) = 0.0f;
804 }
805 
806 ////////////////////////////////////////////////////////////
807 // divergence
808 
809 struct DivergenceBody : ParallelLoopBody
810 {
811     void operator() (const Range& range) const;
812 
813     Mat_<float> v1;
814     Mat_<float> v2;
815     mutable Mat_<float> div;
816 };
817 
operator ()(const Range & range) const818 void DivergenceBody::operator() (const Range& range) const
819 {
820     for (int y = range.start; y < range.end; ++y)
821     {
822         const float* v1Row = v1[y];
823         const float* v2PrevRow = v2[y - 1];
824         const float* v2CurRow = v2[y];
825 
826         float* divRow = div[y];
827 
828         for(int x = 1; x < v1.cols; ++x)
829         {
830             const float v1x = v1Row[x] - v1Row[x - 1];
831             const float v2y = v2CurRow[x] - v2PrevRow[x];
832 
833             divRow[x] = v1x + v2y;
834         }
835     }
836 }
837 
divergence(const Mat_<float> & v1,const Mat_<float> & v2,Mat_<float> & div)838 void divergence(const Mat_<float>& v1, const Mat_<float>& v2, Mat_<float>& div)
839 {
840     CV_DbgAssert( v1.rows > 2 && v1.cols > 2 );
841     CV_DbgAssert( v2.size() == v1.size() );
842     CV_DbgAssert( div.size() == v1.size() );
843 
844     {
845         DivergenceBody body;
846 
847         body.v1 = v1;
848         body.v2 = v2;
849         body.div = div;
850 
851         parallel_for_(Range(1, v1.rows), body);
852     }
853 
854     // compute the divergence on the first row
855     for(int x = 1; x < v1.cols; ++x)
856         div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x);
857 
858     // compute the divergence on the first column
859     for (int y = 1; y < v1.rows; ++y)
860         div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
861 
862     div(0, 0) = v1(0, 0) + v2(0, 0);
863 }
864 
865 ////////////////////////////////////////////////////////////
866 // calcGradRho
867 
868 struct CalcGradRhoBody : ParallelLoopBody
869 {
870     void operator() (const Range& range) const;
871 
872     Mat_<float> I0;
873     Mat_<float> I1w;
874     Mat_<float> I1wx;
875     Mat_<float> I1wy;
876     Mat_<float> u1;
877     Mat_<float> u2;
878     mutable Mat_<float> grad;
879     mutable Mat_<float> rho_c;
880 };
881 
operator ()(const Range & range) const882 void CalcGradRhoBody::operator() (const Range& range) const
883 {
884     for (int y = range.start; y < range.end; ++y)
885     {
886         const float* I0Row = I0[y];
887         const float* I1wRow = I1w[y];
888         const float* I1wxRow = I1wx[y];
889         const float* I1wyRow = I1wy[y];
890         const float* u1Row = u1[y];
891         const float* u2Row = u2[y];
892 
893         float* gradRow = grad[y];
894         float* rhoRow = rho_c[y];
895 
896         for (int x = 0; x < I0.cols; ++x)
897         {
898             const float Ix2 = I1wxRow[x] * I1wxRow[x];
899             const float Iy2 = I1wyRow[x] * I1wyRow[x];
900 
901             // store the |Grad(I1)|^2
902             gradRow[x] = Ix2 + Iy2;
903 
904             // compute the constant part of the rho function
905             rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]);
906         }
907     }
908 }
909 
calcGradRho(const Mat_<float> & I0,const Mat_<float> & I1w,const Mat_<float> & I1wx,const Mat_<float> & I1wy,const Mat_<float> & u1,const Mat_<float> & u2,Mat_<float> & grad,Mat_<float> & rho_c)910 void calcGradRho(const Mat_<float>& I0, const Mat_<float>& I1w, const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2,
911     Mat_<float>& grad, Mat_<float>& rho_c)
912 {
913     CV_DbgAssert( I1w.size() == I0.size() );
914     CV_DbgAssert( I1wx.size() == I0.size() );
915     CV_DbgAssert( I1wy.size() == I0.size() );
916     CV_DbgAssert( u1.size() == I0.size() );
917     CV_DbgAssert( u2.size() == I0.size() );
918     CV_DbgAssert( grad.size() == I0.size() );
919     CV_DbgAssert( rho_c.size() == I0.size() );
920 
921     CalcGradRhoBody body;
922 
923     body.I0 = I0;
924     body.I1w = I1w;
925     body.I1wx = I1wx;
926     body.I1wy = I1wy;
927     body.u1 = u1;
928     body.u2 = u2;
929     body.grad = grad;
930     body.rho_c = rho_c;
931 
932     parallel_for_(Range(0, I0.rows), body);
933 }
934 
935 ////////////////////////////////////////////////////////////
936 // estimateV
937 
938 struct EstimateVBody : ParallelLoopBody
939 {
940     void operator() (const Range& range) const;
941 
942     Mat_<float> I1wx;
943     Mat_<float> I1wy;
944     Mat_<float> u1;
945     Mat_<float> u2;
946     Mat_<float> u3;
947     Mat_<float> grad;
948     Mat_<float> rho_c;
949     mutable Mat_<float> v1;
950     mutable Mat_<float> v2;
951     mutable Mat_<float> v3;
952     float l_t;
953     float gamma;
954 };
955 
operator ()(const Range & range) const956 void EstimateVBody::operator() (const Range& range) const
957 {
958     bool use_gamma = gamma != 0;
959     for (int y = range.start; y < range.end; ++y)
960     {
961         const float* I1wxRow = I1wx[y];
962         const float* I1wyRow = I1wy[y];
963         const float* u1Row = u1[y];
964         const float* u2Row = u2[y];
965         const float* u3Row = use_gamma?u3[y]:NULL;
966         const float* gradRow = grad[y];
967         const float* rhoRow = rho_c[y];
968 
969         float* v1Row = v1[y];
970         float* v2Row = v2[y];
971         float* v3Row = use_gamma ? v3[y]:NULL;
972 
973         for (int x = 0; x < I1wx.cols; ++x)
974         {
975             const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] :
976                                           rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]);
977             float d1 = 0.0f;
978             float d2 = 0.0f;
979             float d3 = 0.0f;
980             if (rho < -l_t * gradRow[x])
981             {
982                 d1 = l_t * I1wxRow[x];
983                 d2 = l_t * I1wyRow[x];
984                 if (use_gamma) d3 = l_t * gamma;
985             }
986             else if (rho > l_t * gradRow[x])
987             {
988                 d1 = -l_t * I1wxRow[x];
989                 d2 = -l_t * I1wyRow[x];
990                 if (use_gamma) d3 = -l_t * gamma;
991             }
992             else if (gradRow[x] > std::numeric_limits<float>::epsilon())
993             {
994                 float fi = -rho / gradRow[x];
995                 d1 = fi * I1wxRow[x];
996                 d2 = fi * I1wyRow[x];
997                 if (use_gamma) d3 = fi * gamma;
998             }
999 
1000             v1Row[x] = u1Row[x] + d1;
1001             v2Row[x] = u2Row[x] + d2;
1002             if (use_gamma) v3Row[x] = u3Row[x] + d3;
1003         }
1004     }
1005 }
1006 
estimateV(const Mat_<float> & I1wx,const Mat_<float> & I1wy,const Mat_<float> & u1,const Mat_<float> & u2,const Mat_<float> & u3,const Mat_<float> & grad,const Mat_<float> & rho_c,Mat_<float> & v1,Mat_<float> & v2,Mat_<float> & v3,float l_t,float gamma)1007 void estimateV(const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, const Mat_<float>& u3, const Mat_<float>& grad, const Mat_<float>& rho_c,
1008    Mat_<float>& v1, Mat_<float>& v2, Mat_<float>& v3, float l_t, float gamma)
1009 {
1010     CV_DbgAssert( I1wy.size() == I1wx.size() );
1011     CV_DbgAssert( u1.size() == I1wx.size() );
1012     CV_DbgAssert( u2.size() == I1wx.size() );
1013     CV_DbgAssert( grad.size() == I1wx.size() );
1014     CV_DbgAssert( rho_c.size() == I1wx.size() );
1015     CV_DbgAssert( v1.size() == I1wx.size() );
1016     CV_DbgAssert( v2.size() == I1wx.size() );
1017 
1018     EstimateVBody body;
1019     bool use_gamma = gamma != 0;
1020     body.I1wx = I1wx;
1021     body.I1wy = I1wy;
1022     body.u1 = u1;
1023     body.u2 = u2;
1024     if (use_gamma) body.u3 = u3;
1025     body.grad = grad;
1026     body.rho_c = rho_c;
1027     body.v1 = v1;
1028     body.v2 = v2;
1029     if (use_gamma) body.v3 = v3;
1030     body.l_t = l_t;
1031     body.gamma = gamma;
1032     parallel_for_(Range(0, I1wx.rows), body);
1033 }
1034 
1035 ////////////////////////////////////////////////////////////
1036 // estimateU
1037 
estimateU(const Mat_<float> & v1,const Mat_<float> & v2,const Mat_<float> & v3,const Mat_<float> & div_p1,const Mat_<float> & div_p2,const Mat_<float> & div_p3,Mat_<float> & u1,Mat_<float> & u2,Mat_<float> & u3,float theta,float gamma)1038 float estimateU(const Mat_<float>& v1, const Mat_<float>& v2, const Mat_<float>& v3,
1039             const Mat_<float>& div_p1, const Mat_<float>& div_p2, const Mat_<float>& div_p3,
1040             Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3,
1041             float theta, float gamma)
1042 {
1043     CV_DbgAssert( v2.size() == v1.size() );
1044     CV_DbgAssert( div_p1.size() == v1.size() );
1045     CV_DbgAssert( div_p2.size() == v1.size() );
1046     CV_DbgAssert( u1.size() == v1.size() );
1047     CV_DbgAssert( u2.size() == v1.size() );
1048 
1049     float error = 0.0f;
1050     bool use_gamma = gamma != 0;
1051     for (int y = 0; y < v1.rows; ++y)
1052     {
1053         const float* v1Row = v1[y];
1054         const float* v2Row = v2[y];
1055         const float* v3Row = use_gamma?v3[y]:NULL;
1056         const float* divP1Row = div_p1[y];
1057         const float* divP2Row = div_p2[y];
1058         const float* divP3Row = use_gamma?div_p3[y]:NULL;
1059 
1060         float* u1Row = u1[y];
1061         float* u2Row = u2[y];
1062         float* u3Row = use_gamma?u3[y]:NULL;
1063 
1064 
1065         for (int x = 0; x < v1.cols; ++x)
1066         {
1067             const float u1k = u1Row[x];
1068             const float u2k = u2Row[x];
1069             const float u3k = use_gamma?u3Row[x]:0;
1070 
1071             u1Row[x] = v1Row[x] + theta * divP1Row[x];
1072             u2Row[x] = v2Row[x] + theta * divP2Row[x];
1073             if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x];
1074             error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k):
1075                                (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k);
1076         }
1077     }
1078 
1079     return error;
1080 }
1081 
1082 ////////////////////////////////////////////////////////////
1083 // estimateDualVariables
1084 
1085 struct EstimateDualVariablesBody : ParallelLoopBody
1086 {
1087     void operator() (const Range& range) const;
1088 
1089     Mat_<float> u1x;
1090     Mat_<float> u1y;
1091     Mat_<float> u2x;
1092     Mat_<float> u2y;
1093     Mat_<float> u3x;
1094     Mat_<float> u3y;
1095     mutable Mat_<float> p11;
1096     mutable Mat_<float> p12;
1097     mutable Mat_<float> p21;
1098     mutable Mat_<float> p22;
1099     mutable Mat_<float> p31;
1100     mutable Mat_<float> p32;
1101     float taut;
1102     bool use_gamma;
1103 };
1104 
operator ()(const Range & range) const1105 void EstimateDualVariablesBody::operator() (const Range& range) const
1106 {
1107     for (int y = range.start; y < range.end; ++y)
1108     {
1109         const float* u1xRow = u1x[y];
1110         const float* u1yRow = u1y[y];
1111         const float* u2xRow = u2x[y];
1112         const float* u2yRow = u2y[y];
1113         const float* u3xRow = u3x[y];
1114         const float* u3yRow = u3y[y];
1115 
1116         float* p11Row = p11[y];
1117         float* p12Row = p12[y];
1118         float* p21Row = p21[y];
1119         float* p22Row = p22[y];
1120         float* p31Row = p31[y];
1121         float* p32Row = p32[y];
1122 
1123         for (int x = 0; x < u1x.cols; ++x)
1124         {
1125             const float g1 = static_cast<float>(hypot(u1xRow[x], u1yRow[x]));
1126             const float g2 = static_cast<float>(hypot(u2xRow[x], u2yRow[x]));
1127             const float g3 = static_cast<float>(hypot(u3xRow[x], u3yRow[x]));
1128 
1129             const float ng1  = 1.0f + taut * g1;
1130             const float ng2 =  1.0f + taut * g2;
1131             const float ng3 = 1.0f + taut * g3;
1132 
1133             p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1;
1134             p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1;
1135             p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2;
1136             p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2;
1137             if (use_gamma) p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3;
1138             if (use_gamma) p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3;
1139         }
1140     }
1141 }
1142 
estimateDualVariables(const Mat_<float> & u1x,const Mat_<float> & u1y,const Mat_<float> & u2x,const Mat_<float> & u2y,const Mat_<float> & u3x,const Mat_<float> & u3y,Mat_<float> & p11,Mat_<float> & p12,Mat_<float> & p21,Mat_<float> & p22,Mat_<float> & p31,Mat_<float> & p32,float taut,bool use_gamma)1143 void estimateDualVariables(const Mat_<float>& u1x, const Mat_<float>& u1y,
1144                      const Mat_<float>& u2x, const Mat_<float>& u2y,
1145                      const Mat_<float>& u3x, const Mat_<float>& u3y,
1146                            Mat_<float>& p11, Mat_<float>& p12,
1147                      Mat_<float>& p21, Mat_<float>& p22,
1148                      Mat_<float>& p31, Mat_<float>& p32,
1149                      float taut, bool use_gamma)
1150 {
1151     CV_DbgAssert( u1y.size() == u1x.size() );
1152     CV_DbgAssert( u2x.size() == u1x.size() );
1153     CV_DbgAssert( u3x.size() == u1x.size() );
1154     CV_DbgAssert( u2y.size() == u1x.size() );
1155     CV_DbgAssert( u3y.size() == u1x.size() );
1156     CV_DbgAssert( p11.size() == u1x.size() );
1157     CV_DbgAssert( p12.size() == u1x.size() );
1158     CV_DbgAssert( p21.size() == u1x.size() );
1159     CV_DbgAssert( p22.size() == u1x.size() );
1160     CV_DbgAssert( p31.size() == u1x.size() );
1161     CV_DbgAssert( p32.size() == u1x.size() );
1162 
1163     EstimateDualVariablesBody body;
1164 
1165     body.u1x = u1x;
1166     body.u1y = u1y;
1167     body.u2x = u2x;
1168     body.u2y = u2y;
1169     body.u3x = u3x;
1170     body.u3y = u3y;
1171     body.p11 = p11;
1172     body.p12 = p12;
1173     body.p21 = p21;
1174     body.p22 = p22;
1175     body.p31 = p31;
1176     body.p32 = p32;
1177     body.taut = taut;
1178     body.use_gamma = use_gamma;
1179 
1180     parallel_for_(Range(0, u1x.rows), body);
1181 }
1182 
procOneScale_ocl(const UMat & I0,const UMat & I1,UMat & u1,UMat & u2)1183 bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2)
1184 {
1185     using namespace cv_ocl_tvl1flow;
1186 
1187     const double scaledEpsilon = epsilon * epsilon * I0.size().area();
1188 
1189     CV_DbgAssert(I1.size() == I0.size());
1190     CV_DbgAssert(I1.type() == I0.type());
1191     CV_DbgAssert(u1.empty() || u1.size() == I0.size());
1192     CV_DbgAssert(u2.size() == u1.size());
1193 
1194     if (u1.empty())
1195     {
1196         u1.create(I0.size(), CV_32FC1);
1197         u1.setTo(Scalar::all(0));
1198 
1199         u2.create(I0.size(), CV_32FC1);
1200         u2.setTo(Scalar::all(0));
1201     }
1202 
1203     UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
1204     UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
1205 
1206     if (!centeredGradient(I1, I1x, I1y))
1207         return false;
1208 
1209     UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
1210     UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
1211     UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
1212 
1213     UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows));
1214     UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
1215 
1216     UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows));
1217     UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows));
1218     UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows));
1219     UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows));
1220     p11.setTo(Scalar::all(0));
1221     p12.setTo(Scalar::all(0));
1222     p21.setTo(Scalar::all(0));
1223     p22.setTo(Scalar::all(0));
1224 
1225     UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows));
1226 
1227     const float l_t = static_cast<float>(lambda * theta);
1228     const float taut = static_cast<float>(tau / theta);
1229     int n;
1230 
1231     for (int warpings = 0; warpings < warps; ++warpings)
1232     {
1233         if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c))
1234             return false;
1235 
1236         double error = std::numeric_limits<double>::max();
1237         double prev_error = 0;
1238 
1239         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
1240         {
1241             if (medianFiltering > 1) {
1242                 cv::medianBlur(u1, u1, medianFiltering);
1243                 cv::medianBlur(u2, u2, medianFiltering);
1244             }
1245             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
1246             {
1247                 // some tweaks to make sum operation less frequently
1248                 n = n_inner + n_outer*innerIterations;
1249                 char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
1250                 if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
1251                     u1, u2, diff, l_t, static_cast<float>(theta), calc_error))
1252                     return false;
1253                 if (calc_error)
1254                 {
1255                     error = cv::sum(diff)[0];
1256                     prev_error = error;
1257                 }
1258                 else
1259                 {
1260                     error = std::numeric_limits<double>::max();
1261                     prev_error -= scaledEpsilon;
1262                 }
1263                 if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut))
1264                     return false;
1265             }
1266         }
1267     }
1268     return true;
1269 }
1270 
procOneScale(const Mat_<float> & I0,const Mat_<float> & I1,Mat_<float> & u1,Mat_<float> & u2,Mat_<float> & u3)1271 void OpticalFlowDual_TVL1::procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3)
1272 {
1273     const float scaledEpsilon = static_cast<float>(epsilon * epsilon * I0.size().area());
1274 
1275     CV_DbgAssert( I1.size() == I0.size() );
1276     CV_DbgAssert( I1.type() == I0.type() );
1277     CV_DbgAssert( u1.size() == I0.size() );
1278     CV_DbgAssert( u2.size() == u1.size() );
1279 
1280     Mat_<float> I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
1281     Mat_<float> I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
1282     centeredGradient(I1, I1x, I1y);
1283 
1284     Mat_<float> flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows));
1285     Mat_<float> flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows));
1286 
1287     Mat_<float> I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
1288     Mat_<float> I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
1289     Mat_<float> I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
1290 
1291     Mat_<float> grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows));
1292     Mat_<float> rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
1293 
1294     Mat_<float> v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows));
1295     Mat_<float> v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows));
1296     Mat_<float> v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows));
1297 
1298     Mat_<float> p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows));
1299     Mat_<float> p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows));
1300     Mat_<float> p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows));
1301     Mat_<float> p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows));
1302     Mat_<float> p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows));
1303     Mat_<float> p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows));
1304     p11.setTo(Scalar::all(0));
1305     p12.setTo(Scalar::all(0));
1306     p21.setTo(Scalar::all(0));
1307     p22.setTo(Scalar::all(0));
1308     bool use_gamma = gamma != 0.;
1309     if (use_gamma) p31.setTo(Scalar::all(0));
1310     if (use_gamma) p32.setTo(Scalar::all(0));
1311 
1312     Mat_<float> div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows));
1313     Mat_<float> div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows));
1314     Mat_<float> div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows));
1315 
1316     Mat_<float> u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows));
1317     Mat_<float> u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows));
1318     Mat_<float> u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows));
1319     Mat_<float> u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows));
1320     Mat_<float> u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows));
1321     Mat_<float> u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows));
1322 
1323     const float l_t = static_cast<float>(lambda * theta);
1324     const float taut = static_cast<float>(tau / theta);
1325 
1326     for (int warpings = 0; warpings < warps; ++warpings)
1327     {
1328         // compute the warping of the target image and its derivatives
1329         buildFlowMap(u1, u2, flowMap1, flowMap2);
1330         remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC);
1331         remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC);
1332         remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC);
1333         //calculate I1(x+u0) and its gradient
1334         calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c);
1335 
1336         float error = std::numeric_limits<float>::max();
1337         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
1338         {
1339             if (medianFiltering > 1) {
1340                 cv::medianBlur(u1, u1, medianFiltering);
1341                 cv::medianBlur(u2, u2, medianFiltering);
1342             }
1343             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
1344             {
1345                 // estimate the values of the variable (v1, v2) (thresholding operator TH)
1346                 estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast<float>(gamma));
1347 
1348                 // compute the divergence of the dual variable (p1, p2, p3)
1349                 divergence(p11, p12, div_p1);
1350                 divergence(p21, p22, div_p2);
1351                 if (use_gamma) divergence(p31, p32, div_p3);
1352 
1353                 // estimate the values of the optical flow (u1, u2)
1354                 error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast<float>(theta), static_cast<float>(gamma));
1355 
1356                 // compute the gradient of the optical flow (Du1, Du2)
1357                 forwardGradient(u1, u1x, u1y);
1358                 forwardGradient(u2, u2x, u2y);
1359                 if (use_gamma) forwardGradient(u3, u3x, u3y);
1360 
1361                 // estimate the values of the dual variable (p1, p2, p3)
1362                 estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma);
1363             }
1364         }
1365     }
1366 }
1367 
collectGarbage()1368 void OpticalFlowDual_TVL1::collectGarbage()
1369 {
1370     //dataMat structure dm
1371     dm.I0s.clear();
1372     dm.I1s.clear();
1373     dm.u1s.clear();
1374     dm.u2s.clear();
1375 
1376     dm.I1x_buf.release();
1377     dm.I1y_buf.release();
1378 
1379     dm.flowMap1_buf.release();
1380     dm.flowMap2_buf.release();
1381 
1382     dm.I1w_buf.release();
1383     dm.I1wx_buf.release();
1384     dm.I1wy_buf.release();
1385 
1386     dm.grad_buf.release();
1387     dm.rho_c_buf.release();
1388 
1389     dm.v1_buf.release();
1390     dm.v2_buf.release();
1391 
1392     dm.p11_buf.release();
1393     dm.p12_buf.release();
1394     dm.p21_buf.release();
1395     dm.p22_buf.release();
1396 
1397     dm.div_p1_buf.release();
1398     dm.div_p2_buf.release();
1399 
1400     dm.u1x_buf.release();
1401     dm.u1y_buf.release();
1402     dm.u2x_buf.release();
1403     dm.u2y_buf.release();
1404 
1405     //dataUMat structure dum
1406     dum.I0s.clear();
1407     dum.I1s.clear();
1408     dum.u1s.clear();
1409     dum.u2s.clear();
1410 
1411     dum.I1x_buf.release();
1412     dum.I1y_buf.release();
1413 
1414     dum.I1w_buf.release();
1415     dum.I1wx_buf.release();
1416     dum.I1wy_buf.release();
1417 
1418     dum.grad_buf.release();
1419     dum.rho_c_buf.release();
1420 
1421     dum.p11_buf.release();
1422     dum.p12_buf.release();
1423     dum.p21_buf.release();
1424     dum.p22_buf.release();
1425 
1426     dum.diff_buf.release();
1427     dum.norm_buf.release();
1428 }
1429 
1430 } // namespace
1431 
createOptFlow_DualTVL1()1432 Ptr<DualTVL1OpticalFlow> cv::createOptFlow_DualTVL1()
1433 {
1434     return makePtr<OpticalFlowDual_TVL1>();
1435 }
1436