1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #if !defined CUDA_DISABLER
44 
45 #include "opencv2/core/cuda/common.hpp"
46 #include "opencv2/core/cuda/border_interpolate.hpp"
47 #include "opencv2/core/cuda/limits.hpp"
48 
49 using namespace cv::cuda;
50 using namespace cv::cuda::device;
51 
52 ////////////////////////////////////////////////////////////
53 // centeredGradient
54 
55 namespace tvl1flow
56 {
centeredGradientKernel(const PtrStepSzf src,PtrStepf dx,PtrStepf dy)57     __global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
58     {
59         const int x = blockIdx.x * blockDim.x + threadIdx.x;
60         const int y = blockIdx.y * blockDim.y + threadIdx.y;
61 
62         if (x >= src.cols || y >= src.rows)
63             return;
64 
65         dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
66         dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
67     }
68 
centeredGradient(PtrStepSzf src,PtrStepSzf dx,PtrStepSzf dy,cudaStream_t stream)69     void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream)
70     {
71         const dim3 block(32, 8);
72         const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
73 
74         centeredGradientKernel<<<grid, block, 0, stream>>>(src, dx, dy);
75         cudaSafeCall( cudaGetLastError() );
76 
77         if (!stream)
78             cudaSafeCall( cudaDeviceSynchronize() );
79     }
80 }
81 
82 ////////////////////////////////////////////////////////////
83 // warpBackward
84 
85 namespace tvl1flow
86 {
bicubicCoeff(float x_)87     static __device__ __forceinline__ float bicubicCoeff(float x_)
88     {
89         float x = fabsf(x_);
90         if (x <= 1.0f)
91         {
92             return x * x * (1.5f * x - 2.5f) + 1.0f;
93         }
94         else if (x < 2.0f)
95         {
96             return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
97         }
98         else
99         {
100             return 0.0f;
101         }
102     }
103 
104     texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp);
105     texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
106     texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
107 
warpBackwardKernel(const PtrStepSzf I0,const PtrStepf u1,const PtrStepf u2,PtrStepf I1w,PtrStepf I1wx,PtrStepf I1wy,PtrStepf grad,PtrStepf rho)108     __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
109     {
110         const int x = blockIdx.x * blockDim.x + threadIdx.x;
111         const int y = blockIdx.y * blockDim.y + threadIdx.y;
112 
113         if (x >= I0.cols || y >= I0.rows)
114             return;
115 
116         const float u1Val = u1(y, x);
117         const float u2Val = u2(y, x);
118 
119         const float wx = x + u1Val;
120         const float wy = y + u2Val;
121 
122         const int xmin = ::ceilf(wx - 2.0f);
123         const int xmax = ::floorf(wx + 2.0f);
124 
125         const int ymin = ::ceilf(wy - 2.0f);
126         const int ymax = ::floorf(wy + 2.0f);
127 
128         float sum  = 0.0f;
129         float sumx = 0.0f;
130         float sumy = 0.0f;
131         float wsum = 0.0f;
132 
133         for (int cy = ymin; cy <= ymax; ++cy)
134         {
135             for (int cx = xmin; cx <= xmax; ++cx)
136             {
137                 const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
138 
139                 sum  += w * tex2D(tex_I1 , cx, cy);
140                 sumx += w * tex2D(tex_I1x, cx, cy);
141                 sumy += w * tex2D(tex_I1y, cx, cy);
142 
143                 wsum += w;
144             }
145         }
146 
147         const float coeff = 1.0f / wsum;
148 
149         const float I1wVal  = sum  * coeff;
150         const float I1wxVal = sumx * coeff;
151         const float I1wyVal = sumy * coeff;
152 
153         I1w(y, x)  = I1wVal;
154         I1wx(y, x) = I1wxVal;
155         I1wy(y, x) = I1wyVal;
156 
157         const float Ix2 = I1wxVal * I1wxVal;
158         const float Iy2 = I1wyVal * I1wyVal;
159 
160         // store the |Grad(I1)|^2
161         grad(y, x) = Ix2 + Iy2;
162 
163         // compute the constant part of the rho function
164         const float I0Val = I0(y, x);
165         rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
166     }
167 
warpBackward(PtrStepSzf I0,PtrStepSzf I1,PtrStepSzf I1x,PtrStepSzf I1y,PtrStepSzf u1,PtrStepSzf u2,PtrStepSzf I1w,PtrStepSzf I1wx,PtrStepSzf I1wy,PtrStepSzf grad,PtrStepSzf rho,cudaStream_t stream)168     void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
169                       PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,
170                       PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,
171                       cudaStream_t stream)
172     {
173         const dim3 block(32, 8);
174         const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
175 
176         bindTexture(&tex_I1 , I1);
177         bindTexture(&tex_I1x, I1x);
178         bindTexture(&tex_I1y, I1y);
179 
180         warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
181         cudaSafeCall( cudaGetLastError() );
182 
183         if (!stream)
184             cudaSafeCall( cudaDeviceSynchronize() );
185     }
186 }
187 
188 ////////////////////////////////////////////////////////////
189 // estimateU
190 
191 namespace tvl1flow
192 {
divergence(const PtrStepf & v1,const PtrStepf & v2,int y,int x)193     __device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
194     {
195         if (x > 0 && y > 0)
196         {
197             const float v1x = v1(y, x) - v1(y, x - 1);
198             const float v2y = v2(y, x) - v2(y - 1, x);
199             return v1x + v2y;
200         }
201         else
202         {
203             if (y > 0)
204                 return v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
205             else
206             {
207                 if (x > 0)
208                     return v1(0, x) - v1(0, x - 1) + v2(0, x);
209                 else
210                     return v1(0, 0) + v2(0, 0);
211             }
212         }
213     }
214 
estimateUKernel(const PtrStepSzf I1wx,const PtrStepf I1wy,const PtrStepf grad,const PtrStepf rho_c,const PtrStepf p11,const PtrStepf p12,const PtrStepf p21,const PtrStepf p22,const PtrStepf p31,const PtrStepf p32,PtrStepf u1,PtrStepf u2,PtrStepf u3,PtrStepf error,const float l_t,const float theta,const float gamma,const bool calcError)215     __global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
216                               const PtrStepf grad, const PtrStepf rho_c,
217                               const PtrStepf p11, const PtrStepf p12,
218                               const PtrStepf p21, const PtrStepf p22,
219                               const PtrStepf p31, const PtrStepf p32,
220                               PtrStepf u1, PtrStepf u2, PtrStepf u3, PtrStepf error,
221                               const float l_t, const float theta, const float gamma, const bool calcError)
222     {
223         const int x = blockIdx.x * blockDim.x + threadIdx.x;
224         const int y = blockIdx.y * blockDim.y + threadIdx.y;
225 
226         if (x >= I1wx.cols || y >= I1wx.rows)
227             return;
228 
229         const float I1wxVal = I1wx(y, x);
230         const float I1wyVal = I1wy(y, x);
231         const float gradVal = grad(y, x);
232         const float u1OldVal = u1(y, x);
233         const float u2OldVal = u2(y, x);
234         const float u3OldVal = gamma ? u3(y, x) : 0;
235 
236         const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal + gamma * u3OldVal);
237 
238         // estimate the values of the variable (v1, v2) (thresholding operator TH)
239 
240         float d1 = 0.0f;
241         float d2 = 0.0f;
242         float d3 = 0.0f;
243 
244         if (rho < -l_t * gradVal)
245         {
246             d1 = l_t * I1wxVal;
247             d2 = l_t * I1wyVal;
248             if (gamma)
249                 d3 = l_t * gamma;
250         }
251         else if (rho > l_t * gradVal)
252         {
253             d1 = -l_t * I1wxVal;
254             d2 = -l_t * I1wyVal;
255             if (gamma)
256                 d3 = -l_t * gamma;
257         }
258         else if (gradVal > numeric_limits<float>::epsilon())
259         {
260             const float fi = -rho / gradVal;
261             d1 = fi * I1wxVal;
262             d2 = fi * I1wyVal;
263             if (gamma)
264                 d3 = fi * gamma;
265         }
266 
267         const float v1 = u1OldVal + d1;
268         const float v2 = u2OldVal + d2;
269         const float v3 = u3OldVal + d3;
270 
271         // compute the divergence of the dual variable (p1, p2)
272 
273         const float div_p1 = divergence(p11, p12, y, x);
274         const float div_p2 = divergence(p21, p22, y, x);
275         const float div_p3 = gamma ? divergence(p31, p32, y, x) : 0;
276 
277         // estimate the values of the optical flow (u1, u2)
278 
279         const float u1NewVal = v1 + theta * div_p1;
280         const float u2NewVal = v2 + theta * div_p2;
281         const float u3NewVal = gamma ? v3 + theta * div_p3 : 0;
282 
283         u1(y, x) = u1NewVal;
284         u2(y, x) = u2NewVal;
285         if (gamma)
286             u3(y, x) = u3NewVal;
287 
288         if (calcError)
289         {
290             const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
291             const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
292             error(y, x) = n1 + n2;
293         }
294     }
295 
estimateU(PtrStepSzf I1wx,PtrStepSzf I1wy,PtrStepSzf grad,PtrStepSzf rho_c,PtrStepSzf p11,PtrStepSzf p12,PtrStepSzf p21,PtrStepSzf p22,PtrStepSzf p31,PtrStepSzf p32,PtrStepSzf u1,PtrStepSzf u2,PtrStepSzf u3,PtrStepSzf error,float l_t,float theta,float gamma,bool calcError,cudaStream_t stream)296     void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
297                    PtrStepSzf grad, PtrStepSzf rho_c,
298                    PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
299                    PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
300                    float l_t, float theta, float gamma, bool calcError,
301                    cudaStream_t stream)
302     {
303         const dim3 block(32, 8);
304         const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
305 
306         estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
307         cudaSafeCall( cudaGetLastError() );
308 
309         if (!stream)
310             cudaSafeCall( cudaDeviceSynchronize() );
311     }
312 }
313 
314 ////////////////////////////////////////////////////////////
315 // estimateDualVariables
316 
317 namespace tvl1flow
318 {
estimateDualVariablesKernel(const PtrStepSzf u1,const PtrStepf u2,const PtrStepSzf u3,PtrStepf p11,PtrStepf p12,PtrStepf p21,PtrStepf p22,PtrStepf p31,PtrStepf p32,const float taut,const float gamma)319     __global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, const PtrStepSzf u3,
320                                                 PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut, const float gamma)
321     {
322         const int x = blockIdx.x * blockDim.x + threadIdx.x;
323         const int y = blockIdx.y * blockDim.y + threadIdx.y;
324 
325         if (x >= u1.cols || y >= u1.rows)
326             return;
327 
328         const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x);
329         const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x);
330 
331         const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);
332         const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);
333 
334         const float u3x = gamma ? u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x) : 0;
335         const float u3y = gamma ? u3(::min(y + 1, u1.rows - 1), x) - u3(y, x) : 0;
336 
337         const float g1 = ::hypotf(u1x, u1y);
338         const float g2 = ::hypotf(u2x, u2y);
339         const float g3 = gamma ? ::hypotf(u3x, u3y) : 0;
340 
341         const float ng1 = 1.0f + taut * g1;
342         const float ng2 = 1.0f + taut * g2;
343         const float ng3 = gamma ? 1.0f + taut * g3 : 0;
344 
345         p11(y, x) = (p11(y, x) + taut * u1x) / ng1;
346         p12(y, x) = (p12(y, x) + taut * u1y) / ng1;
347         p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
348         p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
349         if (gamma)
350         {
351             p31(y, x) = (p31(y, x) + taut * u3x) / ng3;
352             p32(y, x) = (p32(y, x) + taut * u3y) / ng3;
353         }
354     }
355 
estimateDualVariables(PtrStepSzf u1,PtrStepSzf u2,PtrStepSzf u3,PtrStepSzf p11,PtrStepSzf p12,PtrStepSzf p21,PtrStepSzf p22,PtrStepSzf p31,PtrStepSzf p32,float taut,float gamma,cudaStream_t stream)356     void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
357                                PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
358                                float taut, float gamma,
359                                cudaStream_t stream)
360     {
361         const dim3 block(32, 8);
362         const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
363 
364         estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
365         cudaSafeCall( cudaGetLastError() );
366 
367         if (!stream)
368             cudaSafeCall( cudaDeviceSynchronize() );
369     }
370 }
371 
372 #endif // !defined CUDA_DISABLER
373