1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/border_interpolate.hpp" 47 #include "opencv2/core/cuda/limits.hpp" 48 49 using namespace cv::cuda; 50 using namespace cv::cuda::device; 51 52 //////////////////////////////////////////////////////////// 53 // centeredGradient 54 55 namespace tvl1flow 56 { centeredGradientKernel(const PtrStepSzf src,PtrStepf dx,PtrStepf dy)57 __global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy) 58 { 59 const int x = blockIdx.x * blockDim.x + threadIdx.x; 60 const int y = blockIdx.y * blockDim.y + threadIdx.y; 61 62 if (x >= src.cols || y >= src.rows) 63 return; 64 65 dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0))); 66 dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x)); 67 } 68 centeredGradient(PtrStepSzf src,PtrStepSzf dx,PtrStepSzf dy,cudaStream_t stream)69 void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream) 70 { 71 const dim3 block(32, 8); 72 const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); 73 74 centeredGradientKernel<<<grid, block, 0, stream>>>(src, dx, dy); 75 cudaSafeCall( cudaGetLastError() ); 76 77 if (!stream) 78 cudaSafeCall( cudaDeviceSynchronize() ); 79 } 80 } 81 82 //////////////////////////////////////////////////////////// 83 // warpBackward 84 85 namespace tvl1flow 86 { bicubicCoeff(float x_)87 static __device__ __forceinline__ float bicubicCoeff(float x_) 88 { 89 float x = fabsf(x_); 90 if (x <= 1.0f) 91 { 92 return x * x * (1.5f * x - 2.5f) + 1.0f; 93 } 94 else if (x < 2.0f) 95 { 96 return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; 97 } 98 else 99 { 100 return 0.0f; 101 } 102 } 103 104 texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp); 105 texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp); 106 texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp); 107 warpBackwardKernel(const PtrStepSzf I0,const PtrStepf u1,const PtrStepf u2,PtrStepf I1w,PtrStepf I1wx,PtrStepf I1wy,PtrStepf grad,PtrStepf rho)108 __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho) 109 { 110 const int x = blockIdx.x * blockDim.x + threadIdx.x; 111 const int y = blockIdx.y * blockDim.y + threadIdx.y; 112 113 if (x >= I0.cols || y >= I0.rows) 114 return; 115 116 const float u1Val = u1(y, x); 117 const float u2Val = u2(y, x); 118 119 const float wx = x + u1Val; 120 const float wy = y + u2Val; 121 122 const int xmin = ::ceilf(wx - 2.0f); 123 const int xmax = ::floorf(wx + 2.0f); 124 125 const int ymin = ::ceilf(wy - 2.0f); 126 const int ymax = ::floorf(wy + 2.0f); 127 128 float sum = 0.0f; 129 float sumx = 0.0f; 130 float sumy = 0.0f; 131 float wsum = 0.0f; 132 133 for (int cy = ymin; cy <= ymax; ++cy) 134 { 135 for (int cx = xmin; cx <= xmax; ++cx) 136 { 137 const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); 138 139 sum += w * tex2D(tex_I1 , cx, cy); 140 sumx += w * tex2D(tex_I1x, cx, cy); 141 sumy += w * tex2D(tex_I1y, cx, cy); 142 143 wsum += w; 144 } 145 } 146 147 const float coeff = 1.0f / wsum; 148 149 const float I1wVal = sum * coeff; 150 const float I1wxVal = sumx * coeff; 151 const float I1wyVal = sumy * coeff; 152 153 I1w(y, x) = I1wVal; 154 I1wx(y, x) = I1wxVal; 155 I1wy(y, x) = I1wyVal; 156 157 const float Ix2 = I1wxVal * I1wxVal; 158 const float Iy2 = I1wyVal * I1wyVal; 159 160 // store the |Grad(I1)|^2 161 grad(y, x) = Ix2 + Iy2; 162 163 // compute the constant part of the rho function 164 const float I0Val = I0(y, x); 165 rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; 166 } 167 warpBackward(PtrStepSzf I0,PtrStepSzf I1,PtrStepSzf I1x,PtrStepSzf I1y,PtrStepSzf u1,PtrStepSzf u2,PtrStepSzf I1w,PtrStepSzf I1wx,PtrStepSzf I1wy,PtrStepSzf grad,PtrStepSzf rho,cudaStream_t stream)168 void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, 169 PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, 170 PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho, 171 cudaStream_t stream) 172 { 173 const dim3 block(32, 8); 174 const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y)); 175 176 bindTexture(&tex_I1 , I1); 177 bindTexture(&tex_I1x, I1x); 178 bindTexture(&tex_I1y, I1y); 179 180 warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); 181 cudaSafeCall( cudaGetLastError() ); 182 183 if (!stream) 184 cudaSafeCall( cudaDeviceSynchronize() ); 185 } 186 } 187 188 //////////////////////////////////////////////////////////// 189 // estimateU 190 191 namespace tvl1flow 192 { divergence(const PtrStepf & v1,const PtrStepf & v2,int y,int x)193 __device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x) 194 { 195 if (x > 0 && y > 0) 196 { 197 const float v1x = v1(y, x) - v1(y, x - 1); 198 const float v2y = v2(y, x) - v2(y - 1, x); 199 return v1x + v2y; 200 } 201 else 202 { 203 if (y > 0) 204 return v1(y, 0) + v2(y, 0) - v2(y - 1, 0); 205 else 206 { 207 if (x > 0) 208 return v1(0, x) - v1(0, x - 1) + v2(0, x); 209 else 210 return v1(0, 0) + v2(0, 0); 211 } 212 } 213 } 214 estimateUKernel(const PtrStepSzf I1wx,const PtrStepf I1wy,const PtrStepf grad,const PtrStepf rho_c,const PtrStepf p11,const PtrStepf p12,const PtrStepf p21,const PtrStepf p22,const PtrStepf p31,const PtrStepf p32,PtrStepf u1,PtrStepf u2,PtrStepf u3,PtrStepf error,const float l_t,const float theta,const float gamma,const bool calcError)215 __global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy, 216 const PtrStepf grad, const PtrStepf rho_c, 217 const PtrStepf p11, const PtrStepf p12, 218 const PtrStepf p21, const PtrStepf p22, 219 const PtrStepf p31, const PtrStepf p32, 220 PtrStepf u1, PtrStepf u2, PtrStepf u3, PtrStepf error, 221 const float l_t, const float theta, const float gamma, const bool calcError) 222 { 223 const int x = blockIdx.x * blockDim.x + threadIdx.x; 224 const int y = blockIdx.y * blockDim.y + threadIdx.y; 225 226 if (x >= I1wx.cols || y >= I1wx.rows) 227 return; 228 229 const float I1wxVal = I1wx(y, x); 230 const float I1wyVal = I1wy(y, x); 231 const float gradVal = grad(y, x); 232 const float u1OldVal = u1(y, x); 233 const float u2OldVal = u2(y, x); 234 const float u3OldVal = gamma ? u3(y, x) : 0; 235 236 const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal + gamma * u3OldVal); 237 238 // estimate the values of the variable (v1, v2) (thresholding operator TH) 239 240 float d1 = 0.0f; 241 float d2 = 0.0f; 242 float d3 = 0.0f; 243 244 if (rho < -l_t * gradVal) 245 { 246 d1 = l_t * I1wxVal; 247 d2 = l_t * I1wyVal; 248 if (gamma) 249 d3 = l_t * gamma; 250 } 251 else if (rho > l_t * gradVal) 252 { 253 d1 = -l_t * I1wxVal; 254 d2 = -l_t * I1wyVal; 255 if (gamma) 256 d3 = -l_t * gamma; 257 } 258 else if (gradVal > numeric_limits<float>::epsilon()) 259 { 260 const float fi = -rho / gradVal; 261 d1 = fi * I1wxVal; 262 d2 = fi * I1wyVal; 263 if (gamma) 264 d3 = fi * gamma; 265 } 266 267 const float v1 = u1OldVal + d1; 268 const float v2 = u2OldVal + d2; 269 const float v3 = u3OldVal + d3; 270 271 // compute the divergence of the dual variable (p1, p2) 272 273 const float div_p1 = divergence(p11, p12, y, x); 274 const float div_p2 = divergence(p21, p22, y, x); 275 const float div_p3 = gamma ? divergence(p31, p32, y, x) : 0; 276 277 // estimate the values of the optical flow (u1, u2) 278 279 const float u1NewVal = v1 + theta * div_p1; 280 const float u2NewVal = v2 + theta * div_p2; 281 const float u3NewVal = gamma ? v3 + theta * div_p3 : 0; 282 283 u1(y, x) = u1NewVal; 284 u2(y, x) = u2NewVal; 285 if (gamma) 286 u3(y, x) = u3NewVal; 287 288 if (calcError) 289 { 290 const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); 291 const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); 292 error(y, x) = n1 + n2; 293 } 294 } 295 estimateU(PtrStepSzf I1wx,PtrStepSzf I1wy,PtrStepSzf grad,PtrStepSzf rho_c,PtrStepSzf p11,PtrStepSzf p12,PtrStepSzf p21,PtrStepSzf p22,PtrStepSzf p31,PtrStepSzf p32,PtrStepSzf u1,PtrStepSzf u2,PtrStepSzf u3,PtrStepSzf error,float l_t,float theta,float gamma,bool calcError,cudaStream_t stream)296 void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, 297 PtrStepSzf grad, PtrStepSzf rho_c, 298 PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, 299 PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error, 300 float l_t, float theta, float gamma, bool calcError, 301 cudaStream_t stream) 302 { 303 const dim3 block(32, 8); 304 const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); 305 306 estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError); 307 cudaSafeCall( cudaGetLastError() ); 308 309 if (!stream) 310 cudaSafeCall( cudaDeviceSynchronize() ); 311 } 312 } 313 314 //////////////////////////////////////////////////////////// 315 // estimateDualVariables 316 317 namespace tvl1flow 318 { estimateDualVariablesKernel(const PtrStepSzf u1,const PtrStepf u2,const PtrStepSzf u3,PtrStepf p11,PtrStepf p12,PtrStepf p21,PtrStepf p22,PtrStepf p31,PtrStepf p32,const float taut,const float gamma)319 __global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, const PtrStepSzf u3, 320 PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut, const float gamma) 321 { 322 const int x = blockIdx.x * blockDim.x + threadIdx.x; 323 const int y = blockIdx.y * blockDim.y + threadIdx.y; 324 325 if (x >= u1.cols || y >= u1.rows) 326 return; 327 328 const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x); 329 const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x); 330 331 const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x); 332 const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x); 333 334 const float u3x = gamma ? u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x) : 0; 335 const float u3y = gamma ? u3(::min(y + 1, u1.rows - 1), x) - u3(y, x) : 0; 336 337 const float g1 = ::hypotf(u1x, u1y); 338 const float g2 = ::hypotf(u2x, u2y); 339 const float g3 = gamma ? ::hypotf(u3x, u3y) : 0; 340 341 const float ng1 = 1.0f + taut * g1; 342 const float ng2 = 1.0f + taut * g2; 343 const float ng3 = gamma ? 1.0f + taut * g3 : 0; 344 345 p11(y, x) = (p11(y, x) + taut * u1x) / ng1; 346 p12(y, x) = (p12(y, x) + taut * u1y) / ng1; 347 p21(y, x) = (p21(y, x) + taut * u2x) / ng2; 348 p22(y, x) = (p22(y, x) + taut * u2y) / ng2; 349 if (gamma) 350 { 351 p31(y, x) = (p31(y, x) + taut * u3x) / ng3; 352 p32(y, x) = (p32(y, x) + taut * u3y) / ng3; 353 } 354 } 355 estimateDualVariables(PtrStepSzf u1,PtrStepSzf u2,PtrStepSzf u3,PtrStepSzf p11,PtrStepSzf p12,PtrStepSzf p21,PtrStepSzf p22,PtrStepSzf p31,PtrStepSzf p32,float taut,float gamma,cudaStream_t stream)356 void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, 357 PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, 358 float taut, float gamma, 359 cudaStream_t stream) 360 { 361 const dim3 block(32, 8); 362 const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y)); 363 364 estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma); 365 cudaSafeCall( cudaGetLastError() ); 366 367 if (!stream) 368 cudaSafeCall( cudaDeviceSynchronize() ); 369 } 370 } 371 372 #endif // !defined CUDA_DISABLER 373