1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. 14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// @Authors 18// Jin Ma jin@multicorewareinc.com 19// 20// Redistribution and use in source and binary forms, with or without modification, 21// are permitted provided that the following conditions are met: 22// 23// * Redistribution's of source code must retain the above copyright notice, 24// this list of conditions and the following disclaimer. 25// 26// * Redistribution's in binary form must reproduce the above copyright notice, 27// this list of conditions and the following disclaimer in the documentation 28// and/or other materials provided with the distribution. 29// 30// * The name of the copyright holders may not be used to endorse or promote products 31// derived from this software without specific prior written permission. 32// 33// This software is provided by the copyright holders and contributors as is and 34// any express or implied warranties, including, but not limited to, the implied 35// warranties of merchantability and fitness for a particular purpose are disclaimed. 36// In no event shall the Intel Corporation or contributors be liable for any direct, 37// indirect, incidental, special, exemplary, or consequential damages 38// (including, but not limited to, procurement of substitute goods or services; 39// loss of use, data, or profits; or business interruption) however caused 40// and on any theory of liability, whether in contract, strict liability, 41// or tort (including negligence or otherwise) arising in any way out of 42// the use of this software, even if advised of the possibility of such damage. 43// 44//M*/ 45 46__kernel void centeredGradientKernel(__global const float* src_ptr, int src_col, int src_row, int src_step, 47 __global float* dx, __global float* dy, int d_step) 48{ 49 int x = get_global_id(0); 50 int y = get_global_id(1); 51 52 if((x < src_col)&&(y < src_row)) 53 { 54 int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1); 55 int src_x2 = (x - 1) > 0 ? (x -1) : 0; 56 dx[y * d_step+ x] = 0.5f * (src_ptr[y * src_step + src_x1] - src_ptr[y * src_step+ src_x2]); 57 58 int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1); 59 int src_y2 = (y - 1) > 0 ? (y - 1) : 0; 60 dy[y * d_step+ x] = 0.5f * (src_ptr[src_y1 * src_step + x] - src_ptr[src_y2 * src_step+ x]); 61 } 62 63} 64 65inline float bicubicCoeff(float x_) 66{ 67 68 float x = fabs(x_); 69 if (x <= 1.0f) 70 return x * x * (1.5f * x - 2.5f) + 1.0f; 71 else if (x < 2.0f) 72 return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; 73 else 74 return 0.0f; 75} 76 77__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row, 78 image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y, 79 __global const float* u1, int u1_step, 80 __global const float* u2, 81 __global float* I1w, 82 __global float* I1wx, /*int I1wx_step,*/ 83 __global float* I1wy, /*int I1wy_step,*/ 84 __global float* grad, /*int grad_step,*/ 85 __global float* rho, 86 int I1w_step, 87 int u2_step, 88 int u1_offset_x, 89 int u1_offset_y, 90 int u2_offset_x, 91 int u2_offset_y) 92{ 93 int x = get_global_id(0); 94 int y = get_global_id(1); 95 96 if(x < I0_col&&y < I0_row) 97 { 98 //float u1Val = u1(y, x); 99 float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; 100 //float u2Val = u2(y, x); 101 float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; 102 103 float wx = x + u1Val; 104 float wy = y + u2Val; 105 106 int xmin = ceil(wx - 2.0f); 107 int xmax = floor(wx + 2.0f); 108 109 int ymin = ceil(wy - 2.0f); 110 int ymax = floor(wy + 2.0f); 111 112 float sum = 0.0f; 113 float sumx = 0.0f; 114 float sumy = 0.0f; 115 float wsum = 0.0f; 116 sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 117 for (int cy = ymin; cy <= ymax; ++cy) 118 { 119 for (int cx = xmin; cx <= xmax; ++cx) 120 { 121 float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); 122 //sum += w * tex2D(tex_I1 , cx, cy); 123 int2 cood = (int2)(cx, cy); 124 sum += w * read_imagef(tex_I1, sampleri, cood).x; 125 //sumx += w * tex2D(tex_I1x, cx, cy); 126 sumx += w * read_imagef(tex_I1x, sampleri, cood).x; 127 //sumy += w * tex2D(tex_I1y, cx, cy); 128 sumy += w * read_imagef(tex_I1y, sampleri, cood).x; 129 wsum += w; 130 } 131 } 132 float coeff = 1.0f / wsum; 133 float I1wVal = sum * coeff; 134 float I1wxVal = sumx * coeff; 135 float I1wyVal = sumy * coeff; 136 I1w[y * I1w_step + x] = I1wVal; 137 I1wx[y * I1w_step + x] = I1wxVal; 138 I1wy[y * I1w_step + x] = I1wyVal; 139 float Ix2 = I1wxVal * I1wxVal; 140 float Iy2 = I1wyVal * I1wyVal; 141 142 // store the |Grad(I1)|^2 143 grad[y * I1w_step + x] = Ix2 + Iy2; 144 145 // compute the constant part of the rho function 146 float I0Val = I0[y * I0_step + x]; 147 rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; 148 } 149} 150 151inline float readImage(__global float *image, int x, int y, int rows, int cols, int elemCntPerRow) 152{ 153 int i0 = clamp(x, 0, cols - 1); 154 int j0 = clamp(y, 0, rows - 1); 155 156 return image[j0 * elemCntPerRow + i0]; 157} 158 159__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row, 160 __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y, 161 __global const float* u1, int u1_step, 162 __global const float* u2, 163 __global float* I1w, 164 __global float* I1wx, /*int I1wx_step,*/ 165 __global float* I1wy, /*int I1wy_step,*/ 166 __global float* grad, /*int grad_step,*/ 167 __global float* rho, 168 int I1w_step, 169 int u2_step, 170 int I1_step, 171 int I1x_step) 172{ 173 int x = get_global_id(0); 174 int y = get_global_id(1); 175 176 if(x < I0_col&&y < I0_row) 177 { 178 //float u1Val = u1(y, x); 179 float u1Val = u1[y * u1_step + x]; 180 //float u2Val = u2(y, x); 181 float u2Val = u2[y * u2_step + x]; 182 183 float wx = x + u1Val; 184 float wy = y + u2Val; 185 186 int xmin = ceil(wx - 2.0f); 187 int xmax = floor(wx + 2.0f); 188 189 int ymin = ceil(wy - 2.0f); 190 int ymax = floor(wy + 2.0f); 191 192 float sum = 0.0f; 193 float sumx = 0.0f; 194 float sumy = 0.0f; 195 float wsum = 0.0f; 196 197 for (int cy = ymin; cy <= ymax; ++cy) 198 { 199 for (int cx = xmin; cx <= xmax; ++cx) 200 { 201 float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); 202 203 int2 cood = (int2)(cx, cy); 204 sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step); 205 sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step); 206 sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step); 207 wsum += w; 208 } 209 } 210 211 float coeff = 1.0f / wsum; 212 213 float I1wVal = sum * coeff; 214 float I1wxVal = sumx * coeff; 215 float I1wyVal = sumy * coeff; 216 217 I1w[y * I1w_step + x] = I1wVal; 218 I1wx[y * I1w_step + x] = I1wxVal; 219 I1wy[y * I1w_step + x] = I1wyVal; 220 221 float Ix2 = I1wxVal * I1wxVal; 222 float Iy2 = I1wyVal * I1wyVal; 223 224 // store the |Grad(I1)|^2 225 grad[y * I1w_step + x] = Ix2 + Iy2; 226 227 // compute the constant part of the rho function 228 float I0Val = I0[y * I0_step + x]; 229 rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; 230 } 231 232} 233 234 235__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, 236 __global const float* u2, 237 __global float* p11, int p11_step, 238 __global float* p12, 239 __global float* p21, 240 __global float* p22, 241 float taut, 242 int u2_step, 243 int u1_offset_x, 244 int u1_offset_y, 245 int u2_offset_x, 246 int u2_offset_y) 247{ 248 int x = get_global_id(0); 249 int y = get_global_id(1); 250 251 if(x < u1_col && y < u1_row) 252 { 253 int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); 254 float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; 255 256 int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); 257 float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; 258 259 int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); 260 float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; 261 262 int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); 263 float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; 264 265 float g1 = hypot(u1x, u1y); 266 float g2 = hypot(u2x, u2y); 267 268 float ng1 = 1.0f + taut * g1; 269 float ng2 = 1.0f + taut * g2; 270 271 p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1; 272 p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1; 273 p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2; 274 p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2; 275 } 276 277} 278 279inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step) 280{ 281 282 if (x > 0 && y > 0) 283 { 284 float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; 285 float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; 286 return v1x + v2y; 287 } 288 else 289 { 290 if (y > 0) 291 return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0]; 292 else 293 { 294 if (x > 0) 295 return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x]; 296 else 297 return v1[0 * v1_step + 0] + v2[0 * v2_step + 0]; 298 } 299 } 300 301} 302 303__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step, 304 __global const float* I1wy, /*int I1wy_step,*/ 305 __global const float* grad, /*int grad_step,*/ 306 __global const float* rho_c, /*int rho_c_step,*/ 307 __global const float* p11, /*int p11_step,*/ 308 __global const float* p12, /*int p12_step,*/ 309 __global const float* p21, /*int p21_step,*/ 310 __global const float* p22, /*int p22_step,*/ 311 __global float* u1, int u1_step, 312 __global float* u2, 313 __global float* error, float l_t, float theta, int u2_step, 314 int u1_offset_x, 315 int u1_offset_y, 316 int u2_offset_x, 317 int u2_offset_y, 318 char calc_error) 319{ 320 int x = get_global_id(0); 321 int y = get_global_id(1); 322 323 if(x < I1wx_col && y < I1wx_row) 324 { 325 float I1wxVal = I1wx[y * I1wx_step + x]; 326 float I1wyVal = I1wy[y * I1wx_step + x]; 327 float gradVal = grad[y * I1wx_step + x]; 328 float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; 329 float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; 330 331 float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); 332 333 // estimate the values of the variable (v1, v2) (thresholding operator TH) 334 335 float d1 = 0.0f; 336 float d2 = 0.0f; 337 338 if (rho < -l_t * gradVal) 339 { 340 d1 = l_t * I1wxVal; 341 d2 = l_t * I1wyVal; 342 } 343 else if (rho > l_t * gradVal) 344 { 345 d1 = -l_t * I1wxVal; 346 d2 = -l_t * I1wyVal; 347 } 348 else if (gradVal > 1.192092896e-07f) 349 { 350 float fi = -rho / gradVal; 351 d1 = fi * I1wxVal; 352 d2 = fi * I1wyVal; 353 } 354 355 float v1 = u1OldVal + d1; 356 float v2 = u2OldVal + d2; 357 358 // compute the divergence of the dual variable (p1, p2) 359 360 float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); 361 float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); 362 363 // estimate the values of the optical flow (u1, u2) 364 365 float u1NewVal = v1 + theta * div_p1; 366 float u2NewVal = v2 + theta * div_p2; 367 368 u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; 369 u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; 370 371 if(calc_error) 372 { 373 float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); 374 float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); 375 error[y * I1wx_step + x] = n1 + n2; 376 } 377 } 378} 379