1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <math.h>
17 
18 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
19 #include "tensorflow/examples/android/jni/object_tracking/image-inl.h"
20 #include "tensorflow/examples/android/jni/object_tracking/image.h"
21 #include "tensorflow/examples/android/jni/object_tracking/time_log.h"
22 #include "tensorflow/examples/android/jni/object_tracking/utils.h"
23 
24 #include "tensorflow/examples/android/jni/object_tracking/config.h"
25 #include "tensorflow/examples/android/jni/object_tracking/flow_cache.h"
26 #include "tensorflow/examples/android/jni/object_tracking/frame_pair.h"
27 #include "tensorflow/examples/android/jni/object_tracking/image_data.h"
28 #include "tensorflow/examples/android/jni/object_tracking/keypoint.h"
29 #include "tensorflow/examples/android/jni/object_tracking/keypoint_detector.h"
30 #include "tensorflow/examples/android/jni/object_tracking/optical_flow.h"
31 
32 namespace tf_tracking {
33 
OpticalFlow(const OpticalFlowConfig * const config)34 OpticalFlow::OpticalFlow(const OpticalFlowConfig* const config)
35     : config_(config),
36       frame1_(NULL),
37       frame2_(NULL),
38       working_size_(config->image_size) {}
39 
40 
NextFrame(const ImageData * const image_data)41 void OpticalFlow::NextFrame(const ImageData* const image_data) {
42   // Special case for the first frame: make sure the image ends up in
43   // frame1_ so that keypoint detection can be done on it if desired.
44   frame1_ = (frame1_ == NULL) ? image_data : frame2_;
45   frame2_ = image_data;
46 }
47 
48 
49 // Static heart of the optical flow computation.
50 // Lucas Kanade algorithm.
FindFlowAtPoint_LK(const Image<uint8_t> & img_I,const Image<uint8_t> & img_J,const Image<int32_t> & I_x,const Image<int32_t> & I_y,const float p_x,const float p_y,float * out_g_x,float * out_g_y)51 bool OpticalFlow::FindFlowAtPoint_LK(const Image<uint8_t>& img_I,
52                                      const Image<uint8_t>& img_J,
53                                      const Image<int32_t>& I_x,
54                                      const Image<int32_t>& I_y, const float p_x,
55                                      const float p_y, float* out_g_x,
56                                      float* out_g_y) {
57   float g_x = *out_g_x;
58   float g_y = *out_g_y;
59   // Get values for frame 1.  They remain constant through the inner
60   // iteration loop.
61   float vals_I[kFlowArraySize];
62   float vals_I_x[kFlowArraySize];
63   float vals_I_y[kFlowArraySize];
64 
65   const int kPatchSize = 2 * kFlowIntegrationWindowSize + 1;
66   const float kWindowSizeFloat = static_cast<float>(kFlowIntegrationWindowSize);
67 
68 #if USE_FIXED_POINT_FLOW
69   const int fixed_x_max = RealToFixed1616(img_I.width_less_one_) - 1;
70   const int fixed_y_max = RealToFixed1616(img_I.height_less_one_) - 1;
71 #else
72   const float real_x_max = I_x.width_less_one_ - EPSILON;
73   const float real_y_max = I_x.height_less_one_ - EPSILON;
74 #endif
75 
76   // Get the window around the original point.
77   const float src_left_real = p_x - kWindowSizeFloat;
78   const float src_top_real = p_y - kWindowSizeFloat;
79   float* vals_I_ptr = vals_I;
80   float* vals_I_x_ptr = vals_I_x;
81   float* vals_I_y_ptr = vals_I_y;
82 #if USE_FIXED_POINT_FLOW
83   // Source integer coordinates.
84   const int src_left_fixed = RealToFixed1616(src_left_real);
85   const int src_top_fixed = RealToFixed1616(src_top_real);
86 
87   for (int y = 0; y < kPatchSize; ++y) {
88     const int fp_y = Clip(src_top_fixed + (y << 16), 0, fixed_y_max);
89 
90     for (int x = 0; x < kPatchSize; ++x) {
91       const int fp_x = Clip(src_left_fixed + (x << 16), 0, fixed_x_max);
92 
93       *vals_I_ptr++ = img_I.GetPixelInterpFixed1616(fp_x, fp_y);
94       *vals_I_x_ptr++ = I_x.GetPixelInterpFixed1616(fp_x, fp_y);
95       *vals_I_y_ptr++ = I_y.GetPixelInterpFixed1616(fp_x, fp_y);
96     }
97   }
98 #else
99   for (int y = 0; y < kPatchSize; ++y) {
100     const float y_pos = Clip(src_top_real + y, 0.0f, real_y_max);
101 
102     for (int x = 0; x < kPatchSize; ++x) {
103       const float x_pos = Clip(src_left_real + x, 0.0f, real_x_max);
104 
105       *vals_I_ptr++ = img_I.GetPixelInterp(x_pos, y_pos);
106       *vals_I_x_ptr++ = I_x.GetPixelInterp(x_pos, y_pos);
107       *vals_I_y_ptr++ = I_y.GetPixelInterp(x_pos, y_pos);
108     }
109   }
110 #endif
111 
112   // Compute the spatial gradient matrix about point p.
113   float G[] = { 0, 0, 0, 0 };
114   CalculateG(vals_I_x, vals_I_y, kFlowArraySize, G);
115 
116   // Find the inverse of G.
117   float G_inv[4];
118   if (!Invert2x2(G, G_inv)) {
119     return false;
120   }
121 
122 #if NORMALIZE
123   const float mean_I = ComputeMean(vals_I, kFlowArraySize);
124   const float std_dev_I = ComputeStdDev(vals_I, kFlowArraySize, mean_I);
125 #endif
126 
127   // Iterate kNumIterations times or until we converge.
128   for (int iteration = 0; iteration < kNumIterations; ++iteration) {
129     // Get values for frame 2.
130     float vals_J[kFlowArraySize];
131 
132     // Get the window around the destination point.
133     const float left_real = p_x + g_x - kWindowSizeFloat;
134     const float top_real  = p_y + g_y - kWindowSizeFloat;
135     float* vals_J_ptr = vals_J;
136 #if USE_FIXED_POINT_FLOW
137     // The top-left sub-pixel is set for the current iteration (in 16:16
138     // fixed). This is constant over one iteration.
139     const int left_fixed = RealToFixed1616(left_real);
140     const int top_fixed  = RealToFixed1616(top_real);
141 
142     for (int win_y = 0; win_y < kPatchSize; ++win_y) {
143       const int fp_y = Clip(top_fixed + (win_y << 16), 0, fixed_y_max);
144       for (int win_x = 0; win_x < kPatchSize; ++win_x) {
145         const int fp_x = Clip(left_fixed + (win_x << 16), 0, fixed_x_max);
146         *vals_J_ptr++ = img_J.GetPixelInterpFixed1616(fp_x, fp_y);
147       }
148     }
149 #else
150     for (int win_y = 0; win_y < kPatchSize; ++win_y) {
151       const float y_pos = Clip(top_real + win_y, 0.0f, real_y_max);
152       for (int win_x = 0; win_x < kPatchSize; ++win_x) {
153         const float x_pos = Clip(left_real + win_x, 0.0f, real_x_max);
154         *vals_J_ptr++ = img_J.GetPixelInterp(x_pos, y_pos);
155       }
156     }
157 #endif
158 
159 #if NORMALIZE
160     const float mean_J = ComputeMean(vals_J, kFlowArraySize);
161     const float std_dev_J = ComputeStdDev(vals_J, kFlowArraySize, mean_J);
162 
163     // TODO(andrewharp): Probably better to completely detect and handle the
164     // "corner case" where the patch is fully outside the image diagonally.
165     const float std_dev_ratio = std_dev_J > 0.0f ? std_dev_I / std_dev_J : 1.0f;
166 #endif
167 
168     // Compute image mismatch vector.
169     float b_x = 0.0f;
170     float b_y = 0.0f;
171 
172     vals_I_ptr = vals_I;
173     vals_J_ptr = vals_J;
174     vals_I_x_ptr = vals_I_x;
175     vals_I_y_ptr = vals_I_y;
176 
177     for (int win_y = 0; win_y < kPatchSize; ++win_y) {
178       for (int win_x = 0; win_x < kPatchSize; ++win_x) {
179 #if NORMALIZE
180         // Normalized Image difference.
181         const float dI =
182             (*vals_I_ptr++ - mean_I) - (*vals_J_ptr++ - mean_J) * std_dev_ratio;
183 #else
184         const float dI = *vals_I_ptr++ - *vals_J_ptr++;
185 #endif
186         b_x += dI * *vals_I_x_ptr++;
187         b_y += dI * *vals_I_y_ptr++;
188       }
189     }
190 
191     // Optical flow... solve n = G^-1 * b
192     const float n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y);
193     const float n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y);
194 
195     // Update best guess with residual displacement from this level and
196     // iteration.
197     g_x += n_x;
198     g_y += n_y;
199 
200     // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y);
201 
202     // Abort early if we're already below the threshold.
203     if (Square(n_x) + Square(n_y) < Square(kTrackingAbortThreshold)) {
204       break;
205     }
206   }  // Iteration.
207 
208   // Copy value back into output.
209   *out_g_x = g_x;
210   *out_g_y = g_y;
211   return true;
212 }
213 
214 
215 // Pointwise flow using translational 2dof ESM.
FindFlowAtPoint_ESM(const Image<uint8_t> & img_I,const Image<uint8_t> & img_J,const Image<int32_t> & I_x,const Image<int32_t> & I_y,const Image<int32_t> & J_x,const Image<int32_t> & J_y,const float p_x,const float p_y,float * out_g_x,float * out_g_y)216 bool OpticalFlow::FindFlowAtPoint_ESM(
217     const Image<uint8_t>& img_I, const Image<uint8_t>& img_J,
218     const Image<int32_t>& I_x, const Image<int32_t>& I_y,
219     const Image<int32_t>& J_x, const Image<int32_t>& J_y, const float p_x,
220     const float p_y, float* out_g_x, float* out_g_y) {
221   float g_x = *out_g_x;
222   float g_y = *out_g_y;
223   const float area_inv = 1.0f / static_cast<float>(kFlowArraySize);
224 
225   // Get values for frame 1. They remain constant through the inner
226   // iteration loop.
227   uint8_t vals_I[kFlowArraySize];
228   uint8_t vals_J[kFlowArraySize];
229   int16_t src_gradient_x[kFlowArraySize];
230   int16_t src_gradient_y[kFlowArraySize];
231 
232   // TODO(rspring): try out the IntegerPatchAlign() method once
233   // the code for that is in ../common.
234   const float wsize_float = static_cast<float>(kFlowIntegrationWindowSize);
235   const int src_left_fixed = RealToFixed1616(p_x - wsize_float);
236   const int src_top_fixed = RealToFixed1616(p_y - wsize_float);
237   const int patch_size = 2 * kFlowIntegrationWindowSize + 1;
238 
239   // Create the keypoint template patch from a subpixel location.
240   if (!img_I.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
241                                              patch_size, patch_size, vals_I) ||
242       !I_x.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
243                                            patch_size, patch_size,
244                                            src_gradient_x) ||
245       !I_y.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
246                                            patch_size, patch_size,
247                                            src_gradient_y)) {
248     return false;
249   }
250 
251   int bright_offset = 0;
252   int sum_diff = 0;
253 
254   // The top-left sub-pixel is set for the current iteration (in 16:16 fixed).
255   // This is constant over one iteration.
256   int left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
257   int top_fixed  = RealToFixed1616(p_y + g_y - wsize_float);
258 
259   // The truncated version gives the most top-left pixel that is used.
260   int left_trunc = left_fixed >> 16;
261   int top_trunc = top_fixed >> 16;
262 
263   // Compute an initial brightness offset.
264   if (kDoBrightnessNormalize &&
265       left_trunc >= 0 && top_trunc >= 0 &&
266       (left_trunc + patch_size) < img_J.width_less_one_ &&
267       (top_trunc + patch_size) < img_J.height_less_one_) {
268     int templ_index = 0;
269     const uint8_t* j_row = img_J[top_trunc] + left_trunc;
270 
271     const int j_stride = img_J.stride();
272 
273     for (int y = 0; y < patch_size; ++y, j_row += j_stride) {
274       for (int x = 0; x < patch_size; ++x) {
275         sum_diff += static_cast<int>(j_row[x]) - vals_I[templ_index++];
276       }
277     }
278 
279     bright_offset = static_cast<int>(static_cast<float>(sum_diff) * area_inv);
280   }
281 
282   // Iterate kNumIterations times or until we go out of image.
283   for (int iteration = 0; iteration < kNumIterations; ++iteration) {
284     int jtj[3] = { 0, 0, 0 };
285     int jtr[2] = { 0, 0 };
286     sum_diff = 0;
287 
288     // Extract the target image values.
289     // Extract the gradient from the target image patch and accumulate to
290     // the gradient of the source image patch.
291     if (!img_J.ExtractPatchAtSubpixelFixed1616(left_fixed, top_fixed,
292                                                patch_size, patch_size,
293                                                vals_J)) {
294       break;
295     }
296 
297     const uint8_t* templ_row = vals_I;
298     const uint8_t* extract_row = vals_J;
299     const int16_t* src_dx_row = src_gradient_x;
300     const int16_t* src_dy_row = src_gradient_y;
301 
302     for (int y = 0; y < patch_size; ++y, templ_row += patch_size,
303          src_dx_row += patch_size, src_dy_row += patch_size,
304          extract_row += patch_size) {
305       const int fp_y = top_fixed + (y << 16);
306       for (int x = 0; x < patch_size; ++x) {
307         const int fp_x = left_fixed + (x << 16);
308         int32_t target_dx = J_x.GetPixelInterpFixed1616(fp_x, fp_y);
309         int32_t target_dy = J_y.GetPixelInterpFixed1616(fp_x, fp_y);
310 
311         // Combine the two Jacobians.
312         // Right-shift by one to account for the fact that we add
313         // two Jacobians.
314         int32_t dx = (src_dx_row[x] + target_dx) >> 1;
315         int32_t dy = (src_dy_row[x] + target_dy) >> 1;
316 
317         // The current residual b - h(q) == extracted - (template + offset)
318         int32_t diff = static_cast<int32_t>(extract_row[x]) -
319                        static_cast<int32_t>(templ_row[x]) - bright_offset;
320 
321         jtj[0] += dx * dx;
322         jtj[1] += dx * dy;
323         jtj[2] += dy * dy;
324 
325         jtr[0] += dx * diff;
326         jtr[1] += dy * diff;
327 
328         sum_diff += diff;
329       }
330     }
331 
332     const float jtr1_float = static_cast<float>(jtr[0]);
333     const float jtr2_float = static_cast<float>(jtr[1]);
334 
335     // Add some baseline stability to the system.
336     jtj[0] += kEsmRegularizer;
337     jtj[2] += kEsmRegularizer;
338 
339     const int64_t prod1 = static_cast<int64_t>(jtj[0]) * jtj[2];
340     const int64_t prod2 = static_cast<int64_t>(jtj[1]) * jtj[1];
341 
342     // One ESM step.
343     const float jtj_1[4] = { static_cast<float>(jtj[2]),
344                              static_cast<float>(-jtj[1]),
345                              static_cast<float>(-jtj[1]),
346                              static_cast<float>(jtj[0]) };
347     const double det_inv = 1.0 / static_cast<double>(prod1 - prod2);
348 
349     g_x -= det_inv * (jtj_1[0] * jtr1_float + jtj_1[1] * jtr2_float);
350     g_y -= det_inv * (jtj_1[2] * jtr1_float + jtj_1[3] * jtr2_float);
351 
352     if (kDoBrightnessNormalize) {
353       bright_offset +=
354           static_cast<int>(area_inv * static_cast<float>(sum_diff) + 0.5f);
355     }
356 
357     // Update top left position.
358     left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
359     top_fixed  = RealToFixed1616(p_y + g_y - wsize_float);
360 
361     left_trunc = left_fixed >> 16;
362     top_trunc = top_fixed >> 16;
363 
364     // Abort iterations if we go out of borders.
365     if (left_trunc < 0 || top_trunc < 0 ||
366         (left_trunc + patch_size) >= J_x.width_less_one_ ||
367         (top_trunc + patch_size) >= J_y.height_less_one_) {
368       break;
369     }
370   }  // Iteration.
371 
372   // Copy value back into output.
373   *out_g_x = g_x;
374   *out_g_y = g_y;
375   return true;
376 }
377 
378 
FindFlowAtPointReversible(const int level,const float u_x,const float u_y,const bool reverse_flow,float * flow_x,float * flow_y) const379 bool OpticalFlow::FindFlowAtPointReversible(
380     const int level, const float u_x, const float u_y,
381     const bool reverse_flow,
382     float* flow_x, float* flow_y) const {
383   const ImageData& frame_a = reverse_flow ? *frame2_ : *frame1_;
384   const ImageData& frame_b = reverse_flow ? *frame1_ : *frame2_;
385 
386   // Images I (prev) and J (next).
387   const Image<uint8_t>& img_I = *frame_a.GetPyramidSqrt2Level(level * 2);
388   const Image<uint8_t>& img_J = *frame_b.GetPyramidSqrt2Level(level * 2);
389 
390   // Computed gradients.
391   const Image<int32_t>& I_x = *frame_a.GetSpatialX(level);
392   const Image<int32_t>& I_y = *frame_a.GetSpatialY(level);
393   const Image<int32_t>& J_x = *frame_b.GetSpatialX(level);
394   const Image<int32_t>& J_y = *frame_b.GetSpatialY(level);
395 
396   // Shrink factor from original.
397   const float shrink_factor = (1 << level);
398 
399   // Image position vector (p := u^l), scaled for this level.
400   const float scaled_p_x = u_x / shrink_factor;
401   const float scaled_p_y = u_y / shrink_factor;
402 
403   float scaled_flow_x = *flow_x / shrink_factor;
404   float scaled_flow_y = *flow_y / shrink_factor;
405 
406   // LOGE("FindFlowAtPoint level %d: %5.2f, %5.2f (%5.2f, %5.2f)", level,
407   //     scaled_p_x, scaled_p_y, &scaled_flow_x, &scaled_flow_y);
408 
409   const bool success = kUseEsm ?
410     FindFlowAtPoint_ESM(img_I, img_J, I_x, I_y, J_x, J_y,
411                         scaled_p_x, scaled_p_y,
412                         &scaled_flow_x, &scaled_flow_y) :
413     FindFlowAtPoint_LK(img_I, img_J, I_x, I_y,
414                        scaled_p_x, scaled_p_y,
415                        &scaled_flow_x, &scaled_flow_y);
416 
417   *flow_x = scaled_flow_x * shrink_factor;
418   *flow_y = scaled_flow_y * shrink_factor;
419 
420   return success;
421 }
422 
423 
FindFlowAtPointSingleLevel(const int level,const float u_x,const float u_y,const bool filter_by_fb_error,float * flow_x,float * flow_y) const424 bool OpticalFlow::FindFlowAtPointSingleLevel(
425     const int level,
426     const float u_x, const float u_y,
427     const bool filter_by_fb_error,
428     float* flow_x, float* flow_y) const {
429   if (!FindFlowAtPointReversible(level, u_x, u_y, false, flow_x, flow_y)) {
430     return false;
431   }
432 
433   if (filter_by_fb_error) {
434     const float new_position_x = u_x + *flow_x;
435     const float new_position_y = u_y + *flow_y;
436 
437     float reverse_flow_x = 0.0f;
438     float reverse_flow_y = 0.0f;
439 
440     // Now find the backwards flow and confirm it lines up with the original
441     // starting point.
442     if (!FindFlowAtPointReversible(level, new_position_x, new_position_y,
443                                    true,
444                                    &reverse_flow_x, &reverse_flow_y)) {
445       LOGE("Backward error!");
446       return false;
447     }
448 
449     const float discrepancy_length =
450         sqrtf(Square(*flow_x + reverse_flow_x) +
451               Square(*flow_y + reverse_flow_y));
452 
453     const float flow_length = sqrtf(Square(*flow_x) + Square(*flow_y));
454 
455     return discrepancy_length <
456         (kMaxForwardBackwardErrorAllowed * flow_length);
457   }
458 
459   return true;
460 }
461 
462 
463 // An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm.
464 // See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details.
FindFlowAtPointPyramidal(const float u_x,const float u_y,const bool filter_by_fb_error,float * flow_x,float * flow_y) const465 bool OpticalFlow::FindFlowAtPointPyramidal(const float u_x, const float u_y,
466                                            const bool filter_by_fb_error,
467                                            float* flow_x, float* flow_y) const {
468   const int max_level = MAX(kMinNumPyramidLevelsToUseForAdjustment,
469                             kNumPyramidLevels - kNumCacheLevels);
470 
471   // For every level in the pyramid, update the coordinates of the best match.
472   for (int l = max_level - 1; l >= 0; --l) {
473     if (!FindFlowAtPointSingleLevel(l, u_x, u_y,
474                                     filter_by_fb_error, flow_x, flow_y)) {
475       return false;
476     }
477   }
478 
479   return true;
480 }
481 
482 }  // namespace tf_tracking
483