1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_
17 #define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_
18 
19 #include <stdint.h>
20 
21 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
22 #include "tensorflow/examples/android/jni/object_tracking/image-inl.h"
23 #include "tensorflow/examples/android/jni/object_tracking/image.h"
24 #include "tensorflow/examples/android/jni/object_tracking/utils.h"
25 
26 
27 namespace tf_tracking {
28 
GetUV(const uint8_t * const input,Image<uint8_t> * const u,Image<uint8_t> * const v)29 inline void GetUV(const uint8_t* const input, Image<uint8_t>* const u,
30                   Image<uint8_t>* const v) {
31   const uint8_t* pUV = input;
32 
33   for (int row = 0; row < u->GetHeight(); ++row) {
34     uint8_t* u_curr = (*u)[row];
35     uint8_t* v_curr = (*v)[row];
36     for (int col = 0; col < u->GetWidth(); ++col) {
37 #ifdef __APPLE__
38       *u_curr++ = *pUV++;
39       *v_curr++ = *pUV++;
40 #else
41       *v_curr++ = *pUV++;
42       *u_curr++ = *pUV++;
43 #endif
44     }
45   }
46 }
47 
48 // Marks every point within a circle of a given radius on the given boolean
49 // image true.
50 template <typename U>
MarkImage(const int x,const int y,const int radius,Image<U> * const img)51 inline static void MarkImage(const int x, const int y, const int radius,
52                              Image<U>* const img) {
53   SCHECK(img->ValidPixel(x, y), "Marking invalid pixel in image! %d, %d", x, y);
54 
55   // Precomputed for efficiency.
56   const int squared_radius = Square(radius);
57 
58   // Mark every row in the circle.
59   for (int d_y = 0; d_y <= radius; ++d_y) {
60     const int squared_y_dist = Square(d_y);
61 
62     const int min_y = MAX(y - d_y, 0);
63     const int max_y = MIN(y + d_y, img->height_less_one_);
64 
65     // The max d_x of the circle must be strictly greater or equal to
66     // radius - d_y for any positive d_y. Thus, starting from radius - d_y will
67     // reduce the number of iterations required as compared to starting from
68     // either 0 and counting up or radius and counting down.
69     for (int d_x = radius - d_y; d_x <= radius; ++d_x) {
70       // The first time this criteria is met, we know the width of the circle at
71       // this row (without using sqrt).
72       if (squared_y_dist + Square(d_x) >= squared_radius) {
73         const int min_x = MAX(x - d_x, 0);
74         const int max_x = MIN(x + d_x, img->width_less_one_);
75 
76         // Mark both above and below the center row.
77         bool* const top_row_start = (*img)[min_y] + min_x;
78         bool* const bottom_row_start = (*img)[max_y] + min_x;
79 
80         const int x_width = max_x - min_x + 1;
81         memset(top_row_start, true, sizeof(*top_row_start) * x_width);
82         memset(bottom_row_start, true, sizeof(*bottom_row_start) * x_width);
83 
84         // This row is marked, time to move on to the next row.
85         break;
86       }
87     }
88   }
89 }
90 
91 #ifdef __ARM_NEON
92 void CalculateGNeon(
93     const float* const vals_x, const float* const vals_y,
94     const int num_vals, float* const G);
95 #endif
96 
97 // Puts the image gradient matrix about a pixel into the 2x2 float array G.
98 // vals_x should be an array of the window x gradient values, whose indices
99 // can be in any order but are parallel to the vals_y entries.
100 // See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for more details.
CalculateG(const float * const vals_x,const float * const vals_y,const int num_vals,float * const G)101 inline void CalculateG(const float* const vals_x, const float* const vals_y,
102                        const int num_vals, float* const G) {
103 #ifdef __ARM_NEON
104   CalculateGNeon(vals_x, vals_y, num_vals, G);
105   return;
106 #endif
107 
108   // Non-accelerated version.
109   for (int i = 0; i < num_vals; ++i) {
110     G[0] += Square(vals_x[i]);
111     G[1] += vals_x[i] * vals_y[i];
112     G[3] += Square(vals_y[i]);
113   }
114 
115   // The matrix is symmetric, so this is a given.
116   G[2] = G[1];
117 }
118 
CalculateGInt16(const int16_t * const vals_x,const int16_t * const vals_y,const int num_vals,int * const G)119 inline void CalculateGInt16(const int16_t* const vals_x,
120                             const int16_t* const vals_y, const int num_vals,
121                             int* const G) {
122   // Non-accelerated version.
123   for (int i = 0; i < num_vals; ++i) {
124     G[0] += Square(vals_x[i]);
125     G[1] += vals_x[i] * vals_y[i];
126     G[3] += Square(vals_y[i]);
127   }
128 
129   // The matrix is symmetric, so this is a given.
130   G[2] = G[1];
131 }
132 
133 
134 // Puts the image gradient matrix about a pixel into the 2x2 float array G.
135 // Looks up interpolated pixels, then calls above method for implementation.
CalculateG(const int window_radius,const float center_x,const float center_y,const Image<int32_t> & I_x,const Image<int32_t> & I_y,float * const G)136 inline void CalculateG(const int window_radius, const float center_x,
137                        const float center_y, const Image<int32_t>& I_x,
138                        const Image<int32_t>& I_y, float* const G) {
139   SCHECK(I_x.ValidPixel(center_x, center_y), "Problem in calculateG!");
140 
141   // Hardcoded to allow for a max window radius of 5 (9 pixels x 9 pixels).
142   static const int kMaxWindowRadius = 5;
143   SCHECK(window_radius <= kMaxWindowRadius,
144         "Window %d > %d!", window_radius, kMaxWindowRadius);
145 
146   // Diameter of window is 2 * radius + 1 for center pixel.
147   static const int kWindowBufferSize =
148       (kMaxWindowRadius * 2 + 1) * (kMaxWindowRadius * 2 + 1);
149 
150   // Preallocate buffers statically for efficiency.
151   static int16_t vals_x[kWindowBufferSize];
152   static int16_t vals_y[kWindowBufferSize];
153 
154   const int src_left_fixed = RealToFixed1616(center_x - window_radius);
155   const int src_top_fixed = RealToFixed1616(center_y - window_radius);
156 
157   int16_t* vals_x_ptr = vals_x;
158   int16_t* vals_y_ptr = vals_y;
159 
160   const int window_size = 2 * window_radius + 1;
161   for (int y = 0; y < window_size; ++y) {
162     const int fp_y = src_top_fixed + (y << 16);
163 
164     for (int x = 0; x < window_size; ++x) {
165       const int fp_x = src_left_fixed + (x << 16);
166 
167       *vals_x_ptr++ = I_x.GetPixelInterpFixed1616(fp_x, fp_y);
168       *vals_y_ptr++ = I_y.GetPixelInterpFixed1616(fp_x, fp_y);
169     }
170   }
171 
172   int32_t g_temp[] = {0, 0, 0, 0};
173   CalculateGInt16(vals_x, vals_y, window_size * window_size, g_temp);
174 
175   for (int i = 0; i < 4; ++i) {
176     G[i] = g_temp[i];
177   }
178 }
179 
ImageCrossCorrelation(const Image<float> & image1,const Image<float> & image2,const int x_offset,const int y_offset)180 inline float ImageCrossCorrelation(const Image<float>& image1,
181                                    const Image<float>& image2,
182                                    const int x_offset, const int y_offset) {
183   SCHECK(image1.GetWidth() == image2.GetWidth() &&
184          image1.GetHeight() == image2.GetHeight(),
185         "Dimension mismatch! %dx%d vs %dx%d",
186         image1.GetWidth(), image1.GetHeight(),
187         image2.GetWidth(), image2.GetHeight());
188 
189   const int num_pixels = image1.GetWidth() * image1.GetHeight();
190   const float* data1 = image1.data();
191   const float* data2 = image2.data();
192   return ComputeCrossCorrelation(data1, data2, num_pixels);
193 }
194 
195 // Copies an arbitrary region of an image to another (floating point)
196 // image, scaling as it goes using bilinear interpolation.
CopyArea(const Image<uint8_t> & image,const BoundingBox & area_to_copy,Image<float> * const patch_image)197 inline void CopyArea(const Image<uint8_t>& image,
198                      const BoundingBox& area_to_copy,
199                      Image<float>* const patch_image) {
200   VLOG(2) << "Copying from: " << area_to_copy << std::endl;
201 
202   const int patch_width = patch_image->GetWidth();
203   const int patch_height = patch_image->GetHeight();
204 
205   const float x_dist_between_samples = patch_width > 0 ?
206       area_to_copy.GetWidth() / (patch_width - 1) : 0;
207 
208   const float y_dist_between_samples = patch_height > 0 ?
209       area_to_copy.GetHeight() / (patch_height - 1) : 0;
210 
211   for (int y_index = 0; y_index < patch_height; ++y_index) {
212     const float sample_y =
213         y_index * y_dist_between_samples + area_to_copy.top_;
214 
215     for (int x_index = 0; x_index < patch_width; ++x_index) {
216       const float sample_x =
217           x_index * x_dist_between_samples + area_to_copy.left_;
218 
219       if (image.ValidInterpPixel(sample_x, sample_y)) {
220         // TODO(andrewharp): Do area averaging when downsampling.
221         (*patch_image)[y_index][x_index] =
222             image.GetPixelInterp(sample_x, sample_y);
223       } else {
224         (*patch_image)[y_index][x_index] = -1.0f;
225       }
226     }
227   }
228 }
229 
230 
231 // Takes a floating point image and normalizes it in-place.
232 //
233 // First, negative values will be set to the mean of the non-negative pixels
234 // in the image.
235 //
236 // Then, the resulting will be normalized such that it has mean value of 0.0 and
237 // a standard deviation of 1.0.
NormalizeImage(Image<float> * const image)238 inline void NormalizeImage(Image<float>* const image) {
239   const float* const data_ptr = image->data();
240 
241   // Copy only the non-negative values to some temp memory.
242   float running_sum = 0.0f;
243   int num_data_gte_zero = 0;
244   {
245     float* const curr_data = (*image)[0];
246     for (int i = 0; i < image->data_size_; ++i) {
247       if (curr_data[i] >= 0.0f) {
248         running_sum += curr_data[i];
249         ++num_data_gte_zero;
250       } else {
251         curr_data[i] = -1.0f;
252       }
253     }
254   }
255 
256   // If none of the pixels are valid, just set the entire thing to 0.0f.
257   if (num_data_gte_zero == 0) {
258     image->Clear(0.0f);
259     return;
260   }
261 
262   const float corrected_mean = running_sum / num_data_gte_zero;
263 
264   float* curr_data = (*image)[0];
265   for (int i = 0; i < image->data_size_; ++i) {
266     const float curr_val = *curr_data;
267     *curr_data++ = curr_val < 0 ? 0 : curr_val - corrected_mean;
268   }
269 
270   const float std_dev = ComputeStdDev(data_ptr, image->data_size_, 0.0f);
271 
272   if (std_dev > 0.0f) {
273     curr_data = (*image)[0];
274     for (int i = 0; i < image->data_size_; ++i) {
275       *curr_data++ /= std_dev;
276     }
277 
278 #ifdef SANITY_CHECKS
279     LOGV("corrected_mean: %1.2f  std_dev: %1.2f", corrected_mean, std_dev);
280     const float correlation =
281         ComputeCrossCorrelation(image->data(),
282                                 image->data(),
283                                 image->data_size_);
284 
285     if (std::abs(correlation - 1.0f) > EPSILON) {
286       LOG(ERROR) << "Bad image!" << std::endl;
287       LOG(ERROR) << *image << std::endl;
288     }
289 
290     SCHECK(std::abs(correlation - 1.0f) < EPSILON,
291            "Correlation wasn't 1.0f:  %.10f", correlation);
292 #endif
293   }
294 }
295 
296 }  // namespace tf_tracking
297 
298 #endif  // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_UTILS_H_
299