1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
17 #define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
18 
19 #include <stdint.h>
20 
21 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
22 #include "tensorflow/examples/android/jni/object_tracking/utils.h"
23 
24 // TODO(andrewharp): Make this a cast to uint32_t if/when we go unsigned for
25 // operations.
26 #define ZERO 0
27 
28 #ifdef SANITY_CHECKS
29   #define CHECK_PIXEL(IMAGE, X, Y) {\
30     SCHECK((IMAGE)->ValidPixel((X), (Y)), \
31           "CHECK_PIXEL(%d,%d) in %dx%d image.", \
32           static_cast<int>(X), static_cast<int>(Y), \
33           (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\
34   }
35 
36   #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {\
37     SCHECK((IMAGE)->validInterpPixel((X), (Y)), \
38           "CHECK_PIXEL_INTERP(%.2f, %.2f) in %dx%d image.", \
39           static_cast<float>(X), static_cast<float>(Y), \
40           (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\
41   }
42 #else
43   #define CHECK_PIXEL(image, x, y) {}
44   #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {}
45 #endif
46 
47 namespace tf_tracking {
48 
49 #ifdef SANITY_CHECKS
50 // Class which exists solely to provide bounds checking for array-style image
51 // data access.
52 template <typename T>
53 class RowData {
54  public:
RowData(T * const row_data,const int max_col)55   RowData(T* const row_data, const int max_col)
56       : row_data_(row_data), max_col_(max_col) {}
57 
58   inline T& operator[](const int col) const {
59     SCHECK(InRange(col, 0, max_col_),
60           "Column out of range: %d (%d max)", col, max_col_);
61     return row_data_[col];
62   }
63 
64   inline operator T*() const {
65     return row_data_;
66   }
67 
68  private:
69   T* const row_data_;
70   const int max_col_;
71 };
72 #endif
73 
74 // Naive templated sorting function.
75 template <typename T>
Comp(const void * a,const void * b)76 int Comp(const void* a, const void* b) {
77   const T val1 = *reinterpret_cast<const T*>(a);
78   const T val2 = *reinterpret_cast<const T*>(b);
79 
80   if (val1 == val2) {
81     return 0;
82   } else if (val1 < val2) {
83     return -1;
84   } else {
85     return 1;
86   }
87 }
88 
89 // TODO(andrewharp): Make explicit which operations support negative numbers or
90 // struct/class types in image data (possibly create fast multi-dim array class
91 // for data where pixel arithmetic does not make sense).
92 
93 // Image class optimized for working on numeric arrays as grayscale image data.
94 // Supports other data types as a 2D array class, so long as no pixel math
95 // operations are called (convolution, downsampling, etc).
96 template <typename T>
97 class Image {
98  public:
99   Image(const int width, const int height);
100   explicit Image(const Size& size);
101 
102   // Constructor that creates an image from preallocated data.
103   // Note: The image takes ownership of the data lifecycle, unless own_data is
104   // set to false.
105   Image(const int width, const int height, T* const image_data,
106         const bool own_data = true);
107 
108   ~Image();
109 
110   // Extract a pixel patch from this image, starting at a subpixel location.
111   // Uses 16:16 fixed point format for representing real values and doing the
112   // bilinear interpolation.
113   //
114   // Arguments fp_x and fp_y tell the subpixel position in fixed point format,
115   // patchwidth/patchheight give the size of the patch in pixels and
116   // to_data must be a valid pointer to a *contiguous* destination data array.
117   template<class DstType>
118   bool ExtractPatchAtSubpixelFixed1616(const int fp_x,
119                                        const int fp_y,
120                                        const int patchwidth,
121                                        const int patchheight,
122                                        DstType* to_data) const;
123 
124   Image<T>* Crop(
125       const int left, const int top, const int right, const int bottom) const;
126 
GetWidth()127   inline int GetWidth() const { return width_; }
GetHeight()128   inline int GetHeight() const { return height_; }
129 
130   // Bilinearly sample a value between pixels.  Values must be within the image.
131   inline float GetPixelInterp(const float x, const float y) const;
132 
133   // Bilinearly sample a pixels at a subpixel position using fixed point
134   // arithmetic.
135   // Avoids float<->int conversions.
136   // Values must be within the image.
137   // Arguments fp_x and fp_y tell the subpixel position in
138   // 16:16 fixed point format.
139   //
140   // Important: This function only makes sense for integer-valued images, such
141   // as Image<uint8_t> or Image<int> etc.
142   inline T GetPixelInterpFixed1616(const int fp_x_whole,
143                                    const int fp_y_whole) const;
144 
145   // Returns true iff the pixel is in the image's boundaries.
146   inline bool ValidPixel(const int x, const int y) const;
147 
148   inline BoundingBox GetContainingBox() const;
149 
150   inline bool Contains(const BoundingBox& bounding_box) const;
151 
GetMedianValue()152   inline T GetMedianValue() {
153     qsort(image_data_, data_size_, sizeof(image_data_[0]), Comp<T>);
154     return image_data_[data_size_ >> 1];
155   }
156 
157   // Returns true iff the pixel is in the image's boundaries for interpolation
158   // purposes.
159   // TODO(andrewharp): check in interpolation follow-up change.
160   inline bool ValidInterpPixel(const float x, const float y) const;
161 
162   // Safe lookup with boundary enforcement.
GetPixelClipped(const int x,const int y)163   inline T GetPixelClipped(const int x, const int y) const {
164     return (*this)[Clip(y, ZERO, height_less_one_)]
165                   [Clip(x, ZERO, width_less_one_)];
166   }
167 
168 #ifdef SANITY_CHECKS
169   inline RowData<T> operator[](const int row) {
170     SCHECK(InRange(row, 0, height_less_one_),
171           "Row out of range: %d (%d max)", row, height_less_one_);
172     return RowData<T>(image_data_ + row * stride_, width_less_one_);
173   }
174 
175   inline const RowData<T> operator[](const int row) const {
176     SCHECK(InRange(row, 0, height_less_one_),
177           "Row out of range: %d (%d max)", row, height_less_one_);
178     return RowData<T>(image_data_ + row * stride_, width_less_one_);
179   }
180 #else
181   inline T* operator[](const int row) {
182     return image_data_ + row * stride_;
183   }
184 
185   inline const T* operator[](const int row) const {
186     return image_data_ + row * stride_;
187   }
188 #endif
189 
data()190   const T* data() const { return image_data_; }
191 
stride()192   inline int stride() const { return stride_; }
193 
194   // Clears image to a single value.
Clear(const T & val)195   inline void Clear(const T& val) {
196     memset(image_data_, val, sizeof(*image_data_) * data_size_);
197   }
198 
199 #ifdef __ARM_NEON
200   void Downsample2x32ColumnsNeon(const uint8_t* const original,
201                                  const int stride, const int orig_x);
202 
203   void Downsample4x32ColumnsNeon(const uint8_t* const original,
204                                  const int stride, const int orig_x);
205 
206   void DownsampleAveragedNeon(const uint8_t* const original, const int stride,
207                               const int factor);
208 #endif
209 
210   // Naive downsampler that reduces image size by factor by averaging pixels in
211   // blocks of size factor x factor.
212   void DownsampleAveraged(const T* const original, const int stride,
213                           const int factor);
214 
215   // Naive downsampler that reduces image size by factor by averaging pixels in
216   // blocks of size factor x factor.
DownsampleAveraged(const Image<T> & original,const int factor)217   inline void DownsampleAveraged(const Image<T>& original, const int factor) {
218     DownsampleAveraged(original.data(), original.GetWidth(), factor);
219   }
220 
221   // Native downsampler that reduces image size using nearest interpolation
222   void DownsampleInterpolateNearest(const Image<T>& original);
223 
224   // Native downsampler that reduces image size using fixed-point bilinear
225   // interpolation
226   void DownsampleInterpolateLinear(const Image<T>& original);
227 
228   // Relatively efficient downsampling of an image by a factor of two with a
229   // low-pass 3x3 smoothing operation thrown in.
230   void DownsampleSmoothed3x3(const Image<T>& original);
231 
232   // Relatively efficient downsampling of an image by a factor of two with a
233   // low-pass 5x5 smoothing operation thrown in.
234   void DownsampleSmoothed5x5(const Image<T>& original);
235 
236   // Optimized Scharr filter on a single pixel in the X direction.
237   // Scharr filters are like central-difference operators, but have more
238   // rotational symmetry in their response because they also consider the
239   // diagonal neighbors.
240   template <typename U>
241   inline T ScharrPixelX(const Image<U>& original,
242                         const int center_x, const int center_y) const;
243 
244   // Optimized Scharr filter on a single pixel in the X direction.
245   // Scharr filters are like central-difference operators, but have more
246   // rotational symmetry in their response because they also consider the
247   // diagonal neighbors.
248   template <typename U>
249   inline T ScharrPixelY(const Image<U>& original,
250                         const int center_x, const int center_y) const;
251 
252   // Convolve the image with a Scharr filter in the X direction.
253   // Much faster than an equivalent generic convolution.
254   template <typename U>
255   inline void ScharrX(const Image<U>& original);
256 
257   // Convolve the image with a Scharr filter in the Y direction.
258   // Much faster than an equivalent generic convolution.
259   template <typename U>
260   inline void ScharrY(const Image<U>& original);
261 
HalfDiff(int32_t first,int32_t second)262   static inline T HalfDiff(int32_t first, int32_t second) {
263     return (second - first) / 2;
264   }
265 
266   template <typename U>
267   void DerivativeX(const Image<U>& original);
268 
269   template <typename U>
270   void DerivativeY(const Image<U>& original);
271 
272   // Generic function for convolving pixel with 3x3 filter.
273   // Filter pixels should be in row major order.
274   template <typename U>
275   inline T ConvolvePixel3x3(const Image<U>& original,
276                             const int* const filter,
277                             const int center_x, const int center_y,
278                             const int total) const;
279 
280   // Generic function for convolving an image with a 3x3 filter.
281   // TODO(andrewharp): Generalize this for any size filter.
282   template <typename U>
283   inline void Convolve3x3(const Image<U>& original,
284                           const int32_t* const filter);
285 
286   // Load this image's data from a data array. The data at pixels is assumed to
287   // have dimensions equivalent to this image's dimensions * factor.
288   inline void FromArray(const T* const pixels, const int stride,
289                         const int factor = 1);
290 
291   // Copy the image back out to an appropriately sized data array.
ToArray(T * const pixels)292   inline void ToArray(T* const pixels) const {
293     // If not subsampling, memcpy should be faster.
294     memcpy(pixels, this->image_data_, data_size_ * sizeof(T));
295   }
296 
297   // Precompute these for efficiency's sake as they're used by a lot of
298   // clipping code and loop code.
299   // TODO(andrewharp): make these only accessible by other Images.
300   const int width_less_one_;
301   const int height_less_one_;
302 
303   // The raw size of the allocated data.
304   const int data_size_;
305 
306  private:
Allocate()307   inline void Allocate() {
308     image_data_ = new T[data_size_];
309     if (image_data_ == NULL) {
310       LOGE("Couldn't allocate image data!");
311     }
312   }
313 
314   T* image_data_;
315 
316   bool own_data_;
317 
318   const int width_;
319   const int height_;
320 
321   // The image stride (offset to next row).
322   // TODO(andrewharp): Make sure that stride is honored in all code.
323   const int stride_;
324 
325   TF_DISALLOW_COPY_AND_ASSIGN(Image);
326 };
327 
328 template <typename t>
329 inline std::ostream& operator<<(std::ostream& stream, const Image<t>& image) {
330   for (int y = 0; y < image.GetHeight(); ++y) {
331     for (int x = 0; x < image.GetWidth(); ++x) {
332       stream << image[y][x] << " ";
333     }
334     stream << std::endl;
335   }
336   return stream;
337 }
338 
339 }  // namespace tf_tracking
340 
341 #endif  // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
342