1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This is a helper struct to package up the input and output
17 // parameters of an image resizer (the height, widths, etc.).  To
18 // reduce code duplication and ensure consistency across the different
19 // resizers, it performs the input validation.
20 
21 #ifndef TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_
22 #define TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_
23 
24 #define EIGEN_USE_THREADS
25 
26 #include <math.h>
27 #include <algorithm>
28 #include <array>
29 
30 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
31 #include "tensorflow/core/framework/bounds_check.h"
32 #include "tensorflow/core/framework/op_kernel.h"
33 #include "tensorflow/core/framework/register_types.h"
34 #include "tensorflow/core/framework/tensor.h"
35 #include "tensorflow/core/framework/tensor_shape.h"
36 #include "tensorflow/core/framework/types.h"
37 
38 namespace tensorflow {
39 
40 // CalculateResizeScale determines the float scaling factor.
CalculateResizeScale(int64 in_size,int64 out_size,bool align_corners)41 inline float CalculateResizeScale(int64 in_size, int64 out_size,
42                                   bool align_corners) {
43   return (align_corners && out_size > 1)
44              ? (in_size - 1) / static_cast<float>(out_size - 1)
45              : in_size / static_cast<float>(out_size);
46 }
47 
48 // Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the
49 // floating point coordinates of the top,left pixel is 0.5,0.5.
50 struct HalfPixelScaler {
operatorHalfPixelScaler51   inline float operator()(const int x, const float scale) const {
52     // Note that we subtract 0.5 from the return value, as the existing bilinear
53     // sampling code etc assumes pixels are in the old coordinate system.
54     return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
55   }
56 };
57 
58 // Older incorrect scaling method that causes all resizes to have a slight
59 // translation leading to inconsistent results. For example, a flip then a
60 // resize gives different results then a resize then a flip.
61 struct LegacyScaler {
operatorLegacyScaler62   inline float operator()(const int x, const float scale) const {
63     return static_cast<float>(x) * scale;
64   }
65 };
66 
67 struct ImageResizerState {
ImageResizerStateImageResizerState68   explicit ImageResizerState(bool align_corners, bool half_pixel_centers)
69       : align_corners_(align_corners),
70         half_pixel_centers_(half_pixel_centers) {}
71 
72   // ValidateAndCalculateOutputSize checks the bounds on the input tensors
73   // and requested size, sets up some of the resizing state such as the
74   // height_scale and width_scale, and calculates the output size.
75   // If any of these operations fails, it sets an error status in
76   // the context, which the caller must check.
ValidateAndCalculateOutputSizeImageResizerState77   void ValidateAndCalculateOutputSize(OpKernelContext* context,
78                                       const Tensor& input) {
79     OP_REQUIRES(
80         context,
81         !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
82         errors::InvalidArgument("If half_pixel_centers is True, "
83                                 "align_corners must be False."));
84     OP_REQUIRES(context, input.dims() == 4,
85                 errors::InvalidArgument("input must be 4-dimensional",
86                                         input.shape().DebugString()));
87     const Tensor& shape_t = context->input(1);
88     OP_REQUIRES(context, shape_t.dims() == 1,
89                 errors::InvalidArgument("shape_t must be 1-dimensional",
90                                         shape_t.shape().DebugString()));
91     OP_REQUIRES(context, shape_t.NumElements() == 2,
92                 errors::InvalidArgument("shape_t must have two elements",
93                                         shape_t.shape().DebugString()));
94     auto Svec = shape_t.vec<int32>();
95     batch_size = input.dim_size(0);
96     out_height = internal::SubtleMustCopy(Svec(0));
97     out_width = internal::SubtleMustCopy(Svec(1));
98     OP_REQUIRES(
99         context,
100         FastBoundsCheck(input.dim_size(1), std::numeric_limits<int32>::max()) &&
101             FastBoundsCheck(input.dim_size(2),
102                             std::numeric_limits<int32>::max()),
103         errors::InvalidArgument("input sizes must be between 0 and max int32"));
104 
105     in_height = static_cast<int32>(input.dim_size(1));
106     in_width = static_cast<int32>(input.dim_size(2));
107     channels = input.dim_size(3);
108     OP_REQUIRES(context, out_height > 0 && out_width > 0,
109                 errors::InvalidArgument("output dimensions must be positive"));
110     OP_REQUIRES(
111         context, channels > 0,
112         errors::InvalidArgument("image must have at least one channel"));
113     OP_REQUIRES(
114         context, input.dim_size(1) > 0 && input.dim_size(2) > 0,
115         errors::InvalidArgument("input image must be of non-zero size"));
116     height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
117     width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
118 
119     // Guard against overflows
120     OP_REQUIRES(context,
121                 ceilf((out_height - 1) * height_scale) <=
122                     static_cast<float>(std::numeric_limits<int64>::max()),
123                 errors::InvalidArgument(
124                     "input image height scale would cause an overflow"));
125     OP_REQUIRES(
126         context,
127         ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX),
128         errors::InvalidArgument(
129             "input image width scale would cause an overflow"));
130   }
131 
132   // Calculates all the required variables, and allocates the output.
ValidateAndCreateOutputImageResizerState133   void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) {
134     ValidateAndCalculateOutputSize(context, input);
135     if (!context->status().ok()) return;
136     OP_REQUIRES_OK(context, context->allocate_output(
137                                 0,
138                                 TensorShape({input.dim_size(0), out_height,
139                                              out_width, input.dim_size(3)}),
140                                 &output));
141   }
142 
143   int64 batch_size;
144   int64 out_height;
145   int64 out_width;
146   int64 in_height;
147   int64 in_width;
148   int64 channels;
149   float height_scale;
150   float width_scale;
151   Tensor* output = nullptr;
152 
153  private:
154   bool align_corners_;
155   bool half_pixel_centers_;
156 };
157 
158 struct ImageResizerGradientState {
ImageResizerGradientStateImageResizerGradientState159   explicit ImageResizerGradientState(bool align_corners,
160                                      bool half_pixel_centers)
161       : align_corners_(align_corners),
162         half_pixel_centers_(half_pixel_centers) {}
163 
ValidateAndCreateOutputImageResizerGradientState164   void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input,
165                                const Tensor& original_image) {
166     OP_REQUIRES(
167         context,
168         !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
169         errors::InvalidArgument("If half_pixel_centers is True, "
170                                 "align_corners must be False."));
171 
172     OP_REQUIRES(context, input.dims() == 4,
173                 errors::InvalidArgument("input_grad must be 4-dimensional",
174                                         input.shape().DebugString()));
175     // Resizers always produce float images, so input gradient must
176     // always be a float.
177     OP_REQUIRES(context, input.dtype() == DT_FLOAT,
178                 errors::InvalidArgument("input_grad must be of type float",
179                                         DataTypeString(input.dtype())));
180 
181     OP_REQUIRES(context, original_image.dims() == 4,
182                 errors::InvalidArgument("original_image must be 4-dimensional",
183                                         original_image.shape().DebugString()));
184 
185     // Allocate output and initialize to zeros.
186     batch_size = input.dim_size(0);
187     channels = input.dim_size(3);
188     resized_height = input.dim_size(1);
189     resized_width = input.dim_size(2);
190     original_height = original_image.dim_size(1);
191     original_width = original_image.dim_size(2);
192 
193     OP_REQUIRES(
194         context,
195         FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) &&
196             FastBoundsCheck(original_width, std::numeric_limits<int32>::max()),
197         errors::InvalidArgument(
198             "original sizes must be between 0 and max int32"));
199 
200     height_scale =
201         CalculateResizeScale(original_height, resized_height, align_corners_);
202     width_scale =
203         CalculateResizeScale(original_width, resized_width, align_corners_);
204     output = nullptr;
205     OP_REQUIRES_OK(context, context->allocate_output(
206                                 0,
207                                 TensorShape({batch_size, original_height,
208                                              original_width, channels}),
209                                 &output));
210   }
211 
212   int64 batch_size;
213   int64 channels;
214   int64 resized_height;
215   int64 resized_width;
216   int64 original_height;
217   int64 original_width;
218   float height_scale;
219   float width_scale;
220   Tensor* output;
221 
222  private:
223   bool align_corners_;
224   bool half_pixel_centers_;
225 };
226 
227 }  // namespace tensorflow
228 
229 #endif  // TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_
230