1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // This is a helper struct to package up the input and output
17 // parameters of an image resizer (the height, widths, etc.). To
18 // reduce code duplication and ensure consistency across the different
19 // resizers, it performs the input validation.
20
21 #ifndef TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_
22 #define TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_
23
24 #define EIGEN_USE_THREADS
25
26 #include <math.h>
27 #include <algorithm>
28 #include <array>
29
30 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
31 #include "tensorflow/core/framework/bounds_check.h"
32 #include "tensorflow/core/framework/op_kernel.h"
33 #include "tensorflow/core/framework/register_types.h"
34 #include "tensorflow/core/framework/tensor.h"
35 #include "tensorflow/core/framework/tensor_shape.h"
36 #include "tensorflow/core/framework/types.h"
37
38 namespace tensorflow {
39
40 // CalculateResizeScale determines the float scaling factor.
CalculateResizeScale(int64 in_size,int64 out_size,bool align_corners)41 inline float CalculateResizeScale(int64 in_size, int64 out_size,
42 bool align_corners) {
43 return (align_corners && out_size > 1)
44 ? (in_size - 1) / static_cast<float>(out_size - 1)
45 : in_size / static_cast<float>(out_size);
46 }
47
48 // Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the
49 // floating point coordinates of the top,left pixel is 0.5,0.5.
50 struct HalfPixelScaler {
operatorHalfPixelScaler51 inline float operator()(const int x, const float scale) const {
52 // Note that we subtract 0.5 from the return value, as the existing bilinear
53 // sampling code etc assumes pixels are in the old coordinate system.
54 return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
55 }
56 };
57
58 // Older incorrect scaling method that causes all resizes to have a slight
59 // translation leading to inconsistent results. For example, a flip then a
60 // resize gives different results then a resize then a flip.
61 struct LegacyScaler {
operatorLegacyScaler62 inline float operator()(const int x, const float scale) const {
63 return static_cast<float>(x) * scale;
64 }
65 };
66
67 struct ImageResizerState {
ImageResizerStateImageResizerState68 explicit ImageResizerState(bool align_corners, bool half_pixel_centers)
69 : align_corners_(align_corners),
70 half_pixel_centers_(half_pixel_centers) {}
71
72 // ValidateAndCalculateOutputSize checks the bounds on the input tensors
73 // and requested size, sets up some of the resizing state such as the
74 // height_scale and width_scale, and calculates the output size.
75 // If any of these operations fails, it sets an error status in
76 // the context, which the caller must check.
ValidateAndCalculateOutputSizeImageResizerState77 void ValidateAndCalculateOutputSize(OpKernelContext* context,
78 const Tensor& input) {
79 OP_REQUIRES(
80 context,
81 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
82 errors::InvalidArgument("If half_pixel_centers is True, "
83 "align_corners must be False."));
84 OP_REQUIRES(context, input.dims() == 4,
85 errors::InvalidArgument("input must be 4-dimensional",
86 input.shape().DebugString()));
87 const Tensor& shape_t = context->input(1);
88 OP_REQUIRES(context, shape_t.dims() == 1,
89 errors::InvalidArgument("shape_t must be 1-dimensional",
90 shape_t.shape().DebugString()));
91 OP_REQUIRES(context, shape_t.NumElements() == 2,
92 errors::InvalidArgument("shape_t must have two elements",
93 shape_t.shape().DebugString()));
94 auto Svec = shape_t.vec<int32>();
95 batch_size = input.dim_size(0);
96 out_height = internal::SubtleMustCopy(Svec(0));
97 out_width = internal::SubtleMustCopy(Svec(1));
98 OP_REQUIRES(
99 context,
100 FastBoundsCheck(input.dim_size(1), std::numeric_limits<int32>::max()) &&
101 FastBoundsCheck(input.dim_size(2),
102 std::numeric_limits<int32>::max()),
103 errors::InvalidArgument("input sizes must be between 0 and max int32"));
104
105 in_height = static_cast<int32>(input.dim_size(1));
106 in_width = static_cast<int32>(input.dim_size(2));
107 channels = input.dim_size(3);
108 OP_REQUIRES(context, out_height > 0 && out_width > 0,
109 errors::InvalidArgument("output dimensions must be positive"));
110 OP_REQUIRES(
111 context, channels > 0,
112 errors::InvalidArgument("image must have at least one channel"));
113 OP_REQUIRES(
114 context, input.dim_size(1) > 0 && input.dim_size(2) > 0,
115 errors::InvalidArgument("input image must be of non-zero size"));
116 height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
117 width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
118
119 // Guard against overflows
120 OP_REQUIRES(context,
121 ceilf((out_height - 1) * height_scale) <=
122 static_cast<float>(std::numeric_limits<int64>::max()),
123 errors::InvalidArgument(
124 "input image height scale would cause an overflow"));
125 OP_REQUIRES(
126 context,
127 ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX),
128 errors::InvalidArgument(
129 "input image width scale would cause an overflow"));
130 }
131
132 // Calculates all the required variables, and allocates the output.
ValidateAndCreateOutputImageResizerState133 void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) {
134 ValidateAndCalculateOutputSize(context, input);
135 if (!context->status().ok()) return;
136 OP_REQUIRES_OK(context, context->allocate_output(
137 0,
138 TensorShape({input.dim_size(0), out_height,
139 out_width, input.dim_size(3)}),
140 &output));
141 }
142
143 int64 batch_size;
144 int64 out_height;
145 int64 out_width;
146 int64 in_height;
147 int64 in_width;
148 int64 channels;
149 float height_scale;
150 float width_scale;
151 Tensor* output = nullptr;
152
153 private:
154 bool align_corners_;
155 bool half_pixel_centers_;
156 };
157
158 struct ImageResizerGradientState {
ImageResizerGradientStateImageResizerGradientState159 explicit ImageResizerGradientState(bool align_corners,
160 bool half_pixel_centers)
161 : align_corners_(align_corners),
162 half_pixel_centers_(half_pixel_centers) {}
163
ValidateAndCreateOutputImageResizerGradientState164 void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input,
165 const Tensor& original_image) {
166 OP_REQUIRES(
167 context,
168 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
169 errors::InvalidArgument("If half_pixel_centers is True, "
170 "align_corners must be False."));
171
172 OP_REQUIRES(context, input.dims() == 4,
173 errors::InvalidArgument("input_grad must be 4-dimensional",
174 input.shape().DebugString()));
175 // Resizers always produce float images, so input gradient must
176 // always be a float.
177 OP_REQUIRES(context, input.dtype() == DT_FLOAT,
178 errors::InvalidArgument("input_grad must be of type float",
179 DataTypeString(input.dtype())));
180
181 OP_REQUIRES(context, original_image.dims() == 4,
182 errors::InvalidArgument("original_image must be 4-dimensional",
183 original_image.shape().DebugString()));
184
185 // Allocate output and initialize to zeros.
186 batch_size = input.dim_size(0);
187 channels = input.dim_size(3);
188 resized_height = input.dim_size(1);
189 resized_width = input.dim_size(2);
190 original_height = original_image.dim_size(1);
191 original_width = original_image.dim_size(2);
192
193 OP_REQUIRES(
194 context,
195 FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) &&
196 FastBoundsCheck(original_width, std::numeric_limits<int32>::max()),
197 errors::InvalidArgument(
198 "original sizes must be between 0 and max int32"));
199
200 height_scale =
201 CalculateResizeScale(original_height, resized_height, align_corners_);
202 width_scale =
203 CalculateResizeScale(original_width, resized_width, align_corners_);
204 output = nullptr;
205 OP_REQUIRES_OK(context, context->allocate_output(
206 0,
207 TensorShape({batch_size, original_height,
208 original_width, channels}),
209 &output));
210 }
211
212 int64 batch_size;
213 int64 channels;
214 int64 resized_height;
215 int64 resized_width;
216 int64 original_height;
217 int64 original_width;
218 float height_scale;
219 float width_scale;
220 Tensor* output;
221
222 private:
223 bool align_corners_;
224 bool half_pixel_centers_;
225 };
226
227 } // namespace tensorflow
228
229 #endif // TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_
230