1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/image_ops.cc
17 #define EIGEN_USE_THREADS
18
19 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
20 #define EIGEN_USE_GPU
21 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
22
23 #include "tensorflow/core/kernels/image/resize_bilinear_op.h"
24
25 #include <memory>
26
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 #include "tensorflow/core/framework/op_kernel.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/types.h"
33 #include "tensorflow/core/kernels/cast_op.h"
34 #include "tensorflow/core/lib/core/status.h"
35 #include "tensorflow/core/platform/logging.h"
36 #include "tensorflow/core/util/image_resizer_state.h"
37
38 namespace tensorflow {
39
40 typedef Eigen::ThreadPoolDevice CPUDevice;
41 typedef Eigen::GpuDevice GPUDevice;
42
43 template <typename Device, typename T>
44 class ResizeBilinearOp : public OpKernel {
45 public:
ResizeBilinearOp(OpKernelConstruction * context)46 explicit ResizeBilinearOp(OpKernelConstruction* context) : OpKernel(context) {
47 OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
48 OP_REQUIRES_OK(
49 context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
50 }
51
Compute(OpKernelContext * context)52 void Compute(OpKernelContext* context) override {
53 const Tensor& input = context->input(0);
54 ImageResizerState st(align_corners_, half_pixel_centers_);
55 st.ValidateAndCreateOutput(context, input);
56
57 if (!context->status().ok()) return;
58
59 // Return if the output is empty.
60 if (st.output->NumElements() == 0) return;
61
62 typename TTypes<T, 4>::ConstTensor image_data(input.tensor<T, 4>());
63 TTypes<float, 4>::Tensor output_data = st.output->tensor<float, 4>();
64
65 functor::ResizeBilinear<Device, T>()(
66 context->eigen_device<Device>(), image_data, st.height_scale,
67 st.width_scale, half_pixel_centers_, output_data);
68 }
69
70 private:
71 bool align_corners_;
72 bool half_pixel_centers_;
73 };
74
75 namespace {
76 // Compute the interpolation indices only once.
77 struct CachedInterpolation {
78 int64 lower; // Lower source index used in the interpolation
79 int64 upper; // Upper source index used in the interpolation
80 // 1-D linear interpolation scale (see:
81 // https://en.wikipedia.org/wiki/Bilinear_interpolation)
82 float lerp;
83 };
84
85 template <bool half_pixel_centers>
compute_interpolation_weights(const Eigen::Index out_size,const Eigen::Index in_size,const float scale,CachedInterpolation * interpolation)86 inline void compute_interpolation_weights(const Eigen::Index out_size,
87 const Eigen::Index in_size,
88 const float scale,
89 CachedInterpolation* interpolation) {
90 typedef typename std::conditional<half_pixel_centers, HalfPixelScaler,
91 LegacyScaler>::type Scaler;
92 Scaler scaler;
93 for (Eigen::Index i = 0; i < out_size; ++i) {
94 const float in = scaler(i, scale);
95 const float in_f = std::floor(in);
96 interpolation[i].lower =
97 std::max(static_cast<Eigen::Index>(in_f), static_cast<Eigen::Index>(0));
98 interpolation[i].upper =
99 std::min(static_cast<Eigen::Index>(std::ceil(in)), in_size - 1);
100 interpolation[i].lerp = in - in_f;
101 }
102 }
103
104 /**
105 * Computes the bilinear interpolation from the appropriate 4 float points
106 * and the linear interpolation weights.
107 */
compute_lerp(const float top_left,const float top_right,const float bottom_left,const float bottom_right,const float x_lerp,const float y_lerp)108 inline float compute_lerp(const float top_left, const float top_right,
109 const float bottom_left, const float bottom_right,
110 const float x_lerp, const float y_lerp) {
111 const float top = top_left + (top_right - top_left) * x_lerp;
112 const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
113 return top + (bottom - top) * y_lerp;
114 }
115
116 // Casts from float16 to T.
117 template <typename Device, typename T>
118 struct CastFloatTo {
operator ()tensorflow::__anonc82806cb0111::CastFloatTo119 void operator()(const Device& d, typename TTypes<float>::ConstFlat input,
120 typename TTypes<T>::Flat output) {
121 output.device(d) = input.template cast<T>();
122 }
123 };
124
125 template <typename T>
126 struct CastFloatTo<GPUDevice, T> {
operator ()tensorflow::__anonc82806cb0111::CastFloatTo127 void operator()(const GPUDevice& d, typename TTypes<float>::ConstFlat input,
128 typename TTypes<T>::Flat output) {
129 // Use existing cast functor instead of directly casting Eigen tensor, as
130 // otherwise we need to instantiate the cast function in a .cu.cc file
131 functor::CastFunctor<GPUDevice, T, float> cast;
132 cast(d, output, input);
133 }
134 };
135
136 } // namespace
137
138 namespace generator {
139 template <typename T, bool half_pixel_centers>
140 class ResizeBilinearGenerator {
141 public:
ResizeBilinearGenerator(typename TTypes<T,4>::ConstTensor input,const Eigen::Index output_height,const Eigen::Index output_width,const float height_scale,const float width_scale)142 EIGEN_ALWAYS_INLINE ResizeBilinearGenerator(
143 typename TTypes<T, 4>::ConstTensor input,
144 const Eigen::Index output_height, const Eigen::Index output_width,
145 const float height_scale, const float width_scale)
146 : input_(input), ys_(output_height), xs_(output_width) {
147 const Eigen::Index input_height = input.dimension(1);
148 const Eigen::Index input_width = input.dimension(2);
149 compute_interpolation_weights<half_pixel_centers>(
150 output_height, input_height, height_scale, ys_.data());
151 compute_interpolation_weights<half_pixel_centers>(output_width, input_width,
152 width_scale, xs_.data());
153 }
154
operator ()(const Eigen::array<Eigen::Index,4> & coords) const155 EIGEN_ALWAYS_INLINE float operator()(
156 const Eigen::array<Eigen::Index, 4>& coords) const {
157 const Eigen::Index b = coords[0];
158 const Eigen::Index y = coords[1];
159 const Eigen::Index x = coords[2];
160 const Eigen::Index c = coords[3];
161
162 const float top_left = input_(b, ys_[y].lower, xs_[x].lower, c);
163 const float top_right = input_(b, ys_[y].lower, xs_[x].upper, c);
164 const float bottom_left = input_(b, ys_[y].upper, xs_[x].lower, c);
165 const float bottom_right = input_(b, ys_[y].upper, xs_[x].upper, c);
166 const float ys_lerp = ys_[y].lerp;
167 const float xs_lerp = xs_[x].lerp;
168 return compute_lerp(top_left, top_right, bottom_left, bottom_right, xs_lerp,
169 ys_lerp);
170 }
171
172 private:
173 typename TTypes<T, 4>::ConstTensor input_;
174 std::vector<CachedInterpolation> ys_, xs_;
175 };
176 } // namespace generator
177
178 // Partial specialization of ResizeBilinear functor for a CPUDevice.
179 namespace functor {
180 template <typename T>
181 struct ResizeBilinear<CPUDevice, T> {
operator ()tensorflow::functor::ResizeBilinear182 void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images,
183 const float height_scale, const float width_scale,
184 bool half_pixel_centers,
185 typename TTypes<float, 4>::Tensor output) {
186 const Eigen::Index input_height = images.dimension(1);
187 const Eigen::Index input_width = images.dimension(2);
188
189 const Eigen::Index output_height = output.dimension(1);
190 const Eigen::Index output_width = output.dimension(2);
191
192 // Handle no-op resizes efficiently.
193 if (output_height == input_height && output_width == input_width) {
194 output = images.template cast<float>();
195 return;
196 }
197
198 if (half_pixel_centers) {
199 generator::ResizeBilinearGenerator<T, true> generator(
200 images, output_height, output_width, height_scale, width_scale);
201 output.device(d) = output.generate(std::move(generator));
202 } else {
203 generator::ResizeBilinearGenerator<T, false> generator(
204 images, output_height, output_width, height_scale, width_scale);
205 output.device(d) = output.generate(std::move(generator));
206 }
207 }
208 };
209 } // namespace functor
210
211 template <typename Device, typename T>
212 class ResizeBilinearOpGrad : public OpKernel {
213 public:
ResizeBilinearOpGrad(OpKernelConstruction * context)214 explicit ResizeBilinearOpGrad(OpKernelConstruction* context)
215 : OpKernel(context) {
216 OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
217 OP_REQUIRES_OK(
218 context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
219 }
220
Compute(OpKernelContext * context)221 void Compute(OpKernelContext* context) override {
222 // Validate input.
223 // First argument is gradient with respect to resized image.
224 const Tensor& input = context->input(0);
225 const Tensor& original_image = context->input(1);
226
227 ImageResizerGradientState st(align_corners_, half_pixel_centers_);
228 st.ValidateAndCreateOutput(context, input, original_image);
229
230 if (!context->status().ok()) return;
231
232 TTypes<float, 4>::ConstTensor input_grad = input.tensor<float, 4>();
233
234 if (!std::is_same<T, Eigen::half>::value &&
235 !std::is_same<T, Eigen::bfloat16>::value) {
236 typename TTypes<T, 4>::Tensor output_grad(st.output->tensor<T, 4>());
237 functor::ResizeBilinearGrad<Device, T>()(
238 context->eigen_device<Device>(), input_grad, st.height_scale,
239 st.width_scale, half_pixel_centers_, output_grad);
240 } else {
241 // Accumulate output to float instead of half/bfloat16 tensor, since float
242 // accumulation is more numerically stable and GPU half implementation is
243 // slow.
244 // TODO(b/165759037): Create optimized and numerically stable half and
245 // bfloat16 implementation
246 Tensor output_grad;
247 OP_REQUIRES_OK(context, context->allocate_temp(
248 DT_FLOAT, st.output->shape(), &output_grad));
249 functor::ResizeBilinearGrad<Device, float>()(
250 context->eigen_device<Device>(), input_grad, st.height_scale,
251 st.width_scale, half_pixel_centers_, output_grad.tensor<float, 4>());
252 const Tensor& output_grad_const = output_grad;
253 CastFloatTo<Device, T>{}(context->template eigen_device<Device>(),
254 output_grad_const.template flat<float>(),
255 st.output->template flat<T>());
256 }
257 }
258
259 private:
260 bool align_corners_;
261 bool half_pixel_centers_;
262 };
263
264 // Partial specialization of ResizeBilinearGrad functor for a CPUDevice.
265 namespace functor {
266
267 template <typename T>
268 struct ResizeBilinearGrad<CPUDevice, T> {
269 template <typename Scaler>
ResizeGradCoretensorflow::functor::ResizeBilinearGrad270 void ResizeGradCore(const Scaler& scaler,
271 typename TTypes<float, 4>::ConstTensor input_grad,
272 const float height_scale, const float width_scale,
273 typename TTypes<T, 4>::Tensor output_grad) {
274 const Eigen::Index batch = output_grad.dimension(0);
275 const Eigen::Index original_height = output_grad.dimension(1);
276 const Eigen::Index original_width = output_grad.dimension(2);
277 const Eigen::Index channels = output_grad.dimension(3);
278
279 const Eigen::Index resized_height = input_grad.dimension(1);
280 const Eigen::Index resized_width = input_grad.dimension(2);
281
282 output_grad.setZero();
283
284 // Each resized output pixel was computed as a weighted average of four
285 // input pixels. Here we find the four input pixel locations that
286 // contributed to each output pixel and propagate the gradient at the output
287 // pixel location to each of those four input pixel locations in the same
288 // proportions that they originally contributed to the output pixel.
289 // Here is the forward-propagation pseudo-code, for reference:
290 // resized(b, y, x, c) = top_left * (1 - y) * (1 - x)
291 // + top_right * (1 - y) * x
292 // + bottom_left * y * (1 - x)
293 // + bottom_right * y * x
294 for (Eigen::Index b = 0; b < batch; ++b) {
295 for (Eigen::Index y = 0; y < resized_height; ++y) {
296 const float in_y = scaler(y, height_scale);
297 const Eigen::Index top_y_index =
298 std::max(static_cast<Eigen::Index>(floorf(in_y)),
299 static_cast<Eigen::Index>(0));
300 const Eigen::Index bottom_y_index = std::min(
301 static_cast<Eigen::Index>(ceilf(in_y)), original_height - 1);
302 const float y_lerp = in_y - floorf(in_y);
303 const float inverse_y_lerp = (1.0f - y_lerp);
304 for (Eigen::Index x = 0; x < resized_width; ++x) {
305 const float in_x = scaler(x, width_scale);
306 const Eigen::Index left_x_index =
307 std::max(static_cast<Eigen::Index>(floorf(in_x)),
308 static_cast<Eigen::Index>(0));
309 const Eigen::Index right_x_index = std::min(
310 static_cast<Eigen::Index>(ceilf(in_x)), original_width - 1);
311 const float x_lerp = in_x - floorf(in_x);
312 const float inverse_x_lerp = (1.0f - x_lerp);
313 // TODO(b/158287314): Look into vectorizing this.
314 for (Eigen::Index c = 0; c < channels; ++c) {
315 output_grad(b, top_y_index, left_x_index, c) +=
316 T(input_grad(b, y, x, c) * inverse_y_lerp * inverse_x_lerp);
317 output_grad(b, top_y_index, right_x_index, c) +=
318 T(input_grad(b, y, x, c) * inverse_y_lerp * x_lerp);
319 output_grad(b, bottom_y_index, left_x_index, c) +=
320 T(input_grad(b, y, x, c) * y_lerp * inverse_x_lerp);
321 output_grad(b, bottom_y_index, right_x_index, c) +=
322 T(input_grad(b, y, x, c) * y_lerp * x_lerp);
323 }
324 }
325 }
326 }
327 }
operator ()tensorflow::functor::ResizeBilinearGrad328 void operator()(const CPUDevice& d,
329 typename TTypes<float, 4>::ConstTensor input_grad,
330 const float height_scale, const float width_scale,
331 const bool half_pixel_centers,
332 typename TTypes<T, 4>::Tensor output_grad) {
333 if (half_pixel_centers) {
334 return ResizeGradCore(HalfPixelScaler(), input_grad, height_scale,
335 width_scale, output_grad);
336 } else {
337 return ResizeGradCore(LegacyScaler(), input_grad, height_scale,
338 width_scale, output_grad);
339 }
340 }
341 };
342
343 } // namespace functor
344
345 #define REGISTER_KERNEL(T) \
346 REGISTER_KERNEL_BUILDER(Name("ResizeBilinear") \
347 .Device(DEVICE_CPU) \
348 .TypeConstraint<T>("T") \
349 .HostMemory("size"), \
350 ResizeBilinearOp<CPUDevice, T>);
351
352 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
353
354 #undef REGISTER_KERNEL
355
356 #define REGISTER_GRAD_KERNEL(T) \
357 REGISTER_KERNEL_BUILDER( \
358 Name("ResizeBilinearGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
359 ResizeBilinearOpGrad<CPUDevice, T>);
360
361 TF_CALL_half(REGISTER_GRAD_KERNEL);
362 TF_CALL_float(REGISTER_GRAD_KERNEL);
363 TF_CALL_double(REGISTER_GRAD_KERNEL);
364 TF_CALL_bfloat16(REGISTER_GRAD_KERNEL);
365
366 #undef REGISTER_GRAD_KERNEL
367
368 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
369
370 #define REGISTER_KERNEL(T) \
371 REGISTER_KERNEL_BUILDER(Name("ResizeBilinear") \
372 .Device(DEVICE_GPU) \
373 .TypeConstraint<T>("T") \
374 .HostMemory("size"), \
375 ResizeBilinearOp<GPUDevice, T>);
376
377 TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL);
378
379 #undef REGISTER_KERNEL
380
381 #define REGISTER_GRAD_KERNEL(T) \
382 REGISTER_KERNEL_BUILDER( \
383 Name("ResizeBilinearGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
384 ResizeBilinearOpGrad<GPUDevice, T>);
385
386 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GRAD_KERNEL);
387
388 #undef REGISTER_GRAD_KERNEL
389
390 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
391
392 } // namespace tensorflow
393