1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/image_ops.cc
17 #define EIGEN_USE_THREADS
18 
19 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
20 #define EIGEN_USE_GPU
21 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
22 
23 #include "tensorflow/core/kernels/image/resize_bilinear_op.h"
24 
25 #include <memory>
26 
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 #include "tensorflow/core/framework/op_kernel.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/types.h"
33 #include "tensorflow/core/kernels/cast_op.h"
34 #include "tensorflow/core/lib/core/status.h"
35 #include "tensorflow/core/platform/logging.h"
36 #include "tensorflow/core/util/image_resizer_state.h"
37 
38 namespace tensorflow {
39 
40 typedef Eigen::ThreadPoolDevice CPUDevice;
41 typedef Eigen::GpuDevice GPUDevice;
42 
43 template <typename Device, typename T>
44 class ResizeBilinearOp : public OpKernel {
45  public:
ResizeBilinearOp(OpKernelConstruction * context)46   explicit ResizeBilinearOp(OpKernelConstruction* context) : OpKernel(context) {
47     OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
48     OP_REQUIRES_OK(
49         context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
50   }
51 
Compute(OpKernelContext * context)52   void Compute(OpKernelContext* context) override {
53     const Tensor& input = context->input(0);
54     ImageResizerState st(align_corners_, half_pixel_centers_);
55     st.ValidateAndCreateOutput(context, input);
56 
57     if (!context->status().ok()) return;
58 
59     // Return if the output is empty.
60     if (st.output->NumElements() == 0) return;
61 
62     typename TTypes<T, 4>::ConstTensor image_data(input.tensor<T, 4>());
63     TTypes<float, 4>::Tensor output_data = st.output->tensor<float, 4>();
64 
65     functor::ResizeBilinear<Device, T>()(
66         context->eigen_device<Device>(), image_data, st.height_scale,
67         st.width_scale, half_pixel_centers_, output_data);
68   }
69 
70  private:
71   bool align_corners_;
72   bool half_pixel_centers_;
73 };
74 
75 namespace {
76 // Compute the interpolation indices only once.
77 struct CachedInterpolation {
78   int64 lower;  // Lower source index used in the interpolation
79   int64 upper;  // Upper source index used in the interpolation
80   // 1-D linear interpolation scale (see:
81   // https://en.wikipedia.org/wiki/Bilinear_interpolation)
82   float lerp;
83 };
84 
85 template <bool half_pixel_centers>
compute_interpolation_weights(const Eigen::Index out_size,const Eigen::Index in_size,const float scale,CachedInterpolation * interpolation)86 inline void compute_interpolation_weights(const Eigen::Index out_size,
87                                           const Eigen::Index in_size,
88                                           const float scale,
89                                           CachedInterpolation* interpolation) {
90   typedef typename std::conditional<half_pixel_centers, HalfPixelScaler,
91                                     LegacyScaler>::type Scaler;
92   Scaler scaler;
93   for (Eigen::Index i = 0; i < out_size; ++i) {
94     const float in = scaler(i, scale);
95     const float in_f = std::floor(in);
96     interpolation[i].lower =
97         std::max(static_cast<Eigen::Index>(in_f), static_cast<Eigen::Index>(0));
98     interpolation[i].upper =
99         std::min(static_cast<Eigen::Index>(std::ceil(in)), in_size - 1);
100     interpolation[i].lerp = in - in_f;
101   }
102 }
103 
104 /**
105  * Computes the bilinear interpolation from the appropriate 4 float points
106  * and the linear interpolation weights.
107  */
compute_lerp(const float top_left,const float top_right,const float bottom_left,const float bottom_right,const float x_lerp,const float y_lerp)108 inline float compute_lerp(const float top_left, const float top_right,
109                           const float bottom_left, const float bottom_right,
110                           const float x_lerp, const float y_lerp) {
111   const float top = top_left + (top_right - top_left) * x_lerp;
112   const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
113   return top + (bottom - top) * y_lerp;
114 }
115 
116 // Casts from float16 to T.
117 template <typename Device, typename T>
118 struct CastFloatTo {
operator ()tensorflow::__anonc82806cb0111::CastFloatTo119   void operator()(const Device& d, typename TTypes<float>::ConstFlat input,
120                   typename TTypes<T>::Flat output) {
121     output.device(d) = input.template cast<T>();
122   }
123 };
124 
125 template <typename T>
126 struct CastFloatTo<GPUDevice, T> {
operator ()tensorflow::__anonc82806cb0111::CastFloatTo127   void operator()(const GPUDevice& d, typename TTypes<float>::ConstFlat input,
128                   typename TTypes<T>::Flat output) {
129     // Use existing cast functor instead of directly casting Eigen tensor, as
130     // otherwise we need to instantiate the cast function in a .cu.cc file
131     functor::CastFunctor<GPUDevice, T, float> cast;
132     cast(d, output, input);
133   }
134 };
135 
136 }  // namespace
137 
138 namespace generator {
139 template <typename T, bool half_pixel_centers>
140 class ResizeBilinearGenerator {
141  public:
ResizeBilinearGenerator(typename TTypes<T,4>::ConstTensor input,const Eigen::Index output_height,const Eigen::Index output_width,const float height_scale,const float width_scale)142   EIGEN_ALWAYS_INLINE ResizeBilinearGenerator(
143       typename TTypes<T, 4>::ConstTensor input,
144       const Eigen::Index output_height, const Eigen::Index output_width,
145       const float height_scale, const float width_scale)
146       : input_(input), ys_(output_height), xs_(output_width) {
147     const Eigen::Index input_height = input.dimension(1);
148     const Eigen::Index input_width = input.dimension(2);
149     compute_interpolation_weights<half_pixel_centers>(
150         output_height, input_height, height_scale, ys_.data());
151     compute_interpolation_weights<half_pixel_centers>(output_width, input_width,
152                                                       width_scale, xs_.data());
153   }
154 
operator ()(const Eigen::array<Eigen::Index,4> & coords) const155   EIGEN_ALWAYS_INLINE float operator()(
156       const Eigen::array<Eigen::Index, 4>& coords) const {
157     const Eigen::Index b = coords[0];
158     const Eigen::Index y = coords[1];
159     const Eigen::Index x = coords[2];
160     const Eigen::Index c = coords[3];
161 
162     const float top_left = input_(b, ys_[y].lower, xs_[x].lower, c);
163     const float top_right = input_(b, ys_[y].lower, xs_[x].upper, c);
164     const float bottom_left = input_(b, ys_[y].upper, xs_[x].lower, c);
165     const float bottom_right = input_(b, ys_[y].upper, xs_[x].upper, c);
166     const float ys_lerp = ys_[y].lerp;
167     const float xs_lerp = xs_[x].lerp;
168     return compute_lerp(top_left, top_right, bottom_left, bottom_right, xs_lerp,
169                         ys_lerp);
170   }
171 
172  private:
173   typename TTypes<T, 4>::ConstTensor input_;
174   std::vector<CachedInterpolation> ys_, xs_;
175 };
176 }  // namespace generator
177 
178 // Partial specialization of ResizeBilinear functor for a CPUDevice.
179 namespace functor {
180 template <typename T>
181 struct ResizeBilinear<CPUDevice, T> {
operator ()tensorflow::functor::ResizeBilinear182   void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images,
183                   const float height_scale, const float width_scale,
184                   bool half_pixel_centers,
185                   typename TTypes<float, 4>::Tensor output) {
186     const Eigen::Index input_height = images.dimension(1);
187     const Eigen::Index input_width = images.dimension(2);
188 
189     const Eigen::Index output_height = output.dimension(1);
190     const Eigen::Index output_width = output.dimension(2);
191 
192     // Handle no-op resizes efficiently.
193     if (output_height == input_height && output_width == input_width) {
194       output = images.template cast<float>();
195       return;
196     }
197 
198     if (half_pixel_centers) {
199       generator::ResizeBilinearGenerator<T, true> generator(
200           images, output_height, output_width, height_scale, width_scale);
201       output.device(d) = output.generate(std::move(generator));
202     } else {
203       generator::ResizeBilinearGenerator<T, false> generator(
204           images, output_height, output_width, height_scale, width_scale);
205       output.device(d) = output.generate(std::move(generator));
206     }
207   }
208 };
209 }  // namespace functor
210 
211 template <typename Device, typename T>
212 class ResizeBilinearOpGrad : public OpKernel {
213  public:
ResizeBilinearOpGrad(OpKernelConstruction * context)214   explicit ResizeBilinearOpGrad(OpKernelConstruction* context)
215       : OpKernel(context) {
216     OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
217     OP_REQUIRES_OK(
218         context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
219   }
220 
Compute(OpKernelContext * context)221   void Compute(OpKernelContext* context) override {
222     // Validate input.
223     // First argument is gradient with respect to resized image.
224     const Tensor& input = context->input(0);
225     const Tensor& original_image = context->input(1);
226 
227     ImageResizerGradientState st(align_corners_, half_pixel_centers_);
228     st.ValidateAndCreateOutput(context, input, original_image);
229 
230     if (!context->status().ok()) return;
231 
232     TTypes<float, 4>::ConstTensor input_grad = input.tensor<float, 4>();
233 
234     if (!std::is_same<T, Eigen::half>::value &&
235         !std::is_same<T, Eigen::bfloat16>::value) {
236       typename TTypes<T, 4>::Tensor output_grad(st.output->tensor<T, 4>());
237       functor::ResizeBilinearGrad<Device, T>()(
238           context->eigen_device<Device>(), input_grad, st.height_scale,
239           st.width_scale, half_pixel_centers_, output_grad);
240     } else {
241       // Accumulate output to float instead of half/bfloat16 tensor, since float
242       // accumulation is more numerically stable and GPU half implementation is
243       // slow.
244       // TODO(b/165759037): Create optimized and numerically stable half and
245       // bfloat16 implementation
246       Tensor output_grad;
247       OP_REQUIRES_OK(context, context->allocate_temp(
248                                   DT_FLOAT, st.output->shape(), &output_grad));
249       functor::ResizeBilinearGrad<Device, float>()(
250           context->eigen_device<Device>(), input_grad, st.height_scale,
251           st.width_scale, half_pixel_centers_, output_grad.tensor<float, 4>());
252       const Tensor& output_grad_const = output_grad;
253       CastFloatTo<Device, T>{}(context->template eigen_device<Device>(),
254                                output_grad_const.template flat<float>(),
255                                st.output->template flat<T>());
256     }
257   }
258 
259  private:
260   bool align_corners_;
261   bool half_pixel_centers_;
262 };
263 
264 // Partial specialization of ResizeBilinearGrad functor for a CPUDevice.
265 namespace functor {
266 
267 template <typename T>
268 struct ResizeBilinearGrad<CPUDevice, T> {
269   template <typename Scaler>
ResizeGradCoretensorflow::functor::ResizeBilinearGrad270   void ResizeGradCore(const Scaler& scaler,
271                       typename TTypes<float, 4>::ConstTensor input_grad,
272                       const float height_scale, const float width_scale,
273                       typename TTypes<T, 4>::Tensor output_grad) {
274     const Eigen::Index batch = output_grad.dimension(0);
275     const Eigen::Index original_height = output_grad.dimension(1);
276     const Eigen::Index original_width = output_grad.dimension(2);
277     const Eigen::Index channels = output_grad.dimension(3);
278 
279     const Eigen::Index resized_height = input_grad.dimension(1);
280     const Eigen::Index resized_width = input_grad.dimension(2);
281 
282     output_grad.setZero();
283 
284     // Each resized output pixel was computed as a weighted average of four
285     // input pixels. Here we find the four input pixel locations that
286     // contributed to each output pixel and propagate the gradient at the output
287     // pixel location to each of those four input pixel locations in the same
288     // proportions that they originally contributed to the output pixel.
289     // Here is the forward-propagation pseudo-code, for reference:
290     // resized(b, y, x, c) = top_left     * (1 - y) * (1 - x)
291     //                     + top_right    * (1 - y) *      x
292     //                     + bottom_left  *      y  * (1 - x)
293     //                     + bottom_right *      y  *      x
294     for (Eigen::Index b = 0; b < batch; ++b) {
295       for (Eigen::Index y = 0; y < resized_height; ++y) {
296         const float in_y = scaler(y, height_scale);
297         const Eigen::Index top_y_index =
298             std::max(static_cast<Eigen::Index>(floorf(in_y)),
299                      static_cast<Eigen::Index>(0));
300         const Eigen::Index bottom_y_index = std::min(
301             static_cast<Eigen::Index>(ceilf(in_y)), original_height - 1);
302         const float y_lerp = in_y - floorf(in_y);
303         const float inverse_y_lerp = (1.0f - y_lerp);
304         for (Eigen::Index x = 0; x < resized_width; ++x) {
305           const float in_x = scaler(x, width_scale);
306           const Eigen::Index left_x_index =
307               std::max(static_cast<Eigen::Index>(floorf(in_x)),
308                        static_cast<Eigen::Index>(0));
309           const Eigen::Index right_x_index = std::min(
310               static_cast<Eigen::Index>(ceilf(in_x)), original_width - 1);
311           const float x_lerp = in_x - floorf(in_x);
312           const float inverse_x_lerp = (1.0f - x_lerp);
313           // TODO(b/158287314): Look into vectorizing this.
314           for (Eigen::Index c = 0; c < channels; ++c) {
315             output_grad(b, top_y_index, left_x_index, c) +=
316                 T(input_grad(b, y, x, c) * inverse_y_lerp * inverse_x_lerp);
317             output_grad(b, top_y_index, right_x_index, c) +=
318                 T(input_grad(b, y, x, c) * inverse_y_lerp * x_lerp);
319             output_grad(b, bottom_y_index, left_x_index, c) +=
320                 T(input_grad(b, y, x, c) * y_lerp * inverse_x_lerp);
321             output_grad(b, bottom_y_index, right_x_index, c) +=
322                 T(input_grad(b, y, x, c) * y_lerp * x_lerp);
323           }
324         }
325       }
326     }
327   }
operator ()tensorflow::functor::ResizeBilinearGrad328   void operator()(const CPUDevice& d,
329                   typename TTypes<float, 4>::ConstTensor input_grad,
330                   const float height_scale, const float width_scale,
331                   const bool half_pixel_centers,
332                   typename TTypes<T, 4>::Tensor output_grad) {
333     if (half_pixel_centers) {
334       return ResizeGradCore(HalfPixelScaler(), input_grad, height_scale,
335                             width_scale, output_grad);
336     } else {
337       return ResizeGradCore(LegacyScaler(), input_grad, height_scale,
338                             width_scale, output_grad);
339     }
340   }
341 };
342 
343 }  // namespace functor
344 
345 #define REGISTER_KERNEL(T)                            \
346   REGISTER_KERNEL_BUILDER(Name("ResizeBilinear")      \
347                               .Device(DEVICE_CPU)     \
348                               .TypeConstraint<T>("T") \
349                               .HostMemory("size"),    \
350                           ResizeBilinearOp<CPUDevice, T>);
351 
352 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
353 
354 #undef REGISTER_KERNEL
355 
356 #define REGISTER_GRAD_KERNEL(T)                                             \
357   REGISTER_KERNEL_BUILDER(                                                  \
358       Name("ResizeBilinearGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
359       ResizeBilinearOpGrad<CPUDevice, T>);
360 
361 TF_CALL_half(REGISTER_GRAD_KERNEL);
362 TF_CALL_float(REGISTER_GRAD_KERNEL);
363 TF_CALL_double(REGISTER_GRAD_KERNEL);
364 TF_CALL_bfloat16(REGISTER_GRAD_KERNEL);
365 
366 #undef REGISTER_GRAD_KERNEL
367 
368 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
369 
370 #define REGISTER_KERNEL(T)                            \
371   REGISTER_KERNEL_BUILDER(Name("ResizeBilinear")      \
372                               .Device(DEVICE_GPU)     \
373                               .TypeConstraint<T>("T") \
374                               .HostMemory("size"),    \
375                           ResizeBilinearOp<GPUDevice, T>);
376 
377 TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL);
378 
379 #undef REGISTER_KERNEL
380 
381 #define REGISTER_GRAD_KERNEL(T)                                             \
382   REGISTER_KERNEL_BUILDER(                                                  \
383       Name("ResizeBilinearGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
384       ResizeBilinearOpGrad<GPUDevice, T>);
385 
386 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GRAD_KERNEL);
387 
388 #undef REGISTER_GRAD_KERNEL
389 
390 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
391 
392 }  // namespace tensorflow
393