1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/nn_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #include "tensorflow/core/kernels/pad_op.h"
21 
22 #include <memory>
23 #include <string>
24 #include <utility>
25 
26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27 #include "tensorflow/core/framework/op.h"
28 #include "tensorflow/core/framework/op_kernel.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/tensor_types.h"
33 #include "tensorflow/core/framework/types.h"
34 #include "tensorflow/core/platform/logging.h"
35 #include "tensorflow/core/platform/types.h"
36 
37 namespace tensorflow {
38 
39 typedef Eigen::ThreadPoolDevice CPUDevice;
40 typedef Eigen::GpuDevice GPUDevice;
41 #ifdef TENSORFLOW_USE_SYCL
42 typedef Eigen::SyclDevice SYCLDevice;
43 #endif  // TENSORFLOW_USE_SYCL
44 
45 template <typename Device, typename T, typename Tpadding>
46 class PadOp : public OpKernel {
47  public:
PadOp(OpKernelConstruction * context)48   explicit PadOp(OpKernelConstruction* context) : OpKernel(context) {}
49 
Compute(OpKernelContext * context)50   void Compute(OpKernelContext* context) override {
51     const Tensor& in0 = context->input(0);
52     const Tensor& in1 = context->input(1);
53     const int dims = in0.dims();
54     static const int kMinDims = 0;
55     static const int kMaxDims = 6;
56     OP_REQUIRES(context, kMinDims <= dims && dims <= kMaxDims,
57                 errors::Unimplemented("inputs rank not in [", kMinDims, ",",
58                                       kMaxDims, "]: ", dims));
59     OP_REQUIRES(
60         context,
61         TensorShapeUtils::IsMatrix(in1.shape()) && in1.dim_size(1) == 2,
62         errors::InvalidArgument("paddings must be a matrix with 2 columns: ",
63                                 in1.shape().DebugString()));
64     const int fixed_dims =
65         (allow_legacy_scalars() && dims == 0 && in1.dim_size(0) == 1) ? 1
66                                                                       : dims;
67     OP_REQUIRES(
68         context, fixed_dims == in1.dim_size(0),
69         errors::InvalidArgument(
70             "The first dimension of paddings must be the rank of inputs",
71             in1.shape().DebugString(), " ", in0.shape().DebugString()));
72 
73     T pad_value = T();
74     if (context->num_inputs() == 3) {
75       const Tensor& constant_values = context->input(2);
76       OP_REQUIRES(
77           context, TensorShapeUtils::IsScalar(constant_values.shape()),
78           errors::InvalidArgument("constant_values must be a scalar. Found: ",
79                                   constant_values.shape().DebugString()));
80       pad_value = context->input(2).scalar<T>()();
81     }
82 
83     // Compute the shape of the output tensor, and allocate it.
84     TensorShape output_shape;
85     typename TTypes<Tpadding>::ConstMatrix paddings = in1.matrix<Tpadding>();
86     for (int d = 0; d < fixed_dims; ++d) {
87       const Tpadding before_d =
88           paddings(d, 0);                       // Pad before existing elements.
89       const Tpadding after_d = paddings(d, 1);  // Pad after existing elements.
90       OP_REQUIRES(context, before_d >= 0 && after_d >= 0,
91                   errors::InvalidArgument("Paddings must be non-negative: ",
92                                           before_d, " ", after_d));
93       const int64 size_d =
94           (allow_legacy_scalars() && d == in0.dims()) ? 1 : in0.dim_size(d);
95       output_shape.AddDim(before_d + size_d + after_d);
96     }
97 
98     // If there is no padding to be done, forward the input to output.
99     if (output_shape.num_elements() == in0.NumElements()) {
100       // When num_elements == 0, shape may have changed.
101       Tensor out;
102       CHECK(out.CopyFrom(in0, output_shape));
103       context->set_output(0, out);
104       return;
105     }
106 
107     TensorShape collapsed_input_shape;
108     TensorShape collapsed_output_shape;
109     Tensor collapsed_paddings;
110     if (fixed_dims > 1 &&
111         CollapseAdjacentNonPaddedDimensions(
112             in0.shape(), in1, output_shape, &collapsed_input_shape,
113             &collapsed_paddings, &collapsed_output_shape)) {
114       Tensor collapsed_input;
115       CHECK(collapsed_input.CopyFrom(in0, collapsed_input_shape));
116       Tensor collapsed_output;
117       AllocatorAttributes alloc_attrs;
118       alloc_attrs.set_on_host(context->input_memory_type(0) == HOST_MEMORY);
119       OP_REQUIRES_OK(context,
120                      context->allocate_temp(collapsed_input.dtype(),
121                                             collapsed_output_shape,
122                                             &collapsed_output, alloc_attrs));
123       const Tensor& collapsed_paddings_ref = collapsed_paddings;
124       typename TTypes<Tpadding>::ConstMatrix collapsed_paddings_matrix =
125           collapsed_paddings_ref.matrix<Tpadding>();
126 
127       OperateWithVariableRank(context, collapsed_input_shape.dims(),
128                               collapsed_input, collapsed_paddings_matrix,
129                               pad_value, &collapsed_output);
130 
131       Tensor output;
132       CHECK(output.CopyFrom(collapsed_output, output_shape));
133       context->set_output(0, output);
134     } else {
135       Tensor* output = nullptr;
136       OP_REQUIRES_OK(context,
137                      context->allocate_output(0, output_shape, &output));
138       OperateWithVariableRank(context, fixed_dims, in0, paddings, pad_value,
139                               output);
140     }
141   }
142 
143  private:
144   // Collapses adjacent dimensions that are not padded to one dimension for
145   // speed. Returns true if any two dimensions are collapsed. For example,
146   //
147   //   Pad(input_shape=[8, 28, 28, 3],
148   //       paddings=[[0, 0], [0, 0], [0, 0], [0, 1]]
149   // is equivalent to
150   //   Pad(input_shape=[6272, 3],
151   //       paddings=[[0, 0], [0, 1]])
152   //
153   // input_shape: the original input shape.
154   // paddings_as_tensor: the original paddings.
155   // output_shape: the original output shape.
156   // collapsed_input_shape: the input shape after collapsing.
157   // collapsed_paddings_as_tensor: the paddings after collapsing.
158   // collapsed_output_shape: the output shape after collapsing.
CollapseAdjacentNonPaddedDimensions(const TensorShape & input_shape,const Tensor & paddings_as_tensor,const TensorShape & output_shape,TensorShape * collapsed_input_shape,Tensor * collapsed_paddings_as_tensor,TensorShape * collapsed_output_shape)159   static bool CollapseAdjacentNonPaddedDimensions(
160       const TensorShape& input_shape, const Tensor& paddings_as_tensor,
161       const TensorShape& output_shape, TensorShape* collapsed_input_shape,
162       Tensor* collapsed_paddings_as_tensor,
163       TensorShape* collapsed_output_shape) {
164     bool collapsed = false;
165     typename TTypes<Tpadding>::ConstMatrix paddings =
166         paddings_as_tensor.matrix<Tpadding>();
167     std::vector<std::pair<int, int>> collapsed_paddings;
168     int i = 0;
169     while (i < paddings.dimension(0)) {
170       if (paddings(i, 0) != 0 || paddings(i, 1) != 0) {
171         // If padded, copy the original dimension over.
172         collapsed_input_shape->InsertDim(collapsed_input_shape->dims(),
173                                          input_shape.dim_size(i));
174         collapsed_output_shape->InsertDim(collapsed_output_shape->dims(),
175                                           output_shape.dim_size(i));
176         collapsed_paddings.push_back({paddings(i, 0), paddings(i, 1)});
177         ++i;
178       } else {
179         // If not padded, find the next dimension that is padded and collapse
180         // all dimensions in between to one dimension.
181         int64 collapsed_input_dim_size = input_shape.dim_size(i);
182         int64 collapsed_output_dim_size = output_shape.dim_size(i);
183         ++i;
184         while (i < paddings.dimension(0) && paddings(i, 0) == 0 &&
185                paddings(i, 1) == 0) {
186           collapsed = true;
187           collapsed_input_dim_size *= input_shape.dim_size(i);
188           collapsed_output_dim_size *= output_shape.dim_size(i);
189           ++i;
190         }
191         collapsed_input_shape->InsertDim(collapsed_input_shape->dims(),
192                                          collapsed_input_dim_size);
193         collapsed_output_shape->InsertDim(collapsed_output_shape->dims(),
194                                           collapsed_output_dim_size);
195         collapsed_paddings.push_back({0, 0});
196       }
197     }
198 
199     // Copy collapsed_paddings to collapsed_paddings_as_tensor.
200     *collapsed_paddings_as_tensor =
201         Tensor(paddings_as_tensor.dtype(),
202                TensorShape({static_cast<int64>(collapsed_paddings.size()), 2}));
203     auto collapsed_paddings_as_matrix =
204         collapsed_paddings_as_tensor->matrix<Tpadding>();
205     for (size_t i = 0; i < collapsed_paddings.size(); ++i) {
206       collapsed_paddings_as_matrix(i, 0) = collapsed_paddings[i].first;
207       collapsed_paddings_as_matrix(i, 1) = collapsed_paddings[i].second;
208     }
209     return collapsed;
210   }
211 
OperateWithVariableRank(OpKernelContext * context,int fixed_dims,const Tensor & input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)212   void OperateWithVariableRank(OpKernelContext* context, int fixed_dims,
213                                const Tensor& input,
214                                typename TTypes<Tpadding>::ConstMatrix paddings,
215                                T pad_value, Tensor* output) {
216     // Invoke the dims-specific implementation.
217     switch (fixed_dims) {
218       case 0:
219         Operate<0>(context, input.tensor<T, 0>(), paddings, pad_value, output);
220         break;
221       case 1:
222         // TODO(irving): Once Pad doesn't need a scalar special case,
223         // change flat to tensor.  That is, once !allow_legacy_scalars().
224         Operate<1>(context, input.flat<T>(), paddings, pad_value, output);
225         break;
226       case 2:
227         Operate<2>(context, input.tensor<T, 2>(), paddings, pad_value, output);
228         break;
229       case 3:
230         Operate<3>(context, input.tensor<T, 3>(), paddings, pad_value, output);
231         break;
232       case 4:
233         Operate<4>(context, input.tensor<T, 4>(), paddings, pad_value, output);
234         break;
235       case 5:
236         Operate<5>(context, input.tensor<T, 5>(), paddings, pad_value, output);
237         break;
238       case 6:
239         Operate<6>(context, input.tensor<T, 6>(), paddings, pad_value, output);
240         break;
241       default:
242         OP_REQUIRES(context, false,
243                     errors::InvalidArgument("Only ranks up to 6 supported: ",
244                                             input.shape().DebugString()));
245     }
246   }
247 
248   template <int Dims>
Operate(OpKernelContext * context,typename TTypes<T,Dims>::ConstTensor input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)249   void Operate(OpKernelContext* context,
250                typename TTypes<T, Dims>::ConstTensor input,
251                typename TTypes<Tpadding>::ConstMatrix paddings, T pad_value,
252                Tensor* output) {
253     CHECK_EQ(Dims, paddings.dimension(0));
254     CHECK_EQ(2, paddings.dimension(1));
255     Eigen::array<Eigen::IndexPair<Tpadding>, Dims> paddings_array;
256     for (int i = 0; i < Dims; ++i) {
257       paddings_array[i] = {paddings(i, 0), paddings(i, 1)};
258     }
259     functor::Pad<Device, T, Tpadding, Dims> functor;
260     functor(context->eigen_device<Device>(), output->tensor<T, Dims>(), input,
261             paddings_array, pad_value);
262   }
263 };
264 
265 #define REGISTER_KERNEL(type)                                     \
266   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
267                               .Device(DEVICE_CPU)                 \
268                               .TypeConstraint<type>("T")          \
269                               .TypeConstraint<int32>("Tpaddings") \
270                               .HostMemory("paddings"),            \
271                           PadOp<CPUDevice, type, int32>);         \
272   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
273                               .Device(DEVICE_CPU)                 \
274                               .TypeConstraint<type>("T")          \
275                               .TypeConstraint<int64>("Tpaddings") \
276                               .HostMemory("paddings"),            \
277                           PadOp<CPUDevice, type, int64>);         \
278   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
279                               .Device(DEVICE_CPU)                 \
280                               .TypeConstraint<type>("T")          \
281                               .TypeConstraint<int32>("Tpaddings") \
282                               .HostMemory("paddings")             \
283                               .HostMemory("constant_values"),     \
284                           PadOp<CPUDevice, type, int32>);         \
285   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
286                               .Device(DEVICE_CPU)                 \
287                               .TypeConstraint<type>("T")          \
288                               .TypeConstraint<int64>("Tpaddings") \
289                               .HostMemory("paddings")             \
290                               .HostMemory("constant_values"),     \
291                           PadOp<CPUDevice, type, int64>);
292 
293 TF_CALL_POD_TYPES(REGISTER_KERNEL);
294 TF_CALL_string(REGISTER_KERNEL);
295 #undef REGISTER_KERNEL
296 
297 #if GOOGLE_CUDA
298 // Forward declarations of the functor specializations for GPU.
299 namespace functor {
300 #define DECLARE_GPU_SPEC(T, Dims)                                         \
301   template <>                                                             \
302   void Pad<GPUDevice, T, int32, Dims>::operator()(                        \
303       const GPUDevice& d, typename TTypes<T, Dims>::Tensor output,        \
304       typename TTypes<T, Dims>::ConstTensor input,                        \
305       Eigen::array<Eigen::IndexPair<int32>, Dims> paddings, T pad_value); \
306   extern template struct Pad<GPUDevice, T, int32, Dims>;                  \
307   template <>                                                             \
308   void Pad<GPUDevice, T, int64, Dims>::operator()(                        \
309       const GPUDevice& d, typename TTypes<T, Dims>::Tensor output,        \
310       typename TTypes<T, Dims>::ConstTensor input,                        \
311       Eigen::array<Eigen::IndexPair<int64>, Dims> paddings, T pad_value); \
312   extern template struct Pad<GPUDevice, T, int64, Dims>;
313 
314 #define DECLARE_GPU_SPECS(T) \
315   DECLARE_GPU_SPEC(T, 0);    \
316   DECLARE_GPU_SPEC(T, 1);    \
317   DECLARE_GPU_SPEC(T, 2);    \
318   DECLARE_GPU_SPEC(T, 3);    \
319   DECLARE_GPU_SPEC(T, 4);    \
320   DECLARE_GPU_SPEC(T, 5);    \
321   DECLARE_GPU_SPEC(T, 6);
322 
323 TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPECS);
324 TF_CALL_int8(DECLARE_GPU_SPECS);
325 TF_CALL_uint8(DECLARE_GPU_SPECS);
326 }  // namespace functor
327 
328 // Registration of the GPU implementations.
329 #define REGISTER_GPU_KERNEL(T)                                    \
330   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
331                               .Device(DEVICE_GPU)                 \
332                               .TypeConstraint<T>("T")             \
333                               .TypeConstraint<int32>("Tpaddings") \
334                               .HostMemory("paddings"),            \
335                           PadOp<GPUDevice, T, int32>);            \
336   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
337                               .Device(DEVICE_GPU)                 \
338                               .TypeConstraint<T>("T")             \
339                               .TypeConstraint<int64>("Tpaddings") \
340                               .HostMemory("paddings"),            \
341                           PadOp<GPUDevice, T, int64>);            \
342   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
343                               .Device(DEVICE_GPU)                 \
344                               .TypeConstraint<T>("T")             \
345                               .TypeConstraint<int32>("Tpaddings") \
346                               .HostMemory("paddings")             \
347                               .HostMemory("constant_values"),     \
348                           PadOp<GPUDevice, T, int32>)             \
349   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
350                               .Device(DEVICE_GPU)                 \
351                               .TypeConstraint<T>("T")             \
352                               .TypeConstraint<int64>("Tpaddings") \
353                               .HostMemory("paddings")             \
354                               .HostMemory("constant_values"),     \
355                           PadOp<GPUDevice, T, int64>)
356 
357 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNEL);
358 TF_CALL_int8(REGISTER_GPU_KERNEL);
359 TF_CALL_uint8(REGISTER_GPU_KERNEL);
360 
361 // A special GPU kernel for int32.
362 // TODO(b/25387198): Also enable int32 in device memory. This kernel
363 // registration requires all int32 inputs and outputs to be in host memory.
364 REGISTER_KERNEL_BUILDER(Name("Pad")
365                             .Device(DEVICE_GPU)
366                             .TypeConstraint<int32>("T")
367                             .TypeConstraint<int32>("Tpaddings")
368                             .HostMemory("input")
369                             .HostMemory("paddings")
370                             .HostMemory("output"),
371                         PadOp<CPUDevice, int32, int32>);
372 REGISTER_KERNEL_BUILDER(Name("Pad")
373                             .Device(DEVICE_GPU)
374                             .TypeConstraint<int32>("T")
375                             .TypeConstraint<int64>("Tpaddings")
376                             .HostMemory("input")
377                             .HostMemory("paddings")
378                             .HostMemory("output"),
379                         PadOp<CPUDevice, int32, int64>);
380 REGISTER_KERNEL_BUILDER(Name("PadV2")
381                             .Device(DEVICE_GPU)
382                             .TypeConstraint<int32>("T")
383                             .TypeConstraint<int32>("Tpaddings")
384                             .HostMemory("input")
385                             .HostMemory("paddings")
386                             .HostMemory("constant_values")
387                             .HostMemory("output"),
388                         PadOp<CPUDevice, int32, int32>);
389 REGISTER_KERNEL_BUILDER(Name("PadV2")
390                             .Device(DEVICE_GPU)
391                             .TypeConstraint<int32>("T")
392                             .TypeConstraint<int64>("Tpaddings")
393                             .HostMemory("input")
394                             .HostMemory("paddings")
395                             .HostMemory("constant_values")
396                             .HostMemory("output"),
397                         PadOp<CPUDevice, int32, int64>);
398 #endif
399 
400 #ifdef TENSORFLOW_USE_SYCL
401 // Registration of the GPU implementations.
402 #define REGISTER_SYCL_KERNEL(T)                                   \
403   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
404                               .Device(DEVICE_SYCL)                \
405                               .TypeConstraint<T>("T")             \
406                               .TypeConstraint<int32>("Tpaddings") \
407                               .HostMemory("paddings"),            \
408                           PadOp<SYCLDevice, T, int32>);           \
409   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
410                               .Device(DEVICE_SYCL)                \
411                               .TypeConstraint<T>("T")             \
412                               .TypeConstraint<int64>("Tpaddings") \
413                               .HostMemory("paddings"),            \
414                           PadOp<SYCLDevice, T, int64>);           \
415   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
416                               .Device(DEVICE_SYCL)                \
417                               .TypeConstraint<T>("T")             \
418                               .TypeConstraint<int32>("Tpaddings") \
419                               .HostMemory("paddings")             \
420                               .HostMemory("constant_values"),     \
421                           PadOp<SYCLDevice, T, int32>)            \
422   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
423                               .Device(DEVICE_SYCL)                \
424                               .TypeConstraint<T>("T")             \
425                               .TypeConstraint<int64>("Tpaddings") \
426                               .HostMemory("paddings")             \
427                               .HostMemory("constant_values"),     \
428                           PadOp<SYCLDevice, T, int64>)
429 
430 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
431 REGISTER_KERNEL_BUILDER(Name("Pad")
432                             .Device(DEVICE_SYCL)
433                             .TypeConstraint<int32>("T")
434                             .TypeConstraint<int32>("Tpaddings")
435                             .HostMemory("input")
436                             .HostMemory("paddings")
437                             .HostMemory("output"),
438                         PadOp<CPUDevice, int32, int32>);
439 REGISTER_KERNEL_BUILDER(Name("Pad")
440                             .Device(DEVICE_SYCL)
441                             .TypeConstraint<int32>("T")
442                             .TypeConstraint<int64>("Tpaddings")
443                             .HostMemory("input")
444                             .HostMemory("paddings")
445                             .HostMemory("output"),
446                         PadOp<CPUDevice, int32, int64>);
447 REGISTER_KERNEL_BUILDER(Name("PadV2")
448                             .Device(DEVICE_SYCL)
449                             .TypeConstraint<int32>("T")
450                             .TypeConstraint<int32>("Tpaddings")
451                             .HostMemory("input")
452                             .HostMemory("paddings")
453                             .HostMemory("constant_values")
454                             .HostMemory("output"),
455                         PadOp<CPUDevice, int32, int32>);
456 REGISTER_KERNEL_BUILDER(Name("PadV2")
457                             .Device(DEVICE_SYCL)
458                             .TypeConstraint<int32>("T")
459                             .TypeConstraint<int64>("Tpaddings")
460                             .HostMemory("input")
461                             .HostMemory("paddings")
462                             .HostMemory("constant_values")
463                             .HostMemory("output"),
464                         PadOp<CPUDevice, int32, int64>);
465 #undef REGISTER_SYCL_KERNEL
466 #endif  // TENSORFLOW_USE_SYCL
467 
468 }  // end namespace tensorflow
469