1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/nn_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/pad_op.h" 21 22 #include <memory> 23 #include <string> 24 #include <utility> 25 26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 27 #include "tensorflow/core/framework/op.h" 28 #include "tensorflow/core/framework/op_kernel.h" 29 #include "tensorflow/core/framework/register_types.h" 30 #include "tensorflow/core/framework/tensor.h" 31 #include "tensorflow/core/framework/tensor_shape.h" 32 #include "tensorflow/core/framework/tensor_types.h" 33 #include "tensorflow/core/framework/types.h" 34 #include "tensorflow/core/platform/logging.h" 35 #include "tensorflow/core/platform/types.h" 36 37 namespace tensorflow { 38 39 typedef Eigen::ThreadPoolDevice CPUDevice; 40 typedef Eigen::GpuDevice GPUDevice; 41 #ifdef TENSORFLOW_USE_SYCL 42 typedef Eigen::SyclDevice SYCLDevice; 43 #endif // TENSORFLOW_USE_SYCL 44 45 template <typename Device, typename T, typename Tpadding> 46 class PadOp : public OpKernel { 47 public: PadOp(OpKernelConstruction * context)48 explicit PadOp(OpKernelConstruction* context) : OpKernel(context) {} 49 Compute(OpKernelContext * context)50 void Compute(OpKernelContext* context) override { 51 const Tensor& in0 = context->input(0); 52 const Tensor& in1 = context->input(1); 53 const int dims = in0.dims(); 54 static const int kMinDims = 0; 55 static const int kMaxDims = 6; 56 OP_REQUIRES(context, kMinDims <= dims && dims <= kMaxDims, 57 errors::Unimplemented("inputs rank not in [", kMinDims, ",", 58 kMaxDims, "]: ", dims)); 59 OP_REQUIRES( 60 context, 61 TensorShapeUtils::IsMatrix(in1.shape()) && in1.dim_size(1) == 2, 62 errors::InvalidArgument("paddings must be a matrix with 2 columns: ", 63 in1.shape().DebugString())); 64 const int fixed_dims = 65 (allow_legacy_scalars() && dims == 0 && in1.dim_size(0) == 1) ? 1 66 : dims; 67 OP_REQUIRES( 68 context, fixed_dims == in1.dim_size(0), 69 errors::InvalidArgument( 70 "The first dimension of paddings must be the rank of inputs", 71 in1.shape().DebugString(), " ", in0.shape().DebugString())); 72 73 T pad_value = T(); 74 if (context->num_inputs() == 3) { 75 const Tensor& constant_values = context->input(2); 76 OP_REQUIRES( 77 context, TensorShapeUtils::IsScalar(constant_values.shape()), 78 errors::InvalidArgument("constant_values must be a scalar. Found: ", 79 constant_values.shape().DebugString())); 80 pad_value = context->input(2).scalar<T>()(); 81 } 82 83 // Compute the shape of the output tensor, and allocate it. 84 TensorShape output_shape; 85 typename TTypes<Tpadding>::ConstMatrix paddings = in1.matrix<Tpadding>(); 86 for (int d = 0; d < fixed_dims; ++d) { 87 const Tpadding before_d = 88 paddings(d, 0); // Pad before existing elements. 89 const Tpadding after_d = paddings(d, 1); // Pad after existing elements. 90 OP_REQUIRES(context, before_d >= 0 && after_d >= 0, 91 errors::InvalidArgument("Paddings must be non-negative: ", 92 before_d, " ", after_d)); 93 const int64 size_d = 94 (allow_legacy_scalars() && d == in0.dims()) ? 1 : in0.dim_size(d); 95 output_shape.AddDim(before_d + size_d + after_d); 96 } 97 98 // If there is no padding to be done, forward the input to output. 99 if (output_shape.num_elements() == in0.NumElements()) { 100 // When num_elements == 0, shape may have changed. 101 Tensor out; 102 CHECK(out.CopyFrom(in0, output_shape)); 103 context->set_output(0, out); 104 return; 105 } 106 107 TensorShape collapsed_input_shape; 108 TensorShape collapsed_output_shape; 109 Tensor collapsed_paddings; 110 if (fixed_dims > 1 && 111 CollapseAdjacentNonPaddedDimensions( 112 in0.shape(), in1, output_shape, &collapsed_input_shape, 113 &collapsed_paddings, &collapsed_output_shape)) { 114 Tensor collapsed_input; 115 CHECK(collapsed_input.CopyFrom(in0, collapsed_input_shape)); 116 Tensor collapsed_output; 117 AllocatorAttributes alloc_attrs; 118 alloc_attrs.set_on_host(context->input_memory_type(0) == HOST_MEMORY); 119 OP_REQUIRES_OK(context, 120 context->allocate_temp(collapsed_input.dtype(), 121 collapsed_output_shape, 122 &collapsed_output, alloc_attrs)); 123 const Tensor& collapsed_paddings_ref = collapsed_paddings; 124 typename TTypes<Tpadding>::ConstMatrix collapsed_paddings_matrix = 125 collapsed_paddings_ref.matrix<Tpadding>(); 126 127 OperateWithVariableRank(context, collapsed_input_shape.dims(), 128 collapsed_input, collapsed_paddings_matrix, 129 pad_value, &collapsed_output); 130 131 Tensor output; 132 CHECK(output.CopyFrom(collapsed_output, output_shape)); 133 context->set_output(0, output); 134 } else { 135 Tensor* output = nullptr; 136 OP_REQUIRES_OK(context, 137 context->allocate_output(0, output_shape, &output)); 138 OperateWithVariableRank(context, fixed_dims, in0, paddings, pad_value, 139 output); 140 } 141 } 142 143 private: 144 // Collapses adjacent dimensions that are not padded to one dimension for 145 // speed. Returns true if any two dimensions are collapsed. For example, 146 // 147 // Pad(input_shape=[8, 28, 28, 3], 148 // paddings=[[0, 0], [0, 0], [0, 0], [0, 1]] 149 // is equivalent to 150 // Pad(input_shape=[6272, 3], 151 // paddings=[[0, 0], [0, 1]]) 152 // 153 // input_shape: the original input shape. 154 // paddings_as_tensor: the original paddings. 155 // output_shape: the original output shape. 156 // collapsed_input_shape: the input shape after collapsing. 157 // collapsed_paddings_as_tensor: the paddings after collapsing. 158 // collapsed_output_shape: the output shape after collapsing. CollapseAdjacentNonPaddedDimensions(const TensorShape & input_shape,const Tensor & paddings_as_tensor,const TensorShape & output_shape,TensorShape * collapsed_input_shape,Tensor * collapsed_paddings_as_tensor,TensorShape * collapsed_output_shape)159 static bool CollapseAdjacentNonPaddedDimensions( 160 const TensorShape& input_shape, const Tensor& paddings_as_tensor, 161 const TensorShape& output_shape, TensorShape* collapsed_input_shape, 162 Tensor* collapsed_paddings_as_tensor, 163 TensorShape* collapsed_output_shape) { 164 bool collapsed = false; 165 typename TTypes<Tpadding>::ConstMatrix paddings = 166 paddings_as_tensor.matrix<Tpadding>(); 167 std::vector<std::pair<int, int>> collapsed_paddings; 168 int i = 0; 169 while (i < paddings.dimension(0)) { 170 if (paddings(i, 0) != 0 || paddings(i, 1) != 0) { 171 // If padded, copy the original dimension over. 172 collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), 173 input_shape.dim_size(i)); 174 collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), 175 output_shape.dim_size(i)); 176 collapsed_paddings.push_back({paddings(i, 0), paddings(i, 1)}); 177 ++i; 178 } else { 179 // If not padded, find the next dimension that is padded and collapse 180 // all dimensions in between to one dimension. 181 int64 collapsed_input_dim_size = input_shape.dim_size(i); 182 int64 collapsed_output_dim_size = output_shape.dim_size(i); 183 ++i; 184 while (i < paddings.dimension(0) && paddings(i, 0) == 0 && 185 paddings(i, 1) == 0) { 186 collapsed = true; 187 collapsed_input_dim_size *= input_shape.dim_size(i); 188 collapsed_output_dim_size *= output_shape.dim_size(i); 189 ++i; 190 } 191 collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), 192 collapsed_input_dim_size); 193 collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), 194 collapsed_output_dim_size); 195 collapsed_paddings.push_back({0, 0}); 196 } 197 } 198 199 // Copy collapsed_paddings to collapsed_paddings_as_tensor. 200 *collapsed_paddings_as_tensor = 201 Tensor(paddings_as_tensor.dtype(), 202 TensorShape({static_cast<int64>(collapsed_paddings.size()), 2})); 203 auto collapsed_paddings_as_matrix = 204 collapsed_paddings_as_tensor->matrix<Tpadding>(); 205 for (size_t i = 0; i < collapsed_paddings.size(); ++i) { 206 collapsed_paddings_as_matrix(i, 0) = collapsed_paddings[i].first; 207 collapsed_paddings_as_matrix(i, 1) = collapsed_paddings[i].second; 208 } 209 return collapsed; 210 } 211 OperateWithVariableRank(OpKernelContext * context,int fixed_dims,const Tensor & input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)212 void OperateWithVariableRank(OpKernelContext* context, int fixed_dims, 213 const Tensor& input, 214 typename TTypes<Tpadding>::ConstMatrix paddings, 215 T pad_value, Tensor* output) { 216 // Invoke the dims-specific implementation. 217 switch (fixed_dims) { 218 case 0: 219 Operate<0>(context, input.tensor<T, 0>(), paddings, pad_value, output); 220 break; 221 case 1: 222 // TODO(irving): Once Pad doesn't need a scalar special case, 223 // change flat to tensor. That is, once !allow_legacy_scalars(). 224 Operate<1>(context, input.flat<T>(), paddings, pad_value, output); 225 break; 226 case 2: 227 Operate<2>(context, input.tensor<T, 2>(), paddings, pad_value, output); 228 break; 229 case 3: 230 Operate<3>(context, input.tensor<T, 3>(), paddings, pad_value, output); 231 break; 232 case 4: 233 Operate<4>(context, input.tensor<T, 4>(), paddings, pad_value, output); 234 break; 235 case 5: 236 Operate<5>(context, input.tensor<T, 5>(), paddings, pad_value, output); 237 break; 238 case 6: 239 Operate<6>(context, input.tensor<T, 6>(), paddings, pad_value, output); 240 break; 241 default: 242 OP_REQUIRES(context, false, 243 errors::InvalidArgument("Only ranks up to 6 supported: ", 244 input.shape().DebugString())); 245 } 246 } 247 248 template <int Dims> Operate(OpKernelContext * context,typename TTypes<T,Dims>::ConstTensor input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)249 void Operate(OpKernelContext* context, 250 typename TTypes<T, Dims>::ConstTensor input, 251 typename TTypes<Tpadding>::ConstMatrix paddings, T pad_value, 252 Tensor* output) { 253 CHECK_EQ(Dims, paddings.dimension(0)); 254 CHECK_EQ(2, paddings.dimension(1)); 255 Eigen::array<Eigen::IndexPair<Tpadding>, Dims> paddings_array; 256 for (int i = 0; i < Dims; ++i) { 257 paddings_array[i] = {paddings(i, 0), paddings(i, 1)}; 258 } 259 functor::Pad<Device, T, Tpadding, Dims> functor; 260 functor(context->eigen_device<Device>(), output->tensor<T, Dims>(), input, 261 paddings_array, pad_value); 262 } 263 }; 264 265 #define REGISTER_KERNEL(type) \ 266 REGISTER_KERNEL_BUILDER(Name("Pad") \ 267 .Device(DEVICE_CPU) \ 268 .TypeConstraint<type>("T") \ 269 .TypeConstraint<int32>("Tpaddings") \ 270 .HostMemory("paddings"), \ 271 PadOp<CPUDevice, type, int32>); \ 272 REGISTER_KERNEL_BUILDER(Name("Pad") \ 273 .Device(DEVICE_CPU) \ 274 .TypeConstraint<type>("T") \ 275 .TypeConstraint<int64>("Tpaddings") \ 276 .HostMemory("paddings"), \ 277 PadOp<CPUDevice, type, int64>); \ 278 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 279 .Device(DEVICE_CPU) \ 280 .TypeConstraint<type>("T") \ 281 .TypeConstraint<int32>("Tpaddings") \ 282 .HostMemory("paddings") \ 283 .HostMemory("constant_values"), \ 284 PadOp<CPUDevice, type, int32>); \ 285 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 286 .Device(DEVICE_CPU) \ 287 .TypeConstraint<type>("T") \ 288 .TypeConstraint<int64>("Tpaddings") \ 289 .HostMemory("paddings") \ 290 .HostMemory("constant_values"), \ 291 PadOp<CPUDevice, type, int64>); 292 293 TF_CALL_POD_TYPES(REGISTER_KERNEL); 294 TF_CALL_string(REGISTER_KERNEL); 295 #undef REGISTER_KERNEL 296 297 #if GOOGLE_CUDA 298 // Forward declarations of the functor specializations for GPU. 299 namespace functor { 300 #define DECLARE_GPU_SPEC(T, Dims) \ 301 template <> \ 302 void Pad<GPUDevice, T, int32, Dims>::operator()( \ 303 const GPUDevice& d, typename TTypes<T, Dims>::Tensor output, \ 304 typename TTypes<T, Dims>::ConstTensor input, \ 305 Eigen::array<Eigen::IndexPair<int32>, Dims> paddings, T pad_value); \ 306 extern template struct Pad<GPUDevice, T, int32, Dims>; \ 307 template <> \ 308 void Pad<GPUDevice, T, int64, Dims>::operator()( \ 309 const GPUDevice& d, typename TTypes<T, Dims>::Tensor output, \ 310 typename TTypes<T, Dims>::ConstTensor input, \ 311 Eigen::array<Eigen::IndexPair<int64>, Dims> paddings, T pad_value); \ 312 extern template struct Pad<GPUDevice, T, int64, Dims>; 313 314 #define DECLARE_GPU_SPECS(T) \ 315 DECLARE_GPU_SPEC(T, 0); \ 316 DECLARE_GPU_SPEC(T, 1); \ 317 DECLARE_GPU_SPEC(T, 2); \ 318 DECLARE_GPU_SPEC(T, 3); \ 319 DECLARE_GPU_SPEC(T, 4); \ 320 DECLARE_GPU_SPEC(T, 5); \ 321 DECLARE_GPU_SPEC(T, 6); 322 323 TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPECS); 324 TF_CALL_int8(DECLARE_GPU_SPECS); 325 TF_CALL_uint8(DECLARE_GPU_SPECS); 326 } // namespace functor 327 328 // Registration of the GPU implementations. 329 #define REGISTER_GPU_KERNEL(T) \ 330 REGISTER_KERNEL_BUILDER(Name("Pad") \ 331 .Device(DEVICE_GPU) \ 332 .TypeConstraint<T>("T") \ 333 .TypeConstraint<int32>("Tpaddings") \ 334 .HostMemory("paddings"), \ 335 PadOp<GPUDevice, T, int32>); \ 336 REGISTER_KERNEL_BUILDER(Name("Pad") \ 337 .Device(DEVICE_GPU) \ 338 .TypeConstraint<T>("T") \ 339 .TypeConstraint<int64>("Tpaddings") \ 340 .HostMemory("paddings"), \ 341 PadOp<GPUDevice, T, int64>); \ 342 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 343 .Device(DEVICE_GPU) \ 344 .TypeConstraint<T>("T") \ 345 .TypeConstraint<int32>("Tpaddings") \ 346 .HostMemory("paddings") \ 347 .HostMemory("constant_values"), \ 348 PadOp<GPUDevice, T, int32>) \ 349 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 350 .Device(DEVICE_GPU) \ 351 .TypeConstraint<T>("T") \ 352 .TypeConstraint<int64>("Tpaddings") \ 353 .HostMemory("paddings") \ 354 .HostMemory("constant_values"), \ 355 PadOp<GPUDevice, T, int64>) 356 357 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNEL); 358 TF_CALL_int8(REGISTER_GPU_KERNEL); 359 TF_CALL_uint8(REGISTER_GPU_KERNEL); 360 361 // A special GPU kernel for int32. 362 // TODO(b/25387198): Also enable int32 in device memory. This kernel 363 // registration requires all int32 inputs and outputs to be in host memory. 364 REGISTER_KERNEL_BUILDER(Name("Pad") 365 .Device(DEVICE_GPU) 366 .TypeConstraint<int32>("T") 367 .TypeConstraint<int32>("Tpaddings") 368 .HostMemory("input") 369 .HostMemory("paddings") 370 .HostMemory("output"), 371 PadOp<CPUDevice, int32, int32>); 372 REGISTER_KERNEL_BUILDER(Name("Pad") 373 .Device(DEVICE_GPU) 374 .TypeConstraint<int32>("T") 375 .TypeConstraint<int64>("Tpaddings") 376 .HostMemory("input") 377 .HostMemory("paddings") 378 .HostMemory("output"), 379 PadOp<CPUDevice, int32, int64>); 380 REGISTER_KERNEL_BUILDER(Name("PadV2") 381 .Device(DEVICE_GPU) 382 .TypeConstraint<int32>("T") 383 .TypeConstraint<int32>("Tpaddings") 384 .HostMemory("input") 385 .HostMemory("paddings") 386 .HostMemory("constant_values") 387 .HostMemory("output"), 388 PadOp<CPUDevice, int32, int32>); 389 REGISTER_KERNEL_BUILDER(Name("PadV2") 390 .Device(DEVICE_GPU) 391 .TypeConstraint<int32>("T") 392 .TypeConstraint<int64>("Tpaddings") 393 .HostMemory("input") 394 .HostMemory("paddings") 395 .HostMemory("constant_values") 396 .HostMemory("output"), 397 PadOp<CPUDevice, int32, int64>); 398 #endif 399 400 #ifdef TENSORFLOW_USE_SYCL 401 // Registration of the GPU implementations. 402 #define REGISTER_SYCL_KERNEL(T) \ 403 REGISTER_KERNEL_BUILDER(Name("Pad") \ 404 .Device(DEVICE_SYCL) \ 405 .TypeConstraint<T>("T") \ 406 .TypeConstraint<int32>("Tpaddings") \ 407 .HostMemory("paddings"), \ 408 PadOp<SYCLDevice, T, int32>); \ 409 REGISTER_KERNEL_BUILDER(Name("Pad") \ 410 .Device(DEVICE_SYCL) \ 411 .TypeConstraint<T>("T") \ 412 .TypeConstraint<int64>("Tpaddings") \ 413 .HostMemory("paddings"), \ 414 PadOp<SYCLDevice, T, int64>); \ 415 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 416 .Device(DEVICE_SYCL) \ 417 .TypeConstraint<T>("T") \ 418 .TypeConstraint<int32>("Tpaddings") \ 419 .HostMemory("paddings") \ 420 .HostMemory("constant_values"), \ 421 PadOp<SYCLDevice, T, int32>) \ 422 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 423 .Device(DEVICE_SYCL) \ 424 .TypeConstraint<T>("T") \ 425 .TypeConstraint<int64>("Tpaddings") \ 426 .HostMemory("paddings") \ 427 .HostMemory("constant_values"), \ 428 PadOp<SYCLDevice, T, int64>) 429 430 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); 431 REGISTER_KERNEL_BUILDER(Name("Pad") 432 .Device(DEVICE_SYCL) 433 .TypeConstraint<int32>("T") 434 .TypeConstraint<int32>("Tpaddings") 435 .HostMemory("input") 436 .HostMemory("paddings") 437 .HostMemory("output"), 438 PadOp<CPUDevice, int32, int32>); 439 REGISTER_KERNEL_BUILDER(Name("Pad") 440 .Device(DEVICE_SYCL) 441 .TypeConstraint<int32>("T") 442 .TypeConstraint<int64>("Tpaddings") 443 .HostMemory("input") 444 .HostMemory("paddings") 445 .HostMemory("output"), 446 PadOp<CPUDevice, int32, int64>); 447 REGISTER_KERNEL_BUILDER(Name("PadV2") 448 .Device(DEVICE_SYCL) 449 .TypeConstraint<int32>("T") 450 .TypeConstraint<int32>("Tpaddings") 451 .HostMemory("input") 452 .HostMemory("paddings") 453 .HostMemory("constant_values") 454 .HostMemory("output"), 455 PadOp<CPUDevice, int32, int32>); 456 REGISTER_KERNEL_BUILDER(Name("PadV2") 457 .Device(DEVICE_SYCL) 458 .TypeConstraint<int32>("T") 459 .TypeConstraint<int64>("Tpaddings") 460 .HostMemory("input") 461 .HostMemory("paddings") 462 .HostMemory("constant_values") 463 .HostMemory("output"), 464 PadOp<CPUDevice, int32, int64>); 465 #undef REGISTER_SYCL_KERNEL 466 #endif // TENSORFLOW_USE_SYCL 467 468 } // end namespace tensorflow 469