1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/nn_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
21 #include "tensorflow/core/framework/numeric_op.h"
22 #include "tensorflow/core/framework/op_kernel.h"
23 #include "tensorflow/core/framework/tensor.h"
24 #include "tensorflow/core/framework/tensor_shape.h"
25 #include "tensorflow/core/kernels/ops_util.h"
26 #include "tensorflow/core/kernels/pooling_ops_common.h"
27 #include "tensorflow/core/lib/core/errors.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/util/padding.h"
30 #include "tensorflow/core/util/tensor_format.h"
31 
32 namespace tensorflow {
33 
34 typedef Eigen::ThreadPoolDevice CPUDevice;
35 
36 template <typename Device, typename T>
37 class QuantizedAvgPoolingOp : public OpKernel {
38  public:
QuantizedAvgPoolingOp(OpKernelConstruction * context)39   explicit QuantizedAvgPoolingOp(OpKernelConstruction* context)
40       : OpKernel(context) {
41     OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
42     OP_REQUIRES(context, ksize_.size() == 4,
43                 errors::InvalidArgument("Sliding window ksize field must "
44                                         "specify 4 dimensions"));
45     OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
46     OP_REQUIRES(context, stride_.size() == 4,
47                 errors::InvalidArgument("Sliding window strides field must "
48                                         "specify 4 dimensions"));
49     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
50     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
51                 errors::Unimplemented(
52                     "Pooling is not yet supported on the batch dimension."));
53   }
54 
Compute(OpKernelContext * context)55   void Compute(OpKernelContext* context) override {
56     const Tensor& tensor_in = context->input(0);
57     PoolParameters params{context,  ksize_,      stride_,
58                           padding_, FORMAT_NHWC, tensor_in.shape()};
59     if (!context->status().ok()) {
60       return;
61     }
62 
63     const float min_input = context->input(1).flat<float>()(0);
64     const float max_input = context->input(2).flat<float>()(0);
65 
66     OP_REQUIRES(context, params.depth_window == 1,
67                 errors::Unimplemented("Non-spatial pooling is not "
68                                       "yet supported. Volunteers? :)"));
69 
70     OP_REQUIRES(context, tensor_in.dims() == 4,
71                 errors::InvalidArgument("tensor_in must be 4-dimensional"));
72 
73     Tensor* output = nullptr;
74     OP_REQUIRES_OK(context, context->allocate_output(
75                                 0, params.forward_output_shape(), &output));
76     const int32 highest = static_cast<int32>(Eigen::NumTraits<T>::highest());
77     const int32 lowest = static_cast<int32>(Eigen::NumTraits<T>::lowest());
78 
79     // TODO(vrv): Switch this to the Eigen::Tensor version of
80     // SpatialAvgPooling once that version is running quickly.
81     Tensor int32_output(DT_INT32, params.forward_output_shape());
82     // Cast input to int32 tensor and call SpatialAvgPool.
83     Tensor int32_input(DT_INT32, tensor_in.shape());
84     int32_input.flat<int32>() = tensor_in.flat<T>().template cast<int32>();
85     SpatialAvgPool<Device, int32>(context, &int32_output, int32_input, params,
86                                   padding_);
87 
88     // Clamp the int32 output back into quantized space.
89     output->flat<T>() = int32_output.flat<int32>()
90                             .cwiseMax(lowest)
91                             .cwiseMin(highest)
92                             .template cast<T>();
93 
94     Tensor* output_min = nullptr;
95     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
96     output_min->flat<float>()(0) = min_input;
97     Tensor* output_max = nullptr;
98     OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
99     output_max->flat<float>()(0) = max_input;
100   }
101 
102  private:
103   std::vector<int32> ksize_;
104   std::vector<int32> stride_;
105   Padding padding_;
106 };
107 
108 template <typename Device, typename T>
109 class QuantizedMaxPoolingOp : public MaxPoolingOp<Device, T> {
110  public:
QuantizedMaxPoolingOp(OpKernelConstruction * context)111   explicit QuantizedMaxPoolingOp(OpKernelConstruction* context)
112       : MaxPoolingOp<Device, T>(context) {}
113 
Compute(OpKernelContext * context)114   void Compute(OpKernelContext* context) override {
115     const float min_input = context->input(1).flat<float>()(0);
116     const float max_input = context->input(2).flat<float>()(0);
117     MaxPoolingOp<Device, T>::Compute(context);
118     Tensor* output_min = nullptr;
119     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
120     output_min->flat<float>()(0) = min_input;
121     Tensor* output_max = nullptr;
122     OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
123     output_max->flat<float>()(0) = max_input;
124   }
125 };
126 
127 REGISTER_KERNEL_BUILDER(
128     Name("QuantizedAvgPool").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
129     QuantizedAvgPoolingOp<CPUDevice, quint8>);
130 
131 REGISTER_KERNEL_BUILDER(
132     Name("QuantizedMaxPool").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
133     QuantizedMaxPoolingOp<CPUDevice, quint8>);
134 
135 }  // namespace tensorflow
136