1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #define EIGEN_USE_THREADS
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <random>
20 #include <vector>
21 
22 #include "tensorflow/core/kernels/fractional_pool_common.h"
23 
24 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
25 #include "tensorflow/core/framework/numeric_op.h"
26 #include "tensorflow/core/framework/op_kernel.h"
27 #include "tensorflow/core/lib/random/random.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/mutex.h"
30 #include "tensorflow/core/util/guarded_philox_random.h"
31 
32 namespace tensorflow {
33 typedef Eigen::ThreadPoolDevice CPUDevice;
34 
35 template <typename T>
36 class FractionalAvgPoolOp : public OpKernel {
37  public:
FractionalAvgPoolOp(OpKernelConstruction * context)38   explicit FractionalAvgPoolOp(OpKernelConstruction* context)
39       : OpKernel(context) {
40     OP_REQUIRES_OK(context, context->GetAttr("pooling_ratio", &pooling_ratio_));
41     OP_REQUIRES_OK(context, context->GetAttr("pseudo_random", &pseudo_random_));
42     OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
43     OP_REQUIRES(context, pooling_ratio_.size() == 4,
44                 errors::InvalidArgument(
45                     "pooling_ratio field must specify 4 dimensions"));
46     OP_REQUIRES(
47         context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
48         errors::Unimplemented("Fractional average pooling is not yet "
49                               "supported on the batch nor channel dimension."));
50     OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_));
51     OP_REQUIRES_OK(context, context->GetAttr("seed", &seed_));
52     OP_REQUIRES_OK(context, context->GetAttr("seed2", &seed2_));
53     if (deterministic_) {
54       // If both seeds are not set when deterministic_ is true, force set seeds.
55       if ((seed_ == 0) && (seed2_ == 0)) {
56         seed_ = random::New64();
57         seed2_ = random::New64();
58       }
59     } else {
60       OP_REQUIRES(
61           context, (seed_ == 0) && (seed2_ == 0),
62           errors::InvalidArgument(
63               "Both seed and seed2 should be 0 if deterministic is false."));
64     }
65   }
66 
Compute(OpKernelContext * context)67   void Compute(OpKernelContext* context) override {
68     typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
69         ConstEigenMatrixMap;
70     typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
71         EigenMatrixMap;
72 
73     constexpr int tensor_in_and_out_dims = 4;
74 
75     const Tensor& tensor_in = context->input(0);
76     OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims,
77                 errors::InvalidArgument("tensor_in must be 4-dimensional"));
78 
79     std::vector<int> input_size(tensor_in_and_out_dims);
80     std::vector<int> output_size(tensor_in_and_out_dims);
81     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
82       input_size[i] = tensor_in.dim_size(i);
83     }
84     // Output size.
85     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
86       output_size[i] =
87           static_cast<int>(floor(input_size[i] / pooling_ratio_[i]));
88       DCHECK_GT(output_size[i], 0);
89     }
90 
91     // Generate pooling sequence.
92     std::vector<int64> row_cum_seq;
93     std::vector<int64> col_cum_seq;
94     GuardedPhiloxRandom generator;
95     generator.Init(seed_, seed2_);
96     row_cum_seq = GeneratePoolingSequence(input_size[1], output_size[1],
97                                           &generator, pseudo_random_);
98     col_cum_seq = GeneratePoolingSequence(input_size[2], output_size[2],
99                                           &generator, pseudo_random_);
100 
101     // Prepare output.
102     Tensor* output_tensor = nullptr;
103     OP_REQUIRES_OK(context, context->allocate_output(
104                                 0,
105                                 TensorShape({output_size[0], output_size[1],
106                                              output_size[2], output_size[3]}),
107                                 &output_tensor));
108     Tensor* output_row_seq_tensor = nullptr;
109     OP_REQUIRES_OK(context,
110                    context->allocate_output(
111                        1, TensorShape({static_cast<int64>(row_cum_seq.size())}),
112                        &output_row_seq_tensor));
113     Tensor* output_col_seq_tensor = nullptr;
114     OP_REQUIRES_OK(context,
115                    context->allocate_output(
116                        2, TensorShape({static_cast<int64>(col_cum_seq.size())}),
117                        &output_col_seq_tensor));
118 
119     ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), input_size[3],
120                                input_size[2] * input_size[1] * input_size[0]);
121 
122     EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size[3],
123                            output_size[2] * output_size[1] * output_size[0]);
124     // out_count corresponds to number of elements in each pooling cell.
125     Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols());
126 
127     // Initializes the output tensor and out_count with 0.
128     out_mat.setZero();
129     out_count.setZero();
130 
131     auto output_row_seq_flat = output_row_seq_tensor->flat<int64>();
132     auto output_col_seq_flat = output_col_seq_tensor->flat<int64>();
133 
134     // Set output tensors.
135     for (int i = 0; i < row_cum_seq.size(); ++i) {
136       output_row_seq_flat(i) = row_cum_seq[i];
137     }
138 
139     for (int i = 0; i < col_cum_seq.size(); ++i) {
140       output_col_seq_flat(i) = col_cum_seq[i];
141     }
142 
143     // For both input and output,
144     // 0: batch
145     // 1: row / row
146     // 2: col / col
147     // 3: depth / channel
148     const int64 row_max = input_size[1] - 1;
149     const int64 col_max = input_size[2] - 1;
150     for (int64 b = 0; b < input_size[0]; ++b) {
151       // row sequence.
152       for (int64 hs = 0; hs < row_cum_seq.size() - 1; ++hs) {
153         // row start and end.
154         const int64 row_start = row_cum_seq[hs];
155         int64 row_end =
156             overlapping_ ? row_cum_seq[hs + 1] : row_cum_seq[hs + 1] - 1;
157         row_end = std::min(row_end, row_max);
158 
159         // col sequence.
160         for (int64 ws = 0; ws < col_cum_seq.size() - 1; ++ws) {
161           const int64 out_offset =
162               (b * output_size[1] + hs) * output_size[2] + ws;
163           // col start and end.
164           const int64 col_start = col_cum_seq[ws];
165           int64 col_end =
166               overlapping_ ? col_cum_seq[ws + 1] : col_cum_seq[ws + 1] - 1;
167           col_end = std::min(col_end, col_max);
168           for (int64 h = row_start; h <= row_end; ++h) {
169             for (int64 w = col_start; w <= col_end; ++w) {
170               const int64 in_offset =
171                   (b * input_size[1] + h) * input_size[2] + w;
172               out_mat.col(out_offset) += in_mat.col(in_offset);
173               out_count(out_offset)++;
174             }
175           }
176         }
177       }
178     }
179     DCHECK_GT(out_count.minCoeff(), 0);
180     out_mat.array().rowwise() /= out_count.transpose().array();
181   }
182 
183  private:
184   bool deterministic_;
185   int64 seed_;
186   int64 seed2_;
187   std::vector<float> pooling_ratio_;
188   bool pseudo_random_;
189   bool overlapping_;
190 };
191 
192 #define REGISTER_FRACTIONALAVGPOOL(type)                                      \
193   REGISTER_KERNEL_BUILDER(                                                    \
194       Name("FractionalAvgPool").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
195       FractionalAvgPoolOp<type>)
196 
197 REGISTER_FRACTIONALAVGPOOL(int32);
198 REGISTER_FRACTIONALAVGPOOL(int64);
199 REGISTER_FRACTIONALAVGPOOL(float);
200 REGISTER_FRACTIONALAVGPOOL(double);
201 
202 #undef REGISTER_FRACTIONALAVGPOOL
203 
204 template <class T>
205 class FractionalAvgPoolGradOp : public OpKernel {
206  public:
FractionalAvgPoolGradOp(OpKernelConstruction * context)207   explicit FractionalAvgPoolGradOp(OpKernelConstruction* context)
208       : OpKernel(context) {
209     OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
210   }
211 
Compute(OpKernelContext * context)212   void Compute(OpKernelContext* context) override {
213     // Here's the basic idea:
214     // Batch and depth dimension are independent from row and col dimension. And
215     // because FractionalAvgPool currently only support pooling along row and
216     // col, we can basically think of this 4D tensor backpropagation as
217     // operation of a series of 2D planes.
218     //
219     // For each element of a 'slice' (2D plane) of output_backprop, we need to
220     // figure out its contributors when doing FractionalAvgPool operation. This
221     // can be done based on row_pooling_sequence, col_pooling_seq and
222     // overlapping.
223     // Once we figure out the original contributors, we just need to evenly
224     // divide the value of this element among these contributors.
225     //
226     // Internally, we divide the out_backprop tensor and store it in a temporary
227     // tensor of double type. And cast it to the corresponding type.
228     typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
229         ConstEigenMatrixMap;
230     typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>>
231         EigenDoubleMatrixMap;
232 
233     // Grab the inputs.
234     const Tensor& orig_input_tensor_shape = context->input(0);
235     OP_REQUIRES(context,
236                 orig_input_tensor_shape.dims() == 1 &&
237                     orig_input_tensor_shape.NumElements() == 4,
238                 errors::InvalidArgument("original input tensor shape must be"
239                                         "1-dimensional and 4 elements"));
240     const Tensor& out_backprop = context->input(1);
241     const Tensor& row_seq_tensor = context->input(2);
242     const Tensor& col_seq_tensor = context->input(3);
243 
244     const int64 out_batch = out_backprop.dim_size(0);
245     const int64 out_rows = out_backprop.dim_size(1);
246     const int64 out_cols = out_backprop.dim_size(2);
247     const int64 out_depth = out_backprop.dim_size(3);
248 
249     auto row_seq_tensor_flat = row_seq_tensor.flat<int64>();
250     auto col_seq_tensor_flat = col_seq_tensor.flat<int64>();
251     auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64>();
252 
253     const int64 in_batch = orig_input_tensor_shape_flat(0);
254     const int64 in_rows = orig_input_tensor_shape_flat(1);
255     const int64 in_cols = orig_input_tensor_shape_flat(2);
256     const int64 in_depth = orig_input_tensor_shape_flat(3);
257 
258     constexpr int tensor_in_and_out_dims = 4;
259     // Transform orig_input_tensor_shape into TensorShape
260     TensorShape in_shape;
261     for (auto i = 0; i < tensor_in_and_out_dims; ++i) {
262       in_shape.AddDim(orig_input_tensor_shape_flat(i));
263     }
264 
265     // Create intermediate in_backprop.
266     Tensor in_backprop_tensor_temp;
267     OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp(
268                                 {0}, DataTypeToEnum<double>::v(), in_shape,
269                                 &in_backprop_tensor_temp));
270     in_backprop_tensor_temp.flat<double>().setZero();
271     // Transform 4D tensor to 2D matrix.
272     EigenDoubleMatrixMap in_backprop_tensor_temp_mat(
273         in_backprop_tensor_temp.flat<double>().data(), in_depth,
274         in_cols * in_rows * in_batch);
275     ConstEigenMatrixMap out_backprop_mat(out_backprop.flat<T>().data(),
276                                          out_depth,
277                                          out_cols * out_rows * out_batch);
278     // Loop through each element of out_backprop and evenly distribute the
279     // element to the corresponding pooling cell.
280     const int64 in_max_row_index = in_rows - 1;
281     const int64 in_max_col_index = in_cols - 1;
282     for (int64 b = 0; b < out_batch; ++b) {
283       for (int64 r = 0; r < out_rows; ++r) {
284         const int64 in_row_start = row_seq_tensor_flat(r);
285         int64 in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1)
286                                         : row_seq_tensor_flat(r + 1) - 1;
287         in_row_end = std::min(in_row_end, in_max_row_index);
288         for (int64 c = 0; c < out_cols; ++c) {
289           const int64 in_col_start = col_seq_tensor_flat(c);
290           int64 in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1)
291                                           : col_seq_tensor_flat(c + 1) - 1;
292           in_col_end = std::min(in_col_end, in_max_col_index);
293 
294           const int64 num_elements_in_pooling_cell =
295               (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1);
296           const int64 out_index = (b * out_rows + r) * out_cols + c;
297           // Now we can evenly distribute out_backprop(b, h, w, *) to
298           // in_backprop(b, hs:he, ws:we, *).
299           for (int64 in_r = in_row_start; in_r <= in_row_end; ++in_r) {
300             for (int64 in_c = in_col_start; in_c <= in_col_end; ++in_c) {
301               const int64 in_index = (b * in_rows + in_r) * in_cols + in_c;
302               // Walk through each channel (depth).
303               for (int64 d = 0; d < out_depth; ++d) {
304                 const double out_backprop_element = static_cast<double>(
305                     out_backprop_mat.coeffRef(d, out_index));
306                 double& in_backprop_ref =
307                     in_backprop_tensor_temp_mat.coeffRef(d, in_index);
308                 in_backprop_ref +=
309                     out_backprop_element / num_elements_in_pooling_cell;
310               }
311             }
312           }
313         }
314       }
315     }
316 
317     // Depending on the type, cast double to type T.
318     Tensor* in_backprop_tensor = nullptr;
319     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
320                                 {0}, 0, in_shape, &in_backprop_tensor));
321     auto in_backprop_tensor_flat = in_backprop_tensor->flat<T>();
322     auto in_backprop_tensor_temp_flat = in_backprop_tensor_temp.flat<double>();
323     for (int64 i = 0; i < in_backprop_tensor_flat.size(); ++i) {
324       in_backprop_tensor_flat(i) =
325           static_cast<T>(in_backprop_tensor_temp_flat(i));
326     }
327   }
328 
329  private:
330   bool overlapping_;
331 };
332 
333 #define REGISTER_FRACTIONALAVGPOOLGRAD(type)              \
334   REGISTER_KERNEL_BUILDER(Name("FractionalAvgPoolGrad")   \
335                               .Device(DEVICE_CPU)         \
336                               .TypeConstraint<type>("T"), \
337                           FractionalAvgPoolGradOp<type>)
338 
339 REGISTER_FRACTIONALAVGPOOLGRAD(int32);
340 REGISTER_FRACTIONALAVGPOOLGRAD(int64);
341 REGISTER_FRACTIONALAVGPOOLGRAD(float);
342 REGISTER_FRACTIONALAVGPOOLGRAD(double);
343 
344 #undef REGISTER_FRACTIONALAVGPOOLGRAD
345 }  // namespace tensorflow
346