1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This is an internal header file intended to only be included as the
17 // front-matter in the implementation files of various reduction ops.  It
18 // is a header file because we split the various reduction ops into their
19 // own compilation units to get more parallelism in compilation.
20 
21 #ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
22 #define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
23 
24 #define EIGEN_USE_THREADS
25 
26 #include "third_party/eigen3/Eigen/Core"
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 
29 #include "tensorflow/core/framework/numeric_op.h"
30 #include "tensorflow/core/framework/op_kernel.h"
31 #include "tensorflow/core/framework/register_types.h"
32 #include "tensorflow/core/framework/tensor.h"
33 #include "tensorflow/core/framework/types.h"
34 #include "tensorflow/core/kernels/reduction_ops.h"
35 #include "tensorflow/core/kernels/transpose_functor.h"
36 #include "tensorflow/core/lib/core/status.h"
37 #include "tensorflow/core/lib/gtl/inlined_vector.h"
38 #include "tensorflow/core/platform/logging.h"
39 
40 namespace tensorflow {
41 
42 typedef Eigen::ThreadPoolDevice CPUDevice;
43 typedef Eigen::GpuDevice GPUDevice;
44 #ifdef TENSORFLOW_USE_SYCL
45 typedef Eigen::SyclDevice SYCLDevice;
46 #endif  // TENSORFLOW_USE_SYCL
47 
48 template <typename Device>
49 struct Constants {
50   // Derive Index type. int (32-bit) or long (64-bit) depending on the
51   // compile-time configuration. "float" here is not relevant.
52   // TODO(zhifengc): Moves the definition to TTypes.
53   typedef TTypes<float>::Tensor::Index Index;
54   Eigen::array<Index, 1> kZero;
55   Eigen::array<Index, 1> kOne;
56   Eigen::array<Index, 2> kZeroTwo;
57 
ConstantsConstants58   Constants() {
59     kZero[0] = 0;
60     kOne[0] = 1;
61     kZeroTwo[0] = 0;
62     kZeroTwo[1] = 2;
63   }
64 };
65 
66 #if defined(EIGEN_HAS_INDEX_LIST)
67 struct ConstantsBase {
68   const Eigen::IndexList<Eigen::type2index<0>> kZero;
69   const Eigen::IndexList<Eigen::type2index<1>> kOne;
70   const Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<2>> kZeroTwo;
71 };
72 template <>
73 struct Constants<CPUDevice> : ConstantsBase {};
74 #ifdef TENSORFLOW_USE_SYCL
75 template <>
76 struct Constants<SYCLDevice> : ConstantsBase {};
77 #endif  // TENSORFLOW_USE_SYCL
78 #endif  // EIGEN_HAS_INDEX_LIST
79 
80 class ReductionHelper {
81  public:
82   ReductionHelper() : reduce_first_axis_(false) {}
83 
84   Status Simplify(const Tensor& data, const Tensor& axis, const bool keep_dims);
85 
86   // We need to do roughly:
87   //   tmp_out = allocate(out_reshape())
88   //   tmp_out.reshape(out_reshape) = data.reshape(data_reshape).reduce(axes)
89   //   out = tmp_out.reshape(out_shape)
90 
91   // The reduction result must be allocated with this shape.
92   TensorShape out_reshape() const;
93 
94   // The final output shape must be allocated with this shape.
95   TensorShape out_shape() const;
96 
97   // The reduction is on a reshaped tensor of this rank.
98   int ndims() const { return data_reshape_.size(); }
99 
100   // True if need to reduce the 0-th dimension.
101   bool reduce_first_axis() const { return reduce_first_axis_; }
102 
103   // The output is reshaped.
104   template <typename T, int N>
105   typename TTypes<T, N>::Tensor out(Tensor* out) {
106     return out->shaped<T, N>(out_reshape_);
107   }
108 
109   // The input is reshaped.
110   template <typename T, int N>
111   typename TTypes<T, N>::ConstTensor in(const Tensor& data) {
112     return data.shaped<T, N>(data_reshape_);
113   }
114 
115   // Shape of shuffled input
116   TensorShape data_reshape() const {
117     TensorShape shape;
118     for (auto s : data_reshape_) shape.AddDim(s);
119     return shape;
120   }
121 
122   // Shape with all reduction dimensions at the end
123   TensorShape shuffled_shape();
124 
125   // Permutation of reduced dims needed to put reduction dimensions at the end
126   gtl::InlinedVector<int32, 8> permutation();
127 
128  private:
129   bool reduce_first_axis_;  // True if need to reduce the 0-th dimension.
130   gtl::InlinedVector<int64, 4> data_reshape_;  // Reshape data before reduction.
131   gtl::InlinedVector<int64, 4> out_shape_;     // The final output shape.
132   gtl::InlinedVector<int64, 4> out_reshape_;   // Reshape output for reduction.
133 };
134 
135 // For operations where the output is a reduction function along some
136 // dimensions of the input.
137 template <typename Device, class T, typename Tperm, typename Reducer>
138 class ReductionOp : public OpKernel {
139  public:
140   explicit ReductionOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
141     const DataType dt = DataTypeToEnum<T>::v();
142     const DataType pt = DataTypeToEnum<Tperm>::v();
143     OP_REQUIRES_OK(ctx, ctx->MatchSignature({dt, pt}, {dt}));
144 
145     OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
146   }
147 
148   void Compute(OpKernelContext* ctx) override {
149     const Tensor& data = ctx->input(0);
150     const Tensor& axes = ctx->input(1);
151     VLOG(1) << "data shape: " << data.shape().DebugString();
152     VLOG(1) << "axes      : " << axes.SummarizeValue(10);
153 
154     ReductionHelper helper;
155     OP_REQUIRES_OK(ctx, helper.Simplify(data, axes, keep_dims_));
156     CHECK_GE(helper.ndims(), 0);
157 
158     if (helper.ndims() == 0 ||
159         (helper.ndims() == 1 && !helper.reduce_first_axis())) {
160       // Special case. Reduces nothing.  It is unclear why this is
161       // necessary, but tests fail without it.  Look into why this
162       // case occurs.
163       Tensor out;
164       if (!out.CopyFrom(data, helper.out_shape())) {
165         ctx->SetStatus(errors::Internal("Error during reduction copy."));
166       }
167       ctx->set_output(0, out);
168       return;
169     }
170 
171     // We must allocate temp tensors using the same alloc attr as
172     // output(0) because it is returned as output(0) in the end.
173     const AllocatorAttributes alloc_attr = ctx->output_alloc_attr(0);
174 
175     // A temporary tensor whose size matches the size of the reduced
176     // output.
177     Tensor tmp_out;
178     OP_REQUIRES_OK(
179         ctx, ctx->allocate_temp(ctx->expected_output_dtype(0),
180                                 helper.out_reshape(), &tmp_out, alloc_attr));
181 
182     typedef functor::ReduceFunctor<Device, Reducer> Functor;
183     Constants<Device> constants;
184     const Device& d = ctx->eigen_device<Device>();
185     Reducer reducer;
186 
187     if (tmp_out.NumElements() == 0) {
188       // Nothing to do, fall through to final reshaping.
189     } else if (data.NumElements() == 0) {
190       // Degenerate reduction where the input is empty but the output is
191       // nonempty (thus tmp_out.NumElements() > 0), and we must fill the output
192       // with identity elements.  Example: tf.reduce_sum(tf.zeros((0, 3)), [0]).
193       // Eigen sometimes crashes in this case, so we do it manually.
194       Functor::FillIdentity(d, tmp_out.flat<T>(), reducer);
195     } else if ((helper.ndims() == 1) && helper.reduce_first_axis()) {
196       // Reduce to a scalar.
197       Functor::Reduce(ctx, helper.out<T, 0>(&tmp_out), helper.in<T, 1>(data),
198                       constants.kZero, reducer);
199     } else if ((helper.ndims() == 2) && helper.reduce_first_axis()) {
200       // Can be viewed as a reduction of a matrix along 1st dimension.
201       Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data),
202                       constants.kZero, reducer);
203     } else if ((helper.ndims() == 2) && !helper.reduce_first_axis()) {
204       // Can be viewed as a reduction of a matrix along 2nd dimension.
205       Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data),
206                       constants.kOne, reducer);
207     } else if ((helper.ndims() == 3) && helper.reduce_first_axis()) {
208       // Can be viewed as a reduction of a 3D tensor along 1st and 3rd
209       // dimensions.
210       Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 3>(data),
211                       constants.kZeroTwo, reducer);
212     } else if ((helper.ndims() == 3) && !helper.reduce_first_axis()) {
213       // Can be viewed as a reduction of a 3D tensor along 2nd dimension.
214       Functor::Reduce(ctx, helper.out<T, 2>(&tmp_out), helper.in<T, 3>(data),
215                       constants.kOne, reducer);
216     } else {
217       // If we don't hit one of the cases above, transpose the data so that
218       // all reduced dimensions are last and reuse the 2-D -> 1-D case.
219       Tensor data_reshaped;
220       CHECK(data_reshaped.CopyFrom(data, helper.data_reshape()));
221       Tensor shuffled;
222       OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
223                                              helper.shuffled_shape(), &shuffled,
224                                              alloc_attr));
225       OP_REQUIRES_OK(
226           ctx, DoTranspose(d, data_reshaped, helper.permutation(), &shuffled));
227       const int64 unreduced = tmp_out.NumElements();
228       const int64 reduced = shuffled.NumElements() / unreduced;
229       const Tensor& const_shuffled = shuffled;
230       Functor::Reduce(ctx, tmp_out.flat<T>(),
231                       const_shuffled.shaped<T, 2>({unreduced, reduced}),
232                       constants.kOne, reducer);
233     }
234 
235     // Set the real output using the contents of the reduction but the
236     // real expected output shape.  The number of elements should
237     // match between the two shapes.
238     Tensor out;
239     if (!out.CopyFrom(tmp_out, helper.out_shape())) {
240       ctx->SetStatus(errors::Internal("Error during reduction copy."));
241     }
242     ctx->set_output(0, out);
243   }
244 
245  private:
246   // True if the number of dimensions should be maintained.
247   bool keep_dims_;
248 };
249 
250 namespace functor {
251 
252 template <typename Device, typename Reducer>
253 struct ReduceFunctorBase {
254   template <typename OUT_T, typename IN_T, typename ReductionAxes>
255   static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in,
256                      const ReductionAxes& reduction_axes,
257                      const Reducer& reducer) {
258     const Device& d = ctx->eigen_device<Device>();
259     ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes, Reducer> reducer_impl;
260     reducer_impl(d, out, in, reduction_axes, reducer);
261   }
262 
263   template <typename OUT_T>
264   static void FillIdentity(const Device& d, OUT_T out, const Reducer& reducer) {
265     FillIdentityEigenImpl(d, out, reducer);
266   }
267 };
268 
269 template <typename Reducer>
270 struct ReduceFunctor<CPUDevice, Reducer>
271     : ReduceFunctorBase<CPUDevice, Reducer> {};
272 #if TENSORFLOW_USE_SYCL
273 template <typename Reducer>
274 struct ReduceFunctor<SYCLDevice, Reducer>
275     : ReduceFunctorBase<SYCLDevice, Reducer> {};
276 #endif  // TENSORFLOW_USE_SYCL
277 
278 }  // namespace functor
279 }  // namespace tensorflow
280 
281 #endif  // TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
282