1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
17 #define TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 #include "tensorflow/core/kernels/eigen_volume_patch.h"
21 
22 namespace Eigen {
23 
24 /** SpatialMaxPooling
25  * \ingroup CXX11_NeuralNetworks_Module
26  *
27  * \brief Applies a max-pooling over a multichannel input image.
28  *
29  * The input parameter is expected to be a with a rank of 4 (channels, height,
30  * width, others in col-major, and the reverse of that in row-major).
31  *
32  * The result can be assigned to a tensor of rank equal to the rank of the
33  * input. The dimensions of the result will be channels, height, width, and
34  * others (in col-major, and the reverse of that if the input was row-major).
35  *
36  * The order of the width and height dimensions can be swapped if needed.
37  *
38  */
39 #if !defined(EIGEN_HAS_INDEX_LIST)
40 template <typename Input>
41 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
42     const Eigen::DSizes<typename internal::traits<Input>::Index,
43                         internal::traits<Input>::NumDimensions>,
44     const TensorReductionOp<
45         internal::MaxReducer<typename internal::remove_const<
46             typename internal::traits<Input>::Scalar>::type>,
47         const Eigen::array<int, 2>,
48         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
49 #else
50 template <typename Input>
51 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
52     const Eigen::DSizes<typename internal::traits<Input>::Index,
53                         internal::traits<Input>::NumDimensions>,
54     const TensorReductionOp<
55         internal::MaxReducer<typename internal::remove_const<
56             typename internal::traits<Input>::Scalar>::type>,
57         typename internal::conditional<
58             internal::traits<Input>::Layout == ColMajor,
59             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
60             const Eigen::IndexList<Eigen::type2index<2>,
61                                    Eigen::type2index<3> > >::type,
62         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
63 #endif
64 SpatialMaxPooling(const Input& input, DenseIndex patchRows,
65                   DenseIndex patchCols, DenseIndex strideRows,
66                   DenseIndex strideCols, const PaddingType padding_type,
67                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
68   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
69                       YOU_MADE_A_PROGRAMMING_MISTAKE);
70 
71   typedef typename internal::traits<Input>::Index TensorIndex;
72   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
73                    internal::traits<Input>::NumDimensions,
74                    internal::traits<Input>::Layout, TensorIndex> >
75       in(input);
76 
77   const DenseIndex patchRowsEff =
78       patchRows + (patchRows - 1) * (in_strideRows - 1);
79   const DenseIndex patchColsEff =
80       patchCols + (patchCols - 1) * (in_strideCols - 1);
81 
82   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
83   static const int idxRows = isColMajor ? 1 : 2;
84   static const int idxCols = isColMajor ? 2 : 1;
85 
86   // Molds the output of the reduction into the shape expected by the user.
87   // (assuming col-major):
88   // - 1st dim: channels
89   // - 2nd dim: output height
90   // - 3rd dim: output width
91   // - 4th dim and beyond: everything else including batch size
92   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
93       post_reduce_dims;
94   post_reduce_dims[0] = in.dimension(0);
95   if (padding_type == PADDING_VALID) {
96     post_reduce_dims[idxRows] = Eigen::divup(
97         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
98         strideRows);
99     post_reduce_dims[idxCols] = Eigen::divup(
100         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
101         strideCols);
102   } else {
103     post_reduce_dims[idxRows] = Eigen::divup(
104         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
105     post_reduce_dims[idxCols] = Eigen::divup(
106         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
107   }
108   post_reduce_dims[3] = in.dimension(3);
109 
110 #if !defined(EIGEN_HAS_INDEX_LIST)
111   // nvcc doesn't support cxx11
112   Eigen::array<int, 2> reduction_dims;
113   if (isColMajor) {
114     reduction_dims[0] = 1;
115     reduction_dims[1] = 2;
116   } else {
117     reduction_dims[0] = 2;
118     reduction_dims[1] = 3;
119   }
120 #else
121   // Take advantage of cxx11 to give the compiler information it can use to
122   // optimize the code.
123   typename internal::conditional<
124       internal::traits<Input>::Layout == ColMajor,
125       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
126       const Eigen::IndexList<Eigen::type2index<2>,
127                              Eigen::type2index<3> > >::type reduction_dims;
128 #endif
129 
130   return input
131       .extract_image_patches(
132           patchRows, patchCols, strideRows, strideCols, in_strideRows,
133           in_strideCols, padding_type,
134           Eigen::NumTraits<typename internal::remove_const<
135               typename internal::traits<Input>::Scalar>::type>::lowest())
136       .maximum(reduction_dims)
137       .reshape(post_reduce_dims);
138 }
139 
140 /** CuboidMaxPooling
141  * \ingroup CXX11_NeuralNetworks_Module
142  *
143  * \brief Applies a max-pooling over a multichannel input volume.
144  *
145  * The input parameter is expected to be a tensor with a rank of 5 (channels,
146  * depth, height, width, others in col-major, and the reverse of that in
147  * row-major).
148  *
149  * The result can be assigned to a tensor of rank equal to the rank of the
150  * input. The dimensions of the result will be channels, depth, height, width,
151  * and others (in col-major, and the reverse of that if the input was
152  * row-major).
153  *
154  * The order of the depth, width and height dimensions can be swapped if
155  * needed.
156  *
157  */
158 #if !defined(EIGEN_HAS_INDEX_LIST)
159 template <typename Input>
160 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
161     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
162     const TensorReductionOp<
163         internal::MaxReducer<float>, const Eigen::array<int, 1>,
164         const TensorReshapingOp<
165             const Eigen::DSizes<DenseIndex, 3>,
166             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
167                                       const Input> > > >
168 #else
169 template <typename Input>
170 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
171     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
172     const TensorReductionOp<
173         internal::MaxReducer<float>,
174         const Eigen::IndexList<Eigen::type2index<1> >,
175         const TensorReshapingOp<
176             const Eigen::DSizes<DenseIndex, 3>,
177             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
178                                       const Input> > > >
179 #endif
CuboidMaxPooling(const Input & input,DenseIndex patchPlanes,DenseIndex patchRows,DenseIndex patchCols,DenseIndex stridePlanes,DenseIndex strideRows,DenseIndex strideCols,const PaddingType padding_type)180 CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
181                  DenseIndex patchRows, DenseIndex patchCols,
182                  DenseIndex stridePlanes, DenseIndex strideRows,
183                  DenseIndex strideCols, const PaddingType padding_type) {
184   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
185                       YOU_MADE_A_PROGRAMMING_MISTAKE);
186   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
187 
188   typedef typename internal::traits<Input>::Index TensorIndex;
189   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
190                    internal::traits<Input>::NumDimensions,
191                    internal::traits<Input>::Layout, TensorIndex> >
192       in(input);
193 
194   static const int idxPlanes = isColMajor ? 1 : 3;
195   static const int idxRows = 2;
196   static const int idxCols = isColMajor ? 3 : 1;
197 
198   // Molds the output of the reduction into the shape expected by the used
199   // (assuming col-major):
200   // - 1st dim: channels
201   // - 2nd dim: output depth
202   // - 3rd dim: output height
203   // - 4th dim: output width
204   // - 5th dim and beyond: everything else including batch size
205   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
206       post_reduce_dims;
207   post_reduce_dims[0] = in.dimension(0);
208   if (padding_type == PADDING_VALID) {
209     post_reduce_dims[idxPlanes] = Eigen::divup(
210         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
211         stridePlanes);
212     post_reduce_dims[idxRows] = Eigen::divup(
213         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
214         strideRows);
215     post_reduce_dims[idxCols] = Eigen::divup(
216         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
217         strideCols);
218   } else {
219     post_reduce_dims[idxPlanes] = Eigen::divup(
220         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
221     post_reduce_dims[idxRows] = Eigen::divup(
222         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
223     post_reduce_dims[idxCols] = Eigen::divup(
224         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
225   }
226   post_reduce_dims[4] = in.dimension(4);
227 
228   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
229   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
230   if (isColMajor) {
231     pre_reduce_dims[0] = post_reduce_dims[0];
232     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
233                          post_reduce_dims[3] * post_reduce_dims[4];
234   } else {
235     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
236                          post_reduce_dims[2] * post_reduce_dims[3];
237     pre_reduce_dims[2] = post_reduce_dims[4];
238   }
239 
240 #if !defined(EIGEN_HAS_INDEX_LIST)
241   // nvcc doesn't support cxx11
242   Eigen::array<int, 1> reduction_dims;
243   reduction_dims[0] = 1;
244 #else
245   // Take advantage of cxx11 to give the compiler information it can use to
246   // optimize the code.
247   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
248 #endif
249   return input
250       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
251                               strideRows, strideCols, padding_type,
252                               -Eigen::NumTraits<float>::highest())
253       .reshape(pre_reduce_dims)
254       .maximum(reduction_dims)
255       .reshape(post_reduce_dims);
256 }
257 
258 /** SpatialAvgPooling
259  * \ingroup CXX11_NeuralNetworks_Module
260  *
261  * \brief Applies an average pooling over a multichannel input image.
262  *
263  * The input parameter is expected to be a tensor with a rank of 4 (channels,
264  * height, width, others in col-major, and the reverse of that in row-major).
265  *
266  * The result can be assigned to a tensor of rank equal to the rank of the
267  * input. The dimensions of the result will be channels, height, width, and
268  * others (in col-major, and the reverse of that if the input was row-major).
269  *
270  * The order of the width and height dimensions can be swapped if needed.
271  *
272  */
273 namespace internal {
274 
275 template <typename T>
276 struct AvgPoolMeanReducer {
277 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
278     !defined(__HIPCC__)
279   // We only support packet access for floats.
280   static constexpr bool PacketAccess = internal::is_same<T, float>::value;
281 #else
282   static const bool PacketAccess = false;
283 #endif
284   static constexpr bool IsStateful = true;
285 
AvgPoolMeanReducerAvgPoolMeanReducer286   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) {
287     typedef typename packet_traits<T>::type Packet;
288     packetCount_ = pset1<Packet>(T(0.0));
289   }
290 
reduceAvgPoolMeanReducer291   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
292     if (t != -Eigen::NumTraits<T>::highest()) {
293       (*accum) = (*accum) + t;
294       scalarCount_++;
295     }
296   }
297 
initializeAvgPoolMeanReducer298   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
299     return static_cast<T>(0);
300   }
301 
finalizeAvgPoolMeanReducer302   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
303     eigen_assert(scalarCount_ > 0);
304     return accum / T(scalarCount_);
305   }
306 
307 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
308     !defined(__HIPCC__)
309 #ifdef EIGEN_VECTORIZE_AVX512
310 #define pequal(a, b)   \
311   _mm512_castsi512_ps( \
312       _mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ), -1))
313 
314   // The ternarylogic function immediate determines the values in the result
315   // In the case below, 0xd8 implies (false_mask) ? (b) : (a)
316   // For details, refer to the vpternlogd instruction table at
317   // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf
318 
319 #define psel(a, b, false_mask)                        \
320   _mm512_castsi512_ps(_mm512_ternarylogic_epi32(      \
321       _mm512_castps_si512(a), _mm512_castps_si512(b), \
322       _mm512_castps_si512(false_mask), 0xd8))
323 #elif defined EIGEN_VECTORIZE_AVX
324 #define pequal(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_UQ)
325 #define psel(a, b, false_mask) _mm256_blendv_ps(a, b, false_mask)
326 #else
327 #define pequal(a, b) _mm_cmpeq_ps(a, b)
328 #define psel(a, b, false_mask) \
329   _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b))
330 #endif
331 
332   template <typename Packet>
reducePacketAvgPoolMeanReducer333   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p,
334                                                           Packet* accum) {
335     reducePacketWithType(static_cast<T>(0), p, accum);
336   }
337 
338   template <typename Packet>
reducePacketWithTypeAvgPoolMeanReducer339   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacketWithType(
340       T, const Packet& p, Packet* accum) {
341     Packet skip_mask =
342         pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest()));
343     (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask));
344     packetCount_ = padd<Packet>(
345         packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask));
346   }
347 
348   template <typename Packet>
initializePacketAvgPoolMeanReducer349   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
350     return pset1<Packet>(0);
351   }
352 
353   template <typename Packet>
354   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
finalizePacketAvgPoolMeanReducer355   finalizePacket(const Packet& vaccum) const {
356     return pdiv(vaccum, packetCount_);
357   }
358   template <typename Packet>
359   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T
finalizeBothAvgPoolMeanReducer360   finalizeBoth(const T saccum, const Packet& vaccum) const {
361     return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_));
362   }
363 #endif
364 
365  protected:
366   typedef typename packet_traits<T>::type Packet;
367   int scalarCount_;
368   Packet packetCount_;
369 };
370 
371 template <typename Device>
372 struct reducer_traits<AvgPoolMeanReducer<float>, Device> {
373   enum {
374     Cost = 1,
375 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
376     !defined(__HIPCC__)
377     // We only support packet access for floats.
378     PacketAccess = true,
379 #else
380     PacketAccess = false,
381 #endif
382     IsStateful = true,
383     IsExactlyAssociative = false
384   };
385 };
386 
387 template <>
388 struct reducer_traits<AvgPoolMeanReducer<float>, GpuDevice> {
389   enum {
390     Cost = 1,
391     PacketAccess = false,
392     IsStateful = true,
393     IsExactlyAssociative = false
394   };
395 };
396 
397 }  // namespace internal
398 
399 #if !defined(EIGEN_HAS_INDEX_LIST)
400 template <typename Input>
401 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
402     const Eigen::DSizes<typename internal::traits<Input>::Index,
403                         internal::traits<Input>::NumDimensions>,
404     const TensorReductionOp<
405         internal::AvgPoolMeanReducer<typename internal::remove_const<
406             typename internal::traits<Input>::Scalar>::type>,
407         const Eigen::array<int, 2>,
408         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
409 #else
410 template <typename Input>
411 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
412     const Eigen::DSizes<typename internal::traits<Input>::Index,
413                         internal::traits<Input>::NumDimensions>,
414     const TensorReductionOp<
415         internal::AvgPoolMeanReducer<typename internal::remove_const<
416             typename internal::traits<Input>::Scalar>::type>,
417         typename internal::conditional<
418             internal::traits<Input>::Layout == ColMajor,
419             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
420             const Eigen::IndexList<Eigen::type2index<2>,
421                                    Eigen::type2index<3> > >::type,
422         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
423 #endif
424 SpatialAvgPooling(const Input& input, DenseIndex patchRows,
425                   DenseIndex patchCols, DenseIndex strideRows,
426                   DenseIndex strideCols, const PaddingType padding_type,
427                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
428   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
429                       YOU_MADE_A_PROGRAMMING_MISTAKE);
430 
431   typedef typename internal::traits<Input>::Index TensorIndex;
432   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
433                    internal::traits<Input>::NumDimensions,
434                    internal::traits<Input>::Layout, TensorIndex> >
435       in(input);
436 
437   const DenseIndex patchRowsEff =
438       patchRows + (patchRows - 1) * (in_strideRows - 1);
439   const DenseIndex patchColsEff =
440       patchCols + (patchCols - 1) * (in_strideCols - 1);
441 
442   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
443   static const int idxRows = isColMajor ? 1 : 2;
444   static const int idxCols = isColMajor ? 2 : 1;
445 
446   // Molds the output of the reduction into the shape expected by the user.
447   // (assuming col-major):
448   // - 1st dim: channels
449   // - 2nd dim: output height
450   // - 3rd dim: output width
451   // - 4th dim and beyond: everything else including batch size
452   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
453       post_reduce_dims;
454   post_reduce_dims[0] = in.dimension(0);
455   if (padding_type == PADDING_VALID) {
456     post_reduce_dims[idxRows] = Eigen::divup(
457         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
458         strideRows);
459     post_reduce_dims[idxCols] = Eigen::divup(
460         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
461         strideCols);
462   } else {
463     post_reduce_dims[idxRows] = Eigen::divup(
464         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
465     post_reduce_dims[idxCols] = Eigen::divup(
466         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
467   }
468   post_reduce_dims[3] = in.dimension(3);
469 
470   typedef typename internal::remove_const<
471       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
472   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
473 
474 #if !defined(EIGEN_HAS_INDEX_LIST)
475   // nvcc doesn't support cxx11
476   Eigen::array<int, 2> reduction_dims;
477   if (isColMajor) {
478     reduction_dims[0] = 1;
479     reduction_dims[1] = 2;
480   } else {
481     reduction_dims[0] = 2;
482     reduction_dims[1] = 3;
483   }
484 #else
485   // Take advantage of cxx11 to give the compiler information it can use to
486   // optimize the code.
487   typename internal::conditional<
488       internal::traits<Input>::Layout == ColMajor,
489       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
490       const Eigen::IndexList<Eigen::type2index<2>,
491                              Eigen::type2index<3> > >::type reduction_dims;
492 #endif
493   return input
494       .extract_image_patches(patchRows, patchCols, strideRows, strideCols,
495                              in_strideRows, in_strideCols, padding_type,
496                              -Eigen::NumTraits<CoeffReturnType>::highest())
497       .reduce(reduction_dims, mean_with_nan)
498       .reshape(post_reduce_dims);
499 }
500 
501 /** CuboidAvgPooling
502  * \ingroup CXX11_NeuralNetworks_Module
503  *
504  * \brief Applies an average pooling over a multichannel input volume.
505  *
506  * The input parameter is expected to be a tensor with a rank of 5 (channels,
507  * depth, height, width, others, and the reverse of that in row-major).
508  *
509  * The result can be assigned to a tensor of rank equal to the rank of the
510  * input. The dimensions of the result will be channels, depth, width, and
511  * others (in col-major, and the reverse of that if the input was row-major).
512  *
513  * The order of the depth, width and height dimensions can be swapped if
514  * needed.
515  *
516  */
517 #if !defined(EIGEN_HAS_INDEX_LIST)
518 template <typename Input>
519 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
520     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
521     const TensorReductionOp<
522         internal::AvgPoolMeanReducer<float>, const Eigen::array<int, 1>,
523         const TensorReshapingOp<
524             const Eigen::DSizes<DenseIndex, 3>,
525             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
526                                       const Input> > > >
527 #else
528 template <typename Input>
529 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
530     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
531     const TensorReductionOp<
532         internal::AvgPoolMeanReducer<float>,
533         const Eigen::IndexList<Eigen::type2index<1> >,
534         const TensorReshapingOp<
535             const Eigen::DSizes<DenseIndex, 3>,
536             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
537                                       const Input> > > >
538 #endif
539 CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
540                  DenseIndex patchRows, DenseIndex patchCols,
541                  DenseIndex stridePlanes, DenseIndex strideRows,
542                  DenseIndex strideCols, const PaddingType padding_type) {
543   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
544                       YOU_MADE_A_PROGRAMMING_MISTAKE);
545   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
546 
547   typedef typename internal::traits<Input>::Index TensorIndex;
548   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
549                    internal::traits<Input>::NumDimensions,
550                    internal::traits<Input>::Layout, TensorIndex> >
551       in(input);
552 
553   static const int idxPlanes = isColMajor ? 1 : 3;
554   static const int idxRows = 2;
555   static const int idxCols = isColMajor ? 3 : 1;
556   // Molds the output of the reduction into the shape expected by the used
557   // (assuming col-major):
558   // - 1st dim: channels
559   // - 2nd dim: outupt depth
560   // - 3rd dim: output height
561   // - 4th dim: output width
562   // - 5th dim and beyond: everything else including batch size
563   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
564       post_reduce_dims;
565   post_reduce_dims[0] = in.dimension(0);
566   if (padding_type == PADDING_VALID) {
567     post_reduce_dims[idxPlanes] = Eigen::divup(
568         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
569         stridePlanes);
570     post_reduce_dims[idxRows] = Eigen::divup(
571         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
572         strideRows);
573     post_reduce_dims[idxCols] = Eigen::divup(
574         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
575         strideCols);
576   } else {
577     post_reduce_dims[idxPlanes] = Eigen::divup(
578         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
579     post_reduce_dims[idxRows] = Eigen::divup(
580         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
581     post_reduce_dims[idxCols] = Eigen::divup(
582         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
583   }
584   post_reduce_dims[4] = in.dimension(4);
585 
586   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
587   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
588   if (isColMajor) {
589     pre_reduce_dims[0] = post_reduce_dims[0];
590     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
591                          post_reduce_dims[3] * post_reduce_dims[4];
592   } else {
593     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
594                          post_reduce_dims[2] * post_reduce_dims[3];
595     pre_reduce_dims[2] = post_reduce_dims[4];
596   }
597 
598   typedef typename internal::remove_const<
599       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
600   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
601 
602 #if !defined(EIGEN_HAS_INDEX_LIST)
603   // nvcc doesn't support cxx11
604   Eigen::array<int, 1> reduction_dims;
605   reduction_dims[0] = 1;
606 #else
607   // Take advantage of cxx11 to give the compiler information it can use to
608   // optimize the code.
609   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
610 #endif
611   return input
612       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
613                               strideRows, strideCols, padding_type,
614                               -Eigen::NumTraits<float>::highest())
615       .reshape(pre_reduce_dims)
616       .reduce(reduction_dims, mean_with_nan)
617       .reshape(post_reduce_dims);
618 }
619 
620 }  // end namespace Eigen
621 
622 #endif  // TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
623