1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
17 #define TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 #include "tensorflow/core/kernels/eigen_volume_patch.h"
21 
22 namespace Eigen {
23 
24 /** SpatialMaxPooling
25  * \ingroup CXX11_NeuralNetworks_Module
26  *
27  * \brief Applies a max-pooling over a multichannel input image.
28  *
29  * The input parameter is expected to be a with a rank of 4 (channels, height,
30  * width, others in col-major, and the reverse of that in row-major).
31  *
32  * The result can be assigned to a tensor of rank equal to the rank of the
33  * input. The dimensions of the result will be channels, height, width, and
34  * others (in col-major, and the reverse of that if the input was row-major).
35  *
36  * The order of the width and height dimensions can be swapped if needed.
37  *
38  */
39 #if !defined(EIGEN_HAS_INDEX_LIST)
40 template <typename Input>
41 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
42     const Eigen::DSizes<typename internal::traits<Input>::Index,
43                         internal::traits<Input>::NumDimensions>,
44     const TensorReductionOp<
45         internal::MaxReducer<typename internal::remove_const<
46             typename internal::traits<Input>::Scalar>::type>,
47         const Eigen::array<int, 2>,
48         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
49 #else
50 template <typename Input>
51 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
52     const Eigen::DSizes<typename internal::traits<Input>::Index,
53                         internal::traits<Input>::NumDimensions>,
54     const TensorReductionOp<
55         internal::MaxReducer<typename internal::remove_const<
56             typename internal::traits<Input>::Scalar>::type>,
57         typename internal::conditional<
58             internal::traits<Input>::Layout == ColMajor,
59             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
60             const Eigen::IndexList<Eigen::type2index<2>,
61                                    Eigen::type2index<3> > >::type,
62         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
63 #endif
64 SpatialMaxPooling(const Input& input, DenseIndex patchRows,
65                   DenseIndex patchCols, DenseIndex strideRows,
66                   DenseIndex strideCols, const PaddingType padding_type,
67                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
68   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
69                       YOU_MADE_A_PROGRAMMING_MISTAKE);
70 
71   typedef typename internal::traits<Input>::Index TensorIndex;
72   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
73                    internal::traits<Input>::NumDimensions,
74                    internal::traits<Input>::Layout, TensorIndex> >
75       in(input);
76 
77   const DenseIndex patchRowsEff =
78       patchRows + (patchRows - 1) * (in_strideRows - 1);
79   const DenseIndex patchColsEff =
80       patchCols + (patchCols - 1) * (in_strideCols - 1);
81 
82   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
83   static const int idxRows = isColMajor ? 1 : 2;
84   static const int idxCols = isColMajor ? 2 : 1;
85 
86   // Molds the output of the reduction into the shape expected by the user.
87   // (assuming col-major):
88   // - 1st dim: channels
89   // - 2nd dim: output height
90   // - 3rd dim: output width
91   // - 4th dim and beyond: everything else including batch size
92   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
93       post_reduce_dims;
94   post_reduce_dims[0] = in.dimension(0);
95   if (padding_type == PADDING_VALID) {
96     post_reduce_dims[idxRows] = Eigen::divup(
97         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
98         strideRows);
99     post_reduce_dims[idxCols] = Eigen::divup(
100         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
101         strideCols);
102   } else {
103     post_reduce_dims[idxRows] = Eigen::divup(
104         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
105     post_reduce_dims[idxCols] = Eigen::divup(
106         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
107   }
108   post_reduce_dims[3] = in.dimension(3);
109 
110 #if !defined(EIGEN_HAS_INDEX_LIST)
111   // nvcc doesn't support cxx11
112   Eigen::array<int, 2> reduction_dims;
113   if (isColMajor) {
114     reduction_dims[0] = 1;
115     reduction_dims[1] = 2;
116   } else {
117     reduction_dims[0] = 2;
118     reduction_dims[1] = 3;
119   }
120 #else
121   // Take advantage of cxx11 to give the compiler information it can use to
122   // optimize the code.
123   typename internal::conditional<
124       internal::traits<Input>::Layout == ColMajor,
125       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
126       const Eigen::IndexList<Eigen::type2index<2>,
127                              Eigen::type2index<3> > >::type reduction_dims;
128 #endif
129 
130   return input
131       .extract_image_patches(
132           patchRows, patchCols, strideRows, strideCols, in_strideRows,
133           in_strideCols, padding_type,
134           -Eigen::NumTraits<typename internal::remove_const<
135               typename internal::traits<Input>::Scalar>::type>::highest())
136       .maximum(reduction_dims)
137       .reshape(post_reduce_dims);
138 }
139 
140 /** CuboidMaxPooling
141  * \ingroup CXX11_NeuralNetworks_Module
142  *
143  * \brief Applies a max-pooling over a multichannel input volume.
144  *
145  * The input parameter is expected to be a tensor with a rank of 5 (channels,
146  * depth, height, width, others in col-major, and the reverse of that in
147  * row-major).
148  *
149  * The result can be assigned to a tensor of rank equal to the rank of the
150  * input. The dimensions of the result will be channels, depth, height, width,
151  * and others (in col-major, and the reverse of that if the input was
152  * row-major).
153  *
154  * The order of the depth, width and height dimensions can be swapped if
155  * needed.
156  *
157  */
158 #if !defined(EIGEN_HAS_INDEX_LIST)
159 template <typename Input>
160 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
161     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
162     const TensorReductionOp<
163         internal::MaxReducer<float>, const Eigen::array<int, 1>,
164         const TensorReshapingOp<
165             const Eigen::DSizes<DenseIndex, 3>,
166             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
167                                       const Input> > > >
168 #else
169 template <typename Input>
170 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
171     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
172     const TensorReductionOp<
173         internal::MaxReducer<float>,
174         const Eigen::IndexList<Eigen::type2index<1> >,
175         const TensorReshapingOp<
176             const Eigen::DSizes<DenseIndex, 3>,
177             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
178                                       const Input> > > >
179 #endif
CuboidMaxPooling(const Input & input,DenseIndex patchPlanes,DenseIndex patchRows,DenseIndex patchCols,DenseIndex stridePlanes,DenseIndex strideRows,DenseIndex strideCols,const PaddingType padding_type)180 CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
181                  DenseIndex patchRows, DenseIndex patchCols,
182                  DenseIndex stridePlanes, DenseIndex strideRows,
183                  DenseIndex strideCols, const PaddingType padding_type) {
184   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
185                       YOU_MADE_A_PROGRAMMING_MISTAKE);
186   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
187 
188   typedef typename internal::traits<Input>::Index TensorIndex;
189   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
190                    internal::traits<Input>::NumDimensions,
191                    internal::traits<Input>::Layout, TensorIndex> >
192       in(input);
193 
194   static const int idxPlanes = isColMajor ? 1 : 3;
195   static const int idxRows = 2;
196   static const int idxCols = isColMajor ? 3 : 1;
197 
198   // Molds the output of the reduction into the shape expected by the used
199   // (assuming col-major):
200   // - 1st dim: channels
201   // - 2nd dim: output depth
202   // - 3rd dim: output height
203   // - 4th dim: output width
204   // - 5th dim and beyond: everything else including batch size
205   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
206       post_reduce_dims;
207   post_reduce_dims[0] = in.dimension(0);
208   if (padding_type == PADDING_VALID) {
209     post_reduce_dims[idxPlanes] = Eigen::divup(
210         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
211         stridePlanes);
212     post_reduce_dims[idxRows] = Eigen::divup(
213         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
214         strideRows);
215     post_reduce_dims[idxCols] = Eigen::divup(
216         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
217         strideCols);
218   } else {
219     post_reduce_dims[idxPlanes] = Eigen::divup(
220         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
221     post_reduce_dims[idxRows] = Eigen::divup(
222         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
223     post_reduce_dims[idxCols] = Eigen::divup(
224         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
225   }
226   post_reduce_dims[4] = in.dimension(4);
227 
228   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
229   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
230   if (isColMajor) {
231     pre_reduce_dims[0] = post_reduce_dims[0];
232     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
233                          post_reduce_dims[3] * post_reduce_dims[4];
234   } else {
235     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
236                          post_reduce_dims[2] * post_reduce_dims[3];
237     pre_reduce_dims[2] = post_reduce_dims[4];
238   }
239 
240 #if !defined(EIGEN_HAS_INDEX_LIST)
241   // nvcc doesn't support cxx11
242   Eigen::array<int, 1> reduction_dims;
243   reduction_dims[0] = 1;
244 #else
245   // Take advantage of cxx11 to give the compiler information it can use to
246   // optimize the code.
247   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
248 #endif
249   return input
250       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
251                               strideRows, strideCols, padding_type,
252                               -Eigen::NumTraits<float>::highest())
253       .reshape(pre_reduce_dims)
254       .maximum(reduction_dims)
255       .reshape(post_reduce_dims);
256 }
257 
258 /** SpatialAvgPooling
259  * \ingroup CXX11_NeuralNetworks_Module
260  *
261  * \brief Applies an average pooling over a multichannel input image.
262  *
263  * The input parameter is expected to be a tensor with a rank of 4 (channels,
264  * height, width, others in col-major, and the reverse of that in row-major).
265  *
266  * The result can be assigned to a tensor of rank equal to the rank of the
267  * input. The dimensions of the result will be channels, height, width, and
268  * others (in col-major, and the reverse of that if the input was row-major).
269  *
270  * The order of the width and height dimensions can be swapped if needed.
271  *
272  */
273 namespace internal {
274 
275 template <typename T>
276 struct AvgPoolMeanReducer {
277 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
278   // We only support packet access for floats.
279   static const bool PacketAccess = internal::is_same<T, float>::value;
280 #else
281   static const bool PacketAccess = false;
282 #endif
283   static const bool IsStateful = true;
284 
AvgPoolMeanReducerAvgPoolMeanReducer285   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) {
286     typedef typename packet_traits<T>::type Packet;
287     packetCount_ = pset1<Packet>(T(0.0));
288   }
289 
reduceAvgPoolMeanReducer290   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
291     if (t != -Eigen::NumTraits<T>::highest()) {
292       (*accum) = (*accum) + t;
293       scalarCount_++;
294     }
295   }
296 
initializeAvgPoolMeanReducer297   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
298     return static_cast<T>(0);
299   }
300 
finalizeAvgPoolMeanReducer301   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
302     eigen_assert(scalarCount_ > 0);
303     return accum / T(scalarCount_);
304   }
305 
306 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
307 #ifdef EIGEN_VECTORIZE_AVX512
308 #define pequal(a, b)   \
309   _mm512_castsi512_ps( \
310       _mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ), -1))
311 
312   // The ternarylogic function immediate determines the values in the result
313   // In the case below, 0xd8 implies (false_mask) ? (b) : (a)
314   // For details, refer to the vpternlogd instruction table at
315   // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf
316 
317 #define psel(a, b, false_mask)                        \
318   _mm512_castsi512_ps(_mm512_ternarylogic_epi32(      \
319       _mm512_castps_si512(a), _mm512_castps_si512(b), \
320       _mm512_castps_si512(false_mask), 0xd8))
321 #elif defined EIGEN_VECTORIZE_AVX
322 #define pequal(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_UQ)
323 #define psel(a, b, false_mask) _mm256_blendv_ps(a, b, false_mask)
324 #else
325 #define pequal(a, b) _mm_cmpeq_ps(a, b)
326 #define psel(a, b, false_mask) \
327   _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b))
328 #endif
329 
330   template <typename Packet>
reducePacketAvgPoolMeanReducer331   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p,
332                                                           Packet* accum) {
333     reducePacketWithType(static_cast<T>(0), p, accum);
334   }
335 
336   template <typename Packet>
reducePacketWithTypeAvgPoolMeanReducer337   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacketWithType(
338       T, const Packet& p, Packet* accum) {
339     Packet skip_mask =
340         pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest()));
341     (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask));
342     packetCount_ = padd<Packet>(
343         packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask));
344   }
345 
346   template <typename Packet>
initializePacketAvgPoolMeanReducer347   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
348     return pset1<Packet>(0);
349   }
350 
351   template <typename Packet>
352   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
finalizePacketAvgPoolMeanReducer353   finalizePacket(const Packet& vaccum) const {
354     return pdiv(vaccum, packetCount_);
355   }
356   template <typename Packet>
357   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T
finalizeBothAvgPoolMeanReducer358   finalizeBoth(const T saccum, const Packet& vaccum) const {
359     return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_));
360   }
361 #endif
362 
363  protected:
364   typedef typename packet_traits<T>::type Packet;
365   int scalarCount_;
366   Packet packetCount_;
367 };
368 
369 template <typename Device>
370 struct reducer_traits<AvgPoolMeanReducer<float>, Device> {
371   enum {
372     Cost = 1,
373 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
374     // We only support packet access for floats.
375     PacketAccess = true,
376 #else
377     PacketAccess = false,
378 #endif
379     IsStateful = true,
380     IsExactlyAssociative = false
381   };
382 };
383 
384 template <>
385 struct reducer_traits<AvgPoolMeanReducer<float>, GpuDevice> {
386   enum {
387     Cost = 1,
388     PacketAccess = false,
389     IsStateful = true,
390     IsExactlyAssociative = false
391   };
392 };
393 
394 }  // namespace internal
395 
396 #if !defined(EIGEN_HAS_INDEX_LIST)
397 template <typename Input>
398 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
399     const Eigen::DSizes<typename internal::traits<Input>::Index,
400                         internal::traits<Input>::NumDimensions>,
401     const TensorReductionOp<
402         internal::AvgPoolMeanReducer<typename internal::remove_const<
403             typename internal::traits<Input>::Scalar>::type>,
404         const Eigen::array<int, 2>,
405         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
406 #else
407 template <typename Input>
408 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
409     const Eigen::DSizes<typename internal::traits<Input>::Index,
410                         internal::traits<Input>::NumDimensions>,
411     const TensorReductionOp<
412         internal::AvgPoolMeanReducer<typename internal::remove_const<
413             typename internal::traits<Input>::Scalar>::type>,
414         typename internal::conditional<
415             internal::traits<Input>::Layout == ColMajor,
416             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
417             const Eigen::IndexList<Eigen::type2index<2>,
418                                    Eigen::type2index<3> > >::type,
419         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
420 #endif
421 SpatialAvgPooling(const Input& input, DenseIndex patchRows,
422                   DenseIndex patchCols, DenseIndex strideRows,
423                   DenseIndex strideCols, const PaddingType padding_type,
424                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
425   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
426                       YOU_MADE_A_PROGRAMMING_MISTAKE);
427 
428   typedef typename internal::traits<Input>::Index TensorIndex;
429   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
430                    internal::traits<Input>::NumDimensions,
431                    internal::traits<Input>::Layout, TensorIndex> >
432       in(input);
433 
434   const DenseIndex patchRowsEff =
435       patchRows + (patchRows - 1) * (in_strideRows - 1);
436   const DenseIndex patchColsEff =
437       patchCols + (patchCols - 1) * (in_strideCols - 1);
438 
439   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
440   static const int idxRows = isColMajor ? 1 : 2;
441   static const int idxCols = isColMajor ? 2 : 1;
442 
443   // Molds the output of the reduction into the shape expected by the user.
444   // (assuming col-major):
445   // - 1st dim: channels
446   // - 2nd dim: output height
447   // - 3rd dim: output width
448   // - 4th dim and beyond: everything else including batch size
449   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
450       post_reduce_dims;
451   post_reduce_dims[0] = in.dimension(0);
452   if (padding_type == PADDING_VALID) {
453     post_reduce_dims[idxRows] = Eigen::divup(
454         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
455         strideRows);
456     post_reduce_dims[idxCols] = Eigen::divup(
457         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
458         strideCols);
459   } else {
460     post_reduce_dims[idxRows] = Eigen::divup(
461         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
462     post_reduce_dims[idxCols] = Eigen::divup(
463         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
464   }
465   post_reduce_dims[3] = in.dimension(3);
466 
467   typedef typename internal::remove_const<
468       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
469   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
470 
471 #if !defined(EIGEN_HAS_INDEX_LIST)
472   // nvcc doesn't support cxx11
473   Eigen::array<int, 2> reduction_dims;
474   if (isColMajor) {
475     reduction_dims[0] = 1;
476     reduction_dims[1] = 2;
477   } else {
478     reduction_dims[0] = 2;
479     reduction_dims[1] = 3;
480   }
481 #else
482   // Take advantage of cxx11 to give the compiler information it can use to
483   // optimize the code.
484   typename internal::conditional<
485       internal::traits<Input>::Layout == ColMajor,
486       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
487       const Eigen::IndexList<Eigen::type2index<2>,
488                              Eigen::type2index<3> > >::type reduction_dims;
489 #endif
490   return input
491       .extract_image_patches(patchRows, patchCols, strideRows, strideCols,
492                              in_strideRows, in_strideCols, padding_type,
493                              -Eigen::NumTraits<CoeffReturnType>::highest())
494       .reduce(reduction_dims, mean_with_nan)
495       .reshape(post_reduce_dims);
496 }
497 
498 /** CuboidAvgPooling
499  * \ingroup CXX11_NeuralNetworks_Module
500  *
501  * \brief Applies an average pooling over a multichannel input volume.
502  *
503  * The input parameter is expected to be a tensor with a rank of 5 (channels,
504  * depth, height, width, others, and the reverse of that in row-major).
505  *
506  * The result can be assigned to a tensor of rank equal to the rank of the
507  * input. The dimensions of the result will be channels, depth, width, and
508  * others (in col-major, and the reverse of that if the input was row-major).
509  *
510  * The order of the depth, width and height dimensions can be swapped if
511  * needed.
512  *
513  */
514 #if !defined(EIGEN_HAS_INDEX_LIST)
515 template <typename Input>
516 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
517     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
518     const TensorReductionOp<
519         internal::AvgPoolMeanReducer<float>, const Eigen::array<int, 1>,
520         const TensorReshapingOp<
521             const Eigen::DSizes<DenseIndex, 3>,
522             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
523                                       const Input> > > >
524 #else
525 template <typename Input>
526 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
527     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
528     const TensorReductionOp<
529         internal::AvgPoolMeanReducer<float>,
530         const Eigen::IndexList<Eigen::type2index<1> >,
531         const TensorReshapingOp<
532             const Eigen::DSizes<DenseIndex, 3>,
533             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
534                                       const Input> > > >
535 #endif
536 CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
537                  DenseIndex patchRows, DenseIndex patchCols,
538                  DenseIndex stridePlanes, DenseIndex strideRows,
539                  DenseIndex strideCols, const PaddingType padding_type) {
540   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
541                       YOU_MADE_A_PROGRAMMING_MISTAKE);
542   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
543 
544   typedef typename internal::traits<Input>::Index TensorIndex;
545   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
546                    internal::traits<Input>::NumDimensions,
547                    internal::traits<Input>::Layout, TensorIndex> >
548       in(input);
549 
550   static const int idxPlanes = isColMajor ? 1 : 3;
551   static const int idxRows = 2;
552   static const int idxCols = isColMajor ? 3 : 1;
553   // Molds the output of the reduction into the shape expected by the used
554   // (assuming col-major):
555   // - 1st dim: channels
556   // - 2nd dim: outupt depth
557   // - 3rd dim: output height
558   // - 4th dim: output width
559   // - 5th dim and beyond: everything else including batch size
560   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
561       post_reduce_dims;
562   post_reduce_dims[0] = in.dimension(0);
563   if (padding_type == PADDING_VALID) {
564     post_reduce_dims[idxPlanes] = Eigen::divup(
565         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
566         stridePlanes);
567     post_reduce_dims[idxRows] = Eigen::divup(
568         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
569         strideRows);
570     post_reduce_dims[idxCols] = Eigen::divup(
571         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
572         strideCols);
573   } else {
574     post_reduce_dims[idxPlanes] = Eigen::divup(
575         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
576     post_reduce_dims[idxRows] = Eigen::divup(
577         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
578     post_reduce_dims[idxCols] = Eigen::divup(
579         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
580   }
581   post_reduce_dims[4] = in.dimension(4);
582 
583   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
584   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
585   if (isColMajor) {
586     pre_reduce_dims[0] = post_reduce_dims[0];
587     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
588                          post_reduce_dims[3] * post_reduce_dims[4];
589   } else {
590     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
591                          post_reduce_dims[2] * post_reduce_dims[3];
592     pre_reduce_dims[2] = post_reduce_dims[4];
593   }
594 
595   typedef typename internal::remove_const<
596       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
597   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
598 
599 #if !defined(EIGEN_HAS_INDEX_LIST)
600   // nvcc doesn't support cxx11
601   Eigen::array<int, 1> reduction_dims;
602   reduction_dims[0] = 1;
603 #else
604   // Take advantage of cxx11 to give the compiler information it can use to
605   // optimize the code.
606   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
607 #endif
608   return input
609       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
610                               strideRows, strideCols, padding_type,
611                               -Eigen::NumTraits<float>::highest())
612       .reshape(pre_reduce_dims)
613       .reduce(reduction_dims, mean_with_nan)
614       .reshape(post_reduce_dims);
615 }
616 
617 }  // end namespace Eigen
618 
619 #endif  // TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
620