1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 //  * implement other kind of vectorization
20 //  * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Derived>
27 struct redux_traits
28 {
29 public:
30     typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
31   enum {
32     PacketSize = unpacket_traits<PacketType>::size,
33     InnerMaxSize = int(Derived::IsRowMajor)
34                  ? Derived::MaxColsAtCompileTime
35                  : Derived::MaxRowsAtCompileTime
36   };
37 
38   enum {
39     MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
40                   && (functor_traits<Func>::PacketAccess),
41     MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
42     MaySliceVectorize  = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
43   };
44 
45 public:
46   enum {
47     Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
48               : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
49                                         : int(DefaultTraversal)
50   };
51 
52 public:
53   enum {
54     Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
55          : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
56     UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
57   };
58 
59 public:
60   enum {
61     Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
62   };
63 
64 #ifdef EIGEN_DEBUG_ASSIGN
debugredux_traits65   static void debug()
66   {
67     std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
68     std::cerr.setf(std::ios::hex, std::ios::basefield);
69     EIGEN_DEBUG_VAR(Derived::Flags)
70     std::cerr.unsetf(std::ios::hex);
71     EIGEN_DEBUG_VAR(InnerMaxSize)
72     EIGEN_DEBUG_VAR(PacketSize)
73     EIGEN_DEBUG_VAR(MightVectorize)
74     EIGEN_DEBUG_VAR(MayLinearVectorize)
75     EIGEN_DEBUG_VAR(MaySliceVectorize)
76     EIGEN_DEBUG_VAR(Traversal)
77     EIGEN_DEBUG_VAR(UnrollingLimit)
78     EIGEN_DEBUG_VAR(Unrolling)
79     std::cerr << std::endl;
80   }
81 #endif
82 };
83 
84 /***************************************************************************
85 * Part 2 : unrollers
86 ***************************************************************************/
87 
88 /*** no vectorization ***/
89 
90 template<typename Func, typename Derived, int Start, int Length>
91 struct redux_novec_unroller
92 {
93   enum {
94     HalfLength = Length/2
95   };
96 
97   typedef typename Derived::Scalar Scalar;
98 
99   EIGEN_DEVICE_FUNC
runredux_novec_unroller100   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
101   {
102     return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
103                 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
104   }
105 };
106 
107 template<typename Func, typename Derived, int Start>
108 struct redux_novec_unroller<Func, Derived, Start, 1>
109 {
110   enum {
111     outer = Start / Derived::InnerSizeAtCompileTime,
112     inner = Start % Derived::InnerSizeAtCompileTime
113   };
114 
115   typedef typename Derived::Scalar Scalar;
116 
117   EIGEN_DEVICE_FUNC
118   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
119   {
120     return mat.coeffByOuterInner(outer, inner);
121   }
122 };
123 
124 // This is actually dead code and will never be called. It is required
125 // to prevent false warnings regarding failed inlining though
126 // for 0 length run() will never be called at all.
127 template<typename Func, typename Derived, int Start>
128 struct redux_novec_unroller<Func, Derived, Start, 0>
129 {
130   typedef typename Derived::Scalar Scalar;
131   EIGEN_DEVICE_FUNC
132   static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
133 };
134 
135 /*** vectorization ***/
136 
137 template<typename Func, typename Derived, int Start, int Length>
138 struct redux_vec_unroller
139 {
140   enum {
141     PacketSize = redux_traits<Func, Derived>::PacketSize,
142     HalfLength = Length/2
143   };
144 
145   typedef typename Derived::Scalar Scalar;
146   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
147 
148   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
149   {
150     return func.packetOp(
151             redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
152             redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
153   }
154 };
155 
156 template<typename Func, typename Derived, int Start>
157 struct redux_vec_unroller<Func, Derived, Start, 1>
158 {
159   enum {
160     index = Start * redux_traits<Func, Derived>::PacketSize,
161     outer = index / int(Derived::InnerSizeAtCompileTime),
162     inner = index % int(Derived::InnerSizeAtCompileTime),
163     alignment = Derived::Alignment
164   };
165 
166   typedef typename Derived::Scalar Scalar;
167   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
168 
169   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
170   {
171     return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
172   }
173 };
174 
175 /***************************************************************************
176 * Part 3 : implementation of all cases
177 ***************************************************************************/
178 
179 template<typename Func, typename Derived,
180          int Traversal = redux_traits<Func, Derived>::Traversal,
181          int Unrolling = redux_traits<Func, Derived>::Unrolling
182 >
183 struct redux_impl;
184 
185 template<typename Func, typename Derived>
186 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
187 {
188   typedef typename Derived::Scalar Scalar;
189   EIGEN_DEVICE_FUNC
190   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
191   {
192     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
193     Scalar res;
194     res = mat.coeffByOuterInner(0, 0);
195     for(Index i = 1; i < mat.innerSize(); ++i)
196       res = func(res, mat.coeffByOuterInner(0, i));
197     for(Index i = 1; i < mat.outerSize(); ++i)
198       for(Index j = 0; j < mat.innerSize(); ++j)
199         res = func(res, mat.coeffByOuterInner(i, j));
200     return res;
201   }
202 };
203 
204 template<typename Func, typename Derived>
205 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
206   : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
207 {};
208 
209 template<typename Func, typename Derived>
210 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
211 {
212   typedef typename Derived::Scalar Scalar;
213   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
214 
215   static Scalar run(const Derived &mat, const Func& func)
216   {
217     const Index size = mat.size();
218 
219     const Index packetSize = redux_traits<Func, Derived>::PacketSize;
220     const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
221     enum {
222       alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
223       alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
224     };
225     const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
226     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
227     const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
228     const Index alignedEnd2 = alignedStart + alignedSize2;
229     const Index alignedEnd  = alignedStart + alignedSize;
230     Scalar res;
231     if(alignedSize)
232     {
233       PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
234       if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
235       {
236         PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
237         for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
238         {
239           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
240           packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
241         }
242 
243         packet_res0 = func.packetOp(packet_res0,packet_res1);
244         if(alignedEnd>alignedEnd2)
245           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
246       }
247       res = func.predux(packet_res0);
248 
249       for(Index index = 0; index < alignedStart; ++index)
250         res = func(res,mat.coeff(index));
251 
252       for(Index index = alignedEnd; index < size; ++index)
253         res = func(res,mat.coeff(index));
254     }
255     else // too small to vectorize anything.
256          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
257     {
258       res = mat.coeff(0);
259       for(Index index = 1; index < size; ++index)
260         res = func(res,mat.coeff(index));
261     }
262 
263     return res;
264   }
265 };
266 
267 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
268 template<typename Func, typename Derived, int Unrolling>
269 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
270 {
271   typedef typename Derived::Scalar Scalar;
272   typedef typename redux_traits<Func, Derived>::PacketType PacketType;
273 
274   EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
275   {
276     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
277     const Index innerSize = mat.innerSize();
278     const Index outerSize = mat.outerSize();
279     enum {
280       packetSize = redux_traits<Func, Derived>::PacketSize
281     };
282     const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
283     Scalar res;
284     if(packetedInnerSize)
285     {
286       PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
287       for(Index j=0; j<outerSize; ++j)
288         for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
289           packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
290 
291       res = func.predux(packet_res);
292       for(Index j=0; j<outerSize; ++j)
293         for(Index i=packetedInnerSize; i<innerSize; ++i)
294           res = func(res, mat.coeffByOuterInner(j,i));
295     }
296     else // too small to vectorize anything.
297          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
298     {
299       res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
300     }
301 
302     return res;
303   }
304 };
305 
306 template<typename Func, typename Derived>
307 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
308 {
309   typedef typename Derived::Scalar Scalar;
310 
311   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
312   enum {
313     PacketSize = redux_traits<Func, Derived>::PacketSize,
314     Size = Derived::SizeAtCompileTime,
315     VectorizedSize = (Size / PacketSize) * PacketSize
316   };
317   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
318   {
319     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
320     if (VectorizedSize > 0) {
321       Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
322       if (VectorizedSize != Size)
323         res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
324       return res;
325     }
326     else {
327       return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
328     }
329   }
330 };
331 
332 // evaluator adaptor
333 template<typename _XprType>
334 class redux_evaluator
335 {
336 public:
337   typedef _XprType XprType;
338   EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
339 
340   typedef typename XprType::Scalar Scalar;
341   typedef typename XprType::CoeffReturnType CoeffReturnType;
342   typedef typename XprType::PacketScalar PacketScalar;
343   typedef typename XprType::PacketReturnType PacketReturnType;
344 
345   enum {
346     MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
347     MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
348     // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
349     Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
350     IsRowMajor = XprType::IsRowMajor,
351     SizeAtCompileTime = XprType::SizeAtCompileTime,
352     InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
353     CoeffReadCost = evaluator<XprType>::CoeffReadCost,
354     Alignment = evaluator<XprType>::Alignment
355   };
356 
357   EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
358   EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
359   EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
360   EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
361   EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
362 
363   EIGEN_DEVICE_FUNC
364   CoeffReturnType coeff(Index row, Index col) const
365   { return m_evaluator.coeff(row, col); }
366 
367   EIGEN_DEVICE_FUNC
368   CoeffReturnType coeff(Index index) const
369   { return m_evaluator.coeff(index); }
370 
371   template<int LoadMode, typename PacketType>
372   PacketType packet(Index row, Index col) const
373   { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
374 
375   template<int LoadMode, typename PacketType>
376   PacketType packet(Index index) const
377   { return m_evaluator.template packet<LoadMode,PacketType>(index); }
378 
379   EIGEN_DEVICE_FUNC
380   CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
381   { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382 
383   template<int LoadMode, typename PacketType>
384   PacketType packetByOuterInner(Index outer, Index inner) const
385   { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
386 
387   const XprType & nestedExpression() const { return m_xpr; }
388 
389 protected:
390   internal::evaluator<XprType> m_evaluator;
391   const XprType &m_xpr;
392 };
393 
394 } // end namespace internal
395 
396 /***************************************************************************
397 * Part 4 : public API
398 ***************************************************************************/
399 
400 
401 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
402   *
403   * The template parameter \a BinaryOp is the type of the functor \a func which must be
404   * an associative operator. Both current C++98 and C++11 functor styles are handled.
405   *
406   * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
407   */
408 template<typename Derived>
409 template<typename Func>
410 typename internal::traits<Derived>::Scalar
411 DenseBase<Derived>::redux(const Func& func) const
412 {
413   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
414 
415   typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
416   ThisEvaluator thisEval(derived());
417 
418   return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
419 }
420 
421 /** \returns the minimum of all coefficients of \c *this.
422   * \warning the result is undefined if \c *this contains NaN.
423   */
424 template<typename Derived>
425 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
426 DenseBase<Derived>::minCoeff() const
427 {
428   return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
429 }
430 
431 /** \returns the maximum of all coefficients of \c *this.
432   * \warning the result is undefined if \c *this contains NaN.
433   */
434 template<typename Derived>
435 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
436 DenseBase<Derived>::maxCoeff() const
437 {
438   return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
439 }
440 
441 /** \returns the sum of all coefficients of \c *this
442   *
443   * If \c *this is empty, then the value 0 is returned.
444   *
445   * \sa trace(), prod(), mean()
446   */
447 template<typename Derived>
448 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
449 DenseBase<Derived>::sum() const
450 {
451   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
452     return Scalar(0);
453   return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
454 }
455 
456 /** \returns the mean of all coefficients of *this
457 *
458 * \sa trace(), prod(), sum()
459 */
460 template<typename Derived>
461 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
462 DenseBase<Derived>::mean() const
463 {
464 #ifdef __INTEL_COMPILER
465   #pragma warning push
466   #pragma warning ( disable : 2259 )
467 #endif
468   return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
469 #ifdef __INTEL_COMPILER
470   #pragma warning pop
471 #endif
472 }
473 
474 /** \returns the product of all coefficients of *this
475   *
476   * Example: \include MatrixBase_prod.cpp
477   * Output: \verbinclude MatrixBase_prod.out
478   *
479   * \sa sum(), mean(), trace()
480   */
481 template<typename Derived>
482 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
483 DenseBase<Derived>::prod() const
484 {
485   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
486     return Scalar(1);
487   return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
488 }
489 
490 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
491   *
492   * \c *this can be any matrix, not necessarily square.
493   *
494   * \sa diagonal(), sum()
495   */
496 template<typename Derived>
497 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
498 MatrixBase<Derived>::trace() const
499 {
500   return derived().diagonal().sum();
501 }
502 
503 } // end namespace Eigen
504 
505 #endif // EIGEN_REDUX_H
506