• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_COEFFBASED_PRODUCT_H
12 #define EIGEN_COEFFBASED_PRODUCT_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 /*********************************************************************************
19 *  Coefficient based product implementation.
20 *  It is designed for the following use cases:
21 *  - small fixed sizes
22 *  - lazy products
23 *********************************************************************************/
24 
25 /* Since the all the dimensions of the product are small, here we can rely
26  * on the generic Assign mechanism to evaluate the product per coeff (or packet).
27  *
28  * Note that here the inner-loops should always be unrolled.
29  */
30 
31 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
32 struct product_coeff_impl;
33 
34 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
35 struct product_packet_impl;
36 
37 template<typename LhsNested, typename RhsNested, int NestingFlags>
38 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
39 {
40   typedef MatrixXpr XprKind;
41   typedef typename remove_all<LhsNested>::type _LhsNested;
42   typedef typename remove_all<RhsNested>::type _RhsNested;
43   typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
44   typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
45                                            typename traits<_RhsNested>::StorageKind>::ret StorageKind;
46   typedef typename promote_index_type<typename traits<_LhsNested>::Index,
47                                          typename traits<_RhsNested>::Index>::type Index;
48 
49   enum {
50       LhsCoeffReadCost = _LhsNested::CoeffReadCost,
51       RhsCoeffReadCost = _RhsNested::CoeffReadCost,
52       LhsFlags = _LhsNested::Flags,
53       RhsFlags = _RhsNested::Flags,
54 
55       RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
56       ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
57       InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
58 
59       MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
60       MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
61 
62       LhsRowMajor = LhsFlags & RowMajorBit,
63       RhsRowMajor = RhsFlags & RowMajorBit,
64 
65       SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
66 
67       CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
68                       && (ColsAtCompileTime == Dynamic
69                           || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
70                               && (RhsFlags&AlignedBit)
71                              )
72                          ),
73 
74       CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
75                       && (RowsAtCompileTime == Dynamic
76                           || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
77                               && (LhsFlags&AlignedBit)
78                              )
79                          ),
80 
81       EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
82                      : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
83                      : (RhsRowMajor && !CanVectorizeLhs),
84 
85       Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
86             | (EvalToRowMajor ? RowMajorBit : 0)
87             | NestingFlags
88             | (LhsFlags & RhsFlags & AlignedBit)
89             // TODO enable vectorization for mixed types
90             | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
91 
92       CoeffReadCost = InnerSize == Dynamic ? Dynamic
93                     : InnerSize == 0 ? 0
94                     : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
95                       + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
96 
97       /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
98       * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
99       * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
100       * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
101       */
102       CanVectorizeInner =    SameType
103                           && LhsRowMajor
104                           && (!RhsRowMajor)
105                           && (LhsFlags & RhsFlags & ActualPacketAccessBit)
106                           && (LhsFlags & RhsFlags & AlignedBit)
107                           && (InnerSize % packet_traits<Scalar>::size == 0)
108     };
109 };
110 
111 } // end namespace internal
112 
113 template<typename LhsNested, typename RhsNested, int NestingFlags>
114 class CoeffBasedProduct
115   : internal::no_assignment_operator,
116     public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
117 {
118   public:
119 
120     typedef MatrixBase<CoeffBasedProduct> Base;
121     EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
122     typedef typename Base::PlainObject PlainObject;
123 
124   private:
125 
126     typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
127     typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
128 
129     enum {
130       PacketSize = internal::packet_traits<Scalar>::size,
131       InnerSize  = internal::traits<CoeffBasedProduct>::InnerSize,
132       Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
133       CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
134     };
135 
136     typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
137                                    Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic,
138                                    _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
139 
140     typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
141 
142   public:
143 
144     inline CoeffBasedProduct(const CoeffBasedProduct& other)
145       : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
146     {}
147 
148     template<typename Lhs, typename Rhs>
149     inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
150       : m_lhs(lhs), m_rhs(rhs)
151     {
152       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
153       // We still allow to mix T and complex<T>.
154       EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
155         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
156       eigen_assert(lhs.cols() == rhs.rows()
157         && "invalid matrix product"
158         && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
159     }
160 
161     EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
162     EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
163 
164     EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
165     {
166       Scalar res;
167       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
168       return res;
169     }
170 
171     /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
172      * which is why we don't set the LinearAccessBit.
173      */
174     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
175     {
176       Scalar res;
177       const Index row = RowsAtCompileTime == 1 ? 0 : index;
178       const Index col = RowsAtCompileTime == 1 ? index : 0;
179       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
180       return res;
181     }
182 
183     template<int LoadMode>
184     EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
185     {
186       PacketScalar res;
187       internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
188                               Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic,
189                               _LhsNested, _RhsNested, PacketScalar, LoadMode>
190         ::run(row, col, m_lhs, m_rhs, res);
191       return res;
192     }
193 
194     // Implicit conversion to the nested type (trigger the evaluation of the product)
195     EIGEN_STRONG_INLINE operator const PlainObject& () const
196     {
197       m_result.lazyAssign(*this);
198       return m_result;
199     }
200 
201     const _LhsNested& lhs() const { return m_lhs; }
202     const _RhsNested& rhs() const { return m_rhs; }
203 
204     const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
205     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
206 
207     template<int DiagonalIndex>
208     const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
209     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
210 
211     const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
212     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
213 
214   protected:
215     typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
216     typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
217 
218     mutable PlainObject m_result;
219 };
220 
221 namespace internal {
222 
223 // here we need to overload the nested rule for products
224 // such that the nested type is a const reference to a plain matrix
225 template<typename Lhs, typename Rhs, int N, typename PlainObject>
226 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
227 {
228   typedef PlainObject const& type;
229 };
230 
231 /***************************************************************************
232 * Normal product .coeff() implementation (with meta-unrolling)
233 ***************************************************************************/
234 
235 /**************************************
236 *** Scalar path  - no vectorization ***
237 **************************************/
238 
239 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
240 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
241 {
242   typedef typename Lhs::Index Index;
243   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
244   {
245     product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
246     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
247   }
248 };
249 
250 template<typename Lhs, typename Rhs, typename RetScalar>
251 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
252 {
253   typedef typename Lhs::Index Index;
254   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
255   {
256     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
257   }
258 };
259 
260 template<typename Lhs, typename Rhs, typename RetScalar>
261 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
262 {
263   typedef typename Lhs::Index Index;
264   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
265   {
266     res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum();
267   }
268 };
269 
270 /*******************************************
271 *** Scalar path with inner vectorization ***
272 *******************************************/
273 
274 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
275 struct product_coeff_vectorized_unroller
276 {
277   typedef typename Lhs::Index Index;
278   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
279   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
280   {
281     product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
282     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
283   }
284 };
285 
286 template<typename Lhs, typename Rhs, typename Packet>
287 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
288 {
289   typedef typename Lhs::Index Index;
290   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
291   {
292     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
293   }
294 };
295 
296 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
297 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
298 {
299   typedef typename Lhs::PacketScalar Packet;
300   typedef typename Lhs::Index Index;
301   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
302   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
303   {
304     Packet pres;
305     product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
306     product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
307     res = predux(pres);
308   }
309 };
310 
311 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
312 struct product_coeff_vectorized_dyn_selector
313 {
314   typedef typename Lhs::Index Index;
315   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
316   {
317     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
318   }
319 };
320 
321 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
322 // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
323 template<typename Lhs, typename Rhs, int RhsCols>
324 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
325 {
326   typedef typename Lhs::Index Index;
327   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
328   {
329     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
330   }
331 };
332 
333 template<typename Lhs, typename Rhs, int LhsRows>
334 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
335 {
336   typedef typename Lhs::Index Index;
337   static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
338   {
339     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
340   }
341 };
342 
343 template<typename Lhs, typename Rhs>
344 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
345 {
346   typedef typename Lhs::Index Index;
347   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
348   {
349     res = lhs.transpose().cwiseProduct(rhs).sum();
350   }
351 };
352 
353 template<typename Lhs, typename Rhs, typename RetScalar>
354 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
355 {
356   typedef typename Lhs::Index Index;
357   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
358   {
359     product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
360   }
361 };
362 
363 /*******************
364 *** Packet path  ***
365 *******************/
366 
367 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
368 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
369 {
370   typedef typename Lhs::Index Index;
371   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
372   {
373     product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
374     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
375   }
376 };
377 
378 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
379 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
380 {
381   typedef typename Lhs::Index Index;
382   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
383   {
384     product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
385     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
386   }
387 };
388 
389 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
390 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
391 {
392   typedef typename Lhs::Index Index;
393   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
394   {
395     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
396   }
397 };
398 
399 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
400 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
401 {
402   typedef typename Lhs::Index Index;
403   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
404   {
405     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
406   }
407 };
408 
409 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
410 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
411 {
412   typedef typename Lhs::Index Index;
413   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
414   {
415     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
416     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
417       for(Index i = 1; i < lhs.cols(); ++i)
418         res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
419   }
420 };
421 
422 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
423 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
424 {
425   typedef typename Lhs::Index Index;
426   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
427   {
428     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
429     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
430       for(Index i = 1; i < lhs.cols(); ++i)
431         res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
432   }
433 };
434 
435 } // end namespace internal
436 
437 } // end namespace Eigen
438 
439 #endif // EIGEN_COEFFBASED_PRODUCT_H
440