1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 6 // 7 // This Source Code Form is subject to the terms of the Mozilla 8 // Public License v. 2.0. If a copy of the MPL was not distributed 9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11 #ifndef EIGEN_COEFFBASED_PRODUCT_H 12 #define EIGEN_COEFFBASED_PRODUCT_H 13 14 namespace Eigen { 15 16 namespace internal { 17 18 /********************************************************************************* 19 * Coefficient based product implementation. 20 * It is designed for the following use cases: 21 * - small fixed sizes 22 * - lazy products 23 *********************************************************************************/ 24 25 /* Since the all the dimensions of the product are small, here we can rely 26 * on the generic Assign mechanism to evaluate the product per coeff (or packet). 27 * 28 * Note that here the inner-loops should always be unrolled. 29 */ 30 31 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 32 struct product_coeff_impl; 33 34 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 35 struct product_packet_impl; 36 37 template<typename LhsNested, typename RhsNested, int NestingFlags> 38 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > 39 { 40 typedef MatrixXpr XprKind; 41 typedef typename remove_all<LhsNested>::type _LhsNested; 42 typedef typename remove_all<RhsNested>::type _RhsNested; 43 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; 44 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, 45 typename traits<_RhsNested>::StorageKind>::ret StorageKind; 46 typedef typename promote_index_type<typename traits<_LhsNested>::Index, 47 typename traits<_RhsNested>::Index>::type Index; 48 49 enum { 50 LhsCoeffReadCost = _LhsNested::CoeffReadCost, 51 RhsCoeffReadCost = _RhsNested::CoeffReadCost, 52 LhsFlags = _LhsNested::Flags, 53 RhsFlags = _RhsNested::Flags, 54 55 RowsAtCompileTime = _LhsNested::RowsAtCompileTime, 56 ColsAtCompileTime = _RhsNested::ColsAtCompileTime, 57 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), 58 59 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, 60 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, 61 62 LhsRowMajor = LhsFlags & RowMajorBit, 63 RhsRowMajor = RhsFlags & RowMajorBit, 64 65 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, 66 67 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) 68 && (ColsAtCompileTime == Dynamic 69 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 70 && (RhsFlags&AlignedBit) 71 ) 72 ), 73 74 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) 75 && (RowsAtCompileTime == Dynamic 76 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 77 && (LhsFlags&AlignedBit) 78 ) 79 ), 80 81 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 82 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 83 : (RhsRowMajor && !CanVectorizeLhs), 84 85 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) 86 | (EvalToRowMajor ? RowMajorBit : 0) 87 | NestingFlags 88 | (LhsFlags & RhsFlags & AlignedBit) 89 // TODO enable vectorization for mixed types 90 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), 91 92 CoeffReadCost = InnerSize == Dynamic ? Dynamic 93 : InnerSize == 0 ? 0 94 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) 95 + (InnerSize - 1) * NumTraits<Scalar>::AddCost, 96 97 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside 98 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner 99 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect 100 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. 101 */ 102 CanVectorizeInner = SameType 103 && LhsRowMajor 104 && (!RhsRowMajor) 105 && (LhsFlags & RhsFlags & ActualPacketAccessBit) 106 && (LhsFlags & RhsFlags & AlignedBit) 107 && (InnerSize % packet_traits<Scalar>::size == 0) 108 }; 109 }; 110 111 } // end namespace internal 112 113 template<typename LhsNested, typename RhsNested, int NestingFlags> 114 class CoeffBasedProduct 115 : internal::no_assignment_operator, 116 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > 117 { 118 public: 119 120 typedef MatrixBase<CoeffBasedProduct> Base; 121 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) 122 typedef typename Base::PlainObject PlainObject; 123 124 private: 125 126 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; 127 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; 128 129 enum { 130 PacketSize = internal::packet_traits<Scalar>::size, 131 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, 132 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, 133 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner 134 }; 135 136 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, 137 Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic, 138 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; 139 140 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; 141 142 public: 143 144 inline CoeffBasedProduct(const CoeffBasedProduct& other) 145 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) 146 {} 147 148 template<typename Lhs, typename Rhs> 149 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) 150 : m_lhs(lhs), m_rhs(rhs) 151 { 152 // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. 153 // We still allow to mix T and complex<T>. 154 EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined), 155 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 156 eigen_assert(lhs.cols() == rhs.rows() 157 && "invalid matrix product" 158 && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); 159 } 160 161 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } 162 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } 163 164 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 165 { 166 Scalar res; 167 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 168 return res; 169 } 170 171 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, 172 * which is why we don't set the LinearAccessBit. 173 */ 174 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const 175 { 176 Scalar res; 177 const Index row = RowsAtCompileTime == 1 ? 0 : index; 178 const Index col = RowsAtCompileTime == 1 ? index : 0; 179 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 180 return res; 181 } 182 183 template<int LoadMode> 184 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const 185 { 186 PacketScalar res; 187 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, 188 Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic, 189 _LhsNested, _RhsNested, PacketScalar, LoadMode> 190 ::run(row, col, m_lhs, m_rhs, res); 191 return res; 192 } 193 194 // Implicit conversion to the nested type (trigger the evaluation of the product) 195 EIGEN_STRONG_INLINE operator const PlainObject& () const 196 { 197 m_result.lazyAssign(*this); 198 return m_result; 199 } 200 201 const _LhsNested& lhs() const { return m_lhs; } 202 const _RhsNested& rhs() const { return m_rhs; } 203 204 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const 205 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 206 207 template<int DiagonalIndex> 208 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const 209 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 210 211 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const 212 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } 213 214 protected: 215 typename internal::add_const_on_value_type<LhsNested>::type m_lhs; 216 typename internal::add_const_on_value_type<RhsNested>::type m_rhs; 217 218 mutable PlainObject m_result; 219 }; 220 221 namespace internal { 222 223 // here we need to overload the nested rule for products 224 // such that the nested type is a const reference to a plain matrix 225 template<typename Lhs, typename Rhs, int N, typename PlainObject> 226 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> 227 { 228 typedef PlainObject const& type; 229 }; 230 231 /*************************************************************************** 232 * Normal product .coeff() implementation (with meta-unrolling) 233 ***************************************************************************/ 234 235 /************************************** 236 *** Scalar path - no vectorization *** 237 **************************************/ 238 239 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 240 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 241 { 242 typedef typename Lhs::Index Index; 243 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 244 { 245 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); 246 res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); 247 } 248 }; 249 250 template<typename Lhs, typename Rhs, typename RetScalar> 251 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> 252 { 253 typedef typename Lhs::Index Index; 254 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 255 { 256 res = lhs.coeff(row, 0) * rhs.coeff(0, col); 257 } 258 }; 259 260 template<typename Lhs, typename Rhs, typename RetScalar> 261 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> 262 { 263 typedef typename Lhs::Index Index; 264 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) 265 { 266 res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum(); 267 } 268 }; 269 270 /******************************************* 271 *** Scalar path with inner vectorization *** 272 *******************************************/ 273 274 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> 275 struct product_coeff_vectorized_unroller 276 { 277 typedef typename Lhs::Index Index; 278 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 279 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 280 { 281 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 282 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); 283 } 284 }; 285 286 template<typename Lhs, typename Rhs, typename Packet> 287 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> 288 { 289 typedef typename Lhs::Index Index; 290 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 291 { 292 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); 293 } 294 }; 295 296 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 297 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 298 { 299 typedef typename Lhs::PacketScalar Packet; 300 typedef typename Lhs::Index Index; 301 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 302 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 303 { 304 Packet pres; 305 product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 306 product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); 307 res = predux(pres); 308 } 309 }; 310 311 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> 312 struct product_coeff_vectorized_dyn_selector 313 { 314 typedef typename Lhs::Index Index; 315 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 316 { 317 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); 318 } 319 }; 320 321 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower 322 // NOTE maybe they are now useless since we have a specialization for Block<Matrix> 323 template<typename Lhs, typename Rhs, int RhsCols> 324 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> 325 { 326 typedef typename Lhs::Index Index; 327 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 328 { 329 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); 330 } 331 }; 332 333 template<typename Lhs, typename Rhs, int LhsRows> 334 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> 335 { 336 typedef typename Lhs::Index Index; 337 static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 338 { 339 res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); 340 } 341 }; 342 343 template<typename Lhs, typename Rhs> 344 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> 345 { 346 typedef typename Lhs::Index Index; 347 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 348 { 349 res = lhs.transpose().cwiseProduct(rhs).sum(); 350 } 351 }; 352 353 template<typename Lhs, typename Rhs, typename RetScalar> 354 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> 355 { 356 typedef typename Lhs::Index Index; 357 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 358 { 359 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); 360 } 361 }; 362 363 /******************* 364 *** Packet path *** 365 *******************/ 366 367 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 368 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 369 { 370 typedef typename Lhs::Index Index; 371 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 372 { 373 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 374 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); 375 } 376 }; 377 378 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 379 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 380 { 381 typedef typename Lhs::Index Index; 382 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 383 { 384 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 385 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); 386 } 387 }; 388 389 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 390 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> 391 { 392 typedef typename Lhs::Index Index; 393 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 394 { 395 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 396 } 397 }; 398 399 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 400 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> 401 { 402 typedef typename Lhs::Index Index; 403 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 404 { 405 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 406 } 407 }; 408 409 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 410 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 411 { 412 typedef typename Lhs::Index Index; 413 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 414 { 415 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 416 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 417 for(Index i = 1; i < lhs.cols(); ++i) 418 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); 419 } 420 }; 421 422 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 423 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 424 { 425 typedef typename Lhs::Index Index; 426 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 427 { 428 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 429 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 430 for(Index i = 1; i < lhs.cols(); ++i) 431 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); 432 } 433 }; 434 435 } // end namespace internal 436 437 } // end namespace Eigen 438 439 #endif // EIGEN_COEFFBASED_PRODUCT_H 440