1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 6 // 7 // This Source Code Form is subject to the terms of the Mozilla 8 // Public License v. 2.0. If a copy of the MPL was not distributed 9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11 #ifndef EIGEN_COEFFBASED_PRODUCT_H 12 #define EIGEN_COEFFBASED_PRODUCT_H 13 14 namespace Eigen { 15 16 namespace internal { 17 18 /********************************************************************************* 19 * Coefficient based product implementation. 20 * It is designed for the following use cases: 21 * - small fixed sizes 22 * - lazy products 23 *********************************************************************************/ 24 25 /* Since the all the dimensions of the product are small, here we can rely 26 * on the generic Assign mechanism to evaluate the product per coeff (or packet). 27 * 28 * Note that here the inner-loops should always be unrolled. 29 */ 30 31 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 32 struct product_coeff_impl; 33 34 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 35 struct product_packet_impl; 36 37 template<typename LhsNested, typename RhsNested, int NestingFlags> 38 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > 39 { 40 typedef MatrixXpr XprKind; 41 typedef typename remove_all<LhsNested>::type _LhsNested; 42 typedef typename remove_all<RhsNested>::type _RhsNested; 43 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; 44 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, 45 typename traits<_RhsNested>::StorageKind>::ret StorageKind; 46 typedef typename promote_index_type<typename traits<_LhsNested>::Index, 47 typename traits<_RhsNested>::Index>::type Index; 48 49 enum { 50 LhsCoeffReadCost = _LhsNested::CoeffReadCost, 51 RhsCoeffReadCost = _RhsNested::CoeffReadCost, 52 LhsFlags = _LhsNested::Flags, 53 RhsFlags = _RhsNested::Flags, 54 55 RowsAtCompileTime = _LhsNested::RowsAtCompileTime, 56 ColsAtCompileTime = _RhsNested::ColsAtCompileTime, 57 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), 58 59 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, 60 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, 61 62 LhsRowMajor = LhsFlags & RowMajorBit, 63 RhsRowMajor = RhsFlags & RowMajorBit, 64 65 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, 66 67 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) 68 && (ColsAtCompileTime == Dynamic 69 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 70 && (RhsFlags&AlignedBit) 71 ) 72 ), 73 74 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) 75 && (RowsAtCompileTime == Dynamic 76 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 77 && (LhsFlags&AlignedBit) 78 ) 79 ), 80 81 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 82 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 83 : (RhsRowMajor && !CanVectorizeLhs), 84 85 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) 86 | (EvalToRowMajor ? RowMajorBit : 0) 87 | NestingFlags 88 | (LhsFlags & RhsFlags & AlignedBit) 89 // TODO enable vectorization for mixed types 90 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), 91 92 CoeffReadCost = InnerSize == Dynamic ? Dynamic 93 : InnerSize == 0 ? 0 94 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) 95 + (InnerSize - 1) * NumTraits<Scalar>::AddCost, 96 97 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside 98 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner 99 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect 100 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. 101 */ 102 CanVectorizeInner = SameType 103 && LhsRowMajor 104 && (!RhsRowMajor) 105 && (LhsFlags & RhsFlags & ActualPacketAccessBit) 106 && (LhsFlags & RhsFlags & AlignedBit) 107 && (InnerSize % packet_traits<Scalar>::size == 0) 108 }; 109 }; 110 111 } // end namespace internal 112 113 template<typename LhsNested, typename RhsNested, int NestingFlags> 114 class CoeffBasedProduct 115 : internal::no_assignment_operator, 116 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > 117 { 118 public: 119 120 typedef MatrixBase<CoeffBasedProduct> Base; 121 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) 122 typedef typename Base::PlainObject PlainObject; 123 124 private: 125 126 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; 127 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; 128 129 enum { 130 PacketSize = internal::packet_traits<Scalar>::size, 131 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, 132 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, 133 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner 134 }; 135 136 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, 137 Unroll ? InnerSize : Dynamic, 138 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; 139 140 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; 141 142 public: 143 144 inline CoeffBasedProduct(const CoeffBasedProduct& other) 145 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) 146 {} 147 148 template<typename Lhs, typename Rhs> 149 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) 150 : m_lhs(lhs), m_rhs(rhs) 151 { 152 // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. 153 // We still allow to mix T and complex<T>. 154 EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined), 155 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 156 eigen_assert(lhs.cols() == rhs.rows() 157 && "invalid matrix product" 158 && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); 159 } 160 161 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } 162 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } 163 164 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 165 { 166 Scalar res; 167 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 168 return res; 169 } 170 171 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, 172 * which is why we don't set the LinearAccessBit. 173 */ 174 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const 175 { 176 Scalar res; 177 const Index row = RowsAtCompileTime == 1 ? 0 : index; 178 const Index col = RowsAtCompileTime == 1 ? index : 0; 179 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 180 return res; 181 } 182 183 template<int LoadMode> 184 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const 185 { 186 PacketScalar res; 187 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, 188 Unroll ? InnerSize : Dynamic, 189 _LhsNested, _RhsNested, PacketScalar, LoadMode> 190 ::run(row, col, m_lhs, m_rhs, res); 191 return res; 192 } 193 194 // Implicit conversion to the nested type (trigger the evaluation of the product) 195 EIGEN_STRONG_INLINE operator const PlainObject& () const 196 { 197 m_result.lazyAssign(*this); 198 return m_result; 199 } 200 201 const _LhsNested& lhs() const { return m_lhs; } 202 const _RhsNested& rhs() const { return m_rhs; } 203 204 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const 205 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 206 207 template<int DiagonalIndex> 208 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const 209 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 210 211 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const 212 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } 213 214 protected: 215 typename internal::add_const_on_value_type<LhsNested>::type m_lhs; 216 typename internal::add_const_on_value_type<RhsNested>::type m_rhs; 217 218 mutable PlainObject m_result; 219 }; 220 221 namespace internal { 222 223 // here we need to overload the nested rule for products 224 // such that the nested type is a const reference to a plain matrix 225 template<typename Lhs, typename Rhs, int N, typename PlainObject> 226 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> 227 { 228 typedef PlainObject const& type; 229 }; 230 231 /*************************************************************************** 232 * Normal product .coeff() implementation (with meta-unrolling) 233 ***************************************************************************/ 234 235 /************************************** 236 *** Scalar path - no vectorization *** 237 **************************************/ 238 239 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 240 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 241 { 242 typedef typename Lhs::Index Index; 243 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 244 { 245 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); 246 res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col); 247 } 248 }; 249 250 template<typename Lhs, typename Rhs, typename RetScalar> 251 struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar> 252 { 253 typedef typename Lhs::Index Index; 254 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 255 { 256 res = lhs.coeff(row, 0) * rhs.coeff(0, col); 257 } 258 }; 259 260 template<typename Lhs, typename Rhs, typename RetScalar> 261 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> 262 { 263 typedef typename Lhs::Index Index; 264 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) 265 { 266 res = RetScalar(0); 267 } 268 }; 269 270 template<typename Lhs, typename Rhs, typename RetScalar> 271 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> 272 { 273 typedef typename Lhs::Index Index; 274 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) 275 { 276 res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum(); 277 } 278 }; 279 280 /******************************************* 281 *** Scalar path with inner vectorization *** 282 *******************************************/ 283 284 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> 285 struct product_coeff_vectorized_unroller 286 { 287 typedef typename Lhs::Index Index; 288 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 289 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 290 { 291 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 292 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); 293 } 294 }; 295 296 template<typename Lhs, typename Rhs, typename Packet> 297 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> 298 { 299 typedef typename Lhs::Index Index; 300 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 301 { 302 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); 303 } 304 }; 305 306 template<typename Lhs, typename Rhs, typename RetScalar> 307 struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar> 308 { 309 typedef typename Lhs::Index Index; 310 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) 311 { 312 res = 0; 313 } 314 }; 315 316 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 317 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 318 { 319 typedef typename Lhs::PacketScalar Packet; 320 typedef typename Lhs::Index Index; 321 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 322 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 323 { 324 Packet pres; 325 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 326 res = predux(pres); 327 } 328 }; 329 330 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> 331 struct product_coeff_vectorized_dyn_selector 332 { 333 typedef typename Lhs::Index Index; 334 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 335 { 336 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); 337 } 338 }; 339 340 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower 341 // NOTE maybe they are now useless since we have a specialization for Block<Matrix> 342 template<typename Lhs, typename Rhs, int RhsCols> 343 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> 344 { 345 typedef typename Lhs::Index Index; 346 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 347 { 348 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); 349 } 350 }; 351 352 template<typename Lhs, typename Rhs, int LhsRows> 353 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> 354 { 355 typedef typename Lhs::Index Index; 356 static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 357 { 358 res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); 359 } 360 }; 361 362 template<typename Lhs, typename Rhs> 363 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> 364 { 365 typedef typename Lhs::Index Index; 366 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 367 { 368 res = lhs.transpose().cwiseProduct(rhs).sum(); 369 } 370 }; 371 372 template<typename Lhs, typename Rhs, typename RetScalar> 373 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> 374 { 375 typedef typename Lhs::Index Index; 376 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 377 { 378 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); 379 } 380 }; 381 382 /******************* 383 *** Packet path *** 384 *******************/ 385 386 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 387 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 388 { 389 typedef typename Lhs::Index Index; 390 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 391 { 392 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 393 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res); 394 } 395 }; 396 397 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 398 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 399 { 400 typedef typename Lhs::Index Index; 401 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 402 { 403 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 404 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res); 405 } 406 }; 407 408 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 409 struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> 410 { 411 typedef typename Lhs::Index Index; 412 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 413 { 414 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 415 } 416 }; 417 418 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 419 struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> 420 { 421 typedef typename Lhs::Index Index; 422 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 423 { 424 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 425 } 426 }; 427 428 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 429 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> 430 { 431 typedef typename Lhs::Index Index; 432 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) 433 { 434 res = pset1<Packet>(0); 435 } 436 }; 437 438 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 439 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> 440 { 441 typedef typename Lhs::Index Index; 442 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) 443 { 444 res = pset1<Packet>(0); 445 } 446 }; 447 448 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 449 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 450 { 451 typedef typename Lhs::Index Index; 452 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 453 { 454 res = pset1<Packet>(0); 455 for(Index i = 0; i < lhs.cols(); ++i) 456 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); 457 } 458 }; 459 460 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 461 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 462 { 463 typedef typename Lhs::Index Index; 464 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 465 { 466 res = pset1<Packet>(0); 467 for(Index i = 0; i < lhs.cols(); ++i) 468 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); 469 } 470 }; 471 472 } // end namespace internal 473 474 } // end namespace Eigen 475 476 #endif // EIGEN_COEFFBASED_PRODUCT_H 477