1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 // pack a selfadjoint block diagonal for use with the gebp_kernel
18 template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
19 struct symm_pack_lhs
20 {
21   template<int BlockRows> inline
packsymm_pack_lhs22   void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23   {
24     // normal copy
25     for(Index k=0; k<i; k++)
26       for(Index w=0; w<BlockRows; w++)
27         blockA[count++] = lhs(i+w,k);           // normal
28     // symmetric copy
29     Index h = 0;
30     for(Index k=i; k<i+BlockRows; k++)
31     {
32       for(Index w=0; w<h; w++)
33         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
34 
35       blockA[count++] = numext::real(lhs(k,k));   // real (diagonal)
36 
37       for(Index w=h+1; w<BlockRows; w++)
38         blockA[count++] = lhs(i+w, k);          // normal
39       ++h;
40     }
41     // transposed copy
42     for(Index k=i+BlockRows; k<cols; k++)
43       for(Index w=0; w<BlockRows; w++)
44         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
45   }
operatorsymm_pack_lhs46   void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47   {
48     enum { PacketSize = packet_traits<Scalar>::size };
49     const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
50     Index count = 0;
51     //Index peeled_mc3 = (rows/Pack1)*Pack1;
52 
53     const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
54     const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
55     const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
56 
57     if(Pack1>=3*PacketSize)
58       for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
59         pack<3*PacketSize>(blockA, lhs, cols, i, count);
60 
61     if(Pack1>=2*PacketSize)
62       for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
63         pack<2*PacketSize>(blockA, lhs, cols, i, count);
64 
65     if(Pack1>=1*PacketSize)
66       for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
67         pack<1*PacketSize>(blockA, lhs, cols, i, count);
68 
69     // do the same with mr==1
70     for(Index i=peeled_mc1; i<rows; i++)
71     {
72       for(Index k=0; k<i; k++)
73         blockA[count++] = lhs(i, k);                   // normal
74 
75       blockA[count++] = numext::real(lhs(i, i));       // real (diagonal)
76 
77       for(Index k=i+1; k<cols; k++)
78         blockA[count++] = numext::conj(lhs(k, i));     // transposed
79     }
80   }
81 };
82 
83 template<typename Scalar, typename Index, int nr, int StorageOrder>
84 struct symm_pack_rhs
85 {
86   enum { PacketSize = packet_traits<Scalar>::size };
operatorsymm_pack_rhs87   void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
88   {
89     Index end_k = k2 + rows;
90     Index count = 0;
91     const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
92     Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
93     Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
94 
95     // first part: normal case
96     for(Index j2=0; j2<k2; j2+=nr)
97     {
98       for(Index k=k2; k<end_k; k++)
99       {
100         blockB[count+0] = rhs(k,j2+0);
101         blockB[count+1] = rhs(k,j2+1);
102         if (nr>=4)
103         {
104           blockB[count+2] = rhs(k,j2+2);
105           blockB[count+3] = rhs(k,j2+3);
106         }
107         if (nr>=8)
108         {
109           blockB[count+4] = rhs(k,j2+4);
110           blockB[count+5] = rhs(k,j2+5);
111           blockB[count+6] = rhs(k,j2+6);
112           blockB[count+7] = rhs(k,j2+7);
113         }
114         count += nr;
115       }
116     }
117 
118     // second part: diagonal block
119     Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
120     if(nr>=8)
121     {
122       for(Index j2=k2; j2<end8; j2+=8)
123       {
124         // again we can split vertically in three different parts (transpose, symmetric, normal)
125         // transpose
126         for(Index k=k2; k<j2; k++)
127         {
128           blockB[count+0] = numext::conj(rhs(j2+0,k));
129           blockB[count+1] = numext::conj(rhs(j2+1,k));
130           blockB[count+2] = numext::conj(rhs(j2+2,k));
131           blockB[count+3] = numext::conj(rhs(j2+3,k));
132           blockB[count+4] = numext::conj(rhs(j2+4,k));
133           blockB[count+5] = numext::conj(rhs(j2+5,k));
134           blockB[count+6] = numext::conj(rhs(j2+6,k));
135           blockB[count+7] = numext::conj(rhs(j2+7,k));
136           count += 8;
137         }
138         // symmetric
139         Index h = 0;
140         for(Index k=j2; k<j2+8; k++)
141         {
142           // normal
143           for (Index w=0 ; w<h; ++w)
144             blockB[count+w] = rhs(k,j2+w);
145 
146           blockB[count+h] = numext::real(rhs(k,k));
147 
148           // transpose
149           for (Index w=h+1 ; w<8; ++w)
150             blockB[count+w] = numext::conj(rhs(j2+w,k));
151           count += 8;
152           ++h;
153         }
154         // normal
155         for(Index k=j2+8; k<end_k; k++)
156         {
157           blockB[count+0] = rhs(k,j2+0);
158           blockB[count+1] = rhs(k,j2+1);
159           blockB[count+2] = rhs(k,j2+2);
160           blockB[count+3] = rhs(k,j2+3);
161           blockB[count+4] = rhs(k,j2+4);
162           blockB[count+5] = rhs(k,j2+5);
163           blockB[count+6] = rhs(k,j2+6);
164           blockB[count+7] = rhs(k,j2+7);
165           count += 8;
166         }
167       }
168     }
169     if(nr>=4)
170     {
171       for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
172       {
173         // again we can split vertically in three different parts (transpose, symmetric, normal)
174         // transpose
175         for(Index k=k2; k<j2; k++)
176         {
177           blockB[count+0] = numext::conj(rhs(j2+0,k));
178           blockB[count+1] = numext::conj(rhs(j2+1,k));
179           blockB[count+2] = numext::conj(rhs(j2+2,k));
180           blockB[count+3] = numext::conj(rhs(j2+3,k));
181           count += 4;
182         }
183         // symmetric
184         Index h = 0;
185         for(Index k=j2; k<j2+4; k++)
186         {
187           // normal
188           for (Index w=0 ; w<h; ++w)
189             blockB[count+w] = rhs(k,j2+w);
190 
191           blockB[count+h] = numext::real(rhs(k,k));
192 
193           // transpose
194           for (Index w=h+1 ; w<4; ++w)
195             blockB[count+w] = numext::conj(rhs(j2+w,k));
196           count += 4;
197           ++h;
198         }
199         // normal
200         for(Index k=j2+4; k<end_k; k++)
201         {
202           blockB[count+0] = rhs(k,j2+0);
203           blockB[count+1] = rhs(k,j2+1);
204           blockB[count+2] = rhs(k,j2+2);
205           blockB[count+3] = rhs(k,j2+3);
206           count += 4;
207         }
208       }
209     }
210 
211     // third part: transposed
212     if(nr>=8)
213     {
214       for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
215       {
216         for(Index k=k2; k<end_k; k++)
217         {
218           blockB[count+0] = numext::conj(rhs(j2+0,k));
219           blockB[count+1] = numext::conj(rhs(j2+1,k));
220           blockB[count+2] = numext::conj(rhs(j2+2,k));
221           blockB[count+3] = numext::conj(rhs(j2+3,k));
222           blockB[count+4] = numext::conj(rhs(j2+4,k));
223           blockB[count+5] = numext::conj(rhs(j2+5,k));
224           blockB[count+6] = numext::conj(rhs(j2+6,k));
225           blockB[count+7] = numext::conj(rhs(j2+7,k));
226           count += 8;
227         }
228       }
229     }
230     if(nr>=4)
231     {
232       for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
233       {
234         for(Index k=k2; k<end_k; k++)
235         {
236           blockB[count+0] = numext::conj(rhs(j2+0,k));
237           blockB[count+1] = numext::conj(rhs(j2+1,k));
238           blockB[count+2] = numext::conj(rhs(j2+2,k));
239           blockB[count+3] = numext::conj(rhs(j2+3,k));
240           count += 4;
241         }
242       }
243     }
244 
245     // copy the remaining columns one at a time (=> the same with nr==1)
246     for(Index j2=packet_cols4; j2<cols; ++j2)
247     {
248       // transpose
249       Index half = (std::min)(end_k,j2);
250       for(Index k=k2; k<half; k++)
251       {
252         blockB[count] = numext::conj(rhs(j2,k));
253         count += 1;
254       }
255 
256       if(half==j2 && half<k2+rows)
257       {
258         blockB[count] = numext::real(rhs(j2,j2));
259         count += 1;
260       }
261       else
262         half--;
263 
264       // normal
265       for(Index k=half+1; k<k2+rows; k++)
266       {
267         blockB[count] = rhs(k,j2);
268         count += 1;
269       }
270     }
271   }
272 };
273 
274 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
275  * the general matrix matrix product.
276  */
277 template <typename Scalar, typename Index,
278           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
279           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
280           int ResStorageOrder>
281 struct product_selfadjoint_matrix;
282 
283 template <typename Scalar, typename Index,
284           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
285           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
286 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
287 {
288 
289   static EIGEN_STRONG_INLINE void run(
290     Index rows, Index cols,
291     const Scalar* lhs, Index lhsStride,
292     const Scalar* rhs, Index rhsStride,
293     Scalar* res,       Index resStride,
294     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
295   {
296     product_selfadjoint_matrix<Scalar, Index,
297       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
298       RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
299       EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
300       LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
301       ColMajor>
302       ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resStride,  alpha, blocking);
303   }
304 };
305 
306 template <typename Scalar, typename Index,
307           int LhsStorageOrder, bool ConjugateLhs,
308           int RhsStorageOrder, bool ConjugateRhs>
309 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
310 {
311 
312   static EIGEN_DONT_INLINE void run(
313     Index rows, Index cols,
314     const Scalar* _lhs, Index lhsStride,
315     const Scalar* _rhs, Index rhsStride,
316     Scalar* res,        Index resStride,
317     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
318 };
319 
320 template <typename Scalar, typename Index,
321           int LhsStorageOrder, bool ConjugateLhs,
322           int RhsStorageOrder, bool ConjugateRhs>
323 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
324     Index rows, Index cols,
325     const Scalar* _lhs, Index lhsStride,
326     const Scalar* _rhs, Index rhsStride,
327     Scalar* _res,        Index resStride,
328     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
329   {
330     Index size = rows;
331 
332     typedef gebp_traits<Scalar,Scalar> Traits;
333 
334     typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
335     typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
336     typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
337     typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
338     LhsMapper lhs(_lhs,lhsStride);
339     LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
340     RhsMapper rhs(_rhs,rhsStride);
341     ResMapper res(_res, resStride);
342 
343     Index kc = blocking.kc();                   // cache block size along the K direction
344     Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
345     // kc must be smaller than mc
346     kc = (std::min)(kc,mc);
347     std::size_t sizeA = kc*mc;
348     std::size_t sizeB = kc*cols;
349     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
350     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
351 
352     gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
353     symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
354     gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
355     gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
356 
357     for(Index k2=0; k2<size; k2+=kc)
358     {
359       const Index actual_kc = (std::min)(k2+kc,size)-k2;
360 
361       // we have selected one row panel of rhs and one column panel of lhs
362       // pack rhs's panel into a sequential chunk of memory
363       // and expand each coeff to a constant packet for further reuse
364       pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
365 
366       // the select lhs's panel has to be split in three different parts:
367       //  1 - the transposed panel above the diagonal block => transposed packed copy
368       //  2 - the diagonal block => special packed copy
369       //  3 - the panel below the diagonal block => generic packed copy
370       for(Index i2=0; i2<k2; i2+=mc)
371       {
372         const Index actual_mc = (std::min)(i2+mc,k2)-i2;
373         // transposed packed copy
374         pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
375 
376         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
377       }
378       // the block diagonal
379       {
380         const Index actual_mc = (std::min)(k2+kc,size)-k2;
381         // symmetric packed copy
382         pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
383 
384         gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
385       }
386 
387       for(Index i2=k2+kc; i2<size; i2+=mc)
388       {
389         const Index actual_mc = (std::min)(i2+mc,size)-i2;
390         gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
391           (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
392 
393         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
394       }
395     }
396   }
397 
398 // matrix * selfadjoint product
399 template <typename Scalar, typename Index,
400           int LhsStorageOrder, bool ConjugateLhs,
401           int RhsStorageOrder, bool ConjugateRhs>
402 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
403 {
404 
405   static EIGEN_DONT_INLINE void run(
406     Index rows, Index cols,
407     const Scalar* _lhs, Index lhsStride,
408     const Scalar* _rhs, Index rhsStride,
409     Scalar* res,        Index resStride,
410     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
411 };
412 
413 template <typename Scalar, typename Index,
414           int LhsStorageOrder, bool ConjugateLhs,
415           int RhsStorageOrder, bool ConjugateRhs>
416 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
417     Index rows, Index cols,
418     const Scalar* _lhs, Index lhsStride,
419     const Scalar* _rhs, Index rhsStride,
420     Scalar* _res,        Index resStride,
421     const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
422   {
423     Index size = cols;
424 
425     typedef gebp_traits<Scalar,Scalar> Traits;
426 
427     typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
428     typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
429     LhsMapper lhs(_lhs,lhsStride);
430     ResMapper res(_res,resStride);
431 
432     Index kc = blocking.kc();                   // cache block size along the K direction
433     Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
434     std::size_t sizeA = kc*mc;
435     std::size_t sizeB = kc*cols;
436     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
437     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
438 
439     gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
440     gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
441     symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
442 
443     for(Index k2=0; k2<size; k2+=kc)
444     {
445       const Index actual_kc = (std::min)(k2+kc,size)-k2;
446 
447       pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
448 
449       // => GEPP
450       for(Index i2=0; i2<rows; i2+=mc)
451       {
452         const Index actual_mc = (std::min)(i2+mc,rows)-i2;
453         pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
454 
455         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
456       }
457     }
458   }
459 
460 } // end namespace internal
461 
462 /***************************************************************************
463 * Wrapper to product_selfadjoint_matrix
464 ***************************************************************************/
465 
466 namespace internal {
467 
468 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
469 struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
470 {
471   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
472 
473   typedef internal::blas_traits<Lhs> LhsBlasTraits;
474   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
475   typedef internal::blas_traits<Rhs> RhsBlasTraits;
476   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
477 
478   enum {
479     LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
480     LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
481     RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
482     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
483   };
484 
485   template<typename Dest>
486   static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
487   {
488     eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
489 
490     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
491     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
492 
493     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
494                                * RhsBlasTraits::extractScalarFactor(a_rhs);
495 
496     typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
497               Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
498 
499     BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
500 
501     internal::product_selfadjoint_matrix<Scalar, Index,
502       EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
503       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
504       EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
505       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
506       internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor>
507       ::run(
508         lhs.rows(), rhs.cols(),                 // sizes
509         &lhs.coeffRef(0,0), lhs.outerStride(),  // lhs info
510         &rhs.coeffRef(0,0), rhs.outerStride(),  // rhs info
511         &dst.coeffRef(0,0), dst.outerStride(),  // result info
512         actualAlpha, blocking                   // alpha
513       );
514   }
515 };
516 
517 } // end namespace internal
518 
519 } // end namespace Eigen
520 
521 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
522