• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 // pack a selfadjoint block diagonal for use with the gebp_kernel
18 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
19 struct symm_pack_lhs
20 {
21   template<int BlockRows> inline
packsymm_pack_lhs22   void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23   {
24     // normal copy
25     for(Index k=0; k<i; k++)
26       for(Index w=0; w<BlockRows; w++)
27         blockA[count++] = lhs(i+w,k);           // normal
28     // symmetric copy
29     Index h = 0;
30     for(Index k=i; k<i+BlockRows; k++)
31     {
32       for(Index w=0; w<h; w++)
33         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
34 
35       blockA[count++] = numext::real(lhs(k,k));   // real (diagonal)
36 
37       for(Index w=h+1; w<BlockRows; w++)
38         blockA[count++] = lhs(i+w, k);          // normal
39       ++h;
40     }
41     // transposed copy
42     for(Index k=i+BlockRows; k<cols; k++)
43       for(Index w=0; w<BlockRows; w++)
44         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
45   }
operatorsymm_pack_lhs46   void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47   {
48     const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
49     Index count = 0;
50     Index peeled_mc = (rows/Pack1)*Pack1;
51     for(Index i=0; i<peeled_mc; i+=Pack1)
52     {
53       pack<Pack1>(blockA, lhs, cols, i, count);
54     }
55 
56     if(rows-peeled_mc>=Pack2)
57     {
58       pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
59       peeled_mc += Pack2;
60     }
61 
62     // do the same with mr==1
63     for(Index i=peeled_mc; i<rows; i++)
64     {
65       for(Index k=0; k<i; k++)
66         blockA[count++] = lhs(i, k);              // normal
67 
68       blockA[count++] = numext::real(lhs(i, i));       // real (diagonal)
69 
70       for(Index k=i+1; k<cols; k++)
71         blockA[count++] = numext::conj(lhs(k, i));     // transposed
72     }
73   }
74 };
75 
76 template<typename Scalar, typename Index, int nr, int StorageOrder>
77 struct symm_pack_rhs
78 {
79   enum { PacketSize = packet_traits<Scalar>::size };
operatorsymm_pack_rhs80   void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
81   {
82     Index end_k = k2 + rows;
83     Index count = 0;
84     const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
85     Index packet_cols = (cols/nr)*nr;
86 
87     // first part: normal case
88     for(Index j2=0; j2<k2; j2+=nr)
89     {
90       for(Index k=k2; k<end_k; k++)
91       {
92         blockB[count+0] = rhs(k,j2+0);
93         blockB[count+1] = rhs(k,j2+1);
94         if (nr==4)
95         {
96           blockB[count+2] = rhs(k,j2+2);
97           blockB[count+3] = rhs(k,j2+3);
98         }
99         count += nr;
100       }
101     }
102 
103     // second part: diagonal block
104     for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
105     {
106       // again we can split vertically in three different parts (transpose, symmetric, normal)
107       // transpose
108       for(Index k=k2; k<j2; k++)
109       {
110         blockB[count+0] = numext::conj(rhs(j2+0,k));
111         blockB[count+1] = numext::conj(rhs(j2+1,k));
112         if (nr==4)
113         {
114           blockB[count+2] = numext::conj(rhs(j2+2,k));
115           blockB[count+3] = numext::conj(rhs(j2+3,k));
116         }
117         count += nr;
118       }
119       // symmetric
120       Index h = 0;
121       for(Index k=j2; k<j2+nr; k++)
122       {
123         // normal
124         for (Index w=0 ; w<h; ++w)
125           blockB[count+w] = rhs(k,j2+w);
126 
127         blockB[count+h] = numext::real(rhs(k,k));
128 
129         // transpose
130         for (Index w=h+1 ; w<nr; ++w)
131           blockB[count+w] = numext::conj(rhs(j2+w,k));
132         count += nr;
133         ++h;
134       }
135       // normal
136       for(Index k=j2+nr; k<end_k; k++)
137       {
138         blockB[count+0] = rhs(k,j2+0);
139         blockB[count+1] = rhs(k,j2+1);
140         if (nr==4)
141         {
142           blockB[count+2] = rhs(k,j2+2);
143           blockB[count+3] = rhs(k,j2+3);
144         }
145         count += nr;
146       }
147     }
148 
149     // third part: transposed
150     for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
151     {
152       for(Index k=k2; k<end_k; k++)
153       {
154         blockB[count+0] = numext::conj(rhs(j2+0,k));
155         blockB[count+1] = numext::conj(rhs(j2+1,k));
156         if (nr==4)
157         {
158           blockB[count+2] = numext::conj(rhs(j2+2,k));
159           blockB[count+3] = numext::conj(rhs(j2+3,k));
160         }
161         count += nr;
162       }
163     }
164 
165     // copy the remaining columns one at a time (=> the same with nr==1)
166     for(Index j2=packet_cols; j2<cols; ++j2)
167     {
168       // transpose
169       Index half = (std::min)(end_k,j2);
170       for(Index k=k2; k<half; k++)
171       {
172         blockB[count] = numext::conj(rhs(j2,k));
173         count += 1;
174       }
175 
176       if(half==j2 && half<k2+rows)
177       {
178         blockB[count] = numext::real(rhs(j2,j2));
179         count += 1;
180       }
181       else
182         half--;
183 
184       // normal
185       for(Index k=half+1; k<k2+rows; k++)
186       {
187         blockB[count] = rhs(k,j2);
188         count += 1;
189       }
190     }
191   }
192 };
193 
194 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
195  * the general matrix matrix product.
196  */
197 template <typename Scalar, typename Index,
198           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
199           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
200           int ResStorageOrder>
201 struct product_selfadjoint_matrix;
202 
203 template <typename Scalar, typename Index,
204           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
205           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
206 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
207 {
208 
209   static EIGEN_STRONG_INLINE void run(
210     Index rows, Index cols,
211     const Scalar* lhs, Index lhsStride,
212     const Scalar* rhs, Index rhsStride,
213     Scalar* res,       Index resStride,
214     const Scalar& alpha)
215   {
216     product_selfadjoint_matrix<Scalar, Index,
217       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
218       RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
219       EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
220       LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
221       ColMajor>
222       ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resStride,  alpha);
223   }
224 };
225 
226 template <typename Scalar, typename Index,
227           int LhsStorageOrder, bool ConjugateLhs,
228           int RhsStorageOrder, bool ConjugateRhs>
229 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
230 {
231 
232   static EIGEN_DONT_INLINE void run(
233     Index rows, Index cols,
234     const Scalar* _lhs, Index lhsStride,
235     const Scalar* _rhs, Index rhsStride,
236     Scalar* res,        Index resStride,
237     const Scalar& alpha);
238 };
239 
240 template <typename Scalar, typename Index,
241           int LhsStorageOrder, bool ConjugateLhs,
242           int RhsStorageOrder, bool ConjugateRhs>
243 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
244     Index rows, Index cols,
245     const Scalar* _lhs, Index lhsStride,
246     const Scalar* _rhs, Index rhsStride,
247     Scalar* res,        Index resStride,
248     const Scalar& alpha)
249   {
250     Index size = rows;
251 
252     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
253     const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
254 
255     typedef gebp_traits<Scalar,Scalar> Traits;
256 
257     Index kc = size;  // cache block size along the K direction
258     Index mc = rows;  // cache block size along the M direction
259     Index nc = cols;  // cache block size along the N direction
260     computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
261     // kc must smaller than mc
262     kc = (std::min)(kc,mc);
263 
264     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
265     std::size_t sizeB = sizeW + kc*cols;
266     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
267     ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
268     Scalar* blockB = allocatedBlockB + sizeW;
269 
270     gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
271     symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
272     gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
273     gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
274 
275     for(Index k2=0; k2<size; k2+=kc)
276     {
277       const Index actual_kc = (std::min)(k2+kc,size)-k2;
278 
279       // we have selected one row panel of rhs and one column panel of lhs
280       // pack rhs's panel into a sequential chunk of memory
281       // and expand each coeff to a constant packet for further reuse
282       pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
283 
284       // the select lhs's panel has to be split in three different parts:
285       //  1 - the transposed panel above the diagonal block => transposed packed copy
286       //  2 - the diagonal block => special packed copy
287       //  3 - the panel below the diagonal block => generic packed copy
288       for(Index i2=0; i2<k2; i2+=mc)
289       {
290         const Index actual_mc = (std::min)(i2+mc,k2)-i2;
291         // transposed packed copy
292         pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
293 
294         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
295       }
296       // the block diagonal
297       {
298         const Index actual_mc = (std::min)(k2+kc,size)-k2;
299         // symmetric packed copy
300         pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
301 
302         gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
303       }
304 
305       for(Index i2=k2+kc; i2<size; i2+=mc)
306       {
307         const Index actual_mc = (std::min)(i2+mc,size)-i2;
308         gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
309           (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
310 
311         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
312       }
313     }
314   }
315 
316 // matrix * selfadjoint product
317 template <typename Scalar, typename Index,
318           int LhsStorageOrder, bool ConjugateLhs,
319           int RhsStorageOrder, bool ConjugateRhs>
320 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
321 {
322 
323   static EIGEN_DONT_INLINE void run(
324     Index rows, Index cols,
325     const Scalar* _lhs, Index lhsStride,
326     const Scalar* _rhs, Index rhsStride,
327     Scalar* res,        Index resStride,
328     const Scalar& alpha);
329 };
330 
331 template <typename Scalar, typename Index,
332           int LhsStorageOrder, bool ConjugateLhs,
333           int RhsStorageOrder, bool ConjugateRhs>
334 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
335     Index rows, Index cols,
336     const Scalar* _lhs, Index lhsStride,
337     const Scalar* _rhs, Index rhsStride,
338     Scalar* res,        Index resStride,
339     const Scalar& alpha)
340   {
341     Index size = cols;
342 
343     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
344 
345     typedef gebp_traits<Scalar,Scalar> Traits;
346 
347     Index kc = size; // cache block size along the K direction
348     Index mc = rows;  // cache block size along the M direction
349     Index nc = cols;  // cache block size along the N direction
350     computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
351     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
352     std::size_t sizeB = sizeW + kc*cols;
353     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
354     ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
355     Scalar* blockB = allocatedBlockB + sizeW;
356 
357     gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
358     gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
359     symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
360 
361     for(Index k2=0; k2<size; k2+=kc)
362     {
363       const Index actual_kc = (std::min)(k2+kc,size)-k2;
364 
365       pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
366 
367       // => GEPP
368       for(Index i2=0; i2<rows; i2+=mc)
369       {
370         const Index actual_mc = (std::min)(i2+mc,rows)-i2;
371         pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
372 
373         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
374       }
375     }
376   }
377 
378 } // end namespace internal
379 
380 /***************************************************************************
381 * Wrapper to product_selfadjoint_matrix
382 ***************************************************************************/
383 
384 namespace internal {
385 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
386 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
387   : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
388 {};
389 }
390 
391 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
392 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
393   : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
394 {
395   EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
396 
397   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
398 
399   enum {
400     LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
401     LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
402     RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
403     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
404   };
405 
406   template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
407   {
408     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
409 
410     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
411     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
412 
413     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
414                                * RhsBlasTraits::extractScalarFactor(m_rhs);
415 
416     internal::product_selfadjoint_matrix<Scalar, Index,
417       EIGEN_LOGICAL_XOR(LhsIsUpper,
418                         internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
419       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
420       EIGEN_LOGICAL_XOR(RhsIsUpper,
421                         internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
422       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
423       internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor>
424       ::run(
425         lhs.rows(), rhs.cols(),                 // sizes
426         &lhs.coeffRef(0,0),    lhs.outerStride(),  // lhs info
427         &rhs.coeffRef(0,0),    rhs.outerStride(),  // rhs info
428         &dst.coeffRef(0,0), dst.outerStride(),  // result info
429         actualAlpha                             // alpha
430       );
431   }
432 };
433 
434 } // end namespace Eigen
435 
436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
437