1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #include "common.h"
11 
EIGEN_BLAS_FUNC(gemm)12 int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
13 {
14 //   std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
15   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*);
16   static functype func[12];
17 
18   static bool init = false;
19   if(!init)
20   {
21     for(int k=0; k<12; ++k)
22       func[k] = 0;
23     func[NOTR  | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run);
24     func[TR    | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run);
25     func[ADJ   | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run);
26     func[NOTR  | (TR   << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run);
27     func[TR    | (TR   << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run);
28     func[ADJ   | (TR   << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run);
29     func[NOTR  | (ADJ  << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run);
30     func[TR    | (ADJ  << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run);
31     func[ADJ   | (ADJ  << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run);
32     init = true;
33   }
34 
35   Scalar* a = reinterpret_cast<Scalar*>(pa);
36   Scalar* b = reinterpret_cast<Scalar*>(pb);
37   Scalar* c = reinterpret_cast<Scalar*>(pc);
38   Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
39   Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
40 
41   int info = 0;
42   if(OP(*opa)==INVALID)                                               info = 1;
43   else if(OP(*opb)==INVALID)                                          info = 2;
44   else if(*m<0)                                                       info = 3;
45   else if(*n<0)                                                       info = 4;
46   else if(*k<0)                                                       info = 5;
47   else if(*lda<std::max(1,(OP(*opa)==NOTR)?*m:*k))                    info = 8;
48   else if(*ldb<std::max(1,(OP(*opb)==NOTR)?*k:*n))                    info = 10;
49   else if(*ldc<std::max(1,*m))                                        info = 13;
50   if(info)
51     return xerbla_(SCALAR_SUFFIX_UP"GEMM ",&info,6);
52 
53   if(beta!=Scalar(1))
54   {
55     if(beta==Scalar(0)) matrix(c, *m, *n, *ldc).setZero();
56     else                matrix(c, *m, *n, *ldc) *= beta;
57   }
58 
59   internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,*k);
60 
61   int code = OP(*opa) | (OP(*opb) << 2);
62   func[code](*m, *n, *k, a, *lda, b, *ldb, c, *ldc, alpha, blocking, 0);
63   return 0;
64 }
65 
EIGEN_BLAS_FUNC(trsm)66 int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha,  RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
67 {
68 //   std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
69   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
70   static functype func[32];
71 
72   static bool init = false;
73   if(!init)
74   {
75     for(int k=0; k<32; ++k)
76       func[k] = 0;
77 
78     func[NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0,          false,ColMajor,ColMajor>::run);
79     func[TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0,          false,RowMajor,ColMajor>::run);
80     func[ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0,          Conj, RowMajor,ColMajor>::run);
81 
82     func[NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0,          false,ColMajor,ColMajor>::run);
83     func[TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0,          false,RowMajor,ColMajor>::run);
84     func[ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0,          Conj, RowMajor,ColMajor>::run);
85 
86     func[NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0,          false,ColMajor,ColMajor>::run);
87     func[TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0,          false,RowMajor,ColMajor>::run);
88     func[ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0,          Conj, RowMajor,ColMajor>::run);
89 
90     func[NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0,          false,ColMajor,ColMajor>::run);
91     func[TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0,          false,RowMajor,ColMajor>::run);
92     func[ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0,          Conj, RowMajor,ColMajor>::run);
93 
94 
95     func[NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run);
96     func[TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run);
97     func[ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run);
98 
99     func[NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run);
100     func[TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run);
101     func[ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run);
102 
103     func[NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run);
104     func[TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run);
105     func[ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run);
106 
107     func[NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run);
108     func[TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run);
109     func[ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run);
110 
111     init = true;
112   }
113 
114   Scalar* a = reinterpret_cast<Scalar*>(pa);
115   Scalar* b = reinterpret_cast<Scalar*>(pb);
116   Scalar  alpha = *reinterpret_cast<Scalar*>(palpha);
117 
118   int info = 0;
119   if(SIDE(*side)==INVALID)                                            info = 1;
120   else if(UPLO(*uplo)==INVALID)                                       info = 2;
121   else if(OP(*opa)==INVALID)                                          info = 3;
122   else if(DIAG(*diag)==INVALID)                                       info = 4;
123   else if(*m<0)                                                       info = 5;
124   else if(*n<0)                                                       info = 6;
125   else if(*lda<std::max(1,(SIDE(*side)==LEFT)?*m:*n))                 info = 9;
126   else if(*ldb<std::max(1,*m))                                        info = 11;
127   if(info)
128     return xerbla_(SCALAR_SUFFIX_UP"TRSM ",&info,6);
129 
130   int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
131 
132   if(SIDE(*side)==LEFT)
133   {
134     internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m);
135     func[code](*m, *n, a, *lda, b, *ldb, blocking);
136   }
137   else
138   {
139     internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n);
140     func[code](*n, *m, a, *lda, b, *ldb, blocking);
141   }
142 
143   if(alpha!=Scalar(1))
144     matrix(b,*m,*n,*ldb) *= alpha;
145 
146   return 0;
147 }
148 
149 
150 // b = alpha*op(a)*b  for side = 'L'or'l'
151 // b = alpha*b*op(a)  for side = 'R'or'r'
EIGEN_BLAS_FUNC(trmm)152 int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha,  RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
153 {
154 //   std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
155   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
156   static functype func[32];
157   static bool init = false;
158   if(!init)
159   {
160     for(int k=0; k<32; ++k)
161       func[k] = 0;
162 
163     func[NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0,          true, ColMajor,false,ColMajor,false,ColMajor>::run);
164     func[TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0,          true, RowMajor,false,ColMajor,false,ColMajor>::run);
165     func[ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0,          true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
166 
167     func[NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0,          false,ColMajor,false,ColMajor,false,ColMajor>::run);
168     func[TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0,          false,ColMajor,false,RowMajor,false,ColMajor>::run);
169     func[ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0,          false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
170 
171     func[NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0,          true, ColMajor,false,ColMajor,false,ColMajor>::run);
172     func[TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0,          true, RowMajor,false,ColMajor,false,ColMajor>::run);
173     func[ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0,          true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
174 
175     func[NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0,          false,ColMajor,false,ColMajor,false,ColMajor>::run);
176     func[TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0,          false,ColMajor,false,RowMajor,false,ColMajor>::run);
177     func[ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0,          false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
178 
179     func[NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run);
180     func[TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run);
181     func[ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
182 
183     func[NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run);
184     func[TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run);
185     func[ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
186 
187     func[NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run);
188     func[TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run);
189     func[ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
190 
191     func[NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run);
192     func[TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run);
193     func[ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
194 
195     init = true;
196   }
197 
198   Scalar* a = reinterpret_cast<Scalar*>(pa);
199   Scalar* b = reinterpret_cast<Scalar*>(pb);
200   Scalar  alpha = *reinterpret_cast<Scalar*>(palpha);
201 
202   int info = 0;
203   if(SIDE(*side)==INVALID)                                            info = 1;
204   else if(UPLO(*uplo)==INVALID)                                       info = 2;
205   else if(OP(*opa)==INVALID)                                          info = 3;
206   else if(DIAG(*diag)==INVALID)                                       info = 4;
207   else if(*m<0)                                                       info = 5;
208   else if(*n<0)                                                       info = 6;
209   else if(*lda<std::max(1,(SIDE(*side)==LEFT)?*m:*n))                 info = 9;
210   else if(*ldb<std::max(1,*m))                                        info = 11;
211   if(info)
212     return xerbla_(SCALAR_SUFFIX_UP"TRMM ",&info,6);
213 
214   int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
215 
216   if(*m==0 || *n==0)
217     return 1;
218 
219   // FIXME find a way to avoid this copy
220   Matrix<Scalar,Dynamic,Dynamic,ColMajor> tmp = matrix(b,*m,*n,*ldb);
221   matrix(b,*m,*n,*ldb).setZero();
222 
223   if(SIDE(*side)==LEFT)
224   {
225     internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m);
226     func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking);
227   }
228   else
229   {
230     internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n);
231     func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha, blocking);
232   }
233   return 1;
234 }
235 
236 // c = alpha*a*b + beta*c  for side = 'L'or'l'
237 // c = alpha*b*a + beta*c  for side = 'R'or'r
EIGEN_BLAS_FUNC(symm)238 int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
239 {
240 //   std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n";
241   Scalar* a = reinterpret_cast<Scalar*>(pa);
242   Scalar* b = reinterpret_cast<Scalar*>(pb);
243   Scalar* c = reinterpret_cast<Scalar*>(pc);
244   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
245   Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
246 
247   int info = 0;
248   if(SIDE(*side)==INVALID)                                            info = 1;
249   else if(UPLO(*uplo)==INVALID)                                       info = 2;
250   else if(*m<0)                                                       info = 3;
251   else if(*n<0)                                                       info = 4;
252   else if(*lda<std::max(1,(SIDE(*side)==LEFT)?*m:*n))                 info = 7;
253   else if(*ldb<std::max(1,*m))                                        info = 9;
254   else if(*ldc<std::max(1,*m))                                        info = 12;
255   if(info)
256     return xerbla_(SCALAR_SUFFIX_UP"SYMM ",&info,6);
257 
258   if(beta!=Scalar(1))
259   {
260     if(beta==Scalar(0)) matrix(c, *m, *n, *ldc).setZero();
261     else                matrix(c, *m, *n, *ldc) *= beta;
262   }
263 
264   if(*m==0 || *n==0)
265   {
266     return 1;
267   }
268 
269   #if ISCOMPLEX
270   // FIXME add support for symmetric complex matrix
271   int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
272   Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size);
273   if(UPLO(*uplo)==UP)
274   {
275     matA.triangularView<Upper>() = matrix(a,size,size,*lda);
276     matA.triangularView<Lower>() = matrix(a,size,size,*lda).transpose();
277   }
278   else if(UPLO(*uplo)==LO)
279   {
280     matA.triangularView<Lower>() = matrix(a,size,size,*lda);
281     matA.triangularView<Upper>() = matrix(a,size,size,*lda).transpose();
282   }
283   if(SIDE(*side)==LEFT)
284     matrix(c, *m, *n, *ldc) += alpha * matA * matrix(b, *m, *n, *ldb);
285   else if(SIDE(*side)==RIGHT)
286     matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA;
287   #else
288   if(SIDE(*side)==LEFT)
289     if(UPLO(*uplo)==UP)       internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
290     else if(UPLO(*uplo)==LO)  internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
291     else                      return 0;
292   else if(SIDE(*side)==RIGHT)
293     if(UPLO(*uplo)==UP)       internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
294     else if(UPLO(*uplo)==LO)  internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
295     else                      return 0;
296   else
297     return 0;
298   #endif
299 
300   return 0;
301 }
302 
303 // c = alpha*a*a' + beta*c  for op = 'N'or'n'
304 // c = alpha*a'*a + beta*c  for op = 'T'or't','C'or'c'
EIGEN_BLAS_FUNC(syrk)305 int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
306 {
307 //   std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
308   #if !ISCOMPLEX
309   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&);
310   static functype func[8];
311 
312   static bool init = false;
313   if(!init)
314   {
315     for(int k=0; k<8; ++k)
316       func[k] = 0;
317 
318     func[NOTR  | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run);
319     func[TR    | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run);
320     func[ADJ   | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run);
321 
322     func[NOTR  | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run);
323     func[TR    | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run);
324     func[ADJ   | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run);
325 
326     init = true;
327   }
328   #endif
329 
330   Scalar* a = reinterpret_cast<Scalar*>(pa);
331   Scalar* c = reinterpret_cast<Scalar*>(pc);
332   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
333   Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
334 
335   int info = 0;
336   if(UPLO(*uplo)==INVALID)                                            info = 1;
337   else if(OP(*op)==INVALID)                                           info = 2;
338   else if(*n<0)                                                       info = 3;
339   else if(*k<0)                                                       info = 4;
340   else if(*lda<std::max(1,(OP(*op)==NOTR)?*n:*k))                     info = 7;
341   else if(*ldc<std::max(1,*n))                                        info = 10;
342   if(info)
343     return xerbla_(SCALAR_SUFFIX_UP"SYRK ",&info,6);
344 
345   if(beta!=Scalar(1))
346   {
347     if(UPLO(*uplo)==UP)
348       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
349       else                matrix(c, *n, *n, *ldc).triangularView<Upper>() *= beta;
350     else
351       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
352       else                matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
353   }
354 
355   #if ISCOMPLEX
356   // FIXME add support for symmetric complex matrix
357   if(UPLO(*uplo)==UP)
358   {
359     if(OP(*op)==NOTR)
360       matrix(c, *n, *n, *ldc).triangularView<Upper>() += alpha * matrix(a,*n,*k,*lda) * matrix(a,*n,*k,*lda).transpose();
361     else
362       matrix(c, *n, *n, *ldc).triangularView<Upper>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda);
363   }
364   else
365   {
366     if(OP(*op)==NOTR)
367       matrix(c, *n, *n, *ldc).triangularView<Lower>() += alpha * matrix(a,*n,*k,*lda) * matrix(a,*n,*k,*lda).transpose();
368     else
369       matrix(c, *n, *n, *ldc).triangularView<Lower>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda);
370   }
371   #else
372   int code = OP(*op) | (UPLO(*uplo) << 2);
373   func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha);
374   #endif
375 
376   return 0;
377 }
378 
379 // c = alpha*a*b' + alpha*b*a' + beta*c  for op = 'N'or'n'
380 // c = alpha*a'*b + alpha*b'*a + beta*c  for op = 'T'or't'
EIGEN_BLAS_FUNC(syr2k)381 int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
382 {
383   Scalar* a = reinterpret_cast<Scalar*>(pa);
384   Scalar* b = reinterpret_cast<Scalar*>(pb);
385   Scalar* c = reinterpret_cast<Scalar*>(pc);
386   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
387   Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
388 
389   int info = 0;
390   if(UPLO(*uplo)==INVALID)                                            info = 1;
391   else if(OP(*op)==INVALID)                                           info = 2;
392   else if(*n<0)                                                       info = 3;
393   else if(*k<0)                                                       info = 4;
394   else if(*lda<std::max(1,(OP(*op)==NOTR)?*n:*k))                     info = 7;
395   else if(*ldb<std::max(1,(OP(*op)==NOTR)?*n:*k))                     info = 9;
396   else if(*ldc<std::max(1,*n))                                        info = 12;
397   if(info)
398     return xerbla_(SCALAR_SUFFIX_UP"SYR2K",&info,6);
399 
400   if(beta!=Scalar(1))
401   {
402     if(UPLO(*uplo)==UP)
403       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
404       else                matrix(c, *n, *n, *ldc).triangularView<Upper>() *= beta;
405     else
406       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
407       else                matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
408   }
409 
410   if(*k==0)
411     return 1;
412 
413   if(OP(*op)==NOTR)
414   {
415     if(UPLO(*uplo)==UP)
416     {
417       matrix(c, *n, *n, *ldc).triangularView<Upper>()
418         += alpha *matrix(a, *n, *k, *lda)*matrix(b, *n, *k, *ldb).transpose()
419         +  alpha*matrix(b, *n, *k, *ldb)*matrix(a, *n, *k, *lda).transpose();
420     }
421     else if(UPLO(*uplo)==LO)
422       matrix(c, *n, *n, *ldc).triangularView<Lower>()
423         += alpha*matrix(a, *n, *k, *lda)*matrix(b, *n, *k, *ldb).transpose()
424         +  alpha*matrix(b, *n, *k, *ldb)*matrix(a, *n, *k, *lda).transpose();
425   }
426   else if(OP(*op)==TR || OP(*op)==ADJ)
427   {
428     if(UPLO(*uplo)==UP)
429       matrix(c, *n, *n, *ldc).triangularView<Upper>()
430         += alpha*matrix(a, *k, *n, *lda).transpose()*matrix(b, *k, *n, *ldb)
431         +  alpha*matrix(b, *k, *n, *ldb).transpose()*matrix(a, *k, *n, *lda);
432     else if(UPLO(*uplo)==LO)
433       matrix(c, *n, *n, *ldc).triangularView<Lower>()
434         += alpha*matrix(a, *k, *n, *lda).transpose()*matrix(b, *k, *n, *ldb)
435         +  alpha*matrix(b, *k, *n, *ldb).transpose()*matrix(a, *k, *n, *lda);
436   }
437 
438   return 0;
439 }
440 
441 
442 #if ISCOMPLEX
443 
444 // c = alpha*a*b + beta*c  for side = 'L'or'l'
445 // c = alpha*b*a + beta*c  for side = 'R'or'r
EIGEN_BLAS_FUNC(hemm)446 int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
447 {
448   Scalar* a = reinterpret_cast<Scalar*>(pa);
449   Scalar* b = reinterpret_cast<Scalar*>(pb);
450   Scalar* c = reinterpret_cast<Scalar*>(pc);
451   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
452   Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
453 
454 //   std::cerr << "in hemm " << *side << " " << *uplo << " " << *m << " " << *n << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n";
455 
456   int info = 0;
457   if(SIDE(*side)==INVALID)                                            info = 1;
458   else if(UPLO(*uplo)==INVALID)                                       info = 2;
459   else if(*m<0)                                                       info = 3;
460   else if(*n<0)                                                       info = 4;
461   else if(*lda<std::max(1,(SIDE(*side)==LEFT)?*m:*n))                 info = 7;
462   else if(*ldb<std::max(1,*m))                                        info = 9;
463   else if(*ldc<std::max(1,*m))                                        info = 12;
464   if(info)
465     return xerbla_(SCALAR_SUFFIX_UP"HEMM ",&info,6);
466 
467   if(beta==Scalar(0))       matrix(c, *m, *n, *ldc).setZero();
468   else if(beta!=Scalar(1))  matrix(c, *m, *n, *ldc) *= beta;
469 
470   if(*m==0 || *n==0)
471   {
472     return 1;
473   }
474 
475   if(SIDE(*side)==LEFT)
476   {
477     if(UPLO(*uplo)==UP)       internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj,  ColMajor,false,false, ColMajor>
478                                 ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
479     else if(UPLO(*uplo)==LO)  internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor>
480                                 ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
481     else                      return 0;
482   }
483   else if(SIDE(*side)==RIGHT)
484   {
485     if(UPLO(*uplo)==UP)       matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj,  ColMajor>
486                                 ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);*/
487     else if(UPLO(*uplo)==LO)  internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor>
488                                 ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
489     else                      return 0;
490   }
491   else
492   {
493     return 0;
494   }
495 
496   return 0;
497 }
498 
499 // c = alpha*a*conj(a') + beta*c  for op = 'N'or'n'
500 // c = alpha*conj(a')*a + beta*c  for op  = 'C'or'c'
EIGEN_BLAS_FUNC(herk)501 int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
502 {
503   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&);
504   static functype func[8];
505 
506   static bool init = false;
507   if(!init)
508   {
509     for(int k=0; k<8; ++k)
510       func[k] = 0;
511 
512     func[NOTR  | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run);
513     func[ADJ   | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run);
514 
515     func[NOTR  | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run);
516     func[ADJ   | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run);
517 
518     init = true;
519   }
520 
521   Scalar* a = reinterpret_cast<Scalar*>(pa);
522   Scalar* c = reinterpret_cast<Scalar*>(pc);
523   RealScalar alpha = *palpha;
524   RealScalar beta  = *pbeta;
525 
526 //   std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n";
527 
528   int info = 0;
529   if(UPLO(*uplo)==INVALID)                                            info = 1;
530   else if((OP(*op)==INVALID) || (OP(*op)==TR))                        info = 2;
531   else if(*n<0)                                                       info = 3;
532   else if(*k<0)                                                       info = 4;
533   else if(*lda<std::max(1,(OP(*op)==NOTR)?*n:*k))                     info = 7;
534   else if(*ldc<std::max(1,*n))                                        info = 10;
535   if(info)
536     return xerbla_(SCALAR_SUFFIX_UP"HERK ",&info,6);
537 
538   int code = OP(*op) | (UPLO(*uplo) << 2);
539 
540   if(beta!=RealScalar(1))
541   {
542     if(UPLO(*uplo)==UP)
543       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
544       else                matrix(c, *n, *n, *ldc).triangularView<StrictlyUpper>() *= beta;
545     else
546       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
547       else                matrix(c, *n, *n, *ldc).triangularView<StrictlyLower>() *= beta;
548 
549     if(beta!=Scalar(0))
550     {
551       matrix(c, *n, *n, *ldc).diagonal().real() *= beta;
552       matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
553     }
554   }
555 
556   if(*k>0 && alpha!=RealScalar(0))
557   {
558     func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha);
559     matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
560   }
561   return 0;
562 }
563 
564 // c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c,  for op = 'N'or'n'
565 // c = alpha*conj(a')*b + conj(alpha)*conj(b')*a + beta*c,  for op = 'C'or'c'
EIGEN_BLAS_FUNC(her2k)566 int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
567 {
568   Scalar* a = reinterpret_cast<Scalar*>(pa);
569   Scalar* b = reinterpret_cast<Scalar*>(pb);
570   Scalar* c = reinterpret_cast<Scalar*>(pc);
571   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
572   RealScalar beta  = *pbeta;
573 
574   int info = 0;
575   if(UPLO(*uplo)==INVALID)                                            info = 1;
576   else if((OP(*op)==INVALID) || (OP(*op)==TR))                        info = 2;
577   else if(*n<0)                                                       info = 3;
578   else if(*k<0)                                                       info = 4;
579   else if(*lda<std::max(1,(OP(*op)==NOTR)?*n:*k))                     info = 7;
580   else if(*lda<std::max(1,(OP(*op)==NOTR)?*n:*k))                     info = 9;
581   else if(*ldc<std::max(1,*n))                                        info = 12;
582   if(info)
583     return xerbla_(SCALAR_SUFFIX_UP"HER2K",&info,6);
584 
585   if(beta!=RealScalar(1))
586   {
587     if(UPLO(*uplo)==UP)
588       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
589       else                matrix(c, *n, *n, *ldc).triangularView<StrictlyUpper>() *= beta;
590     else
591       if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
592       else                matrix(c, *n, *n, *ldc).triangularView<StrictlyLower>() *= beta;
593 
594     if(beta!=Scalar(0))
595     {
596       matrix(c, *n, *n, *ldc).diagonal().real() *= beta;
597       matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
598     }
599   }
600   else if(*k>0 && alpha!=Scalar(0))
601     matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
602 
603   if(*k==0)
604     return 1;
605 
606   if(OP(*op)==NOTR)
607   {
608     if(UPLO(*uplo)==UP)
609     {
610       matrix(c, *n, *n, *ldc).triangularView<Upper>()
611         +=            alpha *matrix(a, *n, *k, *lda)*matrix(b, *n, *k, *ldb).adjoint()
612         +  numext::conj(alpha)*matrix(b, *n, *k, *ldb)*matrix(a, *n, *k, *lda).adjoint();
613     }
614     else if(UPLO(*uplo)==LO)
615       matrix(c, *n, *n, *ldc).triangularView<Lower>()
616         += alpha*matrix(a, *n, *k, *lda)*matrix(b, *n, *k, *ldb).adjoint()
617         +  numext::conj(alpha)*matrix(b, *n, *k, *ldb)*matrix(a, *n, *k, *lda).adjoint();
618   }
619   else if(OP(*op)==ADJ)
620   {
621     if(UPLO(*uplo)==UP)
622       matrix(c, *n, *n, *ldc).triangularView<Upper>()
623         +=             alpha*matrix(a, *k, *n, *lda).adjoint()*matrix(b, *k, *n, *ldb)
624         +  numext::conj(alpha)*matrix(b, *k, *n, *ldb).adjoint()*matrix(a, *k, *n, *lda);
625     else if(UPLO(*uplo)==LO)
626       matrix(c, *n, *n, *ldc).triangularView<Lower>()
627         +=             alpha*matrix(a, *k, *n, *lda).adjoint()*matrix(b, *k, *n, *ldb)
628         +  numext::conj(alpha)*matrix(b, *k, *n, *ldb).adjoint()*matrix(a, *k, *n, *lda);
629   }
630 
631   return 1;
632 }
633 
634 #endif // ISCOMPLEX
635