1 /*
2  Copyright (c) 2011, Intel Corporation. All rights reserved.
3 
4  Redistribution and use in source and binary forms, with or without modification,
5  are permitted provided that the following conditions are met:
6 
7  * Redistributions of source code must retain the above copyright notice, this
8    list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright notice,
10    this list of conditions and the following disclaimer in the documentation
11    and/or other materials provided with the distribution.
12  * Neither the name of Intel Corporation nor the names of its contributors may
13    be used to endorse or promote products derived from this software without
14    specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 //
27  ********************************************************************************
28  *   Content : Eigen bindings to BLAS F77
29  *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
30  ********************************************************************************
31 */
32 
33 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
34 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
35 
36 namespace Eigen {
37 
38 namespace internal {
39 
40 
41 /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
42 
43 #define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
44 template <typename Index, \
45           int LhsStorageOrder, bool ConjugateLhs, \
46           int RhsStorageOrder, bool ConjugateRhs> \
47 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
48 {\
49 \
50   static void run( \
51     Index rows, Index cols, \
52     const EIGTYPE* _lhs, Index lhsStride, \
53     const EIGTYPE* _rhs, Index rhsStride, \
54     EIGTYPE* res,        Index resStride, \
55     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
56   { \
57     char side='L', uplo='L'; \
58     BlasIndex m, n, lda, ldb, ldc; \
59     const EIGTYPE *a, *b; \
60     EIGTYPE beta(1); \
61     MatrixX##EIGPREFIX b_tmp; \
62 \
63 /* Set transpose options */ \
64 /* Set m, n, k */ \
65     m = convert_index<BlasIndex>(rows);  \
66     n = convert_index<BlasIndex>(cols);  \
67 \
68 /* Set lda, ldb, ldc */ \
69     lda = convert_index<BlasIndex>(lhsStride); \
70     ldb = convert_index<BlasIndex>(rhsStride); \
71     ldc = convert_index<BlasIndex>(resStride); \
72 \
73 /* Set a, b, c */ \
74     if (LhsStorageOrder==RowMajor) uplo='U'; \
75     a = _lhs; \
76 \
77     if (RhsStorageOrder==RowMajor) { \
78       Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
79       b_tmp = rhs.adjoint(); \
80       b = b_tmp.data(); \
81       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
82     } else b = _rhs; \
83 \
84     BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
85 \
86   } \
87 };
88 
89 
90 #define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
91 template <typename Index, \
92           int LhsStorageOrder, bool ConjugateLhs, \
93           int RhsStorageOrder, bool ConjugateRhs> \
94 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
95 {\
96   static void run( \
97     Index rows, Index cols, \
98     const EIGTYPE* _lhs, Index lhsStride, \
99     const EIGTYPE* _rhs, Index rhsStride, \
100     EIGTYPE* res,        Index resStride, \
101     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
102   { \
103     char side='L', uplo='L'; \
104     BlasIndex m, n, lda, ldb, ldc; \
105     const EIGTYPE *a, *b; \
106     EIGTYPE beta(1); \
107     MatrixX##EIGPREFIX b_tmp; \
108     Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
109 \
110 /* Set transpose options */ \
111 /* Set m, n, k */ \
112     m = convert_index<BlasIndex>(rows); \
113     n = convert_index<BlasIndex>(cols); \
114 \
115 /* Set lda, ldb, ldc */ \
116     lda = convert_index<BlasIndex>(lhsStride); \
117     ldb = convert_index<BlasIndex>(rhsStride); \
118     ldc = convert_index<BlasIndex>(resStride); \
119 \
120 /* Set a, b, c */ \
121     if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
122       Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
123       a_tmp = lhs.conjugate(); \
124       a = a_tmp.data(); \
125       lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
126     } else a = _lhs; \
127     if (LhsStorageOrder==RowMajor) uplo='U'; \
128 \
129     if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
130        b = _rhs; } \
131     else { \
132       if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
133         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
134         b_tmp = rhs.conjugate(); \
135       } else \
136       if (ConjugateRhs) { \
137         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
138         b_tmp = rhs.adjoint(); \
139       } else { \
140         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
141         b_tmp = rhs.transpose(); \
142       } \
143       b = b_tmp.data(); \
144       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
145     } \
146 \
147     BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
148 \
149   } \
150 };
151 
152 EIGEN_BLAS_SYMM_L(double, double, d, d)
153 EIGEN_BLAS_SYMM_L(float, float, f, s)
154 EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
155 EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
156 
157 
158 /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
159 
160 #define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
161 template <typename Index, \
162           int LhsStorageOrder, bool ConjugateLhs, \
163           int RhsStorageOrder, bool ConjugateRhs> \
164 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
165 {\
166 \
167   static void run( \
168     Index rows, Index cols, \
169     const EIGTYPE* _lhs, Index lhsStride, \
170     const EIGTYPE* _rhs, Index rhsStride, \
171     EIGTYPE* res,        Index resStride, \
172     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
173   { \
174     char side='R', uplo='L'; \
175     BlasIndex m, n, lda, ldb, ldc; \
176     const EIGTYPE *a, *b; \
177     EIGTYPE beta(1); \
178     MatrixX##EIGPREFIX b_tmp; \
179 \
180 /* Set m, n, k */ \
181     m = convert_index<BlasIndex>(rows);  \
182     n = convert_index<BlasIndex>(cols);  \
183 \
184 /* Set lda, ldb, ldc */ \
185     lda = convert_index<BlasIndex>(rhsStride); \
186     ldb = convert_index<BlasIndex>(lhsStride); \
187     ldc = convert_index<BlasIndex>(resStride); \
188 \
189 /* Set a, b, c */ \
190     if (RhsStorageOrder==RowMajor) uplo='U'; \
191     a = _rhs; \
192 \
193     if (LhsStorageOrder==RowMajor) { \
194       Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
195       b_tmp = lhs.adjoint(); \
196       b = b_tmp.data(); \
197       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
198     } else b = _lhs; \
199 \
200     BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
201 \
202   } \
203 };
204 
205 
206 #define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
207 template <typename Index, \
208           int LhsStorageOrder, bool ConjugateLhs, \
209           int RhsStorageOrder, bool ConjugateRhs> \
210 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
211 {\
212   static void run( \
213     Index rows, Index cols, \
214     const EIGTYPE* _lhs, Index lhsStride, \
215     const EIGTYPE* _rhs, Index rhsStride, \
216     EIGTYPE* res,        Index resStride, \
217     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
218   { \
219     char side='R', uplo='L'; \
220     BlasIndex m, n, lda, ldb, ldc; \
221     const EIGTYPE *a, *b; \
222     EIGTYPE beta(1); \
223     MatrixX##EIGPREFIX b_tmp; \
224     Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
225 \
226 /* Set m, n, k */ \
227     m = convert_index<BlasIndex>(rows); \
228     n = convert_index<BlasIndex>(cols); \
229 \
230 /* Set lda, ldb, ldc */ \
231     lda = convert_index<BlasIndex>(rhsStride); \
232     ldb = convert_index<BlasIndex>(lhsStride); \
233     ldc = convert_index<BlasIndex>(resStride); \
234 \
235 /* Set a, b, c */ \
236     if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
237       Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
238       a_tmp = rhs.conjugate(); \
239       a = a_tmp.data(); \
240       lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
241     } else a = _rhs; \
242     if (RhsStorageOrder==RowMajor) uplo='U'; \
243 \
244     if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
245        b = _lhs; } \
246     else { \
247       if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
248         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
249         b_tmp = lhs.conjugate(); \
250       } else \
251       if (ConjugateLhs) { \
252         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
253         b_tmp = lhs.adjoint(); \
254       } else { \
255         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
256         b_tmp = lhs.transpose(); \
257       } \
258       b = b_tmp.data(); \
259       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
260     } \
261 \
262     BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
263   } \
264 };
265 
266 EIGEN_BLAS_SYMM_R(double, double, d, d)
267 EIGEN_BLAS_SYMM_R(float, float, f, s)
268 EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
269 EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
270 
271 } // end namespace internal
272 
273 } // end namespace Eigen
274 
275 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
276