1 /* 2 Copyright (c) 2011, Intel Corporation. All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without modification, 5 are permitted provided that the following conditions are met: 6 7 * Redistributions of source code must retain the above copyright notice, this 8 list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright notice, 10 this list of conditions and the following disclaimer in the documentation 11 and/or other materials provided with the distribution. 12 * Neither the name of Intel Corporation nor the names of its contributors may 13 be used to endorse or promote products derived from this software without 14 specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 ******************************************************************************** 28 * Content : Eigen bindings to Intel(R) MKL 29 * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() 30 ******************************************************************************** 31 */ 32 33 #ifndef EIGEN_ASSIGN_VML_H 34 #define EIGEN_ASSIGN_VML_H 35 36 namespace Eigen { 37 38 namespace internal { 39 40 template<typename Op> struct vml_call 41 { enum { IsSupported = 0 }; }; 42 43 template<typename Dst, typename Src, typename UnaryOp> 44 class vml_assign_traits 45 { 46 private: 47 enum { 48 DstHasDirectAccess = Dst::Flags & DirectAccessBit, 49 SrcHasDirectAccess = Src::Flags & DirectAccessBit, 50 51 StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), 52 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) 53 : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) 54 : int(Dst::RowsAtCompileTime), 55 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) 56 : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) 57 : int(Dst::MaxRowsAtCompileTime), 58 MaxSizeAtCompileTime = Dst::SizeAtCompileTime, 59 60 MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess 61 && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, 62 MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), 63 VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, 64 LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, 65 MayEnableVml = MightEnableVml && LargeEnough, 66 MayLinearize = MayEnableVml && MightLinearize 67 }; 68 public: 69 enum { 70 Traversal = MayLinearize ? LinearVectorizedTraversal 71 : MayEnableVml ? InnerVectorizedTraversal 72 : DefaultTraversal 73 }; 74 }; 75 76 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling, 77 int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal > 78 struct vml_assign_impl 79 : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn> 80 { 81 }; 82 83 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> 84 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal> 85 { 86 typedef typename Derived1::Scalar Scalar; 87 typedef typename Derived1::Index Index; 88 static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) 89 { 90 // in case we want to (or have to) skip VML at runtime we can call: 91 // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); 92 const Index innerSize = dst.innerSize(); 93 const Index outerSize = dst.outerSize(); 94 for(Index outer = 0; outer < outerSize; ++outer) { 95 const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : 96 &(src.nestedExpression().coeffRef(0, outer)); 97 Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); 98 vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr ); 99 } 100 } 101 }; 102 103 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> 104 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal> 105 { 106 static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) 107 { 108 // in case we want to (or have to) skip VML at runtime we can call: 109 // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); 110 vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); 111 } 112 }; 113 114 // Macroses 115 116 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ 117 template<typename Derived1, typename Derived2, typename UnaryOp> \ 118 struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \ 119 static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \ 120 vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \ 121 } \ 122 }; 123 124 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) 125 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) 126 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) 127 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) 128 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) 129 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) 130 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) 131 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) 132 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) 133 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) 134 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) 135 136 137 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) 138 #define EIGEN_MKL_VML_MODE VML_HA 139 #else 140 #define EIGEN_MKL_VML_MODE VML_LA 141 #endif 142 143 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 144 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 145 enum { IsSupported = 1 }; \ 146 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \ 147 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 148 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ 149 } \ 150 }; 151 152 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 153 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 154 enum { IsSupported = 1 }; \ 155 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \ 156 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 157 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ 158 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ 159 } \ 160 }; 161 162 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 163 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 164 enum { IsSupported = 1 }; \ 165 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \ 166 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 167 EIGENTYPE exponent = func.m_exponent; \ 168 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ 169 VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ 170 (VMLTYPE*)dst, &vmlMode); \ 171 } \ 172 }; 173 174 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ 175 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ 176 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) 177 178 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ 179 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ 180 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) 181 182 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ 183 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ 184 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) 185 186 187 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ 188 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ 189 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) 190 191 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ 192 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ 193 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) 194 195 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ 196 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ 197 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) 198 199 200 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) 201 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) 202 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) 203 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) 204 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) 205 //EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) 206 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) 207 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) 208 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) 209 210 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) 211 212 // The vm*powx functions are not avaibale in the windows version of MKL. 213 #ifndef _WIN32 214 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) 215 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) 216 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) 217 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) 218 #endif 219 220 } // end namespace internal 221 222 } // end namespace Eigen 223 224 #endif // EIGEN_ASSIGN_VML_H 225