1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // kernel_default.h: Chooses default GEMM and GEMV kernels for the 16 // host platform. 17 18 #ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 19 #define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 20 21 #include "../public/bit_depth.h" 22 #include "common.h" 23 #include "kernel_reference.h" 24 25 namespace gemmlowp { 26 27 template <bool MaxProductIsLessThan4096, bool LhsAlwaysNonzero> 28 struct DefaultKernelImpl {}; 29 30 // Partial specialization implementing the logic that if we want to use 31 // a kernel for LhsAlwaysNonzero but do not have such a kernel, then we fall 32 // back to a generic kernel not taking advantage of LhsAlwaysNonzero. 33 template <bool LhsAlwaysNonzero> 34 struct DefaultKernelImpl<true, LhsAlwaysNonzero> 35 : DefaultKernelImpl<false, LhsAlwaysNonzero> {}; 36 37 // Partial specialization implementing the logic that if we want to use 38 // a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we 39 // fall back to a generic kernel not taking advantage of 40 // MaxProductIsLessThan4096. 41 template <bool MaxProductIsLessThan4096> 42 struct DefaultKernelImpl<MaxProductIsLessThan4096, true> 43 : DefaultKernelImpl<MaxProductIsLessThan4096, false> {}; 44 45 template <typename BitDepthParams> 46 struct DefaultKernel 47 : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue * 48 BitDepthParams::RhsRange::kMaxValue < 49 4096), 50 (BitDepthParams::LhsRange::kMinValue > 0)> {}; 51 52 } // end namespace gemmlowp 53 54 #define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, \ 55 LhsAlwaysNonzero, Kernel) \ 56 namespace gemmlowp { \ 57 template <> \ 58 struct DefaultKernelImpl<MaxProductIsLessThan4096, LhsAlwaysNonzero> \ 59 : Kernel {}; \ 60 } 61 62 #if defined GEMMLOWP_NEON_32 63 #include "kernel_neon.h" 64 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_32_Kernel12x4Depth2) 65 GEMMLOWP_SET_DEFAULT_KERNEL(true, false, 66 NEON_32_Kernel12x4Depth2Assuming12BitProducts) 67 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, 68 NEON_32bit_GEMM_Int8Operands_LhsNonzero) 69 #elif defined GEMMLOWP_NEON_64 70 #include "kernel_neon.h" 71 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_64_Kernel12x8Depth2) 72 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, 73 NEON_64bit_GEMM_Int8Operands_LhsNonzero) 74 #elif defined(GEMMLOWP_MSA) 75 #include "kernel_msa.h" 76 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, MSA_Kernel12x8Depth2) 77 #elif defined GEMMLOWP_SSE4_32 78 #include "kernel_sse.h" 79 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_32_Kernel4x4Depth2) 80 #elif defined GEMMLOWP_SSE4_64 81 #include "kernel_sse.h" 82 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_64_Kernel12x4Depth2) 83 #else 84 #include "kernel_reference.h" 85 namespace gemmlowp { 86 typedef ReferenceKernel<KernelFormat< 87 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>, 88 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > > 89 DefaultReferenceKernel; 90 } 91 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, DefaultReferenceKernel) 92 #endif 93 94 #endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 95