1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // kernel_default.h: Chooses default GEMM and GEMV kernels for the
16 // host platform.
17 
18 #ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
19 #define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
20 
21 #include "../public/bit_depth.h"
22 #include "common.h"
23 #include "kernel_reference.h"
24 
25 namespace gemmlowp {
26 
27 template <bool MaxProductIsLessThan4096, bool LhsAlwaysNonzero>
28 struct DefaultKernelImpl {};
29 
30 // Partial specialization implementing the logic that if we want to use
31 // a kernel for LhsAlwaysNonzero but do not have such a kernel, then we fall
32 // back to a generic kernel not taking advantage of LhsAlwaysNonzero.
33 template <bool LhsAlwaysNonzero>
34 struct DefaultKernelImpl<true, LhsAlwaysNonzero>
35     : DefaultKernelImpl<false, LhsAlwaysNonzero> {};
36 
37 // Partial specialization implementing the logic that if we want to use
38 // a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we
39 // fall back to a generic kernel not taking advantage of
40 // MaxProductIsLessThan4096.
41 template <bool MaxProductIsLessThan4096>
42 struct DefaultKernelImpl<MaxProductIsLessThan4096, true>
43     : DefaultKernelImpl<MaxProductIsLessThan4096, false> {};
44 
45 template <typename BitDepthParams>
46 struct DefaultKernel
47     : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue *
48                              BitDepthParams::RhsRange::kMaxValue <
49                          4096),
50                         (BitDepthParams::LhsRange::kMinValue > 0)> {};
51 
52 }  // end namespace gemmlowp
53 
54 #define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096,          \
55                                     LhsAlwaysNonzero, Kernel)          \
56   namespace gemmlowp {                                                 \
57   template <>                                                          \
58   struct DefaultKernelImpl<MaxProductIsLessThan4096, LhsAlwaysNonzero> \
59       : Kernel {};                                                     \
60   }
61 
62 #if defined GEMMLOWP_NEON_32
63 #include "kernel_neon.h"
64 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_32_Kernel12x4Depth2)
65 GEMMLOWP_SET_DEFAULT_KERNEL(true, false,
66                             NEON_32_Kernel12x4Depth2Assuming12BitProducts)
67 GEMMLOWP_SET_DEFAULT_KERNEL(false, true,
68                             NEON_32bit_GEMM_Int8Operands_LhsNonzero)
69 #elif defined GEMMLOWP_NEON_64
70 #include "kernel_neon.h"
71 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_64_Kernel12x8Depth2)
72 GEMMLOWP_SET_DEFAULT_KERNEL(false, true,
73                             NEON_64bit_GEMM_Int8Operands_LhsNonzero)
74 #elif defined(GEMMLOWP_MSA)
75 #include "kernel_msa.h"
76 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, MSA_Kernel12x8Depth2)
77 #elif defined GEMMLOWP_SSE4_32
78 #include "kernel_sse.h"
79 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_32_Kernel4x4Depth2)
80 #elif defined GEMMLOWP_SSE4_64
81 #include "kernel_sse.h"
82 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_64_Kernel12x4Depth2)
83 #else
84 #include "kernel_reference.h"
85 namespace gemmlowp {
86 typedef ReferenceKernel<KernelFormat<
87     KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>,
88     KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > >
89     DefaultReferenceKernel;
90 }
91 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, DefaultReferenceKernel)
92 #endif
93 
94 #endif  // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
95