/external/gemmlowp/meta/ |
D | multi_thread_gemm.h | 29 std::uint8_t* PrepareGemmTask(const Params& params, int kernel_m, int kernel_n, in PrepareGemmTask() argument 53 task, kernel_m, kernel_n, kernel_k); in PrepareGemmTask() 58 int kernel_m, int kernel_n, int kernel_k, in PrepareGemmTasks() argument 81 params, kernel_m, kernel_n, kernel_k, scratch, i * m_chunk, m_chunk, in PrepareGemmTasks() 85 PrepareGemmTask<Executor, Params>(params, kernel_m, kernel_n, kernel_k, in PrepareGemmTasks() 92 params, kernel_m, kernel_n, kernel_k, scratch, 0, params.m, in PrepareGemmTasks() 96 PrepareGemmTask<Executor, Params>(params, kernel_m, kernel_n, kernel_k, in PrepareGemmTasks() 104 template <typename Executor, typename Params, int kernel_m, int kernel_n, 110 Gemm<Executor, Params, kernel_m, kernel_n, kernel_k>(params); in Run() 119 int kernel_m, int kernel_n, int kernel_k> [all …]
|
D | single_thread_gemm.h | 24 template <typename Executor, typename Params, int kernel_m, int kernel_n, 31 static int EstimateScratchSize(const P& params, int kernel_m, int kernel_n, in EstimateScratchSize() argument 36 const int rhs_chunks = ((params.n + kernel_n - 1) / kernel_n); in EstimateScratchSize() 40 params.right_stream, kernel_n, kernel_k); in EstimateScratchSize() 220 static int EstimateScratchSize(const P& params, int kernel_m, int kernel_n, in EstimateScratchSize() argument 229 params.right_stream, kernel_n, kernel_k); in EstimateScratchSize() 441 static int EstimateScratchSize(const P& params, int kernel_m, int kernel_n, in EstimateScratchSize() argument 490 static int EstimateScratchSize(const P& params, int kernel_m, int kernel_n, in EstimateScratchSize() argument 676 template <typename Executor, typename Params, int kernel_m, int kernel_n, 679 internal::Dispatch3DStage1<Executor, Params, kernel_m, kernel_n, kernel_k, [all …]
|
D | base.h | 101 typename OutputStream, int kernel_m, int kernel_n, int pack_size>
|
/external/gemmlowp/meta/generators/ |
D | quantized_mul_kernels_common.py | 53 def Prepare(self, emitter, registers, kernel_m, kernel_n, lhs, rhs): argument 59 self.rhs_offsets = _ReadParams(emitter, registers, rhs, kernel_n, 4) 130 def Prepare(self, emitter, registers, kernel_m, kernel_n, lhs, rhs): argument 135 self.rhs_offsets = _ReadParams(emitter, registers, rhs, kernel_n, 4) 162 def Prepare(self, emitter, registers, kernel_m, kernel_n, lhs, rhs): argument 167 self.rhs_offsets = _ReadParams(emitter, registers, rhs, kernel_n, 4) 222 kernel_n): argument 227 emitter.EmitVStoreAE(data_type, kernel_n, datum, output, None) 410 def _GenerateNxMLoadMultiplyAggregate(emitter, registers, kernel_m, kernel_n, argument 422 rhs_load = [registers.DoubleRegister() for unused_i in range(kernel_n)] [all …]
|
D | common.py | 95 def SpecializeMulKernel(self, in_type, out_type, kernel_m, kernel_n, argument 100 kernel_n, pack_size 112 self.EmitMultiply(in_type, out_type, kernel_m, kernel_n, pack_size)
|