1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6#include <assert.h> 7 8#include <xnnpack/igemm.h> 9#include <xnnpack/math.h> 10 11 12$assert ACTIVATION in ["LINEAR", "RELU", "MINMAX"] 13$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" 14$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" 15$SUFFIX = {"LINEAR": "", "RELU": "_relu", "MINMAX": "_minmax"}[ACTIVATION] 16$PARAMS = {"LINEAR": "xnn_f32_default_params", "RELU": "xnn_f32_relu_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION] 17void xnn_f32_igemm${SUFFIX}_ukernel_${MR}x${NR}__${"wasm" if WASM else "scalar"}( 18 size_t mr, 19 size_t nc, 20 size_t kc, 21 size_t ks, 22 const float**restrict a, 23 const float*restrict w, 24 float*restrict c, 25 size_t cm_stride, 26 size_t cn_stride, 27 size_t a_offset, 28 const float* zero, 29 const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)]) 30{ 31 assert(mr != 0); 32 assert(mr <= ${MR}); 33 assert(nc != 0); 34 assert(kc != 0); 35 assert(kc % sizeof(float) == 0); 36 assert(ks != 0); 37 assert(ks % (${MR} * sizeof(void*)) == 0); 38 assert(a_offset % sizeof(float) == 0); 39 assert(a != NULL); 40 assert(w != NULL); 41 assert(c != NULL); 42 43 float* c0 = c; 44 $for M in range(1, MR): 45 float* c${M} = (float*) ((uintptr_t) c${M-1} + cm_stride); 46 $if M % 2 == 0: 47 if XNN_UNPREDICTABLE(mr <= ${M}) { 48 c${M} = c${M-1}; 49 } 50 $elif M + 1 == MR: 51 if XNN_UNPREDICTABLE(mr != ${M+1}) { 52 c${M} = c${M-1}; 53 } 54 $else: 55 if XNN_UNPREDICTABLE(mr < ${M+1}) { 56 c${M} = c${M-1}; 57 } 58 59 $if ACTIVATION == "MINMAX": 60 const float vmin = params->scalar.min; 61 const float vmax = params->scalar.max; 62 do { 63 $for N in range(NR): 64 float vacc0${N} = w[${N}]; 65 $for M in range(1, MR): 66 $for N in range(NR): 67 float vacc${M}${N} = vacc0${N}; 68 w += ${NR}; 69 70 size_t p = ks; 71 do { 72 $for M in range(MR): 73 const float* restrict a${M} = a[${M}]; 74 assert(a${M} != NULL); 75 if XNN_UNPREDICTABLE(a${M} != zero) { 76 a${M} = (const float*) ((uintptr_t) a${M} + a_offset); 77 } 78 a += ${MR}; 79 80 size_t k = kc; 81 do { 82 $for M in range(MR): 83 const float va${M} = *a${M}++; 84 85 $for N in range(NR): 86 const float vb${N} = w[${N}]; 87 w += ${NR}; 88 89 $for M in range(MR): 90 $for N in range(NR): 91 vacc${M}${N} += va${M} * vb${N}; 92 93 k -= sizeof(float); 94 } while (k != 0); 95 p -= ${MR} * sizeof(void*); 96 } while (p != 0); 97 98 $if ACTIVATION == "MINMAX": 99 $for M in range(MR): 100 $for N in range(NR): 101 vacc${M}${N} = ${MAX_F32}(vacc${M}${N}, vmin); 102 103 $for M in range(MR): 104 $for N in range(NR): 105 vacc${M}${N} = ${MIN_F32}(vacc${M}${N}, vmax); 106 $elif ACTIVATION == "RELU": 107 $for M in range(MR): 108 $for N in range(NR): 109 vacc${M}${N} = ${MAX_F32}(vacc${M}${N}, 0.0f); 110 111 if XNN_LIKELY(nc >= ${NR}) { 112 $for M in reversed(range(MR)): 113 $for N in range(NR): 114 c${M}[${N}] = vacc${M}${N}; 115 c${M} = (float*) ((uintptr_t) c${M} + cn_stride); 116 117 a = (const float**restrict) ((uintptr_t) a - ks); 118 nc -= ${NR}; 119 } else { 120 $for LOG2N in reversed(range(NR.bit_length() - 1)): 121 if (nc & ${1 << LOG2N}) { 122 $for M in reversed(range(MR)): 123 $for N in range(1 << LOG2N): 124 c${M}[${N}] = vacc${M}${N}; 125 $if LOG2N != 0: 126 $for N in range(1 << (LOG2N - 1)): 127 vacc${M}${N} = vacc${M}${N + (1 << LOG2N)}; 128 c${M} += ${1 << LOG2N}; 129 } 130 131 nc = 0; 132 } 133 } while (nc != 0); 134} 135