/external/XNNPACK/src/f32-ppmm/gen/ |
D | 2x4-minmax-scalar.c | 44 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 63 vacc1x0 += va1 * vb0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 76 vacc1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 86 vacc1x0 = math_max_f32(vacc1x0, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 95 c1[0] = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 112 c1[0] = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 117 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 124 *c1 = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 3x3-minmax-scalar.c | 47 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local 68 vacc1x0 += va1 * vb0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 82 vacc1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 93 vacc1x0 = math_max_f32(vacc1x0, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 106 c1[0] = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 124 c1[0] = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 130 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 139 *c1 = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
|
D | 4x2-minmax-scalar.c | 50 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() local 71 vacc1x0 += va1 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 84 vacc1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 94 vacc1x0 = math_max_f32(vacc1x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 107 c1[0] = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 124 *c1 = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar()
|
D | 4x4-minmax-scalar.c | 52 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 81 vacc1x0 += va1 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 102 vacc1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 120 vacc1x0 = math_max_f32(vacc1x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 145 c1[0] = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 168 c1[0] = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 175 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 186 *c1 = vacc1x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c2-minmax-scalar-2x.c | 54 float vacc1x0 = i1[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() local 63 vacc1x0 = vacc1x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 68 vacc1x0 = math_max_f32(vacc1x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 73 vacc1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 79 o1[0] = vacc1x0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x()
|
D | c2-minmax-wasm-2x.c | 54 float vacc1x0 = i1[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() local 63 vacc1x0 = vacc1x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 68 vacc1x0 = __builtin_wasm_max_f32(vacc1x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 73 vacc1x0 = __builtin_wasm_min_f32(vacc1x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 79 o1[0] = vacc1x0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x()
|
D | c4-minmax-wasm-2x.c | 58 float vacc1x0 = i1[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 73 vacc1x0 = vacc1x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 82 vacc1x0 = __builtin_wasm_max_f32(vacc1x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 91 vacc1x0 = __builtin_wasm_min_f32(vacc1x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 101 o1[0] = vacc1x0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
D | c4-minmax-scalar-2x.c | 58 float vacc1x0 = i1[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 73 vacc1x0 = vacc1x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 82 vacc1x0 = math_max_f32(vacc1x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 91 vacc1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 101 o1[0] = vacc1x0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x4c8-minmax-wasmsimd-ld128.c | 55 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 82 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 111 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 55 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 74 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 107 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 55 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 74 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 107 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 61 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 96 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 135 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 61 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 86 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 131 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 61 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 86 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 131 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 2x4c8-xw-minmax-sse2.c | 54 __m128i vacc1x0 = vacc0x0; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() local 72 vacc1x0 = _mm_add_epi32(vacc1x0, _mm_madd_epi16(vxa1, vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() 92 …t __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
|
D | 2x4c8-minmax-sse2-ld64.c | 54 __m128i vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() local 73 vacc1x0 = _mm_add_epi32(vacc1x0, _mm_madd_epi16(vxa1, vxb0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() 96 …t __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
|
D | 2x4c8-minmax-sse2-ld128.c | 54 __m128i vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() local 76 vacc1x0 = _mm_add_epi32(vacc1x0, _mm_madd_epi16(vxa1, vxb0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() 94 …t __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x4c8-minmax-wasmsimd-ld64.c | 56 v128_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 88 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 122 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 56 v128_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 88 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 95 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 126 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 60 v128_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 101 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 102 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 148 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 60 v128_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 102 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 111 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 152 …_t vacc1x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x1-minmax-scalar.c | 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 63 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 73 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 183 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 196 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 202 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 266 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 275 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 279 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar()
|
D | 4x1-minmax-scalar.c | 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 55 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 61 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 131 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 140 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 144 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_4x1__scalar()
|
D | 8x2-minmax-scalar.c | 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 72 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 90 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 225 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 243 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 253 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 330 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 342 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 348 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | wasm-2x4.c | 70 float vacc1x0 = __builtin_wasm_max_f32(vi1x0, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() local 83 vacc1x0 += vi1x0 * vw0; in xnn_f32_prelu_ukernel__wasm_2x4() 93 o1[0] = vacc1x0; in xnn_f32_prelu_ukernel__wasm_2x4()
|