Home
last modified time | relevance | path

Searched refs:vprod0x3 (Results 1 – 25 of 63) sorted by relevance

123

/external/XNNPACK/src/qs8-gemm/gen/
D1x8c8-minmax-neon-mlal-padal.c81 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
82 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
83 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
118 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
119 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x4c8-minmax-wasmsimd-ld128.c75 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local
76 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
79 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
D1x4c8-minmax-wasmsimd-ld64.c73 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local
74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
D1x4c8-xw-minmax-wasmsimd.c73 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local
74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
D1x8c16-minmax-neon-mlal-padal.c76 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
77 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
78 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c106 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
108 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
110 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
165 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
167 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c97 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
98 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
99 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
166 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
167 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x4c8-minmax-wasmsimd-ld128.c95 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
102 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-xw-minmax-wasmsimd.c94 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
95 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld64.c94 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
95 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c131 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
134 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
137 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
212 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
215 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c100 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
102 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
104 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c92 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
93 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
94 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D3x4c8-minmax-wasmsimd-ld128.c115 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
116 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
125 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-neon-mlal-padal.c92 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
93 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
94 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
129 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
130 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x4c8-minmax-wasmsimd-ld64.c84 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local
85 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
86 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
D1x4c8-minmax-wasmsimd-ld128.c86 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local
87 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
90 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
D1x8c16-minmax-neon-mlal-padal.c87 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
88 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
89 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c119 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
121 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
123 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
178 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
180 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c108 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
109 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
110 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
177 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
178 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x4c8-minmax-wasmsimd-ld64.c107 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
108 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
109 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D2x4c8-minmax-wasmsimd-ld128.c108 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
109 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
115 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x8c16-minmax-neon-mlal-padal.c113 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
115 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
117 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c146 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
149 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
152 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
227 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
230 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c103 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
104 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
105 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()

123