/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c8-minmax-xop-ld128.c | 187 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() local 192 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 194 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
|
D | 3x4c8-minmax-xop-ld64.c | 189 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() local 194 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 196 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
|
D | 3x4c8-minmax-sse41-ld64.c | 184 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() local 189 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 191 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
|
D | 3x4c8-minmax-sse41-ld128.c | 182 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() local 187 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 189 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
|
D | 3x4c8-minmax-ssse3-ld64.c | 216 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() local 221 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 223 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
|
D | 3x4c8-minmax-ssse3-ld128.c | 214 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() local 219 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 221 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
|
D | 3x4c8-minmax-sse2-ld128.c | 214 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() local 219 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 221 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-minmax-sse2-ld64.c | 216 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() local 221 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 223 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 199 …v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), vout… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 201 v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 195 …v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), vout… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 197 v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c8-minmax-xop-ld64.c | 172 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() local 177 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 179 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
|
D | 3x4c8-xw-minmax-xop.c | 168 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() local 173 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 175 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
|
D | 3x4c8-minmax-sse41-ld64.c | 167 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() local 172 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 174 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
|
D | 3x4c8-xw-minmax-sse41.c | 163 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() local 168 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 170 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
|
D | 3x4c8-minmax-sse41-ld128.c | 165 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() local 170 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 172 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
|
D | 3x4c8-minmax-xop-ld128.c | 170 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() local 175 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 177 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
|
D | 3x4c8-xw-minmax-ssse3.c | 195 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() local 200 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 202 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
|
D | 3x4c8-minmax-sse2-ld128.c | 197 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() local 202 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 204 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-xw-minmax-sse2.c | 195 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() local 200 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 202 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
|
D | 3x4c8-minmax-sse2-ld64.c | 199 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() local 204 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 206 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
|
D | 3x4c8-minmax-ssse3-ld128.c | 197 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() local 202 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 204 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
|
D | 3x4c8-minmax-ssse3-ld64.c | 199 … __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() local 204 vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 206 __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 178 …v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), vout… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 180 v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 178 …v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), vout… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 180 v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 182 …v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), vout… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 184 v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|