Home
last modified time | relevance | path

Searched refs:vout0123456701234567 (Results 1 – 25 of 88) sorted by relevance

1234

/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-wasmsimd-x8.c50 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() local
52 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
54 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
56 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
74 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() local
75 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
76 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
79 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
80 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
84 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
[all …]
Dminmax-wasmsimd-x16.c83 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() local
84 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
85 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
88 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
93 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
94 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
98 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
99 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
103 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
Dminmax-avx2-mul32-ld64-x8.c50 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() local
52 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
67 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() local
70 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
71 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
75 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
76 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
80 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
Dminmax-wasmsimd-x24.c95 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() local
96 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
97 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
100 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
105 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
106 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
110 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
111 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
115 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
Dminmax-sse41-mul32-ld32-x8.c57 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() local
59 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
80 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() local
83 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
84 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
88 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
89 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
93 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
Dminmax-xop-mul32-ld32-x8.c62 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() local
64 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
85 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() local
88 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
89 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
93 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
94 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
98 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
Dminmax-wasmsimd-x32.c103 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() local
104 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
105 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
108 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
113 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
114 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
118 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
119 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
123 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
Dminmax-sse2-mul16-ld64-x8.c64 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() local
66 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
95 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() local
98 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
99 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
103 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
104 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
108 *output = (int32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
Dminmax-sse41-mul16-ld64-x8.c63 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() local
65 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
93 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() local
96 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
97 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
101 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
102 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
106 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
Dminmax-avx2-mul32-ld64-x16.c72 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() local
75 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
80 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
81 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
85 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
86 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
90 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-avx2-mul32-ld64-x24.c80 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() local
83 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
88 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
89 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
93 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
94 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
98 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-avx2-mul32-ld64-x32.c84 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() local
87 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
92 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
93 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
97 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
98 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
102 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
Dminmax-sse41-mul32-ld32-x16.c92 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() local
95 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
100 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
101 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
105 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
106 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
110 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
Dminmax-xop-mul32-ld32-x16.c97 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() local
100 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
105 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
106 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
110 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
111 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
115 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-wasmsimd-x8.c54 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local
56 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
58 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
60 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
82 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local
83 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
84 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
87 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
88 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
92 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
[all …]
Dminmax-wasmsimd-x16.c95 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() local
96 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
97 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
100 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
105 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
106 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
110 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
111 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
115 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
Dminmax-avx2-mul32-ld64-x8.c53 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local
55 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
73 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local
76 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
77 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
81 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
82 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
86 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
Dminmax-xop-mul32-ld32-x8.c66 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() local
68 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
94 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() local
97 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
98 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
102 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
103 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
107 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
Dminmax-wasmsimd-x24.c110 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() local
111 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
112 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
115 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
120 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
121 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
125 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
126 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
130 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
Dminmax-sse41-mul32-ld32-x8.c61 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() local
63 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
89 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() local
92 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
93 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
97 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
98 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
102 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
Dminmax-avx2-mul32-ld64-x16.c81 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() local
84 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
89 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
90 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
94 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
95 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
99 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
Dminmax-wasmsimd-x32.c121 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local
122 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
123 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
126 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
131 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
132 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
136 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
137 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
141 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
Dminmax-sse41-mul16-ld64-x8.c72 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local
74 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
110 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local
113 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
114 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
118 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
119 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
123 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
Dminmax-avx2-mul32-ld64-x24.c91 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() local
94 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
99 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
100 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
104 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
105 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
109 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
Dminmax-sse2-mul16-ld64-x8.c74 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local
76 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
114 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local
117 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
118 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
122 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
123 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
127 *output = (int32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()

1234