/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-wasmsimd-x8.c | 50 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() local 52 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 54 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 56 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 74 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() local 75 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 76 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 79 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 80 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 84 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() [all …]
|
D | minmax-wasmsimd-x16.c | 83 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() local 84 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 85 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 88 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 93 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 94 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 98 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 99 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 103 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
|
D | minmax-avx2-mul32-ld64-x8.c | 50 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() local 52 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 67 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() local 70 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 71 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 75 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 76 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 80 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
|
D | minmax-wasmsimd-x24.c | 95 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() local 96 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 97 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 100 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 105 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 106 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 110 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 111 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 115 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
|
D | minmax-sse41-mul32-ld32-x8.c | 57 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() local 59 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 80 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() local 83 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 84 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 88 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 89 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 93 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
|
D | minmax-xop-mul32-ld32-x8.c | 62 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() local 64 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 85 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() local 88 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 89 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 93 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 94 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 98 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
|
D | minmax-wasmsimd-x32.c | 103 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() local 104 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 105 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 108 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 113 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 114 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 118 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 119 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 123 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
|
D | minmax-sse2-mul16-ld64-x8.c | 64 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() local 66 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 95 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() local 98 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 99 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 103 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 104 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 108 *output = (int32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
|
D | minmax-sse41-mul16-ld64-x8.c | 63 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() local 65 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 93 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() local 96 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 97 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 101 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 102 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 106 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-avx2-mul32-ld64-x16.c | 72 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() local 75 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 80 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 81 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 85 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 86 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 90 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
|
D | minmax-avx2-mul32-ld64-x24.c | 80 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() local 83 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 88 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 89 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 93 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 94 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24() 98 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24()
|
D | minmax-avx2-mul32-ld64-x32.c | 84 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() local 87 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 92 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 93 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 97 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 98 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 102 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
|
D | minmax-sse41-mul32-ld32-x16.c | 92 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() local 95 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 100 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 101 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 105 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 106 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 110 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
|
D | minmax-xop-mul32-ld32-x16.c | 97 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() local 100 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 105 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 106 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 110 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 111 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 115 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-wasmsimd-x8.c | 54 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local 56 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 58 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 60 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 82 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local 83 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 84 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 87 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 88 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 92 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() [all …]
|
D | minmax-wasmsimd-x16.c | 95 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() local 96 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 97 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 100 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 105 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 106 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 110 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 111 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 115 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
|
D | minmax-avx2-mul32-ld64-x8.c | 53 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local 55 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 73 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local 76 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 77 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 81 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 82 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 86 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
|
D | minmax-xop-mul32-ld32-x8.c | 66 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() local 68 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 94 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() local 97 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 98 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 102 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 103 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 107 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
|
D | minmax-wasmsimd-x24.c | 110 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() local 111 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 112 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 115 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 120 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 121 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 125 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 126 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 130 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
|
D | minmax-sse41-mul32-ld32-x8.c | 61 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() local 63 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 89 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() local 92 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 93 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 97 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 98 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 102 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
|
D | minmax-avx2-mul32-ld64-x16.c | 81 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() local 84 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 89 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 90 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 94 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 95 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() 99 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16()
|
D | minmax-wasmsimd-x32.c | 121 v128_t vout0123456701234567 = wasm_i8x16_narrow_i16x8(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local 122 vout0123456701234567 = wasm_i8x16_max(vout0123456701234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 123 vout0123456701234567 = wasm_i8x16_min(vout0123456701234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 126 *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 131 *((uint32_t*) output) = (uint32_t) wasm_i32x4_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 132 vout0123456701234567 = wasm_u64x2_shr(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 136 *((uint16_t*) output) = (uint16_t) wasm_i16x8_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 137 vout0123456701234567 = wasm_u32x4_shr(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 141 *output = wasm_i8x16_extract_lane(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
|
D | minmax-sse41-mul16-ld64-x8.c | 72 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local 74 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 110 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local 113 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 114 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 118 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 119 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 123 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-avx2-mul32-ld64-x24.c | 91 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() local 94 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 99 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 100 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 104 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 105 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24() 109 *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24()
|
D | minmax-sse2-mul16-ld64-x8.c | 74 const __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local 76 _mm_storel_epi64((__m128i*) output, vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 114 __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local 117 *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 118 vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 122 *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 123 vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() 127 *output = (int32_t) _mm_cvtsi128_si32(vout0123456701234567); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
|