Home
last modified time | relevance | path

Searched refs:zw_packed (Results 1 – 25 of 25) sorted by relevance

/external/XNNPACK/src/qu8-requantization/
Dq31-neon.c84 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qu8_requantize_q31__neon() local
85 const uint8x16_t xyzw_packed = vqmovun_high_s16(vqmovun_s16(xy_packed), zw_packed); in xnn_qu8_requantize_q31__neon()
88 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qu8_requantize_q31__neon() local
89 const uint8x16_t xyzw_packed = vcombine_u8(vqmovun_s16(xy_packed), vqmovun_s16(zw_packed)); in xnn_qu8_requantize_q31__neon()
Dfp32-neon.c75 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_rounded), w_rounded), vzero_po… in xnn_qu8_requantize_fp32__neon() local
76 const uint8x16_t xyzw_packed = vqmovun_high_s16(vqmovun_s16(xy_packed), zw_packed); in xnn_qu8_requantize_fp32__neon()
104 const int16x8_t zw_packed = vcombine_s16(vmovn_s32(z_biased), vmovn_s32(w_biased)); in xnn_qu8_requantize_fp32__neon() local
105 …t8x16_t xyzw_packed = vreinterpretq_u8_s8(vcombine_s8(vmovn_s16(xy_packed), vmovn_s16(zw_packed))); in xnn_qu8_requantize_fp32__neon()
Dprecise-neon.c116 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qu8_requantize_precise__neon() local
117 const uint8x16_t xyzw_packed = vqmovun_high_s16(vqmovun_s16(xy_packed), zw_packed); in xnn_qu8_requantize_precise__neon()
125 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qu8_requantize_precise__neon() local
126 const uint8x16_t xyzw_packed = vcombine_u8(vqmovun_s16(xy_packed), vqmovun_s16(zw_packed)); in xnn_qu8_requantize_precise__neon()
Dfp32-sse2.c75 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_rounded, w_rounded), vzero_point); in xnn_qu8_requantize_fp32__sse2() local
76 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_fp32__sse2()
Dfp32-wasmsimd.c73 const v128_t zw_packed = wasm_v16x8_shuffle(z_biased, w_biased, 0, 2, 4, 6, 8, 10, 12, 14); in xnn_qu8_requantize_fp32__wasmsimd() local
74 …const v128_t xyzw_packed = wasm_v8x16_shuffle(xy_packed, zw_packed, 0, 2, 4, 6, 8, 10, 12, 14, 16,… in xnn_qu8_requantize_fp32__wasmsimd()
Dprecise-sse4.c94 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_precise__sse4() local
95 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_precise__sse4()
Dprecise-ssse3.c102 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_precise__ssse3() local
103 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_precise__ssse3()
Dq31-sse4.c110 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_q31__sse4() local
111 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_q31__sse4()
Dprecise-sse2.c107 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_precise__sse2() local
108 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_precise__sse2()
Dq31-wasmsimd.c113 …const v128_t zw_packed = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(z_scaled, w_scaled), vzer… in xnn_qu8_requantize_q31__wasmsimd() local
114 const v128_t xyzw_packed = wasm_u8x16_narrow_i16x8(xy_packed, zw_packed); in xnn_qu8_requantize_q31__wasmsimd()
Dq31-ssse3.c159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_q31__ssse3() local
160 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_q31__ssse3()
Dq31-sse2.c159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_q31__sse2() local
160 const __m128i xyzw_packed = _mm_packus_epi16(xy_packed, zw_packed); in xnn_qu8_requantize_q31__sse2()
/external/XNNPACK/src/qs8-requantization/
Dq31-neon.c84 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_q31__neon() local
85 const int8x16_t xyzw_packed = vqmovn_high_s16(vqmovn_s16(xy_packed), zw_packed); in xnn_qs8_requantize_q31__neon()
88 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_q31__neon() local
89 const int8x16_t xyzw_packed = vcombine_s8(vqmovn_s16(xy_packed), vqmovn_s16(zw_packed)); in xnn_qs8_requantize_q31__neon()
Dfp32-neon.c75 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_rounded), w_rounded), vzero_po… in xnn_qs8_requantize_fp32__neon() local
76 const int8x16_t xyzw_packed = vqmovn_high_s16(vqmovn_s16(xy_packed), zw_packed); in xnn_qs8_requantize_fp32__neon()
128 const int16x8_t zw_packed = vcombine_s16(vmovn_s32(z_biased), vmovn_s32(w_biased)); in xnn_qs8_requantize_fp32__neon() local
129 const int8x16_t xyzw_packed = vcombine_s8(vmovn_s16(xy_packed), vmovn_s16(zw_packed)); in xnn_qs8_requantize_fp32__neon()
Dprecise-neon.c116 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_precise__neon() local
117 const int8x16_t xyzw_packed = vqmovn_high_s16(vqmovn_s16(xy_packed), zw_packed); in xnn_qs8_requantize_precise__neon()
125 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_precise__neon() local
126 const int8x16_t xyzw_packed = vcombine_s8(vqmovn_s16(xy_packed), vqmovn_s16(zw_packed)); in xnn_qs8_requantize_precise__neon()
Dfp32-sse4.c75 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_rounded, w_rounded), vzero_point); in xnn_qs8_requantize_fp32__sse4() local
76 const __m128i xyzw_packed = _mm_packs_epi16(xy_packed, zw_packed); in xnn_qs8_requantize_fp32__sse4()
Dfp32-sse2.c75 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_rounded, w_rounded), vzero_point); in xnn_qs8_requantize_fp32__sse2() local
77 const __m128i zw_clamped = _mm_max_epi16(_mm_min_epi16(zw_packed, vqmax), vqmin); in xnn_qs8_requantize_fp32__sse2()
Dfp32-wasmsimd.c73 const v128_t zw_packed = wasm_v16x8_shuffle(z_biased, w_biased, 0, 2, 4, 6, 8, 10, 12, 14); in xnn_qs8_requantize_fp32__wasmsimd() local
74 …const v128_t xyzw_packed = wasm_v8x16_shuffle(xy_packed, zw_packed, 0, 2, 4, 6, 8, 10, 12, 14, 16,… in xnn_qs8_requantize_fp32__wasmsimd()
Dprecise-sse4.c94 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_precise__sse4() local
95 const __m128i xyzw_packed = _mm_packs_epi16(xy_packed, zw_packed); in xnn_qs8_requantize_precise__sse4()
Dprecise-ssse3.c102 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_precise__ssse3() local
104 const __m128i zw_clamped = _mm_max_epi16(_mm_min_epi16(zw_packed, vqmax), vqmin); in xnn_qs8_requantize_precise__ssse3()
Dq31-sse4.c110 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_q31__sse4() local
111 const __m128i xyzw_packed = _mm_packs_epi16(xy_packed, zw_packed); in xnn_qs8_requantize_q31__sse4()
Dq31-wasmsimd.c113 …const v128_t zw_packed = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(z_scaled, w_scaled), vzer… in xnn_qs8_requantize_q31__wasmsimd() local
114 const v128_t xyzw_packed = wasm_i8x16_narrow_i16x8(xy_packed, zw_packed); in xnn_qs8_requantize_q31__wasmsimd()
Dprecise-sse2.c107 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_precise__sse2() local
109 const __m128i zw_clamped = _mm_max_epi16(_mm_min_epi16(zw_packed, vqmax), vqmin); in xnn_qs8_requantize_precise__sse2()
Dq31-ssse3.c159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_q31__ssse3() local
161 const __m128i zw_clamped = _mm_max_epi16(_mm_min_epi16(zw_packed, vqmax), vqmin); in xnn_qs8_requantize_q31__ssse3()
Dq31-sse2.c159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_q31__sse2() local
161 const __m128i zw_clamped = _mm_max_epi16(_mm_min_epi16(zw_packed, vqmax), vqmin); in xnn_qs8_requantize_q31__sse2()