/external/XNNPACK/src/qu8-requantization/ |
D | q31-neon.c | 47 const int16x8_t vzero_point = vdupq_n_s16((int16_t)(uint16_t) zero_point); in xnn_qu8_requantize_q31__neon() local 83 …nst int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_scaled), y_scaled), vzero_point); in xnn_qu8_requantize_q31__neon() 84 …nst int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_point); in xnn_qu8_requantize_q31__neon() 87 …x8_t xy_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(x_scaled), vqmovn_s32(y_scaled)), vzero_point); in xnn_qu8_requantize_q31__neon() 88 …x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), vzero_point); in xnn_qu8_requantize_q31__neon()
|
D | fp32-sse2.c | 32 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qu8_requantize_fp32__sse2() local 74 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_rounded, y_rounded), vzero_point); in xnn_qu8_requantize_fp32__sse2() 75 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_rounded, w_rounded), vzero_point); in xnn_qu8_requantize_fp32__sse2()
|
D | precise-neon.c | 44 const int16x8_t vzero_point = vdupq_n_s16((int16_t)(uint16_t) zero_point); in xnn_qu8_requantize_precise__neon() local 115 …nst int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_scaled), y_scaled), vzero_point); in xnn_qu8_requantize_precise__neon() 116 …nst int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_point); in xnn_qu8_requantize_precise__neon() 124 …x8_t xy_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(x_scaled), vqmovn_s32(y_scaled)), vzero_point); in xnn_qu8_requantize_precise__neon() 125 …x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), vzero_point); in xnn_qu8_requantize_precise__neon()
|
D | fp32-neon.c | 33 const int16x8_t vzero_point = vdupq_n_s16((int16_t)(uint16_t) zero_point); in xnn_qu8_requantize_fp32__neon() local 74 …t int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_rounded), y_rounded), vzero_point); in xnn_qu8_requantize_fp32__neon() 75 …t int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_rounded), w_rounded), vzero_point); in xnn_qu8_requantize_fp32__neon()
|
D | precise-sse4.c | 41 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qu8_requantize_precise__sse4() local 93 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qu8_requantize_precise__sse4() 94 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_precise__sse4()
|
D | precise-ssse3.c | 41 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qu8_requantize_precise__ssse3() local 101 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qu8_requantize_precise__ssse3() 102 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_precise__ssse3()
|
D | q31-sse4.c | 47 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qu8_requantize_q31__sse4() local 109 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qu8_requantize_q31__sse4() 110 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_q31__sse4()
|
D | precise-sse2.c | 41 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qu8_requantize_precise__sse2() local 106 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qu8_requantize_precise__sse2() 107 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_precise__sse2()
|
D | q31-wasmsimd.c | 49 const v128_t vzero_point = wasm_i16x8_splat((int16_t) (uint16_t) zero_point); in xnn_qu8_requantize_q31__wasmsimd() local 112 …28_t xy_packed = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(x_scaled, y_scaled), vzero_point); in xnn_qu8_requantize_q31__wasmsimd() 113 …28_t zw_packed = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_q31__wasmsimd()
|
D | q31-ssse3.c | 47 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qu8_requantize_q31__ssse3() local 158 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qu8_requantize_q31__ssse3() 159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_q31__ssse3()
|
D | q31-sse2.c | 47 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qu8_requantize_q31__sse2() local 158 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qu8_requantize_q31__sse2() 159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qu8_requantize_q31__sse2()
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-neon.c | 47 const int16x8_t vzero_point = vdupq_n_s16((int16_t) zero_point); in xnn_qs8_requantize_q31__neon() local 83 …nst int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_scaled), y_scaled), vzero_point); in xnn_qs8_requantize_q31__neon() 84 …nst int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_point); in xnn_qs8_requantize_q31__neon() 87 …x8_t xy_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(x_scaled), vqmovn_s32(y_scaled)), vzero_point); in xnn_qs8_requantize_q31__neon() 88 …x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), vzero_point); in xnn_qs8_requantize_q31__neon()
|
D | fp32-sse4.c | 32 const __m128i vzero_point = _mm_set1_epi16((short) zero_point); in xnn_qs8_requantize_fp32__sse4() local 74 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_rounded, y_rounded), vzero_point); in xnn_qs8_requantize_fp32__sse4() 75 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_rounded, w_rounded), vzero_point); in xnn_qs8_requantize_fp32__sse4()
|
D | fp32-sse2.c | 32 const __m128i vzero_point = _mm_set1_epi16((short) (uint16_t) zero_point); in xnn_qs8_requantize_fp32__sse2() local 74 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_rounded, y_rounded), vzero_point); in xnn_qs8_requantize_fp32__sse2() 75 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_rounded, w_rounded), vzero_point); in xnn_qs8_requantize_fp32__sse2()
|
D | precise-neon.c | 44 const int16x8_t vzero_point = vdupq_n_s16((int16_t) zero_point); in xnn_qs8_requantize_precise__neon() local 115 …nst int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_scaled), y_scaled), vzero_point); in xnn_qs8_requantize_precise__neon() 116 …nst int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_point); in xnn_qs8_requantize_precise__neon() 124 …x8_t xy_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(x_scaled), vqmovn_s32(y_scaled)), vzero_point); in xnn_qs8_requantize_precise__neon() 125 …x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), vzero_point); in xnn_qs8_requantize_precise__neon()
|
D | fp32-neon.c | 33 const int16x8_t vzero_point = vdupq_n_s16((int16_t) zero_point); in xnn_qs8_requantize_fp32__neon() local 74 …t int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_rounded), y_rounded), vzero_point); in xnn_qs8_requantize_fp32__neon() 75 …t int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_rounded), w_rounded), vzero_point); in xnn_qs8_requantize_fp32__neon()
|
D | precise-sse4.c | 41 const __m128i vzero_point = _mm_set1_epi16((short) zero_point); in xnn_qs8_requantize_precise__sse4() local 93 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qs8_requantize_precise__sse4() 94 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_precise__sse4()
|
D | precise-ssse3.c | 41 const __m128i vzero_point = _mm_set1_epi16((short) zero_point); in xnn_qs8_requantize_precise__ssse3() local 101 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qs8_requantize_precise__ssse3() 102 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_precise__ssse3()
|
D | q31-sse4.c | 47 const __m128i vzero_point = _mm_set1_epi16((short) zero_point); in xnn_qs8_requantize_q31__sse4() local 109 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qs8_requantize_q31__sse4() 110 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_q31__sse4()
|
D | q31-wasmsimd.c | 49 const v128_t vzero_point = wasm_i16x8_splat((int16_t) zero_point); in xnn_qs8_requantize_q31__wasmsimd() local 112 …28_t xy_packed = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(x_scaled, y_scaled), vzero_point); in xnn_qs8_requantize_q31__wasmsimd() 113 …28_t zw_packed = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_q31__wasmsimd()
|
D | precise-sse2.c | 41 const __m128i vzero_point = _mm_set1_epi16((short) zero_point); in xnn_qs8_requantize_precise__sse2() local 106 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qs8_requantize_precise__sse2() 107 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_precise__sse2()
|
D | q31-ssse3.c | 47 const __m128i vzero_point = _mm_set1_epi16((short) zero_point); in xnn_qs8_requantize_q31__ssse3() local 158 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qs8_requantize_q31__ssse3() 159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_q31__ssse3()
|
D | q31-sse2.c | 47 const __m128i vzero_point = _mm_set1_epi16((short) zero_point); in xnn_qs8_requantize_q31__sse2() local 158 const __m128i xy_packed = _mm_adds_epi16(_mm_packs_epi32(x_scaled, y_scaled), vzero_point); in xnn_qs8_requantize_q31__sse2() 159 const __m128i zw_packed = _mm_adds_epi16(_mm_packs_epi32(z_scaled, w_scaled), vzero_point); in xnn_qs8_requantize_q31__sse2()
|