/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up16x9-minmax-wasmsimd-mul16.c | 244 const v128_t vq31prod89AB = wasm_v32x4_shuffle(vprod89, vprodAB, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local 250 …v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 257 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod89AB, vshift), wasm_i32x4_gt(vrem89AB, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
|
D | up24x9-minmax-wasmsimd-mul16.c | 301 const v128_t vq31prod89AB = wasm_v32x4_shuffle(vprod89, vprodAB, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() local 309 …v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 318 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod89AB, vshift), wasm_i32x4_gt(vrem89AB, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 307 const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 316 …_mm_add_epi32(_mm_and_si128(vq31prod89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 327 …_mm_sub_epi32(_mm_sra_epi32(vq31prod89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 344 const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 353 …_mm_add_epi32(_mm_and_si128(vq31prod89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 364 …_mm_sub_epi32(_mm_sra_epi32(vq31prod89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 344 const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 353 …_mm_add_epi32(_mm_and_si128(vq31prod89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 364 …_mm_sub_epi32(_mm_sra_epi32(vq31prod89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 391 const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 402 …_mm_add_epi32(_mm_and_si128(vq31prod89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 417 …_mm_sub_epi32(_mm_sra_epi32(vq31prod89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 444 const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 455 …_mm_add_epi32(_mm_and_si128(vq31prod89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 470 …_mm_sub_epi32(_mm_sra_epi32(vq31prod89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 444 const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 455 …_mm_add_epi32(_mm_and_si128(vq31prod89AB, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 470 …_mm_sub_epi32(_mm_sra_epi32(vq31prod89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|