/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-wasmsimd-mul16.c | 186 const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() local 190 …v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 195 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 302 const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() local 306 …v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 311 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
|
D | up16x9-minmax-wasmsimd-mul16.c | 243 const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local 249 …v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 256 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 378 const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local 382 …v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 387 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
|
D | up8x9-minmax-sse41-mul16.c | 222 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 228 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 235 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 382 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 388 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 395 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 243 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 249 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 256 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 423 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 429 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 436 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 243 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 249 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 256 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 423 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 429 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 436 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up24x9-minmax-wasmsimd-mul16.c | 300 const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() local 308 …v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 317 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 444 const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() local 448 …v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31… in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 453 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 306 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 314 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 325 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 490 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 496 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 503 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 343 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 351 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 362 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 547 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 553 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 560 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 343 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 351 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 362 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 547 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 553 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 560 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 390 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 400 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 415 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 588 const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 594 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 601 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 443 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 453 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 468 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 661 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 667 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 674 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 443 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 453 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 468 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 661 const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 667 …_mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), v… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 674 …_mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold)… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|