/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 213 const __m128i vprod46 = _mm_add_epi64(_mm_mul_epi32(vacc4567, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 218 const __m128i vq31prod46 = _mm_srli_epi64(vprod46, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 371 const __m128i vprod46 = _mm_add_epi64(_mm_mul_epi32(vacc4567, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 378 const __m128i vq31prod46 = _mm_srli_epi64(vprod46, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 229 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 234 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 409 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 414 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 229 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 234 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 409 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 414 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 287 const __m128i vprod46 = _mm_add_epi64(_mm_mul_epi32(vacc4567, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 298 const __m128i vq31prod46 = _mm_srli_epi64(vprod46, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 479 const __m128i vprod46 = _mm_add_epi64(_mm_mul_epi32(vacc4567, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 486 const __m128i vq31prod46 = _mm_srli_epi64(vprod46, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 317 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 326 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 533 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 538 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 317 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 326 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 533 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 538 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 361 const __m128i vprod46 = _mm_add_epi64(_mm_mul_epi32(vacc4567, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 378 const __m128i vq31prod46 = _mm_srli_epi64(vprod46, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 577 const __m128i vprod46 = _mm_add_epi64(_mm_mul_epi32(vacc4567, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 584 const __m128i vq31prod46 = _mm_srli_epi64(vprod46, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 405 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 418 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 647 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 652 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 405 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 418 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 647 const __m128i vprod46 = _mm_sub_epi64(_mm_xor_si128(vabsprod46, vnmask46), vnmask46); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 652 const __m128i vq31prod46 = _mm_srli_epi64(_mm_add_epi64(vprod46, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|