/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 210 const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 216 const __m128i vq31prod02 = _mm_srli_epi64(vprod02, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 370 const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 376 const __m128i vq31prod02 = _mm_srli_epi64(vprod02, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 227 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 232 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 407 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 412 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 227 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 232 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 407 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 412 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 284 const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 296 const __m128i vq31prod02 = _mm_srli_epi64(vprod02, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 478 const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 484 const __m128i vq31prod02 = _mm_srli_epi64(vprod02, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 315 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 324 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 531 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 536 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 315 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 324 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 531 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 536 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 358 const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 376 const __m128i vq31prod02 = _mm_srli_epi64(vprod02, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 576 const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 582 const __m128i vq31prod02 = _mm_srli_epi64(vprod02, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 403 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 416 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 645 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 650 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 403 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 416 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 645 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 650 const __m128i vq31prod02 = _mm_srli_epi64(_mm_add_epi64(vprod02, vrounding), 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|