/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse2-mul16.c | 210 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 213 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 224 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 225 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 389 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 392 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 404 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 405 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 284 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 289 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 308 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 309 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 513 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 516 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 528 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 529 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 210 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 224 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 225 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 389 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 404 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 405 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 358 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 365 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 392 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 393 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 627 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 630 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 642 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 643 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 284 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 308 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 309 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 513 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 528 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 529 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 358 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 392 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 393 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 627 const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 642 const __m128i vnmask46 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 643 const __m128i vnmask57 = _mm_shuffle_epi32(vnmask4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|