/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 155 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 159 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 160 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 318 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 321 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 322 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 157 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 159 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 160 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 340 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 342 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 343 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 157 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 159 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 160 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 340 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 342 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 343 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 197 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 205 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 206 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 420 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 424 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 425 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 201 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 205 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 206 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 459 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 461 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 462 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 201 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 205 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 206 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 459 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 461 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 462 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 239 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 251 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 252 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 518 const __m128i vxk5x01234567 = _mm_cvtepi8_epi16(vk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 522 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 523 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 245 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 251 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 252 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 573 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 575 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 576 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 245 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 251 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 252 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 573 …const __m128i vxk5x01234567 = _mm_unpacklo_epi8(vk5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 575 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 576 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|