/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 129 const __m128i vxk3x01234567 = _mm_cvtepi8_epi16(vk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 133 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 294 const __m128i vxk3x01234567 = _mm_cvtepi8_epi16(vk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 297 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 298 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 131 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 133 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 316 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 318 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 319 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 131 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 133 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 316 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 318 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 319 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 155 const __m128i vxk3x01234567 = _mm_cvtepi8_epi16(vk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 163 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 164 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 394 const __m128i vxk3x01234567 = _mm_cvtepi8_epi16(vk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 398 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 399 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 159 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 163 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 164 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 433 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 435 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 436 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 159 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 163 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 164 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 433 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 435 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 436 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 181 const __m128i vxk3x01234567 = _mm_cvtepi8_epi16(vk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 193 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 194 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 492 const __m128i vxk3x01234567 = _mm_cvtepi8_epi16(vk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 496 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 497 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 187 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 193 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 194 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 547 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 549 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 550 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 187 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 193 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 194 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 547 …const __m128i vxk3x01234567 = _mm_unpacklo_epi8(vk3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 549 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 550 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|