/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 179 const __m128i vxi7x01234567 = _mm_cvtepi8_epi16(vi7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 185 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 186 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 340 const __m128i vxi7x01234567 = _mm_cvtepi8_epi16(vi7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 345 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 346 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 182 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 185 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 186 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 363 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 366 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 367 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 182 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 185 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 186 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 363 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 366 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 367 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 237 const __m128i vxi7x01234567 = _mm_cvtepi8_epi16(vi7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 247 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 248 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 444 const __m128i vxi7x01234567 = _mm_cvtepi8_epi16(vi7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 450 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 451 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 242 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 247 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 248 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 484 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 487 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 488 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 242 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 247 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 248 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 484 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 487 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 488 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 295 const __m128i vxi7x01234567 = _mm_cvtepi8_epi16(vi7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 309 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 310 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 542 const __m128i vxi7x01234567 = _mm_cvtepi8_epi16(vi7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 548 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 549 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 302 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 309 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 310 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 598 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 601 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 602 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 302 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 309 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 310 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 598 …const __m128i vxi7x01234567 = _mm_unpacklo_epi8(vi7x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 601 const __m128i vp7x01234567lo = _mm_mullo_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 602 const __m128i vp7x01234567hi = _mm_mulhi_epi16(vxi7x01234567, vxk7x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|