/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 120 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 285 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 288 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 289 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 120 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 306 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 309 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 310 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 120 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 306 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 309 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 310 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-sse41-mul16.c | 142 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 385 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() local 388 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 389 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 142 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 422 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 425 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 426 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 142 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 422 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 425 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 426 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-sse41-mul16.c | 164 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 483 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() local 486 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 487 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 164 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 536 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 539 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 540 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 164 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 536 const __m128i vp2x01234567lo = _mm_mullo_epi16(vxi2x01234567, vxk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 539 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 540 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|