/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-sse41-c8-acc2.c | 96 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 101 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 163 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 168 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7x-minmax-ssse3-c8-acc2.c | 104 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 109 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 184 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 189 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 107 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 112 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 190 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 195 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
D | 7x-minmax-sse41-c16-acc2.c | 115 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 124 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 196 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 201 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
|
D | 7p7x-minmax-sse41-c8-acc2.c | 191 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 196 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 259 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 264 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 131 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 140 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 229 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 234 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse41-c24-acc2.c | 134 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 147 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 231 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 236 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-sse2-c16-acc2.c | 136 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 145 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 237 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 242 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 215 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 220 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 296 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 301 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-sse2-c8-acc2.c | 218 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 223 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 302 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 307 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
|
D | 7p7x-minmax-sse41-c16-acc2.c | 244 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 253 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 326 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 331 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
|
D | 7x-minmax-sse2-c24-acc2.c | 165 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 178 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 286 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 291 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
|
D | 7x-minmax-ssse3-c24-acc2.c | 158 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 171 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 276 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 281 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 292 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 301 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 391 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 396 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
|
D | 7p7x-minmax-sse2-c16-acc2.c | 297 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 306 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 399 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 404 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
|
D | 7p7x-minmax-sse41-c24-acc2.c | 373 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 386 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 471 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 476 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
|
D | 7p7x-minmax-sse2-c24-acc2.c | 468 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 481 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 590 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 595 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 461 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 474 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 580 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 585 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse2-mul16.c | 216 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 227 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 397 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 407 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 216 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 227 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 397 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 407 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 294 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 315 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 521 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 531 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|
D | up16x9-minmax-sse2-mul16.c | 294 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 315 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 521 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() local 531 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
|
D | up24x9-minmax-ssse3-mul16.c | 372 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 403 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 635 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 645 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 372 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 403 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 635 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 645 const __m128i vprod02 = _mm_sub_epi64(_mm_xor_si128(vabsprod02, vnmask02), vnmask02); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|