/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-wasmsimd-c24-acc2.c | 124 const v128_t vabsaccGHIJ = wasm_i32x4_abs(vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 131 const v128_t vsgnaccGHIJ = wasm_i32x4_gt(vabsaccGHIJ, vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 142 const v128_t vabsaccGH = wasm_v32x4_shuffle(vabsaccGHIJ, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 143 const v128_t vabsaccIJ = wasm_v32x4_shuffle(vabsaccGHIJ, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
|
D | 7x-minmax-sse41-c24-acc2.c | 124 const __m128i vabsaccGHIJ = _mm_abs_epi32(vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 131 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 142 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-sse2-c24-acc2.c | 155 const __m128i vabsaccGHIJ = _mm_sub_epi32(_mm_xor_si128(vaccGHIJ, vsgnaccGHIJ), vsgnaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 162 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 173 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
|
D | 7x-minmax-ssse3-c24-acc2.c | 148 const __m128i vabsaccGHIJ = _mm_abs_epi32(vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 155 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 166 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
|
D | 7p7x-minmax-wasmsimd-c24-acc2.c | 359 const v128_t vabsaccGHIJ = wasm_i32x4_abs(vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 366 const v128_t vsgnaccGHIJ = wasm_i32x4_gt(vabsaccGHIJ, vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 377 const v128_t vabsaccGH = wasm_v32x4_shuffle(vabsaccGHIJ, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 378 const v128_t vabsaccIJ = wasm_v32x4_shuffle(vabsaccGHIJ, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
|
D | 7p7x-minmax-sse41-c24-acc2.c | 363 const __m128i vabsaccGHIJ = _mm_abs_epi32(vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 370 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 381 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
|
D | 7p7x-minmax-sse2-c24-acc2.c | 458 const __m128i vabsaccGHIJ = _mm_sub_epi32(_mm_xor_si128(vaccGHIJ, vsgnaccGHIJ), vsgnaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 465 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 476 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 451 const __m128i vabsaccGHIJ = _mm_abs_epi32(vaccGHIJ); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 458 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 469 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-ssse3-mul16.c | 368 const __m128i vabsaccGHIJ = _mm_abs_epi32(vaccGHIJ); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() local 383 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 384 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
|
D | up24x9-minmax-sse2-mul16.c | 368 const __m128i vabsaccGHIJ = _mm_sub_epi32(_mm_xor_si128(vaccGHIJ, vnmaskGHIJ), vnmaskGHIJ); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() local 383 const __m128i vabsaccHJ = _mm_shuffle_epi32(vabsaccGHIJ, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 384 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
|