/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-wasmsimd-c8-acc2.c | 91 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 94 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 98 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 99 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 160 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 163 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 167 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 168 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-wasmsimd-c16-acc2.c | 106 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 111 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 117 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 118 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 196 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 199 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 203 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 204 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
|
D | 7x-minmax-sse41-c8-acc2.c | 91 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 94 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 98 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 158 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 161 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 165 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7p7x-minmax-wasmsimd-c8-acc2.c | 184 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 187 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 191 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 192 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 254 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 257 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 261 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 262 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 121 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 128 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 136 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 137 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 234 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 237 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 241 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 242 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
|
D | 7x-minmax-ssse3-c8-acc2.c | 99 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 102 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 106 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 179 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 182 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 186 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 102 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 105 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 109 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 185 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 188 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 192 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
D | 7x-minmax-sse41-c16-acc2.c | 106 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 111 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 117 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 191 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 194 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 198 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
|
D | 7p7x-minmax-wasmsimd-c16-acc2.c | 233 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 238 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 244 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 245 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 324 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 327 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 331 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 332 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse41-c8-acc2.c | 186 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 189 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 193 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 254 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 257 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 261 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 122 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 127 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 133 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 224 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 227 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 231 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse41-c24-acc2.c | 121 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 128 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 136 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 226 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 229 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 233 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-sse2-c16-acc2.c | 127 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 132 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 138 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 232 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 235 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 239 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 210 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 213 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 217 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 291 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 294 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 298 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-sse2-c8-acc2.c | 213 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 216 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 220 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 297 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 300 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 304 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
|
D | 7p7x-minmax-sse41-c16-acc2.c | 235 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 240 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 246 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 321 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 324 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 328 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
|
D | 7x-minmax-sse2-c24-acc2.c | 152 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 159 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 167 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 281 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 284 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 288 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
|
D | 7x-minmax-ssse3-c24-acc2.c | 145 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 152 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 160 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 271 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 274 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 278 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
|
D | 7p7x-minmax-wasmsimd-c24-acc2.c | 356 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 363 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 371 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 372 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 470 const v128_t vabsacc4567 = wasm_i32x4_abs(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 473 const v128_t vsgnacc4567 = wasm_i32x4_gt(vabsacc4567, vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 477 const v128_t vabsacc45 = wasm_v32x4_shuffle(vabsacc4567, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 478 const v128_t vabsacc67 = wasm_v32x4_shuffle(vabsacc4567, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 283 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 288 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 294 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 386 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 389 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 393 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
|
D | 7p7x-minmax-sse2-c16-acc2.c | 288 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 293 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 299 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 394 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vsgnacc4567), vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 397 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 401 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
|
D | 7p7x-minmax-sse41-c24-acc2.c | 360 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 367 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 375 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 466 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 469 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 473 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse2-mul16.c | 213 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 218 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 219 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 392 const __m128i vabsacc4567 = _mm_sub_epi32(_mm_xor_si128(vacc4567, vnmask4567), vnmask4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 395 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 399 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 213 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 218 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 219 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 392 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 395 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 399 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 289 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 296 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 297 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 516 const __m128i vabsacc4567 = _mm_abs_epi32(vacc4567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 519 const __m128i vabsacc57 = _mm_shuffle_epi32(vabsacc4567, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 523 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|