/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-wasmsimd-c8-acc2.c | 54 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 63 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 99 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 108 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 164 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 173 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 234 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 243 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
|
D | 7p7x-minmax-sse41-c8-acc2.c | 54 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 64 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 100 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 110 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 165 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 175 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 233 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 243 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 64 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 71 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 118 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 125 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 191 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 198 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 272 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 279 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-sse2-c8-acc2.c | 64 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 71 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 118 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 125 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 191 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 198 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 275 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 282 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
|
D | 7p7x-minmax-wasmsimd-c24-acc2.c | 64 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 83 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 122 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 131 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 180 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 199 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 238 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 247 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 316 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 335 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() [all …]
|
D | 7p7x-minmax-sse41-c24-acc2.c | 64 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 84 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 123 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 133 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 182 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 202 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 241 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 251 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 319 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 339 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() [all …]
|
D | 7p7x-minmax-wasmsimd-c16-acc2.c | 59 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 73 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 121 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 135 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 203 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 217 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 304 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 313 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse41-c16-acc2.c | 59 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 74 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 122 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 137 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 204 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 219 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 300 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 310 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
|
D | 7x-minmax-wasmsimd-c8-acc2.c | 72 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 81 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 141 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 150 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-sse41-c8-acc2.c | 71 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 81 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 138 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 148 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7p7x-minmax-sse2-c24-acc2.c | 88 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 105 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 157 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 164 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 238 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 255 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 307 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 314 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 407 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 424 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 88 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 105 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 157 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 164 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 238 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 255 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 307 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 314 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 407 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 424 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() [all …]
|
D | 7x-minmax-ssse3-c8-acc2.c | 81 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 88 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 161 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 168 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 76 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 88 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 155 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 167 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 253 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 265 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 367 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 374 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 81 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 88 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 164 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 171 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
D | 7x-minmax-sse41-c16-acc2.c | 76 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 91 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 171 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 181 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
|
D | 7x-minmax-wasmsimd-c16-acc2.c | 77 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 91 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 177 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 186 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse2-c16-acc2.c | 76 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 88 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 155 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 167 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 253 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 265 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 372 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 379 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 93 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 105 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 206 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 213 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse41-c24-acc2.c | 81 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 101 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 206 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 216 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 82 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 101 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 215 const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 224 vacc1x01234567 = wasm_i16x8_add(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
|
D | 7x-minmax-sse2-c16-acc2.c | 93 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 105 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 211 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 218 vacc1x01234567 = _mm_add_epi16(vacc1x01234567, vxi5x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 153 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(vi5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 159 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 160 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 316 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(vi5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 321 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 322 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 156 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 159 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 160 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 339 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 342 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 343 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 156 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 159 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 160 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 339 …const __m128i vxi5x01234567 = _mm_unpacklo_epi8(vi5x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 342 const __m128i vp5x01234567lo = _mm_mullo_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 343 const __m128i vp5x01234567hi = _mm_mulhi_epi16(vxi5x01234567, vxk5x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|