/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-wasmsimd-c8-acc2.c | 52 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 62 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 97 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 107 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 162 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 172 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 232 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 242 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
|
D | 7p7x-minmax-sse41-c8-acc2.c | 52 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 63 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 98 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 109 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 163 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 174 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 231 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 242 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 63 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 70 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 117 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 124 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 190 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 197 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 271 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 278 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-sse2-c8-acc2.c | 63 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 70 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 117 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 124 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 190 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 197 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 274 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 281 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
|
D | 7p7x-minmax-wasmsimd-c24-acc2.c | 60 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 80 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 120 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 130 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 176 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 196 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 236 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 246 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 312 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 332 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() [all …]
|
D | 7p7x-minmax-sse41-c24-acc2.c | 60 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 81 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 121 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 132 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 178 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 199 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 239 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 250 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 315 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 336 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() [all …]
|
D | 7p7x-minmax-wasmsimd-c16-acc2.c | 56 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 71 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 118 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 133 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 200 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 215 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 302 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 312 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse41-c16-acc2.c | 56 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 72 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 119 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 135 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 201 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 217 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 298 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 309 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
|
D | 7x-minmax-wasmsimd-c8-acc2.c | 70 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 80 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 139 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 149 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-sse41-c8-acc2.c | 69 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 80 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 136 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 147 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7p7x-minmax-sse2-c24-acc2.c | 85 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 102 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 156 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 163 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 235 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 252 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 306 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 313 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 404 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 421 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 85 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 102 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 156 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 163 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 235 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 252 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 306 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 313 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 404 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 421 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() [all …]
|
D | 7x-minmax-ssse3-c8-acc2.c | 80 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 87 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 160 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 167 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 74 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 86 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 153 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 165 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 251 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 263 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 366 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 373 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 80 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 87 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 163 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 170 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
D | 7x-minmax-sse41-c16-acc2.c | 73 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 89 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 169 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 180 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
|
D | 7x-minmax-wasmsimd-c16-acc2.c | 74 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 89 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 175 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 185 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse2-c16-acc2.c | 74 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 86 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 153 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 165 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 251 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 263 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 371 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 378 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 91 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 103 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 205 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 212 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse41-c24-acc2.c | 77 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 98 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 204 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 215 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 78 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 98 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 213 const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 223 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
|
D | 7x-minmax-sse2-c16-acc2.c | 91 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 103 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 210 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 217 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 140 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(vi4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 146 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 147 const __m128i vp4x01234567hi = _mm_mulhi_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 304 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(vi4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 309 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 310 const __m128i vp4x01234567hi = _mm_mulhi_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 143 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 146 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 147 const __m128i vp4x01234567hi = _mm_mulhi_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 327 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 330 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 331 const __m128i vp4x01234567hi = _mm_mulhi_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 143 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 146 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 147 const __m128i vp4x01234567hi = _mm_mulhi_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 327 …const __m128i vxi4x01234567 = _mm_unpacklo_epi8(vi4x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 330 const __m128i vp4x01234567lo = _mm_mullo_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 331 const __m128i vp4x01234567hi = _mm_mulhi_epi16(vxi4x01234567, vxk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|