Home
last modified time | relevance | path

Searched refs:vacc01234567p1 (Results 1 – 17 of 17) sorted by relevance

/external/XNNPACK/src/f32-dwconv/gen/
Dup8x25-minmax-fma3-acc2.c180 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() local
192 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
204 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
216 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
228 vacc01234567p1 = _mm256_fmadd_ps(vi9x01234567, vk9x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
240 vacc01234567p1 = _mm256_fmadd_ps(vi11x01234567, vk11x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
252 vacc01234567p1 = _mm256_fmadd_ps(vi13x01234567, vk13x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
264 vacc01234567p1 = _mm256_fmadd_ps(vi15x01234567, vk15x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
276 vacc01234567p1 = _mm256_fmadd_ps(vi17x01234567, vk17x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
288 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
[all …]
Dup16x25-minmax-fma3-acc2.c186 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() local
204 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
222 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
240 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
258 vacc01234567p1 = _mm256_fmadd_ps(vi9x01234567, vk9x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
276 vacc01234567p1 = _mm256_fmadd_ps(vi11x01234567, vk11x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
294 vacc01234567p1 = _mm256_fmadd_ps(vi13x01234567, vk13x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
312 vacc01234567p1 = _mm256_fmadd_ps(vi15x01234567, vk15x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
330 vacc01234567p1 = _mm256_fmadd_ps(vi17x01234567, vk17x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
348 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
[all …]
Dup8x25-minmax-avx-acc2.c180 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() local
192 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
204 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
216 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
228 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi9x01234567, vk9x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
240 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi11x01234567, vk11x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
252 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi13x01234567, vk13x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
264 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi15x01234567, vk15x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
276 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi17x01234567, vk17x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
288 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
[all …]
Dup16x25-minmax-avx-acc2.c186 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() local
204 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
222 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
240 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
258 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi9x01234567, vk9x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
276 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi11x01234567, vk11x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
294 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi13x01234567, vk13x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
312 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi15x01234567, vk15x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
330 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi17x01234567, vk17x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
348 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
[all …]
Dup16x9-minmax-fma3-acc2.c106 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
124 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
142 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
160 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
175 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
200 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
212 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
224 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
236 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
247 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
[all …]
Dup8x9-minmax-fma3-acc2.c100 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
112 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
124 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
136 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
176 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
184 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
192 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
199 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
Dup8x9-minmax-avx-acc2.c100 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local
112 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
124 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
136 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local
176 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
184 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
192 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
199 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
Dup16x4-minmax-fma3-acc2.c81 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local
99 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
105 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
130 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local
142 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local
176 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
179 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
Dup16x4-minmax-avx-acc2.c81 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local
99 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
105 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
130 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local
142 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local
176 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
179 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
Dup8x4-minmax-avx-acc2.c75 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local
87 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2()
92 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2()
113 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local
121 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2()
124 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2()
Dup8x4-minmax-fma3-acc2.c75 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local
87 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2()
92 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2()
113 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local
121 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2()
124 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2()
/external/XNNPACK/src/f16-dwconv/gen/
Dup8x25-minmax-neonfp16arith-acc2.c176 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() local
184 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
192 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
200 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
208 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi9x01234567, vk9x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
216 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi11x01234567, vk11x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
224 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi13x01234567, vk13x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
232 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi15x01234567, vk15x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
240 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi17x01234567, vk17x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
248 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
[all …]
Dup16x25-minmax-neonfp16arith-acc2.c182 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() local
196 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
210 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
224 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
238 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi9x01234567, vk9x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
252 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi11x01234567, vk11x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
266 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi13x01234567, vk13x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
280 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi15x01234567, vk15x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
294 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi17x01234567, vk17x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
308 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
[all …]
Dup16x9-minmax-neonfp16arith-acc2.c102 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
116 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
130 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
144 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
155 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
176 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
184 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
192 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
200 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
207 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
[all …]
Dup8x9-minmax-neonfp16arith-acc2.c96 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local
104 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
112 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
120 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
127 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
144 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local
152 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
160 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
168 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
175 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
Dup16x4-minmax-neonfp16arith-acc2.c77 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local
91 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
95 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
116 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local
124 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
127 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
144 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local
152 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
155 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
Dup8x4-minmax-neonfp16arith-acc2.c71 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local
79 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2()
82 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2()
99 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local
107 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2()
110 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2()