/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x25-minmax-fma3-acc2.c | 180 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() local 192 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 204 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 216 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 228 vacc01234567p1 = _mm256_fmadd_ps(vi9x01234567, vk9x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 240 vacc01234567p1 = _mm256_fmadd_ps(vi11x01234567, vk11x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 252 vacc01234567p1 = _mm256_fmadd_ps(vi13x01234567, vk13x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 264 vacc01234567p1 = _mm256_fmadd_ps(vi15x01234567, vk15x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 276 vacc01234567p1 = _mm256_fmadd_ps(vi17x01234567, vk17x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() 288 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() [all …]
|
D | up16x25-minmax-fma3-acc2.c | 186 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() local 204 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 222 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 240 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 258 vacc01234567p1 = _mm256_fmadd_ps(vi9x01234567, vk9x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 276 vacc01234567p1 = _mm256_fmadd_ps(vi11x01234567, vk11x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 294 vacc01234567p1 = _mm256_fmadd_ps(vi13x01234567, vk13x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 312 vacc01234567p1 = _mm256_fmadd_ps(vi15x01234567, vk15x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 330 vacc01234567p1 = _mm256_fmadd_ps(vi17x01234567, vk17x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 348 vacc01234567p1 = _mm256_fmadd_ps(vi19x01234567, vk19x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() [all …]
|
D | up8x25-minmax-avx-acc2.c | 180 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() local 192 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 204 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 216 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 228 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi9x01234567, vk9x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 240 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi11x01234567, vk11x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 252 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi13x01234567, vk13x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 264 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi15x01234567, vk15x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 276 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi17x01234567, vk17x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() 288 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() [all …]
|
D | up16x25-minmax-avx-acc2.c | 186 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() local 204 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 222 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 240 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 258 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi9x01234567, vk9x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 276 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi11x01234567, vk11x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 294 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi13x01234567, vk13x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 312 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi15x01234567, vk15x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 330 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi17x01234567, vk17x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 348 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi19x01234567, vk19x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() [all …]
|
D | up16x9-minmax-fma3-acc2.c | 106 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local 124 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 142 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 160 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 175 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 200 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local 212 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 224 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 236 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 247 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() [all …]
|
D | up8x9-minmax-fma3-acc2.c | 100 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local 112 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() 124 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() 136 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() 147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() 168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local 176 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() 184 vacc01234567p1 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() 192 vacc01234567p1 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() 199 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
|
D | up8x9-minmax-avx-acc2.c | 100 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local 112 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() 124 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() 136 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() 147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() 168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local 176 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() 184 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() 192 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() 199 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2()
|
D | up16x4-minmax-fma3-acc2.c | 81 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 99 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 105 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 130 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 142 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 176 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 179 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
|
D | up16x4-minmax-avx-acc2.c | 81 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 99 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 105 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 130 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 142 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 168 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 176 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 179 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
|
D | up8x4-minmax-avx-acc2.c | 75 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local 87 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() 92 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() 113 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local 121 vacc01234567p1 = _mm256_add_ps(vacc01234567p1, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() 124 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2()
|
D | up8x4-minmax-fma3-acc2.c | 75 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local 87 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() 92 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() 113 __m256 vacc01234567p1 = _mm256_mul_ps(vi1x01234567, vk1x01234567); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local 121 vacc01234567p1 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() 124 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2()
|
/external/XNNPACK/src/f16-dwconv/gen/ |
D | up8x25-minmax-neonfp16arith-acc2.c | 176 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() local 184 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 192 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 200 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 208 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi9x01234567, vk9x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 216 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi11x01234567, vk11x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 224 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi13x01234567, vk13x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 232 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi15x01234567, vk15x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 240 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi17x01234567, vk17x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() 248 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() [all …]
|
D | up16x25-minmax-neonfp16arith-acc2.c | 182 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() local 196 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 210 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 224 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 238 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi9x01234567, vk9x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 252 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi11x01234567, vk11x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 266 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi13x01234567, vk13x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 280 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi15x01234567, vk15x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 294 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi17x01234567, vk17x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() 308 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi19x01234567, vk19x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() [all …]
|
D | up16x9-minmax-neonfp16arith-acc2.c | 102 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local 116 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 130 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 144 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 155 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 176 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local 184 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 192 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 200 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() 207 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() [all …]
|
D | up8x9-minmax-neonfp16arith-acc2.c | 96 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local 104 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 112 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 120 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 127 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 144 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local 152 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 160 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 168 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() 175 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2()
|
D | up16x4-minmax-neonfp16arith-acc2.c | 77 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 91 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 95 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 116 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 124 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 127 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 144 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 152 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 155 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
|
D | up8x4-minmax-neonfp16arith-acc2.c | 71 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local 79 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 82 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 99 float16x8_t vacc01234567p1 = vmulq_f16(vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local 107 vacc01234567p1 = vfmaq_f16(vacc01234567p1, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 110 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2()
|