Home
last modified time | relevance | path

Searched refs:vacc01234567p0 (Results 1 – 25 of 37) sorted by relevance

12

/external/XNNPACK/src/f32-dwconv/gen/
Dup8x25-minmax-fma3.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3() local
174 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
180 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
186 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
192 vacc01234567p0 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
198 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
204 vacc01234567p0 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
210 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
216 vacc01234567p0 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
222 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3()
[all …]
Dup8x25-minmax-avx.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx() local
174 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
180 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
186 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
192 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
198 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
204 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
210 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
216 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
222 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx()
[all …]
Dup16x25-minmax-fma3.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() local
177 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
186 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
195 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
204 vacc01234567p0 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
213 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
222 vacc01234567p0 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
231 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
240 vacc01234567p0 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
249 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
[all …]
Dup16x25-minmax-avx.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() local
177 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
186 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
195 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
204 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
213 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
222 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
231 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
240 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
249 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
[all …]
Dup16x9-minmax-fma3.c87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
97 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
106 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
115 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
124 vacc01234567p0 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
133 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
142 vacc01234567p0 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
151 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
160 vacc01234567p0 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
169 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
[all …]
Dup16x9-minmax-avx.c87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
97 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
106 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
115 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
124 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
133 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
142 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
151 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
160 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
169 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx()
[all …]
Dup8x9-minmax-avx.c87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx() local
94 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
100 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
106 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
112 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
118 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
124 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi5x01234567, vk5x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
130 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
136 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi7x01234567, vk7x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
142 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx()
[all …]
Dup8x9-minmax-fma3.c87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3() local
94 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
100 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
106 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
112 vacc01234567p0 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
118 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
124 vacc01234567p0 = _mm256_fmadd_ps(vi5x01234567, vk5x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
130 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
136 vacc01234567p0 = _mm256_fmadd_ps(vi7x01234567, vk7x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
142 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3()
[all …]
Dup16x4-minmax-fma3.c62 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local
72 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
81 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
90 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
99 vacc01234567p0 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
105 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
115 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local
121 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
127 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
133 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
[all …]
Dup8x25-minmax-fma3-acc2.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2() local
174 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
186 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
198 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
210 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
222 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
234 vacc01234567p0 = _mm256_fmadd_ps(vi10x01234567, vk10x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
246 vacc01234567p0 = _mm256_fmadd_ps(vi12x01234567, vk12x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
258 vacc01234567p0 = _mm256_fmadd_ps(vi14x01234567, vk14x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
270 vacc01234567p0 = _mm256_fmadd_ps(vi16x01234567, vk16x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2()
[all …]
Dup16x4-minmax-avx.c62 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local
72 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
81 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
90 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
99 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi3x01234567, vk3x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
105 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
115 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local
121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
127 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi1x01234567, vk1x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
133 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
[all …]
Dup16x9-minmax-fma3-acc2.c87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
97 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
115 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
133 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
151 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
169 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
175 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
178 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
188 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
194 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
[all …]
Dup16x25-minmax-fma3-acc2.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() local
177 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
195 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
213 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
231 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
249 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
267 vacc01234567p0 = _mm256_fmadd_ps(vi10x01234567, vk10x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
285 vacc01234567p0 = _mm256_fmadd_ps(vi12x01234567, vk12x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
303 vacc01234567p0 = _mm256_fmadd_ps(vi14x01234567, vk14x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
321 vacc01234567p0 = _mm256_fmadd_ps(vi16x01234567, vk16x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
[all …]
Dup8x25-minmax-avx-acc2.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2() local
174 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
186 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
198 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
210 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
222 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
234 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi10x01234567, vk10x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
246 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi12x01234567, vk12x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
258 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi14x01234567, vk14x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
270 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi16x01234567, vk16x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2()
[all …]
Dup16x25-minmax-avx-acc2.c167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() local
177 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
195 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi2x01234567, vk2x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
213 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi4x01234567, vk4x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
231 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi6x01234567, vk6x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
249 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi8x01234567, vk8x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
267 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi10x01234567, vk10x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
285 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi12x01234567, vk12x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
303 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi14x01234567, vk14x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
321 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi16x01234567, vk16x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
[all …]
Dup8x9-minmax-fma3-acc2.c87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
94 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
106 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
118 vacc01234567p0 = _mm256_fmadd_ps(vi4x01234567, vk4x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
130 vacc01234567p0 = _mm256_fmadd_ps(vi6x01234567, vk6x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
142 vacc01234567p0 = _mm256_fmadd_ps(vi8x01234567, vk8x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
147 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, vacc01234567p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
149 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
160 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
164 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2()
[all …]
Dup8x4-minmax-fma3.c62 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3() local
69 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
75 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
81 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
87 vacc01234567p0 = _mm256_fmadd_ps(vi3x01234567, vk3x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
92 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
103 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3() local
107 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
111 vacc01234567p0 = _mm256_fmadd_ps(vi1x01234567, vk1x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
115 vacc01234567p0 = _mm256_fmadd_ps(vi2x01234567, vk2x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
[all …]
/external/XNNPACK/src/f16-dwconv/gen/
Dup8x25-minmax-neonfp16arith.c167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith() local
172 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
176 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
180 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
184 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
188 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
192 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
196 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi6x01234567, vk6x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
200 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
204 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith()
[all …]
Dup16x25-minmax-neonfp16arith.c167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith() local
175 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
182 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
189 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
196 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
203 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
210 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
217 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi6x01234567, vk6x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
224 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
231 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith()
[all …]
Dup16x9-minmax-neonfp16arith.c87 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
95 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
102 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
109 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
116 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
123 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
130 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
137 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi6x01234567, vk6x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
144 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
151 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith()
[all …]
Dup8x9-minmax-neonfp16arith.c87 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() local
92 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
96 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
100 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
104 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
108 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
112 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi5x01234567, vk5x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
116 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi6x01234567, vk6x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
120 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi7x01234567, vk7x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
124 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith()
[all …]
Dup16x4-minmax-neonfp16arith.c62 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local
70 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
77 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
84 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
91 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi3x01234567, vk3x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
95 float16x8_t vacc01234567 = vmaxq_f16(vacc01234567p0, vmin); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
104 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local
109 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
113 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi1x01234567, vk1x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
117 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
[all …]
Dup8x25-minmax-neonfp16arith-acc2.c167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2() local
172 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
180 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
188 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
196 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi6x01234567, vk6x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
204 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
212 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi10x01234567, vk10x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
220 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi12x01234567, vk12x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
228 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi14x01234567, vk14x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
236 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi16x01234567, vk16x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2()
[all …]
Dup16x25-minmax-neonfp16arith-acc2.c167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2() local
175 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
189 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
203 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
217 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi6x01234567, vk6x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
231 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
245 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi10x01234567, vk10x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
259 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi12x01234567, vk12x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
273 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi14x01234567, vk14x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
287 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi16x01234567, vk16x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2()
[all …]
Dup16x9-minmax-neonfp16arith-acc2.c87 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
95 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
109 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi2x01234567, vk2x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
123 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi4x01234567, vk4x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
137 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi6x01234567, vk6x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
151 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi8x01234567, vk8x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
155 vacc01234567p0 = vaddq_f16(vacc01234567p0, vacc01234567p1); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
158 float16x8_t vacc01234567 = vmaxq_f16(vacc01234567p0, vmin); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
167 float16x8_t vacc01234567p0 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
172 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2()
[all …]

12