Home
last modified time | relevance | path

Searched refs:_mm512_add_ps (Results 1 – 25 of 147) sorted by relevance

123456

/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx512f-p5-scalef-x192-acc6.c200 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
201 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
202 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
203 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
204 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
205 vacc5 = _mm512_add_ps(vacc5, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
206 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
207 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
208 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
209 vacc3 = _mm512_add_ps(vacc3, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
[all …]
Davx512f-p5-scalef-x160-acc5.c177 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
178 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
179 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
180 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
181 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
182 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
183 vacc1 = _mm512_add_ps(vacc1, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
184 vacc2 = _mm512_add_ps(vacc2, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
185 vacc3 = _mm512_add_ps(vacc3, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
186 vacc4 = _mm512_add_ps(vacc4, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
[all …]
Davx512f-p5-scalef-x128-acc4.c154 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
155 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
156 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
157 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
158 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
159 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
160 vacc2 = _mm512_add_ps(vacc2, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
161 vacc3 = _mm512_add_ps(vacc3, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
164 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
165 vacc2 = _mm512_add_ps(vacc2, vacc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
[all …]
Davx512f-p5-scalef-x192-acc3.c197 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
198 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
199 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
200 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
201 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
202 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
203 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
204 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
205 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
206 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
[all …]
Davx512f-p5-scalef-x144-acc3.c164 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
165 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
166 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
167 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
168 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
169 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
170 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
171 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
172 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
175 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
[all …]
Davx512f-p5-scalef-x192-acc2.c196 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
197 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
198 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
199 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
200 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
201 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
202 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
203 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
204 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
205 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
[all …]
Davx512f-p5-scalef-x160-acc2.c174 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
175 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
176 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
177 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
178 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
179 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
180 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
181 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
182 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
183 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
[all …]
Davx512f-p5-scalef-x192.c195 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
196 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
197 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
198 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
199 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
200 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
201 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
202 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
203 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
204 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
[all …]
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx512f-p5-scalef-x192-acc6.c216 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
217 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
218 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
219 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
220 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
221 vacc5 = _mm512_add_ps(vacc5, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
222 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
223 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
224 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
225 vacc3 = _mm512_add_ps(vacc3, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
[all …]
Davx512f-p5-scalef-x160-acc5.c191 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
192 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
193 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
194 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
195 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
196 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
197 vacc1 = _mm512_add_ps(vacc1, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
198 vacc2 = _mm512_add_ps(vacc2, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
199 vacc3 = _mm512_add_ps(vacc3, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
200 vacc4 = _mm512_add_ps(vacc4, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
[all …]
Davx512f-p5-scalef-x192-acc3.c213 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
214 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
215 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
216 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
217 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
218 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
219 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
220 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
221 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
222 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
[all …]
Davx512f-p5-scalef-x144-acc3.c177 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
178 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
179 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
180 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
181 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
182 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
183 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
184 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
185 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
188 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
[all …]
Davx512f-p5-scalef-x128-acc4.c166 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
167 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
168 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
169 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
170 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
171 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
172 vacc2 = _mm512_add_ps(vacc2, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
173 vacc3 = _mm512_add_ps(vacc3, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
176 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
177 vacc2 = _mm512_add_ps(vacc2, vacc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
[all …]
Davx512f-p5-scalef-x192-acc2.c212 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
213 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
214 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
215 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
216 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
217 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
218 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
219 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
220 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
221 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
[all …]
Davx512f-p5-scalef-x160-acc2.c188 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
189 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
190 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
191 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
192 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
193 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
194 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
195 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
196 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
197 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
[all …]
Davx512f-p5-scalef-x192.c211 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
212 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
213 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
214 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
215 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
216 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
217 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
218 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
219 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
220 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
[all …]
/external/XNNPACK/src/f32-raddextexp/gen/
Davx512f-p5-scalef-x192-acc6.c225 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
226 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
227 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
228 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
229 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
230 vaccv5 = _mm512_add_ps(vaccv5, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
231 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
232 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
233 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
234 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
[all …]
Davx512f-p5-scalef-x160-acc5.c199 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
200 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
201 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
202 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
203 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
204 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
205 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
206 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
207 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
208 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
[all …]
Davx512f-p5-scalef-x192-acc3.c213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
215 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
216 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
217 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
218 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
219 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
220 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
221 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
222 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
[all …]
Davx512f-p5-scalef-x192-acc2.c209 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
210 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
211 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
212 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
215 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
216 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
217 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
218 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
[all …]
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x192.c185 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
186 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
187 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
188 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
189 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
190 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
191 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
192 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
193 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
194 const __m512 ve9 = _mm512_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
[all …]
Davx512f-p5-scalef-x176.c175 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
176 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
177 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
178 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
179 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
180 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
181 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
182 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
183 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
184 const __m512 ve9 = _mm512_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
[all …]
Davx512f-p5-scalef-x160.c165 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
166 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
167 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
168 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
169 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
170 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
171 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
172 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
173 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
174 const __m512 ve9 = _mm512_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
[all …]
Davx512f-p5-scalef-x144.c155 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
156 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
157 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
158 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
159 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
160 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
161 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
162 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
163 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
212 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
[all …]
Davx512f-p5-scalef-x128.c145 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
146 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
147 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
148 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
149 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
150 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
151 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
152 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
199 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
233 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()

123456