Home
last modified time | relevance | path

Searched refs:vacce (Results 1 – 25 of 26) sorted by relevance

12

/external/XNNPACK/src/f32-raddextexp/
Davx512f-p5-scalef.c.in43 __m512 vacce${K} = vminus_inf;
91 __m512 vmax_e${N} = _mm512_max_ps(vacce${N}, vn${N});
96 const __m512 vdelta_acce${K} = _mm512_sub_ps(vacce${K}, vmax_e${K});
107 vacce${K} = vmax_e${K};
114 const __m512 vmax_acce${ABC[A:A+2]} = _mm512_max_ps(vacce${A}, vacce${A+1});
116 const __m512 vmax_acce${ABC[A]} = vacce${A};
125 const __m512 vdelta_acce${K} = _mm512_sub_ps(vacce${K}, vmax_acce${ABC[0:ACCUMULATORS]});
130 __m512 vacce = vmax_acce${ABC[0:ACCUMULATORS]};
133 __m512 vacce = vacce0;
156 const __m512 vmax_e = _mm512_max_ps(vacce, vn);
[all …]
Davx2-p5.c.in47 __m256 vacce${K} = vminus_inf;
95 __m256 vmax_e${N} = _mm256_max_ps(vacce${N}, vn${N});
104 … const __m256 vdelta_acce${K} = _mm256_max_ps(_mm256_sub_ps(vacce${K}, vmax_e${K}), vmin_exponent);
125 vacce${K} = vmax_e${K};
132 const __m256 vmax_acce${ABC[A:A+2]} = _mm256_max_ps(vacce${A}, vacce${A+1});
134 const __m256 vmax_acce${ABC[A]} = vacce${A};
143 …const __m256 vdelta_acce${K} = _mm256_max_ps(_mm256_sub_ps(vacce${K}, vmax_acce${ABC[0:ACCUMULATOR…
151 __m256 vacce = vmax_acce${ABC[0:ACCUMULATORS]};
154 __m256 vacce = vacce0;
177 const __m256 vmax_e = _mm256_max_ps(vacce, vn);
[all …]
/external/XNNPACK/src/f32-raddextexp/gen/
Davx2-p5-x64.c198 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x64() local
221 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
224 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
235 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
254 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
265 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
269 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
279 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
283 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
286 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
Davx512f-p5-scalef-x128.c175 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local
198 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
199 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
204 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
230 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
231 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
235 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
239 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
240 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
Davx512f-p5-scalef-x128-acc2.c186 __m512 vacce = vmax_acce01; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local
209 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
210 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
215 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
241 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
242 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
246 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
250 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
251 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
Davx2-p5-x72.c211 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x72() local
234 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
237 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
248 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
267 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
278 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
282 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
292 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
296 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
299 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
Davx2-p5-x64-acc2.c213 __m256 vacce = vmax_acce01; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() local
236 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
239 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
250 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
269 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
280 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
284 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
294 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
298 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
301 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
Davx512f-p5-scalef-x144.c187 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local
210 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
211 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
216 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
242 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
243 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
247 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
251 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
252 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
Davx2-p5-x64-acc4.c233 __m256 vacce = vmax_acce0123; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() local
256 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
259 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
270 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
289 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
300 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
304 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
314 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
318 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
321 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
Davx2-p5-x80-acc2.c239 __m256 vacce = vmax_acce01; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() local
262 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
265 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
276 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
295 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
306 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
310 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
320 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
324 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
327 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
Davx2-p5-x80.c224 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x80() local
247 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
250 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
261 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
280 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
291 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
295 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
305 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
309 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
312 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
Davx2-p5-x72-acc3.c237 __m256 vacce = vmax_acce012; in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() local
260 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
263 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
274 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
293 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
304 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
308 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
318 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
322 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
325 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
Davx512f-p5-scalef-x128-acc4.c202 __m512 vacce = vmax_acce0123; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() local
225 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
226 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
231 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
257 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
258 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
262 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
266 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
267 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
Davx512f-p5-scalef-x160-acc2.c210 __m512 vacce = vmax_acce01; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local
233 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
234 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
239 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
265 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
266 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
270 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
274 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
275 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
Davx512f-p5-scalef-x160.c199 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() local
222 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
223 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
228 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
254 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
255 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
259 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
263 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
264 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
Davx512f-p5-scalef-x144-acc3.c207 __m512 vacce = vmax_acce012; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() local
230 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
231 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
236 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
262 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
263 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
267 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
271 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
272 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
Davx2-p5-x96.c250 __m256 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local
273 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
276 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
287 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
306 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
317 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
321 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
331 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
335 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
338 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
Davx512f-p5-scalef-x192-acc2.c234 __m512 vacce = vmax_acce01; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local
257 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
258 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
263 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
289 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
290 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
294 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
298 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
299 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
Davx512f-p5-scalef-x160-acc5.c235 __m512 vacce = vmax_acce01234; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() local
258 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
259 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
264 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
290 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
291 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
295 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
299 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
300 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
Davx512f-p5-scalef-x192.c223 __m512 vacce = vacce0; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local
246 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
247 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
252 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
278 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
279 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
283 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
287 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
288 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
Davx2-p5-x96-acc3.c276 __m256 vacce = vmax_acce012; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() local
299 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
302 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
313 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
332 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
343 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
347 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
357 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
361 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
364 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
Davx2-p5-x96-acc2.c265 __m256 vacce = vmax_acce01; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local
288 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
291 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
302 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
321 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
332 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
336 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
346 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
350 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
353 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
Davx2-p5-x80-acc5.c270 __m256 vacce = vmax_acce01234; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() local
293 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
296 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
307 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
326 vn = _mm256_blendv_ps(vacce, vn, _mm256_castsi256_ps(vmask)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
337 const __m256 vmax_e = _mm256_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
341 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_e), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
351 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
355 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
358 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
Davx512f-p5-scalef-x192-acc3.c243 __m512 vacce = vmax_acce012; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local
266 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
267 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
272 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
298 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
299 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
303 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
307 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
308 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
Davx512f-p5-scalef-x192-acc6.c266 __m512 vacce = vmax_acce012345; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local
289 const __m512 vmax_e = _mm512_max_ps(vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
290 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
295 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
321 const __m512 vmax_e = _mm512_mask_max_ps(vacce, vmask, vacce, vn); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
322 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
326 vacce = vmax_e; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
330 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
331 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()

12