Home
last modified time | relevance | path

Searched refs:vmax_acce (Results 1 – 25 of 26) sorted by relevance

12

/external/XNNPACK/src/f32-raddextexp/
Davx2-p5.c.in132 const __m256 vmax_acce${ABC[A:A+2]} = _mm256_max_ps(vacce${A}, vacce${A+1});
134 const __m256 vmax_acce${ABC[A]} = vacce${A};
139 …const __m256 vmax_acce${ABC[A:min(A+ACC_SLICE*2, ACCUMULATORS)]} = _mm256_max_ps(vmax_acce${ABC[A:…
143 …const __m256 vdelta_acce${K} = _mm256_max_ps(_mm256_sub_ps(vacce${K}, vmax_acce${ABC[0:ACCUMULATOR…
151 __m256 vacce = vmax_acce${ABC[0:ACCUMULATORS]};
239 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1));
240vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, …
241vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, …
242 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent);
251 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce));
Davx512f-p5-scalef.c.in114 const __m512 vmax_acce${ABC[A:A+2]} = _mm512_max_ps(vacce${A}, vacce${A+1});
116 const __m512 vmax_acce${ABC[A]} = vacce${A};
121 …const __m512 vmax_acce${ABC[A:min(A+ACC_SLICE*2, ACCUMULATORS)]} = _mm512_max_ps(vmax_acce${ABC[A:…
125 const __m512 vdelta_acce${K} = _mm512_sub_ps(vacce${K}, vmax_acce${ABC[0:ACCUMULATORS]});
130 __m512 vacce = vmax_acce${ABC[0:ACCUMULATORS]};
197 const float vmax_acce = _mm512_reduce_max_ps(vacce);
198 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce));
201 sum[1] = vmax_acce;
/external/XNNPACK/src/f32-raddextexp/gen/
Davx2-p5-x64.c283 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64() local
284vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
285vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
286 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
295 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
Davx2-p5-x72.c296 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() local
297vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
298vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
299 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
308 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
Davx2-p5-x64-acc2.c298 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() local
299vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
300vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
301 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
310 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
Davx2-p5-x64-acc4.c318 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() local
319vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
320vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
321 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
330 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
Davx2-p5-x80-acc2.c324 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() local
325vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
326vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
327 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
336 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
Davx2-p5-x80.c309 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() local
310vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
311vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
312 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
321 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
Davx2-p5-x72-acc3.c322 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() local
323vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
324vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
325 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
334 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
Davx2-p5-x96.c335 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local
336vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
337vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
338 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
347 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
Davx2-p5-x96-acc3.c361 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() local
362vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
363vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
364 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
373 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
Davx2-p5-x96-acc2.c350 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local
351vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
352vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
353 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
362 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
Davx2-p5-x80-acc5.c355 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() local
356vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
357vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
358 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
367 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
Davx2-p5-x96-acc6.c390 __m256 vmax_acce = _mm256_max_ps(vacce, _mm256_permute2f128_ps(vacce, vacce, 1)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() local
391vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(1, 0, 3, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
392vmax_acce = _mm256_max_ps(vmax_acce, _mm256_shuffle_ps(vmax_acce, vmax_acce, _MM_SHUFFLE(2, 3, 0, … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
393 const __m256 vdelta_acce = _mm256_max_ps(_mm256_sub_ps(vacce, vmax_acce), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
402 _mm_store_ss(&sum[1], _mm256_castps256_ps128(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
Davx512f-p5-scalef-x128.c239 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local
240 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
243 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
Davx512f-p5-scalef-x128-acc2.c250 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local
251 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
254 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
Davx512f-p5-scalef-x144.c251 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local
252 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
255 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
Davx512f-p5-scalef-x128-acc4.c266 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() local
267 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
270 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
Davx512f-p5-scalef-x160-acc2.c274 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local
275 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
278 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
Davx512f-p5-scalef-x160.c263 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() local
264 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
267 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
Davx512f-p5-scalef-x144-acc3.c271 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() local
272 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
275 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
Davx512f-p5-scalef-x192-acc2.c298 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local
299 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
302 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
Davx512f-p5-scalef-x160-acc5.c299 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() local
300 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
303 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
Davx512f-p5-scalef-x192.c287 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local
288 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
291 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
Davx512f-p5-scalef-x192-acc3.c307 const float vmax_acce = _mm512_reduce_max_ps(vacce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local
308 const __m512 vdelta_acce = _mm512_sub_ps(vacce, _mm512_set1_ps(vmax_acce)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
311 sum[1] = vmax_acce; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()

12