/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 72 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 73 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 74 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 75 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 76 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 77 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 78 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 79 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 80 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 81 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 73 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 74 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 75 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 76 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 77 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 78 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 79 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 80 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 81 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 82 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 74 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 75 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 76 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 77 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 78 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 79 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 80 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 81 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 82 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 83 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x160.c | 68 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 69 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 70 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 71 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 72 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 73 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 74 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 75 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 76 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 77 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 69 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 70 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 71 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 72 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 73 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 74 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 75 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 76 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 77 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 78 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 77 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 78 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 79 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 80 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 81 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 82 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 83 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 84 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 85 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 86 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 68 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 69 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 70 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 71 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 72 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 73 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 74 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 75 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 76 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 201 const __m512 vn = _mm512_roundscale_ps(_mm512_mul_ps(vx, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
D | avx512f-p5-scalef-x144.c | 66 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 67 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 68 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 69 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 70 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 71 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 72 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 73 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 74 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 196 const __m512 vn = _mm512_roundscale_ps(_mm512_mul_ps(vx, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 71 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 72 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 73 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 74 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 75 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 76 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 77 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 78 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 79 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 80 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 73 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 74 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 75 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 76 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 77 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 78 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 79 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 80 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 81 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 82 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 76 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 77 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 78 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 79 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 80 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 81 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 82 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 83 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 84 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 85 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 72 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 73 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 74 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 75 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 76 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 77 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 78 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 79 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 80 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 81 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x160.c | 67 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 68 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 69 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 70 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 71 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 72 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 73 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 74 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 75 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 76 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 68 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 69 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 70 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 71 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 72 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 73 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 74 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 75 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 76 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 77 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 71 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 72 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 73 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 74 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 75 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 76 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 77 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 78 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 79 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 80 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x144.c | 65 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 66 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 67 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 68 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 69 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 70 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 71 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 72 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 73 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 183 const __m512 vn = _mm512_roundscale_ps(_mm512_mul_ps(vx, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 72 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 73 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 74 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 75 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 76 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 77 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 78 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 79 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 80 __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 81 __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 70 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 71 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 72 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 73 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 74 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 75 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 76 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 77 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 78 __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 79 __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 68 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 69 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 70 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 71 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 72 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 73 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 74 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 75 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 76 __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 77 __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 59 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 60 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 61 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 62 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 63 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 64 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 65 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 66 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 67 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 68 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 58 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 59 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 60 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 61 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 62 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 63 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 64 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 65 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 66 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 67 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 57 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 58 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 59 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 60 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 61 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 62 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 63 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 64 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 65 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 66 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 56 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 57 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 58 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 59 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 60 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 61 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 62 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 63 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 64 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 196 const __m512 vn = _mm512_roundscale_ps(_mm512_mul_ps(vx, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 59 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 60 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 61 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 62 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 63 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 64 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 65 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 66 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 67 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 68 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 61 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 62 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 63 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 64 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 65 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 66 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 67 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 68 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 69 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 70 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|