/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x128.c | 49 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 50 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 51 __m512 vsqrtx1 = _mm512_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 52 __m512 vhalfrsqrtx1 = _mm512_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 53 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 54 __m512 vhalfrsqrtx2 = _mm512_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 55 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 56 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 57 __m512 vsqrtx4 = _mm512_mul_ps(vrsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 58 __m512 vhalfrsqrtx4 = _mm512_mul_ps(vrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() [all …]
|
D | avx512f-nr1fma1adj-x112.c | 47 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 48 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 49 __m512 vsqrtx1 = _mm512_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 50 __m512 vhalfrsqrtx1 = _mm512_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 51 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 52 __m512 vhalfrsqrtx2 = _mm512_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 53 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 54 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 55 __m512 vsqrtx4 = _mm512_mul_ps(vrsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 56 __m512 vhalfrsqrtx4 = _mm512_mul_ps(vrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() [all …]
|
D | avx512f-nr1fma1adj-x96.c | 45 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 46 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 47 __m512 vsqrtx1 = _mm512_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 48 __m512 vhalfrsqrtx1 = _mm512_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 49 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 50 __m512 vhalfrsqrtx2 = _mm512_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 51 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 52 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 53 __m512 vsqrtx4 = _mm512_mul_ps(vrsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 54 __m512 vhalfrsqrtx4 = _mm512_mul_ps(vrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() [all …]
|
D | avx512f-nr1fma1adj-x80.c | 43 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 44 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 45 __m512 vsqrtx1 = _mm512_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 46 __m512 vhalfrsqrtx1 = _mm512_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 47 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 48 __m512 vhalfrsqrtx2 = _mm512_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 49 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 50 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 51 __m512 vsqrtx4 = _mm512_mul_ps(vrsqrtx4, vx4); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 52 __m512 vhalfrsqrtx4 = _mm512_mul_ps(vrsqrtx4, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() [all …]
|
D | avx512f-nr1fma1adj-x64.c | 41 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 42 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 43 __m512 vsqrtx1 = _mm512_mul_ps(vrsqrtx1, vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 44 __m512 vhalfrsqrtx1 = _mm512_mul_ps(vrsqrtx1, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 45 __m512 vsqrtx2 = _mm512_mul_ps(vrsqrtx2, vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 46 __m512 vhalfrsqrtx2 = _mm512_mul_ps(vrsqrtx2, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 47 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 48 __m512 vhalfrsqrtx3 = _mm512_mul_ps(vrsqrtx3, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 85 __m512 vsqrtx = _mm512_mul_ps(vrsqrtx, vx); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 86 __m512 vhalfrsqrtx = _mm512_mul_ps(vrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() [all …]
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 53 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 54 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 55 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 56 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 57 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 58 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 59 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 60 const __m512 vz7 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 123 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 124 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() [all …]
|
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 52 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 53 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 54 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 55 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 56 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 57 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 58 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 114 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 115 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 116 vp1 = _mm512_mul_ps(vp1, vt1); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() [all …]
|
D | velu-avx512f-rr1-p6-x128.c | 53 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 54 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 55 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 56 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 57 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 58 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 59 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 60 const __m512 vz7 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 133 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 134 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() [all …]
|
D | velu-avx512f-rr1-lut16-p3-perm-x80.c | 50 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 51 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 52 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 53 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 54 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 96 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 97 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 98 vp1 = _mm512_mul_ps(vp1, vt1); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 99 vt1 = _mm512_mul_ps(vt1, vs1); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() 100 vp2 = _mm512_mul_ps(vp2, vt2); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() [all …]
|
D | velu-avx512f-rr1-lut16-p3-perm-x96.c | 51 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 52 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 53 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 54 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 55 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 56 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 105 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 106 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 107 vp1 = _mm512_mul_ps(vp1, vt1); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() 108 vt1 = _mm512_mul_ps(vt1, vs1); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() [all …]
|
D | velu-avx512f-rr1-p6-x96.c | 51 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 52 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 53 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 54 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 55 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 56 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 113 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 114 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 115 vp1 = _mm512_mul_ps(vp1, vt1); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 116 vt1 = _mm512_mul_ps(vt1, vs1); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() [all …]
|
D | velu-avx512f-rr1-p6-x112.c | 52 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 53 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 54 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 55 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 56 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 57 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 58 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 123 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 124 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 125 vp1 = _mm512_mul_ps(vp1, vt1); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() [all …]
|
D | velu-avx512f-rr1-p6-x80.c | 50 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 51 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 52 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 53 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 54 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 103 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 104 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 105 vp1 = _mm512_mul_ps(vp1, vt1); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 106 vt1 = _mm512_mul_ps(vt1, vs1); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 107 vp2 = _mm512_mul_ps(vp2, vt2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() [all …]
|
D | velu-avx512f-rr1-p6-x64.c | 49 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 50 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 51 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 52 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 93 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 94 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 95 vp1 = _mm512_mul_ps(vp1, vt1); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 96 vt1 = _mm512_mul_ps(vt1, vs1); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 97 vp2 = _mm512_mul_ps(vp2, vt2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 98 vt2 = _mm512_mul_ps(vt2, vs2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 72 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 73 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 74 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 75 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 76 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 77 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 78 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 79 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 80 __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 81 __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 70 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 71 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 72 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 73 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 74 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 75 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 76 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 77 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 78 __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 79 __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 68 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 69 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 70 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 71 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 72 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 73 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 74 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 75 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 76 __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 77 __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 66 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 67 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 68 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 69 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 70 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 71 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 72 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 73 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 74 __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 163 vf0 = _mm512_mul_ps(vf0, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 64 __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 65 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 66 __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 67 __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 68 __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 69 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 70 __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 71 __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 152 vf0 = _mm512_mul_ps(vf0, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 153 vf1 = _mm512_mul_ps(vf1, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 59 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 60 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 61 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 62 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 63 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 64 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 65 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 66 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 67 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 68 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 58 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 59 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 60 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 61 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 62 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 63 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 64 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 65 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 66 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 67 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 57 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 58 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 59 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 60 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 61 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 62 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 63 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 64 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 65 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 66 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 56 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 57 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 58 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 59 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 60 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 61 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 62 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 63 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 64 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 145 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 55 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 56 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 57 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 58 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 59 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 60 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 61 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 62 const __m512 vn7 = _mm512_roundscale_ps(_mm512_mul_ps(vx7, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 136 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 137 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() [all …]
|
D | avx512f-p5-scalef-x112.c | 54 const __m512 vn0 = _mm512_roundscale_ps(_mm512_mul_ps(vx0, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 55 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 56 const __m512 vn2 = _mm512_roundscale_ps(_mm512_mul_ps(vx2, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 57 const __m512 vn3 = _mm512_roundscale_ps(_mm512_mul_ps(vx3, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 58 const __m512 vn4 = _mm512_roundscale_ps(_mm512_mul_ps(vx4, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 59 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 60 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 127 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 128 __m512 vf1 = _mm512_mul_ps(vp1, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 129 __m512 vf2 = _mm512_mul_ps(vp2, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() [all …]
|