/external/XNNPACK/src/math/ |
D | exp-avx512f-rr2-p5-scalef.c | 20 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 23 const __m512 vzero_cutoff = _mm512_set1_ps(-0x1.9FE368p+6f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 25 const __m512 vinf_cutoff = _mm512_set1_ps(0x1.62E42Ep+6f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 27 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 32 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_exp__avx512f_rr2_p5_scalef() [all …]
|
D | exp-avx512f-rr2-p5.c | 21 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p+23f); in xnn_math_f32_exp__avx512f_rr2_p5() 23 const __m512 vzero_cutoff = _mm512_set1_ps(-0x1.9FE368p+6f); in xnn_math_f32_exp__avx512f_rr2_p5() 25 const __m512 vinf_cutoff = _mm512_set1_ps(0x1.62E42Ep+6f); in xnn_math_f32_exp__avx512f_rr2_p5() 26 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_math_f32_exp__avx512f_rr2_p5() 27 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_exp__avx512f_rr2_p5() 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_math_f32_exp__avx512f_rr2_p5() 29 const __m512 vplus_inf = _mm512_set1_ps(INFINITY); in xnn_math_f32_exp__avx512f_rr2_p5() 31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_exp__avx512f_rr2_p5() 32 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_exp__avx512f_rr2_p5() 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_exp__avx512f_rr2_p5() [all …]
|
D | expm1minus-avx512f-rr1-p6.c | 22 const __m512 vsat_cutoff = _mm512_set1_ps(-0x1.154246p+4f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 24 const __m512 vmagic_bias = _mm512_set1_ps(0x1.8000FEp23f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 25 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 26 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 30 const __m512 vc6 = _mm512_set1_ps(0x1.6b7338p-10f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 31 const __m512 vc5 = _mm512_set1_ps(0x1.12278Ep-7f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 32 const __m512 vc4 = _mm512_set1_ps(0x1.555716p-5f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 33 const __m512 vc3 = _mm512_set1_ps(0x1.5554B0p-3f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 34 const __m512 vc2 = _mm512_set1_ps(0x1.FFFFFEp-2f); in xnn_math_f32_expm1minus__avx512f_rr1_p6() 35 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_expm1minus__avx512f_rr1_p6()
|
D | extexp-avx512f-p5.c | 22 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_math_f32_extexp__avx512f_p5() 23 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_extexp__avx512f_p5() 24 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_math_f32_extexp__avx512f_p5() 26 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_math_f32_extexp__avx512f_p5() 27 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_extexp__avx512f_p5() 28 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_extexp__avx512f_p5() 29 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_extexp__avx512f_p5() 30 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_extexp__avx512f_p5() 31 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_math_f32_extexp__avx512f_p5()
|
D | sigmoid-avx512f-rr2-p5-scalef-div.c | 23 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 24 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 25 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 28 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 29 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 30 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 31 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div() 33 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_div()
|
D | sigmoid-avx512f-rr2-p5-scalef-nr1fma.c | 23 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 24 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 25 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 28 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 29 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 30 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 31 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma() 33 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma()
|
D | sigmoid-avx512f-rr2-p5-scalef-nr1fma1adj.c | 23 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 24 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 25 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 28 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 29 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 30 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 31 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj() 33 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_p5_scalef_nr1fma1adj()
|
D | exp-avx512f-rr2-lut32-p2-perm2.c | 21 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p23f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 22 const __m512 vlog2e_x32 = _mm512_set1_ps(0x1.715476p5f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 24 const __m512 vzero_cutoff = _mm512_set1_ps(-0x1.9FE368p6f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 26 const __m512 vinf_cutoff = _mm512_set1_ps(0x1.62E42Ep6f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 27 const __m512 vminus_ln2_o32_hi = _mm512_set1_ps(-0x1.62e43p-6f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 28 const __m512 vminus_ln2_o32_lo = _mm512_set1_ps(0x1.05c61p-34f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 29 const __m512 vplus_inf = _mm512_set1_ps(INFINITY); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 31 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2() 32 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2()
|
D | exp-avx512f-rr2-lut16-p3-perm.c | 21 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p23f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 22 const __m512 vlog2e_x16 = _mm512_set1_ps(0x1.715476p4f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 24 const __m512 vzero_cutoff = _mm512_set1_ps(-0x1.9FE368p6f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 26 const __m512 vinf_cutoff = _mm512_set1_ps(0x1.62E42Ep6f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 27 const __m512 vminus_ln2_o16_hi = _mm512_set1_ps(-0x1.62e43p-5f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 28 const __m512 vminus_ln2_o16_lo = _mm512_set1_ps(0x1.05c61p-33f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 29 const __m512 vplus_inf = _mm512_set1_ps(INFINITY); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 31 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm() 32 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm()
|
D | sigmoid-avx512f-rr1-p5-scalef-div.c | 23 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div() 24 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div() 27 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div() 28 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div() 29 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div() 30 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div() 31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div() 32 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_div()
|
D | sigmoid-avx512f-rr1-p5-scalef-nr1fma.c | 23 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma() 24 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma() 27 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma() 28 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma() 29 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma() 30 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma() 31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma() 32 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma()
|
D | sigmoid-avx512f-rr1-p5-scalef-nr1fma1adj.c | 23 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj() 24 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj() 27 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj() 28 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj() 29 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj() 30 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj() 31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj() 32 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr1_p5_scalef_nr1fma1adj()
|
D | expm1minus-avx512f-rr1-lut16-p3-perm.c | 24 const __m512 vsat_cutoff = _mm512_set1_ps(-0x1.154246p+4f); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm() 26 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm() 27 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm() 32 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55561Cp-3f); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm() 37 const __m512 vc2 = _mm512_set1_ps(0x1.0001ECp-1f); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm()
|
D | sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div.c | 24 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_div() 25 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_div() 37 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_div() 38 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_div() 41 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_div() 42 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_div() 43 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_div()
|
D | sigmoid-avx512f-rr2-lut16-p3-perm-scalef-div.c | 24 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_div() 25 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_div() 32 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_div() 33 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_div() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_div() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_div() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_div()
|
D | sigmoid-avx512f-rr2-lut16-p3-perm-scalef-nr1fma.c | 24 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma() 25 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma() 32 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma() 33 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma()
|
D | sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma.c | 24 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma() 25 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma() 37 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma() 38 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma() 41 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma() 42 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma() 43 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma()
|
D | sigmoid-avx512f-rr2-lut16-p3-perm-scalef-nr1fma1adj.c | 24 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma1adj() 25 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma1adj() 32 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma1adj() 33 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma1adj() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma1adj() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma1adj() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut16_p3_perm_scalef_nr1fma1adj()
|
D | sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma1adj.c | 24 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma1adj() 25 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma1adj() 37 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma1adj() 38 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma1adj() 41 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma1adj() 42 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma1adj() 43 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_math_f32_sigmoid__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma1adj()
|
D | exp-avx512f-rr2-lut16-p3-perm-scalef.c | 21 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef() 22 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef() 23 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef() 24 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef() 26 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef() 27 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_math_f32_exp__avx512f_rr2_lut16_p3_perm_scalef()
|
D | exp-avx512f-rr2-lut32-p2-perm2-scalef.c | 21 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef() 22 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef() 23 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62e43p-1f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef() 24 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05c61p-29f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef() 26 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef() 27 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_math_f32_exp__avx512f_rr2_lut32_p2_perm2_scalef()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x16.c | 27 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 28 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 38 const __m512 vscale = _mm512_set1_ps(scale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() [all …]
|
D | avx512f-p5-scalef-x32.c | 27 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 28 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 38 const __m512 vscale = _mm512_set1_ps(scale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x16.c | 28 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 29 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 30 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 32 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 33 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 34 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 35 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 36 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 37 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() [all …]
|
D | avx512f-p5-scalef-x32.c | 28 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 29 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 30 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 32 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 33 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 34 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 35 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 36 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 37 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 39 const __m512 vscalev = _mm512_set1_ps(scale_value); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() [all …]
|