Home
last modified time | relevance | path

Searched refs:vn8 (Results 1 – 25 of 90) sorted by relevance

1234

/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x72-acc3.c78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local
90 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
101 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
113 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
123 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
Davx2-p5-x72.c76 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local
88 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
99 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
111 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
121 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
Davx2-p5-x80-acc2.c79 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local
92 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
104 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
117 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
128 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
Davx2-p5-x80.c78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local
91 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
103 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
116 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
127 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
Davx2-p5-x80-acc5.c82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() local
95 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
107 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
120 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
131 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x72.c78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local
90 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
101 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
113 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
123 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
Davx2-p5-x80.c80 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local
93 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
105 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
118 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
129 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
Davx2-p5-x88.c82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local
96 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
109 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
123 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
135 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
Davx2-p5-x96.c84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local
99 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
113 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
128 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
141 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx2-p5-x72-acc3.c79 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() local
91 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
102 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
114 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
124 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
Davx2-p5-x72.c77 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() local
89 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
100 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
112 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
122 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
Davx2-p5-x80-acc5.c83 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local
96 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
108 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
121 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
132 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
Davx2-p5-x80.c79 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local
92 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
104 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
117 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
128 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
Davx2-p5-x80-acc2.c80 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() local
93 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
105 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
118 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
129 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
Davx2-p5-x96-acc2.c84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local
99 const __m256 vs8 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn8), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
113 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
128 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
141 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
/external/XNNPACK/src/f32-raddextexp/gen/
Davx512f-p5-scalef-x144.c64 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local
76 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
86 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
157 vmax_e0 = _mm512_max_ps(vmax_e0, vn8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
168 const __m512 vdelta_e8 = _mm512_sub_ps(vn8, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
Davx512f-p5-scalef-x160-acc2.c67 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local
80 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
91 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
168 vmax_e0 = _mm512_max_ps(vmax_e0, vn8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
181 const __m512 vdelta_e8 = _mm512_sub_ps(vn8, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
Davx512f-p5-scalef-x160.c65 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() local
78 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
89 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
166 vmax_e0 = _mm512_max_ps(vmax_e0, vn8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
178 const __m512 vdelta_e8 = _mm512_sub_ps(vn8, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
Davx512f-p5-scalef-x144-acc3.c68 const __m512 vn8 = _mm512_roundscale_ps(_mm512_mul_ps(vx8, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() local
80 __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
90 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
161 vmax_e2 = _mm512_max_ps(vmax_e2, vn8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
174 const __m512 vdelta_e8 = _mm512_sub_ps(vn8, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx2-rr1-lut8-p4-perm-x72.c72 __m256 vn8 = _mm256_fmadd_ps(vz8, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() local
98 const __m256i ven8 = _mm256_slli_epi32(_mm256_castps_si256(vn8), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
99 const __m256i vl8 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn8)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
100 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
119 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vz8); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
Dvelu-avx2-rr1-lut4-p4-perm-x72.c73 __m256 vn8 = _mm256_fmadd_ps(vz8, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() local
99 const __m256i ven8 = _mm256_slli_epi32(_mm256_castps_si256(vn8), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
100 const __m256i vl8 = _mm256_castps_si256(_mm256_permutevar_ps(vtable, _mm256_castps_si256(vn8))); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
101 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
120 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vz8); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
Dvelu-avx2-rr1-lut16-p3-gather-x72.c72 __m256 vn8 = _mm256_fmadd_ps(vz8, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local
90 const __m256i vidx8 = _mm256_and_si256(_mm256_castps_si256(vn8), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
109 const __m256i ven8 = _mm256_slli_epi32(_mm256_castps_si256(vn8), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
110 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
129 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vz8); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
Dvelu-avx2-rr1-lut4-p4-perm-x80.c75 __m256 vn8 = _mm256_fmadd_ps(vz8, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() local
102 const __m256i ven8 = _mm256_slli_epi32(_mm256_castps_si256(vn8), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
103 const __m256i vl8 = _mm256_castps_si256(_mm256_permutevar_ps(vtable, _mm256_castps_si256(vn8))); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
104 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
126 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vz8); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
Dvelu-avx2-rr1-lut16-p3-gather-x80.c74 __m256 vn8 = _mm256_fmadd_ps(vz8, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local
93 const __m256i vidx8 = _mm256_and_si256(_mm256_castps_si256(vn8), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
114 const __m256i ven8 = _mm256_slli_epi32(_mm256_castps_si256(vn8), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
115 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
136 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vz8); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
/external/XNNPACK/src/f32-sigmoid/gen/
Davx-rr2-p5-div-x72.c71 __m256 vn8 = _mm256_add_ps(_mm256_mul_ps(vz8, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() local
97 …_m128 vs_lo8 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn8)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
98 …128 vs_hi8 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn8, 1)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
109 vn8 = _mm256_sub_ps(vn8, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
119 __m256 vt8 = _mm256_add_ps(_mm256_mul_ps(vn8, vminus_ln2_hi), vz8); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
129 vt8 = _mm256_add_ps(_mm256_mul_ps(vn8, vminus_ln2_lo), vt8); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()

1234