Home
last modified time | relevance | path

Searched refs:vr2 (Results 1 – 25 of 55) sorted by relevance

123

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-nr2fma-x24.c96 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local
100 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
104 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
108 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
Davx2-rr1-p5-nr2fma-x32.c109 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local
114 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
119 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
124 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
Davx2-rr1-p5-nr2fma-x40.c122 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local
128 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
134 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
140 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
Davx2-rr1-p5-nr2fma-x48.c135 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local
142 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
149 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
156 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
Davx-rr2-p5-nr2-x24.c108 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local
114 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
115 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
119 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
Davx512f-rr1-p5-scalef-nr1fma-x48.c89 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local
93 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
97 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c92 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local
96 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
100 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c98 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local
102 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
106 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
Davx2-rr1-p5-nr2fma-x56.c148 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local
156 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
164 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
172 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
Davx512f-rr1-p5-scalef-nr1fma-x64.c101 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local
106 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
111 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
Davx2-rr1-p5-nr1fma-x24.c96 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local
100 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
105 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
Davx-rr2-p5-nr2-x32.c124 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local
131 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
132 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
138 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c110 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local
115 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
120 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c104 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local
109 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
114 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
Davx2-rr1-p5-nr2fma-x64.c161 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
170 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
179 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
188 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
Davx2-rr1-p5-nr2fma-x72.c174 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local
184 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
194 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
204 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
Davx2-rr1-p5-nr1fma-x32.c109 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local
114 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
120 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
Davx512f-rr1-p5-scalef-nr1fma-x80.c113 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local
119 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
125 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c122 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() local
128 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
134 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c116 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local
122 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
128 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
Davx-rr2-p5-nr2-x40.c140 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local
148 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
149 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
157 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
Davx2-rr1-p5-nr2fma-x80.c187 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local
198 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
209 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
220 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
Davx512f-rr1-p5-scalef-nr1fma-x96.c125 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local
132 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
139 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
Davx2-rr1-p5-nr1fma-x40.c122 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local
128 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
135 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c128 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local
135 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
142 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()

123