Home
last modified time | relevance | path

Searched refs:vr0 (Results 1 – 25 of 56) sorted by relevance

123

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-nr2fma-x16.c81 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local
84 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
87 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
90 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
Davx2-rr1-p5-nr2fma-x24.c94 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local
98 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
102 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
106 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
Davx2-rr1-p5-nr2fma-x32.c107 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local
112 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
117 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
122 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
Davx2-rr1-p5-nr2fma-x40.c120 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local
126 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
132 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
138 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
Davx-rr2-p5-nr2-x16.c90 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local
93 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
94 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
98 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
Davx512f-rr1-p5-scalef-nr1fma-x32.c75 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() local
78 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
81 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c84 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local
87 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
90 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c78 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() local
81 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
84 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
Davx2-rr1-p5-nr2fma-x48.c133 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local
140 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
147 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
154 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
Davx2-rr1-p5-nr1fma-x16.c81 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local
84 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
88 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
Davx-rr2-p5-nr2-x24.c106 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local
110 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
111 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
117 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
Davx512f-rr1-p5-scalef-nr1fma-x48.c87 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local
91 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
95 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c90 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local
94 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
98 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c96 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local
100 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
104 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
Davx2-rr1-p5-nr2fma-x56.c146 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local
154 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
162 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
170 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
Davx512f-rr1-p5-scalef-nr1fma-x64.c99 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local
104 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
109 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
Davx2-rr1-p5-nr1fma-x24.c94 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local
98 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
103 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
Davx-rr2-p5-nr2-x32.c122 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local
127 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
128 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
136 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c108 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local
113 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
118 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c102 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local
107 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
112 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
Davx2-rr1-p5-nr2fma-x64.c159 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
168 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
177 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
186 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
Davx2-rr1-p5-nr2fma-x72.c172 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local
182 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
192 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
202 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
Davx2-rr1-p5-nr1fma-x32.c107 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local
112 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
118 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
Davx512f-rr1-p5-scalef-nr1fma-x80.c111 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local
117 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
123 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c120 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() local
126 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
132 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()

123