/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x64.c | 157 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 166 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 175 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 184 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 169 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 179 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 189 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 199 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx2-rr1-p5-nr2fma-x80.c | 181 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 192 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 203 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 214 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x128.c | 145 const __m512 vd7 = _mm512_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() local 154 __m512 vr7 = _mm512_rcp14_ps(vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 163 vr7 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 148 const __m512 vd7 = _mm512_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local 157 __m512 vr7 = _mm512_rcp14_ps(vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 166 vr7 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 154 const __m512 vd7 = _mm512_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local 163 __m512 vr7 = _mm512_rcp14_ps(vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 172 vr7 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
|
D | avx-rr2-p5-nr2-x64.c | 184 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() local 193 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 209 vr7 = _mm256_mul_ps(vr7, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr7, vd7))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 210 vr7 = _mm256_mul_ps(vr7, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr7, vd7))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
|
D | avx2-rr1-p5-nr1fma-x64.c | 157 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() local 166 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 175 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
|
D | avx-rr2-p5-nr2-x72.c | 199 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() local 209 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 226 vr7 = _mm256_mul_ps(vr7, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr7, vd7))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 227 vr7 = _mm256_mul_ps(vr7, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr7, vd7))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
|
D | avx2-rr1-p5-nr1fma-x72.c | 169 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() local 179 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 189 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
|
D | avx2-rr1-p5-nr1fma-x80.c | 181 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local 192 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 203 vr7 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr7, vd7, vone), vr7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
|
D | avx-rr2-p5-nr2-x80.c | 214 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() local 225 __m256 vr7 = _mm256_rcp_ps(vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 243 vr7 = _mm256_mul_ps(vr7, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr7, vd7))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 244 vr7 = _mm256_mul_ps(vr7, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr7, vd7))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
|
D | avx512f-rr1-p5-scalef-div-x128.c | 145 const __m512 vd7 = _mm512_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() local 154 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 148 const __m512 vd7 = _mm512_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local 157 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 154 const __m512 vd7 = _mm512_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local 163 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | avx2-rr1-p5-div-x64.c | 157 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() local 166 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
|
D | avx2-rr1-p5-div-x72.c | 169 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() local 179 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
|
D | avx2-rr1-p5-div-x80.c | 181 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local 192 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
|
D | avx-rr2-p5-div-x64.c | 183 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() local 192 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
|
D | avx-rr2-p5-div-x72.c | 198 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() local 208 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
|
D | avx-rr2-p5-div-x80.c | 213 const __m256 vd7 = _mm256_add_ps(ve7, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() local 224 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
|