/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx512f-rr1-p5-scalef-div-x128.c | 48 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 49 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 50 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 51 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 52 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 53 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 54 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 55 const __m512 vz7 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx7), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 179 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 210 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
|
D | avx512f-rr1-p5-scalef-div-x112.c | 47 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 48 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 49 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 50 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 51 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 52 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 53 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 164 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 195 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x112.c | 50 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 51 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 52 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 53 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 54 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 55 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 56 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 167 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 199 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 57 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 58 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 59 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 60 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 61 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 62 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 63 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 64 const __m512 vz7 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx7), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 188 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 220 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 51 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 52 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 53 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 54 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 55 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 56 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 57 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 58 const __m512 vz7 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx7), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 182 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 214 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x96.c | 55 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 56 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 57 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 58 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 59 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 60 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 158 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 190 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
|
D | avx512f-rr1-p5-scalef-div-x96.c | 46 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 47 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 48 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 49 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 50 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 51 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 149 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 180 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x96.c | 49 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 50 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 51 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 52 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 53 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 54 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 152 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 184 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c | 56 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 57 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 58 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 59 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 60 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 61 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 62 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 173 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 205 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 51 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 52 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 53 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 54 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 55 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 56 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 57 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 58 const __m512 vz7 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx7), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 201 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 236 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
|
D | avx512f-rr1-p5-scalef-nr1fma-x128.c | 48 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 49 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 50 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 51 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 52 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 53 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 54 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 55 const __m512 vz7 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx7), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 198 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 232 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x80.c | 54 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 55 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 56 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 57 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 58 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 143 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 175 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
|
D | avx512f-rr1-p5-scalef-div-x80.c | 45 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() 46 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() 47 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() 48 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() 49 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() 134 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() 165 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x80.c | 48 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() 49 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() 50 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() 51 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() 52 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() 137 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() 169 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x96.c | 46 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 47 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 48 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 49 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 50 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 51 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 164 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 198 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 50 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 51 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 52 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 53 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 54 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 55 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 56 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 184 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 219 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 56 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 57 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 58 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 59 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 60 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 61 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 62 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 190 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 225 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | avx512f-rr1-p5-scalef-nr1fma-x112.c | 47 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 48 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 49 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 50 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 51 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 52 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 53 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 181 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 215 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 57 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 58 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 59 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 60 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 61 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 62 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 63 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 64 const __m512 vz7 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx7), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 207 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 242 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
|
D | avx512f-rr1-p5-scalef-div-x64.c | 44 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64() 45 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64() 46 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64() 47 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64() 119 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64() 150 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64()
|
D | avx512f-rr1-p5-scalef-nr1fma-x64.c | 44 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 45 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 46 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 47 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 130 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 164 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x64.c | 47 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64() 48 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64() 49 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64() 50 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64() 122 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64() 154 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x64.c | 53 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64() 54 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64() 55 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64() 56 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64() 128 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64() 160 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64()
|
D | avx512f-rr1-p5-scalef-nr1fma-x80.c | 45 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 46 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 47 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 48 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 49 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 147 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 181 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c | 54 const __m512 vz0 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx0), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 55 const __m512 vz1 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx1), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 56 const __m512 vz2 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx2), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 57 const __m512 vz3 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx3), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 58 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 156 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 191 const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
|