/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 150 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 161 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 153 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 165 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x144.c | 160 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local 172 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x128-acc4.c | 159 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() local 173 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x160-acc2.c | 173 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local 187 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x160.c | 170 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() local 183 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x144-acc3.c | 166 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() local 180 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx2-p5-x64.c | 158 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64() local 173 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x64()
|
D | avx512f-p5-scalef-x192-acc2.c | 193 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 209 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx2-p5-x72.c | 168 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() local 184 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x160-acc5.c | 182 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() local 199 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx2-p5-x64-acc2.c | 161 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() local 177 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
|
D | avx512f-p5-scalef-x192.c | 190 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 205 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x64-acc4.c | 167 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() local 185 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
|
D | avx512f-p5-scalef-x192-acc3.c | 196 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx2-p5-x80-acc2.c | 181 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() local 199 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 205 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 225 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x80.c | 178 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() local 195 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
|
D | avx2-p5-x72-acc3.c | 174 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() local 192 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x96.c | 198 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local 217 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc3.c | 204 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() local 225 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 201 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local 221 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x80-acc5.c | 190 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() local 211 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x96-acc6.c | 213 const __m256 vdelta_e0 = _mm256_max_ps(_mm256_sub_ps(vn0, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() local 237 …_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(vdelta_e0, vmagic_bias)), … in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
|