Home
last modified time | relevance | path

Searched refs:vf9 (Results 1 – 25 of 46) sorted by relevance

12

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-div-x80.c194 __m256 vf9 = _mm256_div_ps(ve9, vd9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local
205 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
216 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
227 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
Davx2-rr1-p5-nr1fma-x80.c217 __m256 vf9 = _mm256_mul_ps(ve9, vr9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local
228 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
239 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
250 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
Davx2-rr1-p5-nr2fma-x80.c227 __m256 vf9 = _mm256_mul_ps(ve9, vr9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local
238 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
249 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
260 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
Davx-rr2-p5-div-x80.c226 __m256 vf9 = _mm256_div_ps(ve9, vd9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() local
237 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
248 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
259 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
Davx-rr2-p5-nr2-x80.c259 __m256 vf9 = _mm256_mul_ps(ve9, vr9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() local
270 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
281 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
292 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x80.c201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local
214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
226 vf9 = _mm256_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
238 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
Davx2-p5-x88.c213 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local
227 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
240 vf9 = _mm256_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
253 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
Davx2-p5-x96.c225 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local
240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
254 vf9 = _mm256_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
268 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
Davx512f-p5-scalef-x160.c171 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local
183 vf9 = _mm512_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
196 _mm512_storeu_ps(output + 144, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
Davx512f-p5-scalef-x176.c181 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local
194 vf9 = _mm512_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
208 _mm512_storeu_ps(output + 144, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
Davx512f-p5-scalef-x192.c191 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local
205 vf9 = _mm512_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
220 _mm512_storeu_ps(output + 144, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx2-p5-x80-acc5.c204 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local
217 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
229 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
242 vacc4 = _mm256_add_ps(vacc4, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
Davx2-p5-x80.c200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local
213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
225 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
238 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
Davx2-p5-x80-acc2.c201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() local
214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
226 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
239 vacc1 = _mm256_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
Davx2-p5-x96-acc2.c225 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local
240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
254 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
269 vacc1 = _mm256_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
Davx2-p5-x96.c224 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local
239 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
253 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
268 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
Davx2-p5-x96-acc3.c226 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local
241 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
255 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
270 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
Davx2-p5-x96-acc6.c229 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local
244 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
258 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
273 vacc3 = _mm256_add_ps(vacc3, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x160.c163 __m512 vf9 = _mm512_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local
186 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
199 _mm512_storeu_ps(y + 144, vf9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
Davx512f-p5-scalef-x176.c172 __m512 vf9 = _mm512_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local
197 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
211 _mm512_storeu_ps(y + 144, vf9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
Davx512f-p5-scalef-x192.c181 __m512 vf9 = _mm512_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local
208 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
223 _mm512_storeu_ps(y + 144, vf9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
Davx2-p5-x80.c169 __m256 vf9 = _mm256_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() local
221 vf9 = _mm256_mul_ps(vf9, vs9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80()
233 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80()
Davx2-p5-x88.c178 __m256 vf9 = _mm256_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local
234 vf9 = _mm256_mul_ps(vf9, vs9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
247 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x80-acc2.c200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local
213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
225 vacc1 = _mm256_add_ps(vacc1, vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
Davx2-p5-x80.c199 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local
212 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
224 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()

12