/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 129 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 142 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 155 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 168 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 181 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 197 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx512f-p5-scalef-x192-acc2.c | 125 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 177 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 193 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 126 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 139 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 152 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 165 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 178 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 194 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 124 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 137 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 150 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 163 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 176 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 192 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x96-acc2.c | 157 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 170 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 183 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 196 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 226 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc6.c | 161 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 174 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 187 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 200 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 230 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 156 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 169 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 182 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 195 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 225 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc3.c | 127 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 140 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 153 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 166 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 179 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 195 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 125 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 177 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 193 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 126 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 139 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 152 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 165 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 178 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 194 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 130 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 143 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 156 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 169 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 182 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 198 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x96-acc2.c | 158 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 171 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 184 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 197 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96.c | 157 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 170 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 183 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 196 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 226 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 112 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 125 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 183 __m512 vf11 = _mm512_mul_ps(vp11, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x96.c | 118 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 131 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 144 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 157 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 170 vp11 = _mm256_fmadd_ps(vp11, vt11, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 189 __m256 vf11 = _mm256_mul_ps(vp11, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 125 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 177 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 193 __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x96.c | 158 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 171 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 184 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 197 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 114 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 127 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 140 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 153 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 166 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 220 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp11, vdelta_e11)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192.c | 112 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 125 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 216 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp11, vdelta_e11)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc3.c | 116 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 129 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 142 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 155 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 168 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 224 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp11, vdelta_e11)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc6.c | 122 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 135 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 148 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 161 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 174 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 236 vaccv5 = _mm512_add_ps(vaccv5, _mm512_scalef_ps(vp11, vdelta_e11)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x96.c | 116 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local 129 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 142 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 155 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 168 vp11 = _mm256_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 243 vaccv0 = _mm256_fmadd_ps(vp11, vs11, vaccv0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc3.c | 120 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() local 133 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 146 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 159 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 172 vp11 = _mm256_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 253 vaccv2 = _mm256_fmadd_ps(vp11, vs11, vaccv2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 118 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local 131 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 144 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 157 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 170 vp11 = _mm256_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 248 vaccv1 = _mm256_fmadd_ps(vp11, vs11, vaccv1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc6.c | 126 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() local 139 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 152 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 165 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 178 vp11 = _mm256_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 268 vaccv5 = _mm256_fmadd_ps(vp11, vs11, vaccv5); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
|