/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 199 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 200 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 201 const __m512 vdelta_acce2 = _mm512_sub_ps(vacce2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 202 const __m512 vdelta_acce3 = _mm512_sub_ps(vacce3, vmax_e3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 203 const __m512 vdelta_acce4 = _mm512_sub_ps(vacce4, vmax_e4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 204 const __m512 vdelta_acce5 = _mm512_sub_ps(vacce5, vmax_e5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 205 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 206 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 207 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 208 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 177 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 178 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 179 const __m512 vdelta_acce2 = _mm512_sub_ps(vacce2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 180 const __m512 vdelta_acce3 = _mm512_sub_ps(vacce3, vmax_e3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 181 const __m512 vdelta_acce4 = _mm512_sub_ps(vacce4, vmax_e4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 182 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 183 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 184 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 185 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 186 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x128-acc4.c | 155 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 156 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 157 const __m512 vdelta_acce2 = _mm512_sub_ps(vacce2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 158 const __m512 vdelta_acce3 = _mm512_sub_ps(vacce3, vmax_e3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 159 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 160 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 161 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 162 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 163 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 164 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 163 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 164 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 165 const __m512 vdelta_acce2 = _mm512_sub_ps(vacce2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 166 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 167 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 168 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 169 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 170 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 171 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 172 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 193 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 194 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 195 const __m512 vdelta_acce2 = _mm512_sub_ps(vacce2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 196 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 197 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 198 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 199 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 200 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 201 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 202 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x128-acc2.c | 151 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 152 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 153 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 154 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 155 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 156 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 157 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 158 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 159 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 160 const __m512 vdelta_e7 = _mm512_sub_ps(vn7, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 171 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 172 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 173 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 174 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 175 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 176 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 177 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 178 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 179 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 180 const __m512 vdelta_e7 = _mm512_sub_ps(vn7, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 191 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 192 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 193 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 194 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 195 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 196 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 197 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 198 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 199 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 200 const __m512 vdelta_e7 = _mm512_sub_ps(vn7, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 189 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 190 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 191 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 192 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 193 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 194 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 195 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 196 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 197 const __m512 vdelta_e7 = _mm512_sub_ps(vn7, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 198 const __m512 vdelta_e8 = _mm512_sub_ps(vn8, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160.c | 169 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 170 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 171 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 172 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 173 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 174 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 175 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 176 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 177 const __m512 vdelta_e7 = _mm512_sub_ps(vn7, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 178 const __m512 vdelta_e8 = _mm512_sub_ps(vn8, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 159 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 160 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 161 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 162 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 163 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 164 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 165 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 166 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 167 const __m512 vdelta_e7 = _mm512_sub_ps(vn7, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 168 const __m512 vdelta_e8 = _mm512_sub_ps(vn8, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 149 const __m512 vdelta_acce0 = _mm512_sub_ps(vacce0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 150 const __m512 vdelta_e0 = _mm512_sub_ps(vn0, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 151 const __m512 vdelta_e1 = _mm512_sub_ps(vn1, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 152 const __m512 vdelta_e2 = _mm512_sub_ps(vn2, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 153 const __m512 vdelta_e3 = _mm512_sub_ps(vn3, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 154 const __m512 vdelta_e4 = _mm512_sub_ps(vn4, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 155 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 156 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 157 const __m512 vdelta_e7 = _mm512_sub_ps(vn7, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 199 const __m512 vdelta_acce = _mm512_sub_ps(vacce, vmax_e); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 58 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 59 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 60 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 61 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 62 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 63 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 64 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 65 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 66 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 67 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 59 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 60 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 61 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 62 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 63 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 64 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 65 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 66 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 67 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 68 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 60 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 61 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 62 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 63 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 64 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 65 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 66 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 67 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 68 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 69 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x160.c | 56 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 57 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 58 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 59 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 60 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 61 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 62 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 63 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 64 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 65 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 57 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 58 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 59 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 60 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 61 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 62 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 63 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 64 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 65 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 66 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 59 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 60 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 61 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 62 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 63 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 64 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 65 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 66 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 67 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 68 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 62 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 63 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 64 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 65 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 66 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 67 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 68 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 69 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 70 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 71 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 58 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 59 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 60 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 61 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 62 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 63 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 64 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 65 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 66 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 67 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x160.c | 55 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 56 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 57 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 58 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 59 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 60 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 61 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 62 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 63 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 64 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 56 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 57 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 58 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 59 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 60 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 61 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 62 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 63 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 64 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 65 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 58 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 59 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 60 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 61 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 62 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 63 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 64 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 65 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 66 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 67 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 57 const __m512 vx0 = _mm512_sub_ps(vi0, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 58 const __m512 vx1 = _mm512_sub_ps(vi1, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 59 const __m512 vx2 = _mm512_sub_ps(vi2, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 60 const __m512 vx3 = _mm512_sub_ps(vi3, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 61 const __m512 vx4 = _mm512_sub_ps(vi4, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 62 const __m512 vx5 = _mm512_sub_ps(vi5, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 63 const __m512 vx6 = _mm512_sub_ps(vi6, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 64 const __m512 vx7 = _mm512_sub_ps(vi7, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 65 const __m512 vx8 = _mm512_sub_ps(vi8, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 66 const __m512 vx9 = _mm512_sub_ps(vi9, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
/external/XNNPACK/src/f32-vbinary/gen/ |
D | vsqrdiffc-avx512f-x32.c | 39 __m512 vy0123456789ABCDEF = _mm512_sub_ps(va0123456789ABCDEF, vb); in xnn_f32_vsqrdiffc_ukernel__avx512f_x32() 40 __m512 vyGHIJKLMNOPQRSTUV = _mm512_sub_ps(vaGHIJKLMNOPQRSTUV, vb); in xnn_f32_vsqrdiffc_ukernel__avx512f_x32() 54 __m512 vy = _mm512_sub_ps(va, vb); in xnn_f32_vsqrdiffc_ukernel__avx512f_x32() 68 __m512 vy = _mm512_sub_ps(va, vb); in xnn_f32_vsqrdiffc_ukernel__avx512f_x32()
|