/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 200 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 201 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 202 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 203 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 204 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 205 vacc5 = _mm512_add_ps(vacc5, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 206 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 207 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 208 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 209 vacc3 = _mm512_add_ps(vacc3, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 177 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 178 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 179 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 180 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 181 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 182 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 183 vacc1 = _mm512_add_ps(vacc1, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 184 vacc2 = _mm512_add_ps(vacc2, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 185 vacc3 = _mm512_add_ps(vacc3, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 186 vacc4 = _mm512_add_ps(vacc4, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x128-acc4.c | 154 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 155 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 156 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 157 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 158 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 159 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 160 vacc2 = _mm512_add_ps(vacc2, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 161 vacc3 = _mm512_add_ps(vacc3, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 164 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 165 vacc2 = _mm512_add_ps(vacc2, vacc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 197 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 198 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 199 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 200 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 201 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 202 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 203 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 204 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 205 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 206 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 164 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 165 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 166 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 167 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 168 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 169 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 170 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 171 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 172 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 175 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 196 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 197 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 198 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 199 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 200 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 201 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 202 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 203 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 204 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 205 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 174 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 175 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 176 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 177 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 178 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 179 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 180 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 181 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 182 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 183 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 195 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 196 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 197 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 198 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 199 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 200 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 201 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 202 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 203 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 204 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 216 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 217 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 218 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 219 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 220 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 221 vacc5 = _mm512_add_ps(vacc5, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 222 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 223 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 224 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 225 vacc3 = _mm512_add_ps(vacc3, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 191 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 192 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 193 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 194 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 195 vacc4 = _mm512_add_ps(vacc4, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 196 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 197 vacc1 = _mm512_add_ps(vacc1, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 198 vacc2 = _mm512_add_ps(vacc2, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 199 vacc3 = _mm512_add_ps(vacc3, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 200 vacc4 = _mm512_add_ps(vacc4, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 213 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 214 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 215 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 216 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 217 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 218 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 219 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 220 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 221 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 222 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 177 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 178 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 179 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 180 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 181 vacc1 = _mm512_add_ps(vacc1, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 182 vacc2 = _mm512_add_ps(vacc2, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 183 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 184 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 185 vacc2 = _mm512_add_ps(vacc2, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 188 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
D | avx512f-p5-scalef-x128-acc4.c | 166 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 167 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 168 vacc2 = _mm512_add_ps(vacc2, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 169 vacc3 = _mm512_add_ps(vacc3, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 170 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 171 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 172 vacc2 = _mm512_add_ps(vacc2, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 173 vacc3 = _mm512_add_ps(vacc3, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 176 vacc0 = _mm512_add_ps(vacc0, vacc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 177 vacc2 = _mm512_add_ps(vacc2, vacc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 212 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 213 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 214 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 215 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 216 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 217 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 218 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 219 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 220 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 221 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 188 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 189 vacc1 = _mm512_add_ps(vacc1, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 190 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 191 vacc1 = _mm512_add_ps(vacc1, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 192 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 193 vacc1 = _mm512_add_ps(vacc1, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 194 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 195 vacc1 = _mm512_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 196 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 197 vacc1 = _mm512_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 211 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 212 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 213 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 214 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 215 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 216 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 217 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 218 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 219 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 220 vacc0 = _mm512_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 225 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 226 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 227 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 228 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 229 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 230 vaccv5 = _mm512_add_ps(vaccv5, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 231 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 232 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 233 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 234 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 199 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 200 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 201 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 202 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 203 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 204 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 205 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 206 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 207 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 208 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 215 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 216 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 217 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 218 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 219 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 220 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 221 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 222 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 209 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 210 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 211 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 212 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 215 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 216 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 217 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 218 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp9, vdelta_e9)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 185 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 186 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 187 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 188 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 189 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 190 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 191 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 192 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 193 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 194 const __m512 ve9 = _mm512_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 175 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 176 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 177 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 178 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 179 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 180 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 181 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 182 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 183 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 184 const __m512 ve9 = _mm512_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 165 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 166 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 167 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 168 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 169 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 170 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 171 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 172 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 173 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 174 const __m512 ve9 = _mm512_add_ps(vn9, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 155 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 156 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 157 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 158 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 159 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 160 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 161 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 162 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 163 const __m512 ve8 = _mm512_add_ps(vn8, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 212 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 145 const __m512 ve0 = _mm512_add_ps(vn0, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 146 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 147 const __m512 ve2 = _mm512_add_ps(vn2, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 148 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 149 const __m512 ve4 = _mm512_add_ps(vn4, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 150 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 151 const __m512 ve6 = _mm512_add_ps(vn6, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 152 const __m512 ve7 = _mm512_add_ps(vn7, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 199 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 233 const __m512 ve = _mm512_add_ps(vn, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|