Home
last modified time | relevance | path

Searched refs:vadd_s32 (Results 1 – 25 of 71) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D4x16c8-minmax-neon-mull-padal.c354 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
355 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
356 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
357 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
361 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
362 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
363 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
364 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
368 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
369 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
[all …]
D3x16c8-minmax-neon-mull-padal.c285 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
286 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
287 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
288 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
292 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
293 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
294 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
295 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
299 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
300 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
[all …]
D4x16c16-minmax-neon-mlal-padal.c418 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
419 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
420 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
421 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
425 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
426 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
427 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
428 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
432 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
433 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D4x8c8-minmax-neon-mull-padal.c226 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
227 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
228 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
229 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
233 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
234 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
235 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
236 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
240 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
241 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
[all …]
D2x16c8-minmax-neon-mull-padal.c216 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
217 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
218 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
219 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
223 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
224 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
225 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
226 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
230 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
231 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c333 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
334 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
335 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
336 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
340 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
341 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
342 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
343 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
347 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
348 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mull-padal.c187 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
188 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
189 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
190 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
194 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
195 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
196 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
197 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
201 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
202 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c258 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
259 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
260 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
261 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
265 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
266 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
267 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
268 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
272 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
273 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D4x16c8-minmax-neon-mlal-padal.c593 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
594 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
595 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
596 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
600 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
601 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
602 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
603 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
607 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
608 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c248 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
249 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
250 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
251 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
255 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
256 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
257 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
258 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
262 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
263 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c211 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
212 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
213 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
214 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
218 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
219 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
220 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
221 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
225 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
226 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x16c8-minmax-neon-mull-padal.c334 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
335 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
336 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
337 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
341 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
342 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
343 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
344 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
348 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
349 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
[all …]
D3x16c8-minmax-neon-mull-padal.c267 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
268 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
269 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
270 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
274 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
275 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
276 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
277 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
281 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
282 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
[all …]
D4x16c16-minmax-neon-mlal-padal.c398 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
399 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
400 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
401 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
405 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
406 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
407 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
408 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
412 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
413 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D4x8c8-minmax-neon-mull-padal.c206 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
207 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
208 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
209 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
213 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
214 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
215 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
216 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
220 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
221 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
[all …]
D2x16c8-minmax-neon-mull-padal.c200 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
201 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
202 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
203 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
207 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
208 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
209 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
210 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
214 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
215 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c315 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
316 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
317 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
318 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
322 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
323 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
324 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
325 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
329 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
330 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mull-padal.c169 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
170 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
171 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
172 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
176 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
177 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
178 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
179 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
183 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
184 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c238 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
239 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
240 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
241 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
245 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
246 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
247 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
248 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
252 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
253 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c232 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
233 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
234 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
235 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
239 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
240 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
241 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
242 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
246 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
247 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x16c8-minmax-neon-mlal-padal.c573 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
574 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
575 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
576 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
580 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
581 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
582 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
583 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
587 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
588 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
D3x16c8-minmax-neon-mlal-padal.c456 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
457 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
458 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
459 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
463 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
464 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
465 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
466 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
470 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
471 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c193 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
194 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
195 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
196 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
200 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
201 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
202 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
203 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
207 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
208 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D2x8c8-minmax-neon-mull-padal.c132 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
133 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
134 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
135 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
139 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
140 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
141 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
142 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
146 const int32x2_t vpsum1x0 = vadd_s32(vget_low_s32(vacc1x0), vget_high_s32(vacc1x0)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
147 const int32x2_t vpsum1x1 = vadd_s32(vget_low_s32(vacc1x1), vget_high_s32(vacc1x1)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
[all …]
D1x16c8-minmax-neon-mull-padal.c133 const int32x2_t vpsum0x0 = vadd_s32(vget_low_s32(vacc0x0), vget_high_s32(vacc0x0)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
134 const int32x2_t vpsum0x1 = vadd_s32(vget_low_s32(vacc0x1), vget_high_s32(vacc0x1)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
135 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
136 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
140 const int32x2_t vpsum0x4 = vadd_s32(vget_low_s32(vacc0x4), vget_high_s32(vacc0x4)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
141 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
142 const int32x2_t vpsum0x6 = vadd_s32(vget_low_s32(vacc0x6), vget_high_s32(vacc0x6)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
143 const int32x2_t vpsum0x7 = vadd_s32(vget_low_s32(vacc0x7), vget_high_s32(vacc0x7)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
147 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
148 const int32x2_t vpsum0x9 = vadd_s32(vget_low_s32(vacc0x9), vget_high_s32(vacc0x9)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
[all …]

123