/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mull-padal.c | 89 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 183 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 216 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() 277 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 89 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 223 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 310 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 343 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 404 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 89 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 212 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 248 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 309 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 113 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 215 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 332 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 447 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 113 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 263 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 454 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 571 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 686 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 113 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 252 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 396 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 511 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c8-minmax-neon-mull-padal.c | 92 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 166 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 196 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() 257 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 92 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 206 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 293 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 323 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 384 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 92 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 195 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 228 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 289 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 116 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 198 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 312 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 427 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 116 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 246 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 437 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 551 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 666 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 116 int32x4_t vacc3x6 = vacc0x6; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 235 vacc3x6 = vpadalq_s16(vacc3x6, vprod3x6); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 376 const int32x4_t vsum3x67 = vpaddq_s32(vacc3x6, vacc3x7); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 491 const int32x2_t vpsum3x6 = vadd_s32(vget_low_s32(vacc3x6), vget_high_s32(vacc3x6)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|