/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mull-padal.c | 206 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 214 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 270 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 271 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 238 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 246 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 302 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 303 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 263 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 279 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 339 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 340 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 345 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 353 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 409 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 410 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 311 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 327 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 387 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 388 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 320 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 344 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 408 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 409 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 452 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 468 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 528 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 529 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 384 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 408 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 472 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 473 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 559 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 583 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 647 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 648 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mull-padal.c | 190 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 198 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 254 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 255 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 222 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 230 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 286 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 287 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 245 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 261 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 321 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 322 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 329 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 337 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 393 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 394 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 293 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 309 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 369 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 370 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 300 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 324 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 388 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 389 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 434 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 450 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 510 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 511 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 364 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 388 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 452 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 453 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 539 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 563 int32x4_t vacc1xCDEF = vpaddq_s32(vsum1xCD, vsum1xEF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 627 const int32x2_t vsum1xEF = vpadd_s32(vpsum1xE, vpsum1xF); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 628 int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF ); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|