/external/XNNPACK/src/f32-conv-hwc/ |
D | 3x3s2p1c3x4-scalar-1x1.c | 413 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() local 415 o0_tmp[0] = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 416 o0_tmp[1] = voc1; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 417 o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 421 *o0_tmp++ = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 652 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() local 654 o0_tmp[0] = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 655 o0_tmp[1] = voc1; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 656 o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 660 *o0_tmp++ = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1()
|
D | 3x3s2p0p1c3x4-scalar-1x1.c | 413 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() local 415 o0_tmp[0] = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 416 o0_tmp[1] = voc1; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 417 o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 421 *o0_tmp++ = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 652 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() local 654 o0_tmp[0] = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 655 o0_tmp[1] = voc1; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 656 o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 660 *o0_tmp++ = voc0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1()
|
/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 365 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 379 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 384 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 567 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 574 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 579 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 700 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 707 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 365 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 379 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 384 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 569 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 576 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 581 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 704 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 711 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 365 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 379 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 384 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 569 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 576 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 581 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 704 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 711 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 365 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 379 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 384 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 567 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 574 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 579 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 700 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 707 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x1.c | 264 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 400 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 407 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 412 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 264 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 398 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 405 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 410 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 264 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 400 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 407 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 412 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 264 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 398 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 405 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 410 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 353 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 358 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 557 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 562 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 570 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 575 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 353 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 358 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 555 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 560 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 568 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 573 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 353 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 358 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 555 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 560 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 568 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 573 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 353 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 358 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 557 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 562 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 570 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 575 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 516 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 526 vst1q_f32(o0_tmp, vo0x0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 541 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 546 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 549 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 820 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 825 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 833 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 516 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 526 vst1q_f32(o0_tmp, vo0x0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 541 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 546 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 549 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 818 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 823 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 831 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 516 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 526 vst1q_f32(o0_tmp, vo0x0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 541 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 546 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 549 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 818 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 823 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 831 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 516 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 526 vst1q_f32(o0_tmp, vo0x0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 541 vst1_f32(o0_tmp, vo0x0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 546 vst1_lane_f32(o0_tmp, vo0x0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 549 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 820 float* o0_tmp = o0; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 825 vst1q_f32(o0_tmp, vo0c0123); o0_tmp += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 833 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|