/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x8-neon-2x1.c | 92 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 94 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 101 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 109 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 117 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 125 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c1x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 133 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c1x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 141 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 149 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 157 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 94 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 96 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 103 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 111 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 119 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 127 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c1x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 135 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c1x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 143 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 151 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 159 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 94 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 96 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 103 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 111 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 119 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 127 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c1x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 135 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c1x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 143 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 151 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 159 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 92 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 94 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 101 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 109 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 117 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 125 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c1x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 133 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c1x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 141 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 149 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 157 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 559 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 561 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 568 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 576 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 584 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 592 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c1x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 600 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c1x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 608 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 616 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 624 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 559 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 561 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 568 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 576 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 584 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 592 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c1x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 600 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c1x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 608 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 616 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 624 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 559 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 561 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 568 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 576 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 584 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 592 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c1x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 600 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c1x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 608 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 616 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 624 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 559 float32x4_t vo0c4567 = vld1q_f32(w + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 561 float32x4_t vo1c4567 = vo0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 568 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c0x4567, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 576 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c0x4567, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 584 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 592 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c1x4567, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 600 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c1x4567, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 608 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 616 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk00c2x4567, vget_high_f32(vi0x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 624 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk10c2x4567, vget_high_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|