/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x4-neon-2x1.c | 92 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 97 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 102 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 107 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 112 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 117 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 122 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 127 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 137 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 94 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 99 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 104 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 109 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 114 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 119 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 124 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 129 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 139 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() [all …]
|
D | 3x3s2p1c3x4-neon-2x1.c | 92 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 97 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 102 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 107 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 112 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 117 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 122 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 127 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 137 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() [all …]
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 94 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 99 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 104 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 109 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 114 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 119 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 124 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 129 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 139 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() [all …]
|
D | 3x3s2p1c3x8-neon-2x1.c | 93 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 100 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 108 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 116 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 124 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 140 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 148 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 156 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 164 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 95 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 102 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 110 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 118 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 126 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 142 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 150 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 158 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 166 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 95 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 102 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 110 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 118 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 126 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 142 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 150 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 158 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 166 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 93 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 100 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 108 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 116 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 124 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 140 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 148 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 156 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 164 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() [all …]
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 402 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 407 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 412 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 417 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 422 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 427 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 432 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 437 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 442 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 402 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 407 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 412 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 417 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 422 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 427 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 432 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 437 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 442 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 402 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 407 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 412 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 417 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 422 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 427 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 432 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 437 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 442 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 402 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 407 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 412 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 417 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 422 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 427 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 432 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 437 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 442 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 567 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 575 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 583 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 591 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 599 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 607 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 615 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 623 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 631 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 567 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 575 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 583 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 591 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 599 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 607 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 615 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 623 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 631 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 567 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 575 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 583 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 591 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 599 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 607 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 615 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 623 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 631 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 567 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 575 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 583 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 591 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 599 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 607 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 615 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 623 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 631 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-neon-2x2.c | 336 const float32x4x2_t vo1c0123 = vzipq_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 339 vst1_f32(o1c0, vget_low_f32(vo1c0123.val[0])); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 340 vst1_f32(o1c1, vget_high_f32(vo1c0123.val[0])); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 341 vst1_f32(o1c2, vget_low_f32(vo1c0123.val[1])); o1c2 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 342 vst1_f32(o1c3, vget_high_f32(vo1c0123.val[1])); o1c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 595 const float32x4x2_t vo1c0123 = vzipq_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 597 vst1_f32(o1c0, vget_low_f32(vo1c0123.val[0])); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 598 vst1_f32(o1c1, vget_high_f32(vo1c0123.val[0])); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 599 vst1_f32(o1c2, vget_low_f32(vo1c0123.val[1])); o1c2 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 600 vst1_f32(o1c3, vget_high_f32(vo1c0123.val[1])); o1c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
|