/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 96 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 111 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 119 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 127 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 135 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 143 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 151 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 166 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 174 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 182 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 94 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 109 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 117 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 125 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 140 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 148 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 156 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 164 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 172 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 180 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 94 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 109 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 117 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 125 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 133 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 141 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 149 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 164 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 172 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 180 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 96 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 111 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 119 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 127 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 142 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 150 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 158 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 166 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 174 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 182 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 97 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 116 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 129 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 142 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 155 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 168 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 181 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 201 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 214 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 227 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 99 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 118 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 131 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 144 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 164 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 177 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 190 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 203 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 216 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 229 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 99 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 118 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 131 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 144 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 157 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 170 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 183 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 203 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 216 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 229 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 97 float32x4_t vo1x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 116 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 129 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 142 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 162 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 175 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 188 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 201 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk00c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 214 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c2x0123, vget_low_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 227 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk20c2x0123, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|