/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 95 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 110 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 118 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 126 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 134 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 142 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 150 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 165 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 173 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 181 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 93 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 108 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 116 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 124 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 139 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 147 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 155 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 163 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 171 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 179 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 93 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 108 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 116 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 124 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 132 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 140 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 148 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 163 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 171 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 179 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 95 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 110 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 118 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 126 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 141 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 149 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 157 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 165 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 173 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 181 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 95 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 115 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 128 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 141 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 154 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 167 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 180 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 200 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 213 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 226 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 97 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 117 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 130 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 143 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 163 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 176 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 189 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 202 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 215 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 228 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 97 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 117 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 130 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 143 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 156 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 169 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 182 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 202 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 215 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 228 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 95 float32x4_t vo0x1c0123 = vo0x0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 115 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c0x0123, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 128 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 141 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c0x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 161 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c1x0123, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 174 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 187 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 200 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk00c2x0123, vget_low_f32(vi0x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 213 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c2x0123, vget_low_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 226 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk20c2x0123, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|