Home
last modified time | relevance | path

Searched refs:vo1c0123 (Results 1 – 17 of 17) sorted by relevance

/external/XNNPACK/src/f32-conv-hwc/gen/
D3x3s2p0p1c3x4-neon-2x1.c92 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local
97 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
102 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
107 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
112 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
117 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
122 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
127 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
137 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
[all …]
D3x3s2p1c3x4-neonfma-2x1.c94 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local
99 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
104 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
109 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
114 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
119 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
124 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
129 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
139 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
[all …]
D3x3s2p1c3x4-neon-2x1.c92 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local
97 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
102 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
107 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
112 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
117 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
122 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
127 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
137 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
[all …]
D3x3s2p0p1c3x4-neonfma-2x1.c94 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local
99 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
104 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
109 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
114 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
119 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
124 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
129 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
139 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
[all …]
D3x3s2p1c3x8-neon-2x1.c93 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local
100 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
108 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
116 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
124 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
140 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
148 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
156 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
164 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
[all …]
D3x3s2p0p1c3x8-neonfma-2x1.c95 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local
102 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
110 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
118 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
126 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
142 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
150 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
158 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
166 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
[all …]
D3x3s2p1c3x8-neonfma-2x1.c95 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local
102 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
110 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
118 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
126 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
142 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
150 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
158 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
166 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
[all …]
D3x3s2p0p1c3x8-neon-2x1.c93 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local
100 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
108 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
116 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
124 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
140 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
148 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
156 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
164 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
[all …]
D3x3s2p0p1c3x4-neonfma-2x2.c397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local
402 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
407 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
412 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
417 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
422 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
427 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
432 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
437 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
442 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
[all …]
D3x3s2p1c3x4-neon-2x2.c397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local
402 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
407 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
412 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
417 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
422 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
427 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
432 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
437 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
442 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
[all …]
D3x3s2p0p1c3x4-neon-2x2.c397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local
402 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
407 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
412 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
417 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
422 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
427 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
432 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
437 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
442 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
[all …]
D3x3s2p1c3x4-neonfma-2x2.c397 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
402 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
407 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
412 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
417 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
422 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
427 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
432 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
437 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
442 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]
D3x3s2p0p1c3x8-neon-2x2.c560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local
567 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
575 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
583 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
591 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
599 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
607 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
615 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
623 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
631 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
[all …]
D3x3s2p1c3x8-neonfma-2x2.c560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
567 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
575 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
583 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
591 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
599 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
607 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
615 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
623 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
631 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
[all …]
D3x3s2p0p1c3x8-neonfma-2x2.c560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local
567 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
575 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
583 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
591 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
599 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
607 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
615 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
623 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
631 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
[all …]
D3x3s2p1c3x8-neon-2x2.c560 float32x4_t vo1c0123 = vo0c0123; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
567 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
575 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
583 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
591 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
599 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
607 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
615 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
623 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
631 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
[all …]
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-neon-2x2.c336 const float32x4x2_t vo1c0123 = vzipq_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
339 vst1_f32(o1c0, vget_low_f32(vo1c0123.val[0])); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
340 vst1_f32(o1c1, vget_high_f32(vo1c0123.val[0])); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
341 vst1_f32(o1c2, vget_low_f32(vo1c0123.val[1])); o1c2 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
342 vst1_f32(o1c3, vget_high_f32(vo1c0123.val[1])); o1c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
595 const float32x4x2_t vo1c0123 = vzipq_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
597 vst1_f32(o1c0, vget_low_f32(vo1c0123.val[0])); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
598 vst1_f32(o1c1, vget_high_f32(vo1c0123.val[0])); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
599 vst1_f32(o1c2, vget_low_f32(vo1c0123.val[1])); o1c2 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
600 vst1_f32(o1c3, vget_high_f32(vo1c0123.val[1])); o1c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()