Home
last modified time | relevance | path

Searched refs:vk21c2x0123 (Results 1 – 17 of 17) sorted by relevance

/external/XNNPACK/src/f32-conv-hwc/gen/
D3x3s2p0p1c3x4-neonfma-2x2.c248 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local
250 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
251 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
253 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
254 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local
493 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
494 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
682 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local
684 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
[all …]
D3x3s2p1c3x4-neon-2x2.c253 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local
255 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
256 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
258 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
259 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local
493 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
494 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
684 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local
686 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
[all …]
D3x3s2p0p1c3x4-neon-2x2.c246 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local
248 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
249 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
251 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
252 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local
493 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
494 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
684 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local
686 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
[all …]
D3x3s2p1c3x4-neonfma-2x2.c255 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
257 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
258 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
260 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
261 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
493 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
494 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
682 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
684 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]
D3x3s2p0p1c3x4-neon-2x1.c186 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local
188 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
189 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local
382 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
383 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
D3x3s2p1c3x4-neonfma-2x1.c188 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local
190 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
191 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local
382 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
383 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
D3x3s2p1c3x4-neon-2x1.c186 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local
188 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
189 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local
382 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
383 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
D3x3s2p0p1c3x4-neonfma-2x1.c188 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local
190 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
191 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local
382 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
383 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
D3x3s2p0p1c3x8-neon-2x2.c335 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local
338 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
339 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
343 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
344 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local
709 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
710 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
994 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local
997 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
[all …]
D3x3s2p1c3x8-neonfma-2x2.c344 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
347 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
348 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
352 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
353 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
709 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
710 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
992 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
995 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
[all …]
D3x3s2p0p1c3x8-neonfma-2x2.c337 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local
340 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
341 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
345 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
346 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local
709 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
710 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
992 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local
995 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
[all …]
D3x3s2p1c3x8-neon-2x2.c342 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
345 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
346 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
350 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
351 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
709 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
710 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
994 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
997 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
[all …]
D3x3s2p1c3x8-neon-2x1.c239 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local
242 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
243 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local
531 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
532 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
D3x3s2p0p1c3x8-neonfma-2x1.c241 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local
244 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
245 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local
531 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
532 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
D3x3s2p1c3x8-neonfma-2x1.c241 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local
244 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
245 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local
531 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
532 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
D3x3s2p0p1c3x8-neon-2x1.c239 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local
242 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
243 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local
531 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
532 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-sse-1x1.c149 const __m128 vk21c2x0123 = _mm_load_ps(w + 72); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local
151 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk21c2x0123, vi21c2)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()
276 const __m128 vk21c2x0123 = _mm_load_ps(w + 72); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local
278 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk21c2x0123, vi21c2)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()