/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 248 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 250 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 251 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 253 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 254 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 493 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 494 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 682 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 684 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 253 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 255 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 256 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 258 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 259 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 493 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 494 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 684 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 686 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 246 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 248 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 249 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 251 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 252 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 493 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 494 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 684 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 686 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 255 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 257 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 258 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 260 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 261 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 491 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 493 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 494 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 682 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 684 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x1.c | 186 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 188 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 189 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 382 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 383 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 188 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 190 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 191 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 382 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 383 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 186 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 188 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 189 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 382 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 383 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 188 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 190 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 191 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 380 const float32x4_t vk21c2x0123 = vld1q_f32(w + 72); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 382 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 383 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 335 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 338 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 339 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 343 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 344 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 709 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 710 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 994 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 997 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 344 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 347 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 348 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 352 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 353 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 709 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 710 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 992 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 995 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 337 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 340 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 341 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 345 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_high_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 346 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 709 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 710 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 992 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 995 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 342 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 345 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 346 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 350 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk21c2x0123, vget_low_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 351 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 706 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 709 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 710 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 994 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 997 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x1.c | 239 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 242 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 243 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 531 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 532 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 241 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 244 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 245 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 531 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 532 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 241 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 244 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 245 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 531 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c2x0123, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 532 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 239 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 242 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 243 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 528 const float32x4_t vk21c2x0123 = vld1q_f32(w + 144); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 531 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c2x0123, vi2x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 532 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vi4x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-sse-1x1.c | 149 const __m128 vk21c2x0123 = _mm_load_ps(w + 72); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 151 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk21c2x0123, vi21c2)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() 276 const __m128 vk21c2x0123 = _mm_load_ps(w + 72); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 278 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk21c2x0123, vi21c2)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()
|