Lines Matching refs:vo1x0

105         float32x4_t vo1x0 = vo0x0;  in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()  local
119 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c0, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
126 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c0, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
133 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c0, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
147 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c1, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
154 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c1, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
161 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c1, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
168 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c2, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
175 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c2, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
182 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c2, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
189 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c0, vget_low_f32(vi2x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
196 vo1x0 = vmlaq_lane_f32(vo1x0, vk11c0, vget_low_f32(vi3x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
203 vo1x0 = vmlaq_lane_f32(vo1x0, vk21c0, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
210 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c1, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
217 vo1x0 = vmlaq_lane_f32(vo1x0, vk11c1, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
224 vo1x0 = vmlaq_lane_f32(vo1x0, vk21c1, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
238 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c2, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
245 vo1x0 = vmlaq_lane_f32(vo1x0, vk11c2, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
252 vo1x0 = vmlaq_lane_f32(vo1x0, vk21c2, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
259 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c0, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
266 vo1x0 = vmlaq_lane_f32(vo1x0, vk12c0, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
273 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c0, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
280 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
287 vo1x0 = vmlaq_lane_f32(vo1x0, vk12c1, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
294 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
301 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c2, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
308 vo1x0 = vmlaq_lane_f32(vo1x0, vk12c2, vget_low_f32(vi3x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
315 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c2, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
326 vo1x0 = vmaxq_f32(vo1x0, vmin); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
331 vo1x0 = vminq_f32(vo1x0, vmax); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
336 const float32x4x2_t vo1c0123 = vzipq_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
352 float32x4_t vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
366 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c0, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
375 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c0, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
384 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c0, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
407 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c1, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
414 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c1, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
421 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c1, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
428 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c2, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
435 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c2, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
442 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c2, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
449 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c0, vget_low_f32(vi2x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
458 vo1x0 = vmlaq_lane_f32(vo1x0, vk11c0, vget_low_f32(vi3x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
467 vo1x0 = vmlaq_lane_f32(vo1x0, vk21c0, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
476 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c1, vget_low_f32(vi2x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
485 vo1x0 = vmlaq_lane_f32(vo1x0, vk11c1, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
494 vo1x0 = vmlaq_lane_f32(vo1x0, vk21c1, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
517 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c2, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
524 vo1x0 = vmlaq_lane_f32(vo1x0, vk11c2, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
531 vo1x0 = vmlaq_lane_f32(vo1x0, vk21c2, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
539 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c0, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
544 vo1x0 = vmlaq_lane_f32(vo1x0, vk12c0, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
549 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c0, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
554 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c1, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
559 vo1x0 = vmlaq_lane_f32(vo1x0, vk12c1, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
564 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
569 vo1x0 = vmlaq_lane_f32(vo1x0, vk02c2, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
574 vo1x0 = vmlaq_lane_f32(vo1x0, vk12c2, vget_low_f32(vi3x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
579 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c2, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
583 vo1x0 = vmaxq_f32(vo1x0, vmin); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
588 vo1x0 = vminq_f32(vo1x0, vmax); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
595 const float32x4x2_t vo1c0123 = vzipq_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
609 vst1q_lane_f32(o1c0, vo1x0, 0); o1c0 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
610 vst1q_lane_f32(o1c1, vo1x0, 1); o1c1 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
611 vst1q_lane_f32(o1c2, vo1x0, 2); o1c2 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
612 vst1q_lane_f32(o1c3, vo1x0, 3); o1c3 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()