Home
last modified time | relevance | path

Searched refs:vi1x8ACE9BDF (Results 1 – 25 of 35) sorted by relevance

12

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3s2p1-minmax-neonfma-1x4.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local
75 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
94 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
111 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local
116 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
117 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
D3x3s2p1-minmax-neonfma-1x4-acc2.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local
75 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
94 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
112 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local
117 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
118 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
D3x3s2p1-minmax-neonfma-1x4-acc4.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local
75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
94 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
114 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local
119 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
120 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
D3x3s2p1-minmax-neon-1x4-acc2.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local
75 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
94 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
112 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local
117 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
118 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
D3x3s2p1-minmax-neon-1x4.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local
75 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
94 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
111 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local
116 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
117 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
D3x3s2p1-minmax-neon-1x4-acc3.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local
75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
94 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
113 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local
118 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
119 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
D3x3s2p1-minmax-neonfma-1x4-acc3.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local
75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
94 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
113 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local
118 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
119 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
D3x3s2p1-minmax-neon-1x4-acc4.c70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local
75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
94 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
114 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local
119 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
120 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
D5x5s2p2-minmax-neon-1x4-acc4.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() local
96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
106 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
200 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4()
[all …]
D5x5s2p2-minmax-neon-1x4-acc3.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() local
96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
106 vo0p2 = vmlaq_lane_f32(vo0p2, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
199 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3()
[all …]
D5x5s2p2-minmax-neonfma-1x4-acc4.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() local
96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
106 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
200 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4()
[all …]
D5x5s2p2-minmax-neonfma-1x4-acc2.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() local
96 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
106 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
198 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2()
[all …]
D5x5s2p2-minmax-neonfma-1x4-acc3.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() local
96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
106 vo0p2 = vfmaq_lane_f32(vo0p2, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
199 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3()
[all …]
D5x5s2p2-minmax-neon-1x4.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() local
96 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
106 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
197 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4()
[all …]
D5x5s2p2-minmax-neonfma-1x4.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() local
96 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
106 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
197 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4()
[all …]
D5x5s2p2-minmax-neon-1x4-acc2.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() local
96 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
106 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
198 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2()
[all …]
D5x5s2p2-minmax-neon-1x4-acc5.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() local
96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
106 vo0p3 = vmlaq_lane_f32(vo0p3, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
201 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5()
[all …]
D5x5s2p2-minmax-neonfma-1x4-acc5.c85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() local
96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
106 vo0p3 = vfmaq_lane_f32(vo0p3, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
201 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5()
[all …]
D3x3s2p1-minmax-neonfma-2x4-acc2.c84 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local
92 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
100 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
101 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
121 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
146 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local
153 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
154 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
D3x3s2p1-minmax-neon-2x4.c84 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local
92 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
100 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
101 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
121 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
144 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local
151 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
152 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
D3x3s2p1-minmax-neonfma-2x4.c84 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local
92 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
100 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
101 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
121 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
144 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local
151 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
152 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
D3x3s2p1-minmax-neonfma-3x4.c97 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local
108 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
118 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
119 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
147 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
176 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local
185 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
186 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
D3x3s2p1-minmax-neon-3x4.c97 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local
108 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
118 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
119 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
147 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
176 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local
185 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
186 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
D5x5s2p2-minmax-neon-2x4-acc2.c100 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local
115 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
130 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
144 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
145 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
174 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
175 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
212 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
213 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
260 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
[all …]
D5x5s2p2-minmax-neonfma-2x4-acc2.c100 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local
115 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
130 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
144 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
145 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
174 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
175 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
212 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
213 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
260 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
[all …]

12