Home
last modified time | relevance | path

Searched refs:vi5x8ACE9BDF (Results 1 – 14 of 14) sorted by relevance

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3s2p1-minmax-neonfma-3x4.c101 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local
110 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
126 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
127 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
149 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
180 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local
193 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
194 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
D3x3s2p1-minmax-neon-3x4.c101 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local
110 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
126 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
127 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
149 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
180 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local
193 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
194 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
D5x5s2p2-minmax-neon-2x4-acc2.c104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local
122 vo1p1 = vmlaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
137 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
264 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
[all …]
D5x5s2p2-minmax-neonfma-2x4-acc2.c104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local
122 vo1p1 = vfmaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
137 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
264 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
[all …]
D5x5s2p2-minmax-neon-2x4.c104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local
122 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
137 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
262 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
[all …]
D5x5s2p2-minmax-neon-2x4-acc3.c104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local
122 vo1p2 = vmlaq_lane_f32(vo1p2, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
137 vo1p1 = vmlaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
266 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
[all …]
D5x5s2p2-minmax-neonfma-2x4-acc3.c104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local
122 vo1p2 = vfmaq_lane_f32(vo1p2, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
137 vo1p1 = vfmaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
266 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
[all …]
D5x5s2p2-minmax-neonfma-2x4.c104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local
122 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
137 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
262 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
[all …]
D5x5s2p2-minmax-neonfma-3x4-acc2.c118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local
135 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
142 vo1p1 = vfmaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
155 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
162 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
[all …]
D5x5s2p2-minmax-neon-3x4.c118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local
135 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
142 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
155 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
162 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
[all …]
D5x5s2p2-minmax-neonfma-3x4.c118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local
135 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
142 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
155 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
162 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
[all …]
D5x5s2p2-minmax-neon-3x4-acc2.c118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local
135 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
142 vo1p1 = vmlaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
155 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
162 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
[all …]
D3x3s2p1-minmax-neonfma-4x4.c114 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local
126 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
144 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
145 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
175 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
212 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local
227 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
228 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
D3x3s2p1-minmax-neon-4x4.c114 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local
126 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
144 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
145 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
175 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
212 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local
227 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
228 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()