/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neonfma-3x4.c | 101 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 110 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 126 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 127 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 149 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 180 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 193 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 194 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
|
D | 3x3s2p1-minmax-neon-3x4.c | 101 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 110 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 126 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 127 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 149 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 180 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 193 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 194 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
|
D | 5x5s2p2-minmax-neon-2x4-acc2.c | 104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local 122 vo1p1 = vmlaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 137 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 264 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() [all …]
|
D | 5x5s2p2-minmax-neonfma-2x4-acc2.c | 104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local 122 vo1p1 = vfmaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 137 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 264 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() [all …]
|
D | 5x5s2p2-minmax-neon-2x4.c | 104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local 122 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 137 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 262 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() [all …]
|
D | 5x5s2p2-minmax-neon-2x4-acc3.c | 104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local 122 vo1p2 = vmlaq_lane_f32(vo1p2, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 137 vo1p1 = vmlaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 266 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() [all …]
|
D | 5x5s2p2-minmax-neonfma-2x4-acc3.c | 104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local 122 vo1p2 = vfmaq_lane_f32(vo1p2, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 137 vo1p1 = vfmaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 266 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() [all …]
|
D | 5x5s2p2-minmax-neonfma-2x4.c | 104 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local 122 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 137 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 152 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 153 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 183 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 220 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 221 vi5x8ACE9BDF = vi5xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 262 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() [all …]
|
D | 5x5s2p2-minmax-neonfma-3x4-acc2.c | 118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local 135 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 142 vo1p1 = vfmaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 155 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 162 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() [all …]
|
D | 5x5s2p2-minmax-neon-3x4.c | 118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local 135 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 142 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 155 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 162 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() [all …]
|
D | 5x5s2p2-minmax-neonfma-3x4.c | 118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local 135 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 142 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 155 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 162 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() [all …]
|
D | 5x5s2p2-minmax-neon-3x4-acc2.c | 118 float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local 135 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 142 vo1p1 = vmlaq_lane_f32(vo1p1, vi5x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 155 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 162 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 179 const float32x4_t vi5x68AC = vextq_f32(vi5x0246, vi5x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 180 vi5x0246 = vi5x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 219 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 267 const float32x4_t vi5xACEG = vextq_f32(vi5x8ACE9BDF.val[0], vi5xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() [all …]
|
D | 3x3s2p1-minmax-neonfma-4x4.c | 114 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local 126 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 144 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 145 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 175 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 212 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local 227 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 228 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
|
D | 3x3s2p1-minmax-neon-4x4.c | 114 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); i5 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local 126 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 144 const float32x4_t vi5x7BDF = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 145 vi5x1357 = vi5x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 175 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 212 const float32x4x2_t vi5x8ACE9BDF = vld2q_f32(i5); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local 227 …i5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 228 …i5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
|