/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neonfma-4x4.c | 116 const float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); i7 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local 127 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 148 const float32x4_t vi7x7BDF = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 149 vi7x1357 = vi7x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 176 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 214 const float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local 231 …i7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() 232 …i7x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
|
D | 3x3s2p1-minmax-neon-4x4.c | 116 const float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); i7 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local 127 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 148 const float32x4_t vi7x7BDF = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 149 vi7x1357 = vi7x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 176 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 214 const float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local 231 …i7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() 232 …i7x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
|
D | 5x5s2p2-minmax-neonfma-3x4-acc2.c | 120 float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); i7 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local 143 vo2p1 = vfmaq_lane_f32(vo2p1, vi7x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 163 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 184 vi7x0246 = vi7x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 223 vi7x1357 = vi7x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 271 const float32x4_t vi7xACEG = vextq_f32(vi7x8ACE9BDF.val[0], vi7xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 272 vi7x8ACE9BDF = vi7xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 327 …i7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() [all …]
|
D | 5x5s2p2-minmax-neon-3x4.c | 120 float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); i7 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local 143 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 163 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 184 vi7x0246 = vi7x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 223 vi7x1357 = vi7x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 271 const float32x4_t vi7xACEG = vextq_f32(vi7x8ACE9BDF.val[0], vi7xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 272 vi7x8ACE9BDF = vi7xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 324 …i7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() [all …]
|
D | 5x5s2p2-minmax-neonfma-3x4.c | 120 float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); i7 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local 143 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 163 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 184 vi7x0246 = vi7x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 223 vi7x1357 = vi7x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 271 const float32x4_t vi7xACEG = vextq_f32(vi7x8ACE9BDF.val[0], vi7xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 272 vi7x8ACE9BDF = vi7xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 324 …i7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() [all …]
|
D | 5x5s2p2-minmax-neon-3x4-acc2.c | 120 float32x4x2_t vi7x8ACE9BDF = vld2q_f32(i7); i7 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local 143 vo2p1 = vmlaq_lane_f32(vo2p1, vi7x8ACE9BDF.val[0], vget_high_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 163 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x8ACE9BDF.val[1], vget_high_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 184 vi7x0246 = vi7x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 223 vi7x1357 = vi7x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 271 const float32x4_t vi7xACEG = vextq_f32(vi7x8ACE9BDF.val[0], vi7xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 272 vi7x8ACE9BDF = vi7xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 327 …i7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() [all …]
|