/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neonfma-1x4.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local 75 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 94 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 111 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local 116 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 117 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc2.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local 75 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 94 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 112 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local 117 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 118 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc4.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local 75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 94 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 114 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local 119 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 120 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
|
D | 3x3s2p1-minmax-neon-1x4-acc2.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local 75 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 94 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 112 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local 117 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 118 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
|
D | 3x3s2p1-minmax-neon-1x4.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local 75 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 94 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 111 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local 116 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 117 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
|
D | 3x3s2p1-minmax-neon-1x4-acc3.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local 75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 94 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 113 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local 118 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 119 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc3.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local 75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 94 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 113 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local 118 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 119 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
|
D | 3x3s2p1-minmax-neon-1x4-acc4.c | 70 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local 75 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 81 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 82 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 94 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 114 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local 119 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 120 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
|
D | 5x5s2p2-minmax-neon-1x4-acc4.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() local 96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 106 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() 200 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc4() [all …]
|
D | 5x5s2p2-minmax-neon-1x4-acc3.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() local 96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 106 vo0p2 = vmlaq_lane_f32(vo0p2, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() 199 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc3() [all …]
|
D | 5x5s2p2-minmax-neonfma-1x4-acc4.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() local 96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 106 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() 200 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc4() [all …]
|
D | 5x5s2p2-minmax-neonfma-1x4-acc2.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() local 96 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 106 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() 198 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2() [all …]
|
D | 5x5s2p2-minmax-neonfma-1x4-acc3.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() local 96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 106 vo0p2 = vfmaq_lane_f32(vo0p2, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() 199 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc3() [all …]
|
D | 5x5s2p2-minmax-neon-1x4.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() local 96 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 106 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() 197 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4() [all …]
|
D | 5x5s2p2-minmax-neonfma-1x4.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() local 96 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 106 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() 197 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4() [all …]
|
D | 5x5s2p2-minmax-neon-1x4-acc2.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() local 96 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 106 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() 198 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc2() [all …]
|
D | 5x5s2p2-minmax-neon-1x4-acc5.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() local 96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 106 vo0p3 = vmlaq_lane_f32(vo0p3, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() 201 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4_acc5() [all …]
|
D | 5x5s2p2-minmax-neonfma-1x4-acc5.c | 85 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() local 96 float32x4_t vo0p2 = vmulq_lane_f32(vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 106 vo0p3 = vfmaq_lane_f32(vo0p3, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 116 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 117 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 137 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 138 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 164 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 165 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() 201 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc5() [all …]
|
D | 3x3s2p1-minmax-neonfma-2x4-acc2.c | 84 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 92 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 100 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 101 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 121 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 146 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 153 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 154 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
|
D | 3x3s2p1-minmax-neon-2x4.c | 84 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 92 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 100 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 101 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 121 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 144 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local 151 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() 152 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
|
D | 3x3s2p1-minmax-neonfma-2x4.c | 84 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 92 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 100 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 101 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 121 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 144 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local 151 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() 152 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
|
D | 3x3s2p1-minmax-neonfma-3x4.c | 97 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 108 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 118 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 119 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 147 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 176 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 185 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() 186 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
|
D | 3x3s2p1-minmax-neon-3x4.c | 97 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 108 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 118 const float32x4_t vi1x7BDF = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 119 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 147 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 176 const float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 185 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() 186 …i1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[1]))); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
|
D | 5x5s2p2-minmax-neon-2x4-acc2.c | 100 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local 115 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 130 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 144 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 145 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 174 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 175 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 212 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 213 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 260 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() [all …]
|
D | 5x5s2p2-minmax-neonfma-2x4-acc2.c | 100 float32x4x2_t vi1x8ACE9BDF = vld2q_f32(i1); i1 += 8; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local 115 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 130 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x8ACE9BDF.val[1], vget_low_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 144 const float32x4_t vi1x68AC = vextq_f32(vi1x0246, vi1x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 145 vi1x0246 = vi1x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 174 const float32x4_t vi1x79BD = vextq_f32(vi1x1357, vi1x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 175 vi1x1357 = vi1x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 212 const float32x4_t vi1xACEG = vextq_f32(vi1x8ACE9BDF.val[0], vi1xGIKMHJLN.val[0], 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 213 vi1x8ACE9BDF = vi1xGIKMHJLN; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 260 …i1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1x8ACE9BDF.val[0]))); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() [all …]
|