1; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s 2 3; TODO: For now test for the `-epilogue-vectorization-minimum-VF` option. In 4; the future we need to replace this with a more meaningful test of the 5; epilogue vectorization cost-model. 6; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-minimum-VF=4 -S | FileCheck %s --check-prefix=CHECK-MIN-4 7; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s --check-prefix=CHECK-MIN-D 8 9target datalayout = "e-m:e-i64:64-n32:64" 10target triple = "powerpc64le-unknown-linux-gnu" 11 12; Do not vectorize epilogues for loops with minsize attribute 13; CHECK-LABLE: @f1 14; CHECK-NOT: vector.main.loop.iter.check 15; CHECK-NOT: vec.epilog.iter.check 16; CHECK-NOT: vec.epilog.ph 17; CHECK-NOT: vec.epilog.vector.body 18; CHECK-NOT: vec.epilog.middle.block 19; CHECK: ret void 20 21define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #0 { 22entry: 23 %cmp1 = icmp sgt i32 %N, 0 24 br i1 %cmp1, label %for.body.preheader, label %for.end 25 26for.body.preheader: ; preds = %entry 27 %wide.trip.count = zext i32 %N to i64 28 br label %for.body 29 30for.body: ; preds = %for.body.preheader, %for.body 31 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 32 %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv 33 %0 = load float, float* %arrayidx, align 4 34 %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv 35 %1 = load float, float* %arrayidx2, align 4 36 %add = fadd fast float %0, %1 37 %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv 38 store float %add, float* %arrayidx4, align 4 39 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 40 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 41 br i1 %exitcond, label %for.body, label %for.end.loopexit 42 43for.end.loopexit: ; preds = %for.body 44 br label %for.end 45 46for.end: ; preds = %for.end.loopexit, %entry 47 ret void 48} 49 50; Do not vectorize epilogues for loops with optsize attribute 51; CHECK-LABLE: @f2 52; CHECK-NOT: vector.main.loop.iter.check 53; CHECK-NOT: vec.epilog.iter.check 54; CHECK-NOT: vec.epilog.ph 55; CHECK-NOT: vec.epilog.vector.body 56; CHECK-NOT: vec.epilog.middle.block 57; CHECK: ret void 58 59define dso_local void @f2(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #1 { 60entry: 61 %cmp1 = icmp sgt i32 %N, 0 62 br i1 %cmp1, label %for.body.preheader, label %for.end 63 64for.body.preheader: ; preds = %entry 65 %wide.trip.count = zext i32 %N to i64 66 br label %for.body 67 68for.body: ; preds = %for.body.preheader, %for.body 69 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 70 %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv 71 %0 = load float, float* %arrayidx, align 4 72 %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv 73 %1 = load float, float* %arrayidx2, align 4 74 %add = fadd fast float %0, %1 75 %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv 76 store float %add, float* %arrayidx4, align 4 77 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 78 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 79 br i1 %exitcond, label %for.body, label %for.end.loopexit 80 81for.end.loopexit: ; preds = %for.body 82 br label %for.end 83 84for.end: ; preds = %for.end.loopexit, %entry 85 ret void 86} 87 88; Do not vectorize the epilogue for loops with VF less than the default -epilogue-vectorization-minimum-VF of 16. 89; CHECK-MIN-D-LABLE: @f3 90; CHECK-MIN-D-NOT: vector.main.loop.iter.check 91; CHECK-MIN-D-NOT: vec.epilog.iter.check 92; CHECK-MIN-D-NOT: vec.epilog.ph 93; CHECK-MIN-D-NOT: vec.epilog.vector.body 94; CHECK-MIN-D-NOT: vec.epilog.middle.block 95; CHECK-MIN-D: ret void 96 97; Specify a smaller minimum VF (via `-epilogue-vectorization-minimum-VF=4`) and 98; make sure the epilogue gets vectorized in that case. 99; CHECK-MIN-D-LABLE: @f3 100; CHECK-MIN-4: vector.main.loop.iter.check 101; CHECK-MIN-4: vec.epilog.iter.check 102; CHECK-MIN-4: vec.epilog.ph 103; CHECK-MIN-4: vec.epilog.vector.body 104; CHECK-MIN-4: vec.epilog.middle.block 105; CHECK-MIN-4: ret void 106 107define dso_local void @f3(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) { 108entry: 109 %cmp1 = icmp sgt i32 %N, 0 110 br i1 %cmp1, label %for.body.preheader, label %for.end 111 112for.body.preheader: ; preds = %entry 113 %wide.trip.count = zext i32 %N to i64 114 br label %for.body 115 116for.body: ; preds = %for.body.preheader, %for.body 117 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 118 %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv 119 %0 = load float, float* %arrayidx, align 4 120 %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv 121 %1 = load float, float* %arrayidx2, align 4 122 %add = fadd fast float %0, %1 123 %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv 124 store float %add, float* %arrayidx4, align 4 125 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 126 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 127 br i1 %exitcond, label %for.body, label %for.end.loopexit 128 129for.end.loopexit: ; preds = %for.body 130 br label %for.end 131 132for.end: ; preds = %for.end.loopexit, %entry 133 ret void 134} 135 136attributes #0 = { minsize } 137attributes #1 = { optsize }