1; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s
2
3; TODO: For now test for the `-epilogue-vectorization-minimum-VF` option. In
4; the future we need to replace this with a more meaningful test of the
5; epilogue vectorization cost-model.
6; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-minimum-VF=4 -S | FileCheck %s --check-prefix=CHECK-MIN-4
7; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s --check-prefix=CHECK-MIN-D
8
9target datalayout = "e-m:e-i64:64-n32:64"
10target triple = "powerpc64le-unknown-linux-gnu"
11
12; Do not vectorize epilogues for loops with minsize attribute
13; CHECK-LABLE: @f1
14; CHECK-NOT: vector.main.loop.iter.check
15; CHECK-NOT: vec.epilog.iter.check
16; CHECK-NOT: vec.epilog.ph
17; CHECK-NOT: vec.epilog.vector.body
18; CHECK-NOT: vec.epilog.middle.block
19; CHECK: ret void
20
21define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #0 {
22entry:
23  %cmp1 = icmp sgt i32 %N, 0
24  br i1 %cmp1, label %for.body.preheader, label %for.end
25
26for.body.preheader:                               ; preds = %entry
27  %wide.trip.count = zext i32 %N to i64
28  br label %for.body
29
30for.body:                                         ; preds = %for.body.preheader, %for.body
31  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
32  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
33  %0 = load float, float* %arrayidx, align 4
34  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
35  %1 = load float, float* %arrayidx2, align 4
36  %add = fadd fast float %0, %1
37  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
38  store float %add, float* %arrayidx4, align 4
39  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
40  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
41  br i1 %exitcond, label %for.body, label %for.end.loopexit
42
43for.end.loopexit:                                 ; preds = %for.body
44  br label %for.end
45
46for.end:                                          ; preds = %for.end.loopexit, %entry
47  ret void
48}
49
50; Do not vectorize epilogues for loops with optsize attribute
51; CHECK-LABLE: @f2
52; CHECK-NOT: vector.main.loop.iter.check
53; CHECK-NOT: vec.epilog.iter.check
54; CHECK-NOT: vec.epilog.ph
55; CHECK-NOT: vec.epilog.vector.body
56; CHECK-NOT: vec.epilog.middle.block
57; CHECK: ret void
58
59define dso_local void @f2(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #1 {
60entry:
61  %cmp1 = icmp sgt i32 %N, 0
62  br i1 %cmp1, label %for.body.preheader, label %for.end
63
64for.body.preheader:                               ; preds = %entry
65  %wide.trip.count = zext i32 %N to i64
66  br label %for.body
67
68for.body:                                         ; preds = %for.body.preheader, %for.body
69  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
70  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
71  %0 = load float, float* %arrayidx, align 4
72  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
73  %1 = load float, float* %arrayidx2, align 4
74  %add = fadd fast float %0, %1
75  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
76  store float %add, float* %arrayidx4, align 4
77  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
78  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
79  br i1 %exitcond, label %for.body, label %for.end.loopexit
80
81for.end.loopexit:                                 ; preds = %for.body
82  br label %for.end
83
84for.end:                                          ; preds = %for.end.loopexit, %entry
85  ret void
86}
87
88; Do not vectorize the epilogue for loops with VF less than the default -epilogue-vectorization-minimum-VF of 16.
89; CHECK-MIN-D-LABLE: @f3
90; CHECK-MIN-D-NOT: vector.main.loop.iter.check
91; CHECK-MIN-D-NOT: vec.epilog.iter.check
92; CHECK-MIN-D-NOT: vec.epilog.ph
93; CHECK-MIN-D-NOT: vec.epilog.vector.body
94; CHECK-MIN-D-NOT: vec.epilog.middle.block
95; CHECK-MIN-D: ret void
96
97; Specify a smaller minimum VF (via `-epilogue-vectorization-minimum-VF=4`) and
98; make sure the epilogue gets vectorized in that case.
99; CHECK-MIN-D-LABLE: @f3
100; CHECK-MIN-4: vector.main.loop.iter.check
101; CHECK-MIN-4: vec.epilog.iter.check
102; CHECK-MIN-4: vec.epilog.ph
103; CHECK-MIN-4: vec.epilog.vector.body
104; CHECK-MIN-4: vec.epilog.middle.block
105; CHECK-MIN-4: ret void
106
107define dso_local void @f3(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) {
108entry:
109  %cmp1 = icmp sgt i32 %N, 0
110  br i1 %cmp1, label %for.body.preheader, label %for.end
111
112for.body.preheader:                               ; preds = %entry
113  %wide.trip.count = zext i32 %N to i64
114  br label %for.body
115
116for.body:                                         ; preds = %for.body.preheader, %for.body
117  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
118  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
119  %0 = load float, float* %arrayidx, align 4
120  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
121  %1 = load float, float* %arrayidx2, align 4
122  %add = fadd fast float %0, %1
123  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
124  store float %add, float* %arrayidx4, align 4
125  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
126  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
127  br i1 %exitcond, label %for.body, label %for.end.loopexit
128
129for.end.loopexit:                                 ; preds = %for.body
130  br label %for.end
131
132for.end:                                          ; preds = %for.end.loopexit, %entry
133  ret void
134}
135
136attributes #0 = { minsize }
137attributes #1 = { optsize }