1; RUN: opt -da-disable-delinearization-checks -basic-aa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
2; RUN: opt -da-disable-delinearization-checks -aa-pipeline=basic-aa -passes='loop-unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
3
4target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
5
6; CHECK-LABEL: sub_sub_less
7; CHECK: %j = phi
8; CHECK-NOT: %j.1 = phi
9define void @sub_sub_less([100 x i32]* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
10entry:
11  %cmp = icmp sgt i32 %N, 0
12  br i1 %cmp, label %for.outer, label %cleanup
13
14for.outer:
15  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
16  br label %for.inner
17
18for.inner:
19  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
20  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
21  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
22  %0 = load i32, i32* %arrayidx5, align 4
23  %mul = mul nsw i32 %0, %i
24  %add = add nsw i32 %mul, %sum
25  %add6 = add nuw nsw i32 %j, 1
26  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 %i, i32 %j
27  store i32 1, i32* %arrayidx, align 4
28  %add72 = add nuw nsw i32 %i, 1
29  %add73 = add nuw nsw i32 %j, -1
30  %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 %add72, i32 %add73
31  store i32 %add, i32* %arrayidx8, align 4
32  %exitcond = icmp eq i32 %add6, %N
33  br i1 %exitcond, label %for.latch, label %for.inner
34
35for.latch:
36  %add7 = add nuw nsw i32 %i, 1
37  %exitcond29 = icmp eq i32 %add7, %N
38  br i1 %exitcond29, label %cleanup, label %for.outer
39
40cleanup:
41  ret void
42}
43
44
45; CHECK-LABEL: sub_sub_eq
46; CHECK: %j = phi
47; CHECK: %j.1 = phi
48; CHECK: %j.2 = phi
49; CHECK: %j.3 = phi
50define void @sub_sub_eq([100 x i32]* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
51entry:
52  %cmp = icmp sgt i32 %N, 0
53  br i1 %cmp, label %for.outer, label %cleanup
54
55for.outer:
56  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
57  br label %for.inner
58
59for.inner:
60  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
61  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
62  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
63  %0 = load i32, i32* %arrayidx5, align 4
64  %mul = mul nsw i32 %0, %i
65  %add = add nsw i32 %mul, %sum
66  %add6 = add nuw nsw i32 %j, 1
67  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 %i, i32 %j
68  store i32 1, i32* %arrayidx, align 4
69  %add72 = add nuw nsw i32 %i, 1
70  %add73 = add nuw nsw i32 %j, 0
71  %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 %add72, i32 %add73
72  store i32 %add, i32* %arrayidx8, align 4
73  %exitcond = icmp eq i32 %add6, %N
74  br i1 %exitcond, label %for.latch, label %for.inner
75
76for.latch:
77  %add7 = add nuw nsw i32 %i, 1
78  %exitcond29 = icmp eq i32 %add7, %N
79  br i1 %exitcond29, label %cleanup, label %for.outer
80
81cleanup:
82  ret void
83}
84
85
86; CHECK-LABEL: sub_sub_more
87; CHECK: %j = phi
88; CHECK: %j.1 = phi
89; CHECK: %j.2 = phi
90; CHECK: %j.3 = phi
91define void @sub_sub_more([100 x i32]* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
92entry:
93  %cmp = icmp sgt i32 %N, 0
94  br i1 %cmp, label %for.outer, label %cleanup
95
96for.outer:
97  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
98  br label %for.inner
99
100for.inner:
101  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
102  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
103  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
104  %0 = load i32, i32* %arrayidx5, align 4
105  %mul = mul nsw i32 %0, %i
106  %add = add nsw i32 %mul, %sum
107  %add6 = add nuw nsw i32 %j, 1
108  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 %i, i32 %j
109  store i32 1, i32* %arrayidx, align 4
110  %add72 = add nuw nsw i32 %i, 1
111  %add73 = add nuw nsw i32 %j, 1
112  %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 %add72, i32 %add73
113  store i32 %add, i32* %arrayidx8, align 4
114  %exitcond = icmp eq i32 %add6, %N
115  br i1 %exitcond, label %for.latch, label %for.inner
116
117for.latch:
118  %add7 = add nuw nsw i32 %i, 1
119  %exitcond29 = icmp eq i32 %add7, %N
120  br i1 %exitcond29, label %cleanup, label %for.outer
121
122cleanup:
123  ret void
124}
125
126; CHECK-LABEL: sub_sub_less_3d
127; CHECK: %k = phi
128; CHECK-NOT: %k.1 = phi
129
130; for (long i = 0; i < 100; ++i)
131;   for (long j = 0; j < 100; ++j)
132;     for (long k = 0; k < 100; ++k) {
133;       A[i][j][k] = 0;
134;       A[i+1][j][k-1] = 0;
135;     }
136
137define void @sub_sub_less_3d([100 x [100 x i32]]* noalias %A) {
138entry:
139  br label %for.i
140
141for.i:
142  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.i.latch ]
143  br label %for.j
144
145for.j:
146  %j = phi i32 [ 0, %for.i ], [ %inc.j, %for.j.latch ]
147  br label %for.k
148
149for.k:
150  %k = phi i32 [ 0, %for.j ], [ %inc.k, %for.k ]
151  %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i32 %i, i32 %j, i32 %k
152  store i32 0, i32* %arrayidx, align 4
153  %add.i = add nsw i32 %i, 1
154  %sub.k = add nsw i32 %k, -1
155  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i32 %add.i, i32 %j, i32 %sub.k
156  store i32 0, i32* %arrayidx2, align 4
157  %inc.k = add nsw i32 %k, 1
158  %cmp.k = icmp slt i32 %inc.k, 100
159  br i1 %cmp.k, label %for.k, label %for.j.latch
160
161for.j.latch:
162  %inc.j = add nsw i32 %j, 1
163  %cmp.j = icmp slt i32 %inc.j, 100
164  br i1 %cmp.j, label %for.j, label %for.i.latch, !llvm.loop !1
165
166for.i.latch:
167  %inc.i = add nsw i32 %i, 1
168  %cmp.i = icmp slt i32 %inc.i, 100
169  br i1 %cmp.i, label %for.i, label %for.end
170
171for.end:
172  ret void
173}
174
175; CHECK-LABEL: sub_sub_outer_scalar
176; CHECK: %k = phi
177; CHECK-NOT: %k.1 = phi
178
179define void @sub_sub_outer_scalar([100 x i32]* %A) {
180entry:
181  br label %for.i
182
183for.i:
184  %i = phi i64 [ 0, %entry ], [ %inc.i, %for.i.latch ]
185  br label %for.j
186
187for.j:
188  %j = phi i64 [ 0, %for.i ], [ %inc.j, %for.j.latch ]
189  br label %for.k
190
191for.k:
192  %k = phi i64 [ 0, %for.j ], [ %inc.k, %for.k ]
193  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %j
194  %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i64 0, i64 %k
195  %0 = load i32, i32* %arrayidx7, align 4
196  %sub.j = sub nsw i64 %j, 1
197  %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub.j
198  %arrayidx9 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx8, i64 0, i64 %k
199  store i32 %0, i32* %arrayidx9, align 4
200  %inc.k = add nsw i64 %k, 1
201  %cmp.k = icmp slt i64 %inc.k, 100
202  br i1 %cmp.k, label %for.k, label %for.j.latch
203
204for.j.latch:
205  %inc.j = add nsw i64 %j, 1
206  %cmp.j = icmp slt i64 %inc.j, 100
207  br i1 %cmp.j, label %for.j, label %for.i.latch
208
209for.i.latch:
210  %inc.i = add nsw i64 %i, 1
211  %cmp.i = icmp slt i64 %inc.i, 100
212  br i1 %cmp.i, label %for.i, label %for.end
213
214for.end:
215  ret void
216}
217
218!1 = distinct !{!1, !2}
219!2 = !{!"llvm.loop.unroll_and_jam.disable"}
220