1; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime < %s -S | FileCheck %s
2; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-and-jam-threshold=15 < %s -S | FileCheck %s --check-prefix=CHECK-LOWTHRES
3
4target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
5
6; CHECK-LABEL: test1
7; Basic check that these loops are by default UnJ'd
8define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
9; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
10; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
11entry:
12  %cmp = icmp ne i32 %J, 0
13  %cmp122 = icmp ne i32 %I, 0
14  %or.cond = and i1 %cmp, %cmp122
15  br i1 %or.cond, label %for.outer.preheader, label %for.end
16
17for.outer.preheader:
18  br label %for.outer
19
20for.outer:
21  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
22  br label %for.inner
23
24for.inner:
25  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
26  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
27  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
28  %0 = load i32, i32* %arrayidx.us, align 4
29  %add.us = add i32 %0, %sum1.us
30  %inc.us = add nuw i32 %j.us, 1
31  %exitcond = icmp eq i32 %inc.us, %J
32  br i1 %exitcond, label %for.latch, label %for.inner
33
34for.latch:
35  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
36  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
37  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
38  %add8.us = add nuw i32 %i.us, 1
39  %exitcond25 = icmp eq i32 %add8.us, %I
40  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
41
42for.end.loopexit:
43  br label %for.end
44
45for.end:
46  ret void
47}
48
49
50; CHECK-LABEL: nounroll_and_jam
51; #pragma nounroll_and_jam
52define void @nounroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
53; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
54entry:
55  %cmp = icmp ne i32 %J, 0
56  %cmp122 = icmp ne i32 %I, 0
57  %or.cond = and i1 %cmp, %cmp122
58  br i1 %or.cond, label %for.outer.preheader, label %for.end
59
60for.outer.preheader:
61  br label %for.outer
62
63for.outer:
64  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
65  br label %for.inner
66
67for.inner:
68  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
69  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
70  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
71  %0 = load i32, i32* %arrayidx.us, align 4
72  %add.us = add i32 %0, %sum1.us
73  %inc.us = add nuw i32 %j.us, 1
74  %exitcond = icmp eq i32 %inc.us, %J
75  br i1 %exitcond, label %for.latch, label %for.inner
76
77for.latch:
78  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
79  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
80  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
81  %add8.us = add nuw i32 %i.us, 1
82  %exitcond25 = icmp eq i32 %add8.us, %I
83  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !1
84
85for.end.loopexit:
86  br label %for.end
87
88for.end:
89  ret void
90}
91
92
93; CHECK-LABEL: unroll_and_jam_count
94; #pragma unroll_and_jam(8)
95define void @unroll_and_jam_count(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
96; CHECK: %i.us = phi i32 [ %add8.us.7, %for.latch ], [ 0, %for.outer.preheader.new ]
97entry:
98  %cmp = icmp ne i32 %J, 0
99  %cmp122 = icmp ne i32 %I, 0
100  %or.cond = and i1 %cmp, %cmp122
101  br i1 %or.cond, label %for.outer.preheader, label %for.end
102
103for.outer.preheader:
104  br label %for.outer
105
106for.outer:
107  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
108  br label %for.inner
109
110for.inner:
111  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
112  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
113  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
114  %0 = load i32, i32* %arrayidx.us, align 4
115  %add.us = add i32 %0, %sum1.us
116  %inc.us = add nuw i32 %j.us, 1
117  %exitcond = icmp eq i32 %inc.us, %J
118  br i1 %exitcond, label %for.latch, label %for.inner
119
120for.latch:
121  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
122  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
123  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
124  %add8.us = add nuw i32 %i.us, 1
125  %exitcond25 = icmp eq i32 %add8.us, %I
126  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !3
127
128for.end.loopexit:
129  br label %for.end
130
131for.end:
132  ret void
133}
134
135
136; CHECK-LABEL: unroll_and_jam
137; #pragma unroll_and_jam
138define void @unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
139; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
140; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
141entry:
142  %cmp = icmp ne i32 %J, 0
143  %cmp122 = icmp ne i32 %I, 0
144  %or.cond = and i1 %cmp, %cmp122
145  br i1 %or.cond, label %for.outer.preheader, label %for.end
146
147for.outer.preheader:
148  br label %for.outer
149
150for.outer:
151  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
152  br label %for.inner
153
154for.inner:
155  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
156  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
157  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
158  %0 = load i32, i32* %arrayidx.us, align 4
159  %add.us = add i32 %0, %sum1.us
160  %inc.us = add nuw i32 %j.us, 1
161  %exitcond = icmp eq i32 %inc.us, %J
162  br i1 %exitcond, label %for.latch, label %for.inner
163
164for.latch:
165  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
166  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
167  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
168  %add8.us = add nuw i32 %i.us, 1
169  %exitcond25 = icmp eq i32 %add8.us, %I
170  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !5
171
172for.end.loopexit:
173  br label %for.end
174
175for.end:
176  ret void
177}
178
179
180; CHECK-LABEL: nounroll
181; #pragma nounroll (which we take to mean disable unroll and jam too)
182define void @nounroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
183; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
184entry:
185  %cmp = icmp ne i32 %J, 0
186  %cmp122 = icmp ne i32 %I, 0
187  %or.cond = and i1 %cmp, %cmp122
188  br i1 %or.cond, label %for.outer.preheader, label %for.end
189
190for.outer.preheader:
191  br label %for.outer
192
193for.outer:
194  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
195  br label %for.inner
196
197for.inner:
198  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
199  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
200  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
201  %0 = load i32, i32* %arrayidx.us, align 4
202  %add.us = add i32 %0, %sum1.us
203  %inc.us = add nuw i32 %j.us, 1
204  %exitcond = icmp eq i32 %inc.us, %J
205  br i1 %exitcond, label %for.latch, label %for.inner
206
207for.latch:
208  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
209  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
210  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
211  %add8.us = add nuw i32 %i.us, 1
212  %exitcond25 = icmp eq i32 %add8.us, %I
213  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !7
214
215for.end.loopexit:
216  br label %for.end
217
218for.end:
219  ret void
220}
221
222
223; CHECK-LABEL: unroll
224; #pragma unroll (which we take to mean disable unroll and jam)
225define void @unroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
226; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
227entry:
228  %cmp = icmp ne i32 %J, 0
229  %cmp122 = icmp ne i32 %I, 0
230  %or.cond = and i1 %cmp, %cmp122
231  br i1 %or.cond, label %for.outer.preheader, label %for.end
232
233for.outer.preheader:
234  br label %for.outer
235
236for.outer:
237  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
238  br label %for.inner
239
240for.inner:
241  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
242  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
243  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
244  %0 = load i32, i32* %arrayidx.us, align 4
245  %add.us = add i32 %0, %sum1.us
246  %inc.us = add nuw i32 %j.us, 1
247  %exitcond = icmp eq i32 %inc.us, %J
248  br i1 %exitcond, label %for.latch, label %for.inner
249
250for.latch:
251  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
252  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
253  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
254  %add8.us = add nuw i32 %i.us, 1
255  %exitcond25 = icmp eq i32 %add8.us, %I
256  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !9
257
258for.end.loopexit:
259  br label %for.end
260
261for.end:
262  ret void
263}
264
265
266; CHECK-LABEL: nounroll_plus_unroll_and_jam
267; #pragma clang loop nounroll, unroll_and_jam (which we take to mean do unroll_and_jam)
268define void @nounroll_plus_unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
269; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
270entry:
271  %cmp = icmp ne i32 %J, 0
272  %cmp122 = icmp ne i32 %I, 0
273  %or.cond = and i1 %cmp, %cmp122
274  br i1 %or.cond, label %for.outer.preheader, label %for.end
275
276for.outer.preheader:
277  br label %for.outer
278
279for.outer:
280  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
281  br label %for.inner
282
283for.inner:
284  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
285  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
286  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
287  %0 = load i32, i32* %arrayidx.us, align 4
288  %add.us = add i32 %0, %sum1.us
289  %inc.us = add nuw i32 %j.us, 1
290  %exitcond = icmp eq i32 %inc.us, %J
291  br i1 %exitcond, label %for.latch, label %for.inner
292
293for.latch:
294  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
295  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
296  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
297  %add8.us = add nuw i32 %i.us, 1
298  %exitcond25 = icmp eq i32 %add8.us, %I
299  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !11
300
301for.end.loopexit:
302  br label %for.end
303
304for.end:
305  ret void
306}
307
308
309!1 = distinct !{!1, !2}
310!2 = distinct !{!"llvm.loop.unroll_and_jam.disable"}
311!3 = distinct !{!3, !4}
312!4 = distinct !{!"llvm.loop.unroll_and_jam.count", i32 8}
313!5 = distinct !{!5, !6}
314!6 = distinct !{!"llvm.loop.unroll_and_jam.enable"}
315!7 = distinct !{!7, !8}
316!8 = distinct !{!"llvm.loop.unroll.disable"}
317!9 = distinct !{!9, !10}
318!10 = distinct !{!"llvm.loop.unroll.enable"}
319!11 = distinct !{!11, !8, !6}