1; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
2; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
3;
4; Run loop unrolling twice to verify that loop unrolling metadata is properly
5; removed and further unrolling is disabled after the pass is run once.
6
7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-linux-gnu"
9
10; loop4 contains a small loop which should be completely unrolled by
11; the default unrolling heuristics.  It serves as a control for the
12; unroll(disable) pragma test loop4_with_disable.
13;
14; CHECK-LABEL: @loop4(
15; CHECK-NOT: br i1
16define void @loop4(i32* nocapture %a) {
17entry:
18  br label %for.body
19
20for.body:                                         ; preds = %for.body, %entry
21  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
22  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
23  %0 = load i32, i32* %arrayidx, align 4
24  %inc = add nsw i32 %0, 1
25  store i32 %inc, i32* %arrayidx, align 4
26  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
27  %exitcond = icmp eq i64 %indvars.iv.next, 4
28  br i1 %exitcond, label %for.end, label %for.body
29
30for.end:                                          ; preds = %for.body
31  ret void
32}
33
34; #pragma clang loop unroll(disable)
35;
36; CHECK-LABEL: @loop4_with_disable(
37; CHECK: store i32
38; CHECK-NOT: store i32
39; CHECK: br i1
40define void @loop4_with_disable(i32* nocapture %a) {
41entry:
42  br label %for.body
43
44for.body:                                         ; preds = %for.body, %entry
45  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
46  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
47  %0 = load i32, i32* %arrayidx, align 4
48  %inc = add nsw i32 %0, 1
49  store i32 %inc, i32* %arrayidx, align 4
50  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
51  %exitcond = icmp eq i64 %indvars.iv.next, 4
52  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
53
54for.end:                                          ; preds = %for.body
55  ret void
56}
57!1 = !{!1, !2}
58!2 = !{!"llvm.loop.unroll.disable"}
59
60; loop64 has a high enough count that it should *not* be unrolled by
61; the default unrolling heuristic.  It serves as the control for the
62; unroll(full) pragma test loop64_with_.* tests below.
63;
64; CHECK-LABEL: @loop64(
65; CHECK: store i32
66; CHECK-NOT: store i32
67; CHECK: br i1
68define void @loop64(i32* nocapture %a) {
69entry:
70  br label %for.body
71
72for.body:                                         ; preds = %for.body, %entry
73  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
74  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
75  %0 = load i32, i32* %arrayidx, align 4
76  %inc = add nsw i32 %0, 1
77  store i32 %inc, i32* %arrayidx, align 4
78  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
79  %exitcond = icmp eq i64 %indvars.iv.next, 64
80  br i1 %exitcond, label %for.end, label %for.body
81
82for.end:                                          ; preds = %for.body
83  ret void
84}
85
86; #pragma clang loop unroll(full)
87; Loop should be fully unrolled.
88;
89; CHECK-LABEL: @loop64_with_full(
90; CHECK-NOT: br i1
91define void @loop64_with_full(i32* nocapture %a) {
92entry:
93  br label %for.body
94
95for.body:                                         ; preds = %for.body, %entry
96  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
97  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
98  %0 = load i32, i32* %arrayidx, align 4
99  %inc = add nsw i32 %0, 1
100  store i32 %inc, i32* %arrayidx, align 4
101  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
102  %exitcond = icmp eq i64 %indvars.iv.next, 64
103  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
104
105for.end:                                          ; preds = %for.body
106  ret void
107}
108!3 = !{!3, !4}
109!4 = !{!"llvm.loop.unroll.full"}
110
111; #pragma clang loop unroll_count(4)
112; Loop should be unrolled 4 times.
113;
114; CHECK-LABEL: @loop64_with_count4(
115; CHECK: store i32
116; CHECK: store i32
117; CHECK: store i32
118; CHECK: store i32
119; CHECK-NOT: store i32
120; CHECK: br i1
121define void @loop64_with_count4(i32* nocapture %a) {
122entry:
123  br label %for.body
124
125for.body:                                         ; preds = %for.body, %entry
126  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
127  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
128  %0 = load i32, i32* %arrayidx, align 4
129  %inc = add nsw i32 %0, 1
130  store i32 %inc, i32* %arrayidx, align 4
131  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
132  %exitcond = icmp eq i64 %indvars.iv.next, 64
133  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
134
135for.end:                                          ; preds = %for.body
136  ret void
137}
138!5 = !{!5, !6}
139!6 = !{!"llvm.loop.unroll.count", i32 4}
140
141; #pragma clang loop unroll(full)
142; Full unrolling is requested, but loop has a runtime trip count so
143; no unrolling should occur.
144;
145; CHECK-LABEL: @runtime_loop_with_full(
146; CHECK: store i32
147; CHECK-NOT: store i32
148define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
149entry:
150  %cmp3 = icmp sgt i32 %b, 0
151  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
152
153for.body:                                         ; preds = %entry, %for.body
154  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
155  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
156  %0 = load i32, i32* %arrayidx, align 4
157  %inc = add nsw i32 %0, 1
158  store i32 %inc, i32* %arrayidx, align 4
159  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
160  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
161  %exitcond = icmp eq i32 %lftr.wideiv, %b
162  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
163
164for.end:                                          ; preds = %for.body, %entry
165  ret void
166}
167!8 = !{!8, !4}
168
169; #pragma clang loop unroll_count(4)
170; Loop has a runtime trip count.  Runtime unrolling should occur and loop
171; should be duplicated (original and 4x unrolled).
172;
173; CHECK-LABEL: @runtime_loop_with_count4(
174; CHECK: for.body.prol:
175; CHECK: store
176; CHECK-NOT: store
177; CHECK: br i1
178; CHECK: for.body
179; CHECK: store
180; CHECK: store
181; CHECK: store
182; CHECK: store
183; CHECK-NOT: store
184; CHECK: br i1
185define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
186entry:
187  %cmp3 = icmp sgt i32 %b, 0
188  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
189
190for.body:                                         ; preds = %entry, %for.body
191  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
192  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
193  %0 = load i32, i32* %arrayidx, align 4
194  %inc = add nsw i32 %0, 1
195  store i32 %inc, i32* %arrayidx, align 4
196  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
197  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
198  %exitcond = icmp eq i32 %lftr.wideiv, %b
199  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
200
201for.end:                                          ; preds = %for.body, %entry
202  ret void
203}
204!9 = !{!9, !6}
205
206; #pragma clang loop unroll_count(1)
207; Loop should not be unrolled
208;
209; CHECK-LABEL: @unroll_1(
210; CHECK: store i32
211; CHECK-NOT: store i32
212; CHECK: br i1
213define void @unroll_1(i32* nocapture %a, i32 %b) {
214entry:
215  br label %for.body
216
217for.body:                                         ; preds = %for.body, %entry
218  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
219  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
220  %0 = load i32, i32* %arrayidx, align 4
221  %inc = add nsw i32 %0, 1
222  store i32 %inc, i32* %arrayidx, align 4
223  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
224  %exitcond = icmp eq i64 %indvars.iv.next, 4
225  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
226
227for.end:                                          ; preds = %for.body
228  ret void
229}
230!10 = !{!10, !11}
231!11 = !{!"llvm.loop.unroll.count", i32 1}
232
233; #pragma clang loop unroll(full)
234; Loop has very high loop count (1 million) and full unrolling was requested.
235; Loop should unrolled up to the pragma threshold, but not completely.
236;
237; CHECK-LABEL: @unroll_1M(
238; CHECK: store i32
239; CHECK: store i32
240; CHECK: br i1
241define void @unroll_1M(i32* nocapture %a, i32 %b) {
242entry:
243  br label %for.body
244
245for.body:                                         ; preds = %for.body, %entry
246  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
247  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
248  %0 = load i32, i32* %arrayidx, align 4
249  %inc = add nsw i32 %0, 1
250  store i32 %inc, i32* %arrayidx, align 4
251  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
252  %exitcond = icmp eq i64 %indvars.iv.next, 1000000
253  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
254
255for.end:                                          ; preds = %for.body
256  ret void
257}
258!12 = !{!12, !4}
259
260; #pragma clang loop unroll(enable)
261; Loop should be fully unrolled.
262;
263; CHECK-LABEL: @loop64_with_enable(
264; CHECK-NOT: br i1
265define void @loop64_with_enable(i32* nocapture %a) {
266entry:
267  br label %for.body
268
269for.body:                                         ; preds = %for.body, %entry
270  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
271  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
272  %0 = load i32, i32* %arrayidx, align 4
273  %inc = add nsw i32 %0, 1
274  store i32 %inc, i32* %arrayidx, align 4
275  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
276  %exitcond = icmp eq i64 %indvars.iv.next, 64
277  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
278
279for.end:                                          ; preds = %for.body
280  ret void
281}
282!13 = !{!13, !14}
283!14 = !{!"llvm.loop.unroll.enable"}
284
285; #pragma clang loop unroll(enable)
286; Loop has a runtime trip count and should be runtime unrolled and duplicated
287; (original and 8x).
288;
289; CHECK-LABEL: @runtime_loop_with_enable(
290; CHECK: for.body.prol:
291; CHECK: store
292; CHECK-NOT: store
293; CHECK: br i1
294; CHECK: for.body:
295; CHECK: store i32
296; CHECK: store i32
297; CHECK: store i32
298; CHECK: store i32
299; CHECK: store i32
300; CHECK: store i32
301; CHECK: store i32
302; CHECK: store i32
303; CHECK-NOT: store i32
304; CHECK: br i1
305define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
306entry:
307  %cmp3 = icmp sgt i32 %b, 0
308  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
309
310for.body:                                         ; preds = %entry, %for.body
311  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
312  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
313  %0 = load i32, i32* %arrayidx, align 4
314  %inc = add nsw i32 %0, 1
315  store i32 %inc, i32* %arrayidx, align 4
316  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
317  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
318  %exitcond = icmp eq i32 %lftr.wideiv, %b
319  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
320
321for.end:                                          ; preds = %for.body, %entry
322  ret void
323}
324!15 = !{!15, !14}
325