1; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
2; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
3; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
4;
5; Run loop unrolling twice to verify that loop unrolling metadata is properly
6; removed and further unrolling is disabled after the pass is run once.
7
8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9target triple = "x86_64-unknown-linux-gnu"
10
11; loop4 contains a small loop which should be completely unrolled by
12; the default unrolling heuristics.  It serves as a control for the
13; unroll(disable) pragma test loop4_with_disable.
14;
15; CHECK-LABEL: @loop4(
16; CHECK-NOT: br i1
17define void @loop4(i32* nocapture %a) {
18entry:
19  br label %for.body
20
21for.body:                                         ; preds = %for.body, %entry
22  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
23  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
24  %0 = load i32, i32* %arrayidx, align 4
25  %inc = add nsw i32 %0, 1
26  store i32 %inc, i32* %arrayidx, align 4
27  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
28  %exitcond = icmp eq i64 %indvars.iv.next, 4
29  br i1 %exitcond, label %for.end, label %for.body
30
31for.end:                                          ; preds = %for.body
32  ret void
33}
34
35; #pragma clang loop unroll(disable)
36;
37; CHECK-LABEL: @loop4_with_disable(
38; CHECK: store i32
39; CHECK-NOT: store i32
40; CHECK: br i1
41define void @loop4_with_disable(i32* nocapture %a) {
42entry:
43  br label %for.body
44
45for.body:                                         ; preds = %for.body, %entry
46  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
47  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
48  %0 = load i32, i32* %arrayidx, align 4
49  %inc = add nsw i32 %0, 1
50  store i32 %inc, i32* %arrayidx, align 4
51  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
52  %exitcond = icmp eq i64 %indvars.iv.next, 4
53  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
54
55for.end:                                          ; preds = %for.body
56  ret void
57}
58!1 = !{!1, !2}
59!2 = !{!"llvm.loop.unroll.disable"}
60
61; loop64 has a high enough count that it should *not* be unrolled by
62; the default unrolling heuristic.  It serves as the control for the
63; unroll(full) pragma test loop64_with_.* tests below.
64;
65; CHECK-LABEL: @loop64(
66; CHECK: store i32
67; CHECK-NOT: store i32
68; CHECK: br i1
69define void @loop64(i32* nocapture %a) {
70entry:
71  br label %for.body
72
73for.body:                                         ; preds = %for.body, %entry
74  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
75  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
76  %0 = load i32, i32* %arrayidx, align 4
77  %inc = add nsw i32 %0, 1
78  store i32 %inc, i32* %arrayidx, align 4
79  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
80  %exitcond = icmp eq i64 %indvars.iv.next, 64
81  br i1 %exitcond, label %for.end, label %for.body
82
83for.end:                                          ; preds = %for.body
84  ret void
85}
86
87; #pragma clang loop unroll(full)
88; Loop should be fully unrolled.
89;
90; CHECK-LABEL: @loop64_with_full(
91; CHECK-NOT: br i1
92define void @loop64_with_full(i32* nocapture %a) {
93entry:
94  br label %for.body
95
96for.body:                                         ; preds = %for.body, %entry
97  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
98  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
99  %0 = load i32, i32* %arrayidx, align 4
100  %inc = add nsw i32 %0, 1
101  store i32 %inc, i32* %arrayidx, align 4
102  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
103  %exitcond = icmp eq i64 %indvars.iv.next, 64
104  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
105
106for.end:                                          ; preds = %for.body
107  ret void
108}
109!3 = !{!3, !4}
110!4 = !{!"llvm.loop.unroll.full"}
111
112; #pragma clang loop unroll_count(4)
113; Loop should be unrolled 4 times.
114;
115; CHECK-LABEL: @loop64_with_count4(
116; CHECK: store i32
117; CHECK: store i32
118; CHECK: store i32
119; CHECK: store i32
120; CHECK-NOT: store i32
121; CHECK: br i1
122define void @loop64_with_count4(i32* nocapture %a) {
123entry:
124  br label %for.body
125
126for.body:                                         ; preds = %for.body, %entry
127  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
128  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
129  %0 = load i32, i32* %arrayidx, align 4
130  %inc = add nsw i32 %0, 1
131  store i32 %inc, i32* %arrayidx, align 4
132  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
133  %exitcond = icmp eq i64 %indvars.iv.next, 64
134  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
135
136for.end:                                          ; preds = %for.body
137  ret void
138}
139!5 = !{!5, !6}
140!6 = !{!"llvm.loop.unroll.count", i32 4}
141
142; #pragma clang loop unroll(full)
143; Full unrolling is requested, but loop has a runtime trip count so
144; no unrolling should occur.
145;
146; CHECK-LABEL: @runtime_loop_with_full(
147; CHECK: store i32
148; CHECK-NOT: store i32
149define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
150entry:
151  %cmp3 = icmp sgt i32 %b, 0
152  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
153
154for.body:                                         ; preds = %entry, %for.body
155  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
156  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
157  %0 = load i32, i32* %arrayidx, align 4
158  %inc = add nsw i32 %0, 1
159  store i32 %inc, i32* %arrayidx, align 4
160  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
161  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
162  %exitcond = icmp eq i32 %lftr.wideiv, %b
163  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
164
165for.end:                                          ; preds = %for.body, %entry
166  ret void
167}
168!8 = !{!8, !4}
169
170; #pragma clang loop unroll_count(4)
171; Loop has a runtime trip count.  Runtime unrolling should occur and loop
172; should be duplicated (original and 4x unrolled) if remainder is allowed,
173; otherwise loop should not be unrolled.
174;
175; CHECK-LABEL: @runtime_loop_with_count4(
176; CHECK: for.body
177; CHECK: store
178; REM: store
179; REM: store
180; REM: store
181; CHECK-NOT: store
182; CHECK: br i1
183; REM: for.body.epil:
184; REM: store
185; NOREM-NOT: for.body.epil:
186; NOREM-NOT: store
187; CHECK-NOT: store
188; REM: br i1
189; NOREM-NOT: br i1
190define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
191entry:
192  %cmp3 = icmp sgt i32 %b, 0
193  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
194
195for.body:                                         ; preds = %entry, %for.body
196  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
197  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
198  %0 = load i32, i32* %arrayidx, align 4
199  %inc = add nsw i32 %0, 1
200  store i32 %inc, i32* %arrayidx, align 4
201  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
202  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
203  %exitcond = icmp eq i32 %lftr.wideiv, %b
204  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
205
206for.end:                                          ; preds = %for.body, %entry
207  ret void
208}
209!9 = !{!9, !6}
210
211; #pragma clang loop unroll_count(1)
212; Loop should not be unrolled
213;
214; CHECK-LABEL: @unroll_1(
215; CHECK: store i32
216; CHECK-NOT: store i32
217; CHECK: br i1
218define void @unroll_1(i32* nocapture %a, i32 %b) {
219entry:
220  br label %for.body
221
222for.body:                                         ; preds = %for.body, %entry
223  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
224  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
225  %0 = load i32, i32* %arrayidx, align 4
226  %inc = add nsw i32 %0, 1
227  store i32 %inc, i32* %arrayidx, align 4
228  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
229  %exitcond = icmp eq i64 %indvars.iv.next, 4
230  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
231
232for.end:                                          ; preds = %for.body
233  ret void
234}
235!10 = !{!10, !11}
236!11 = !{!"llvm.loop.unroll.count", i32 1}
237
238; #pragma clang loop unroll(full)
239; Loop has very high loop count (1 million) and full unrolling was requested.
240; Loop should unrolled up to the pragma threshold, but not completely.
241;
242; CHECK-LABEL: @unroll_1M(
243; CHECK: store i32
244; CHECK: store i32
245; CHECK: br i1
246define void @unroll_1M(i32* nocapture %a, i32 %b) {
247entry:
248  br label %for.body
249
250for.body:                                         ; preds = %for.body, %entry
251  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
252  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
253  %0 = load i32, i32* %arrayidx, align 4
254  %inc = add nsw i32 %0, 1
255  store i32 %inc, i32* %arrayidx, align 4
256  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
257  %exitcond = icmp eq i64 %indvars.iv.next, 1000000
258  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
259
260for.end:                                          ; preds = %for.body
261  ret void
262}
263!12 = !{!12, !4}
264
265; #pragma clang loop unroll(enable)
266; Loop should be fully unrolled.
267;
268; CHECK-LABEL: @loop64_with_enable(
269; CHECK-NOT: br i1
270define void @loop64_with_enable(i32* nocapture %a) {
271entry:
272  br label %for.body
273
274for.body:                                         ; preds = %for.body, %entry
275  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
276  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
277  %0 = load i32, i32* %arrayidx, align 4
278  %inc = add nsw i32 %0, 1
279  store i32 %inc, i32* %arrayidx, align 4
280  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
281  %exitcond = icmp eq i64 %indvars.iv.next, 64
282  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
283
284for.end:                                          ; preds = %for.body
285  ret void
286}
287!13 = !{!13, !14}
288!14 = !{!"llvm.loop.unroll.enable"}
289
290; #pragma clang loop unroll(enable)
291; Loop has a runtime trip count and should be runtime unrolled and duplicated
292; (original and 8x) if remainder is allowed, otherwise it should not be
293; unrolled.
294;
295; CHECK-LABEL: @runtime_loop_with_enable(
296; CHECK: for.body:
297; CHECK: store i32
298; REM: store i32
299; REM: store i32
300; REM: store i32
301; REM: store i32
302; REM: store i32
303; REM: store i32
304; REM: store i32
305; CHECK-NOT: store i32
306; CHECK: br i1
307; REM: for.body.epil:
308; NOREM-NOT: for.body.epil:
309; REM: store
310; CHECK-NOT: store
311; REM: br i1
312; NOREM-NOT: br i1
313define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
314entry:
315  %cmp3 = icmp sgt i32 %b, 0
316  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
317
318for.body:                                         ; preds = %entry, %for.body
319  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
320  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
321  %0 = load i32, i32* %arrayidx, align 4
322  %inc = add nsw i32 %0, 1
323  store i32 %inc, i32* %arrayidx, align 4
324  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
325  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
326  %exitcond = icmp eq i32 %lftr.wideiv, %b
327  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
328
329for.end:                                          ; preds = %for.body, %entry
330  ret void
331}
332!15 = !{!15, !14}
333
334; #pragma clang loop unroll_count(3)
335; Loop has a runtime trip count.  Runtime unrolling should occur and loop
336; should be duplicated (original and 3x unrolled) if remainder is allowed,
337; otherwise it should not be unrolled.
338;
339; CHECK-LABEL: @runtime_loop_with_count3(
340; CHECK: for.body
341; CHECK: store
342; REM: store
343; REM: store
344; CHECK-NOT: store
345; CHECK: br i1
346; REM: for.body.epil:
347; REM: store
348; NOREM-NOT: for.body.epil:
349; NOREM-NOT: store
350; CHECK-NOT: store
351; REM: br i1
352define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
353entry:
354  %cmp3 = icmp sgt i32 %b, 0
355  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
356
357for.body:                                         ; preds = %entry, %for.body
358  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
359  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
360  %0 = load i32, i32* %arrayidx, align 4
361  %inc = add nsw i32 %0, 1
362  store i32 %inc, i32* %arrayidx, align 4
363  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
364  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
365  %exitcond = icmp eq i32 %lftr.wideiv, %b
366  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
367
368for.end:                                          ; preds = %for.body, %entry
369  ret void
370}
371!16 = !{!16, !17}
372!17 = !{!"llvm.loop.unroll.count", i32 3}
373