1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
2
3; These tests check for loop branching structure, and that the loop align
4; directive is placed in the expected place.
5
6; CodeGen should insert a branch into the middle of the loop in
7; order to avoid a branch within the loop.
8
9; CHECK-LABEL: simple:
10;      CHECK:   align
11; CHECK-NEXT: .LBB0_1:
12; CHECK-NEXT:   callq loop_header
13;      CHECK:   js .LBB0_3
14; CHECK-NEXT:   callq loop_latch
15; CHECK-NEXT:   jmp .LBB0_1
16; CHECK-NEXT: .LBB0_3:
17; CHECK-NEXT:   callq exit
18
19define void @simple() nounwind {
20entry:
21  br label %loop
22
23loop:
24  call void @loop_header()
25  %t0 = tail call i32 @get()
26  %t1 = icmp slt i32 %t0, 0
27  br i1 %t1, label %done, label %bb
28
29bb:
30  call void @loop_latch()
31  br label %loop
32
33done:
34  call void @exit()
35  ret void
36}
37
38; CodeGen should move block_a to the top of the loop so that it
39; falls through into the loop, avoiding a branch within the loop.
40
41; CHECK-LABEL: slightly_more_involved:
42;      CHECK:   jmp .LBB1_1
43; CHECK-NEXT:   align
44; CHECK-NEXT: .LBB1_4:
45; CHECK-NEXT:   callq bar99
46; CHECK-NEXT: .LBB1_1:
47; CHECK-NEXT:   callq body
48
49define void @slightly_more_involved() nounwind {
50entry:
51  br label %loop
52
53loop:
54  call void @body()
55  %t0 = call i32 @get()
56  %t1 = icmp slt i32 %t0, 2
57  br i1 %t1, label %block_a, label %bb
58
59bb:
60  %t2 = call i32 @get()
61  %t3 = icmp slt i32 %t2, 99
62  br i1 %t3, label %exit, label %loop
63
64block_a:
65  call void @bar99()
66  br label %loop
67
68exit:
69  call void @exit()
70  ret void
71}
72
73; Same as slightly_more_involved, but block_a is now a CFG diamond with
74; fallthrough edges which should be preserved.
75; "callq block_a_merge_func" is tail duped.
76
77; CHECK-LABEL: yet_more_involved:
78;      CHECK:   jmp .LBB2_1
79; CHECK-NEXT:   align
80
81;      CHECK: .LBB2_1:
82; CHECK-NEXT:   callq body
83; CHECK-NEXT:   callq get
84; CHECK-NEXT:   cmpl $2, %eax
85; CHECK-NEXT:   jge .LBB2_2
86; CHECK-NEXT:   callq bar99
87; CHECK-NEXT:   callq get
88; CHECK-NEXT:   cmpl $2999, %eax
89; CHECK-NEXT:   jg .LBB2_6
90; CHECK-NEXT:   callq block_a_true_func
91; CHECK-NEXT:   callq block_a_merge_func
92; CHECK-NEXT:   jmp .LBB2_1
93; CHECK-NEXT:   align
94; CHECK-NEXT: .LBB2_6:
95; CHECK-NEXT:   callq block_a_false_func
96; CHECK-NEXT:   callq block_a_merge_func
97; CHECK-NEXT:   jmp .LBB2_1
98
99define void @yet_more_involved() nounwind {
100entry:
101  br label %loop
102
103loop:
104  call void @body()
105  %t0 = call i32 @get()
106  %t1 = icmp slt i32 %t0, 2
107  br i1 %t1, label %block_a, label %bb
108
109bb:
110  %t2 = call i32 @get()
111  %t3 = icmp slt i32 %t2, 99
112  br i1 %t3, label %exit, label %loop
113
114block_a:
115  call void @bar99()
116  %z0 = call i32 @get()
117  %z1 = icmp slt i32 %z0, 3000
118  br i1 %z1, label %block_a_true, label %block_a_false
119
120block_a_true:
121  call void @block_a_true_func()
122  br label %block_a_merge
123
124block_a_false:
125  call void @block_a_false_func()
126  br label %block_a_merge
127
128block_a_merge:
129  call void @block_a_merge_func()
130  br label %loop
131
132exit:
133  call void @exit()
134  ret void
135}
136
137; CodeGen should move the CFG islands that are part of the loop but don't
138; conveniently fit anywhere so that they are at least contiguous with the
139; loop.
140
141; CHECK-LABEL: cfg_islands:
142;      CHECK:   jmp     .LBB3_1
143; CHECK-NEXT:   align
144; CHECK-NEXT: .LBB3_7:
145; CHECK-NEXT:   callq   bar100
146; CHECK-NEXT: .LBB3_1:
147; CHECK-NEXT:   callq   loop_header
148;      CHECK:   jl .LBB3_7
149;      CHECK:   jge .LBB3_3
150; CHECK-NEXT:   callq   bar101
151; CHECK-NEXT:   jmp     .LBB3_1
152; CHECK-NEXT:   align
153; CHECK-NEXT: .LBB3_3:
154;      CHECK:   jge .LBB3_4
155; CHECK-NEXT:   callq   bar102
156; CHECK-NEXT:   jmp     .LBB3_1
157; CHECK-NEXT: .LBB3_4:
158;      CHECK:   jl .LBB3_6
159; CHECK-NEXT:   callq   loop_latch
160; CHECK-NEXT:   jmp     .LBB3_1
161; CHECK-NEXT: .LBB3_6:
162
163define void @cfg_islands() nounwind {
164entry:
165  br label %loop
166
167loop:
168  call void @loop_header()
169  %t0 = call i32 @get()
170  %t1 = icmp slt i32 %t0, 100
171  br i1 %t1, label %block100, label %bb
172
173bb:
174  %t2 = call i32 @get()
175  %t3 = icmp slt i32 %t2, 101
176  br i1 %t3, label %block101, label %bb1
177
178bb1:
179  %t4 = call i32 @get()
180  %t5 = icmp slt i32 %t4, 102
181  br i1 %t5, label %block102, label %bb2
182
183bb2:
184  %t6 = call i32 @get()
185  %t7 = icmp slt i32 %t6, 103
186  br i1 %t7, label %exit, label %bb3
187
188bb3:
189  call void @loop_latch()
190  br label %loop
191
192exit:
193  call void @exit()
194  ret void
195
196block100:
197  call void @bar100()
198  br label %loop
199
200block101:
201  call void @bar101()
202  br label %loop
203
204block102:
205  call void @bar102()
206  br label %loop
207}
208
209; CHECK-LABEL: check_minsize:
210; CHECK-NOT:   align
211; CHECK:      .LBB4_1:
212; CHECK-NEXT:   callq loop_header
213; CHECK:        callq loop_latch
214; CHECK:      .LBB4_3:
215; CHECK:        callq exit
216
217
218define void @check_minsize() minsize nounwind {
219entry:
220  br label %loop
221
222loop:
223  call void @loop_header()
224  %t0 = tail call i32 @get()
225  %t1 = icmp slt i32 %t0, 0
226  br i1 %t1, label %done, label %bb
227
228bb:
229  call void @loop_latch()
230  br label %loop
231
232done:
233  call void @exit()
234  ret void
235}
236
237; This is exactly the same function as slightly_more_involved.
238; The difference is that when optimising for size, we do not want
239; to see this reordering.
240
241; CHECK-LABEL: slightly_more_involved_2:
242; CHECK-NOT:      jmp .LBB5_1
243; CHECK:          .LBB5_1:
244; CHECK-NEXT:     callq body
245
246define void @slightly_more_involved_2() #0 {
247entry:
248  br label %loop
249
250loop:
251  call void @body()
252  %t0 = call i32 @get()
253  %t1 = icmp slt i32 %t0, 2
254  br i1 %t1, label %block_a, label %bb
255
256bb:
257  %t2 = call i32 @get()
258  %t3 = icmp slt i32 %t2, 99
259  br i1 %t3, label %exit, label %loop
260
261block_a:
262  call void @bar99()
263  br label %loop
264
265exit:
266  call void @exit()
267  ret void
268}
269
270attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
271
272; CHECK-LABEL: slightly_more_involved_2_pgso:
273; CHECK-NOT:      jmp .LBB6_1
274; CHECK:          .LBB6_1:
275; CHECK-NEXT:     callq body
276
277define void @slightly_more_involved_2_pgso() norecurse nounwind readnone uwtable !prof !14 {
278entry:
279  br label %loop
280
281loop:
282  call void @body()
283  %t0 = call i32 @get()
284  %t1 = icmp slt i32 %t0, 2
285  br i1 %t1, label %block_a, label %bb
286
287bb:
288  %t2 = call i32 @get()
289  %t3 = icmp slt i32 %t2, 99
290  br i1 %t3, label %exit, label %loop
291
292block_a:
293  call void @bar99()
294  br label %loop
295
296exit:
297  call void @exit()
298  ret void
299}
300
301declare void @bar99() nounwind
302declare void @bar100() nounwind
303declare void @bar101() nounwind
304declare void @bar102() nounwind
305declare void @body() nounwind
306declare void @exit() nounwind
307declare void @loop_header() nounwind
308declare void @loop_latch() nounwind
309declare i32 @get() nounwind
310declare void @block_a_true_func() nounwind
311declare void @block_a_false_func() nounwind
312declare void @block_a_merge_func() nounwind
313
314!llvm.module.flags = !{!0}
315!0 = !{i32 1, !"ProfileSummary", !1}
316!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
317!2 = !{!"ProfileFormat", !"InstrProf"}
318!3 = !{!"TotalCount", i64 10000}
319!4 = !{!"MaxCount", i64 10}
320!5 = !{!"MaxInternalCount", i64 1}
321!6 = !{!"MaxFunctionCount", i64 1000}
322!7 = !{!"NumCounts", i64 3}
323!8 = !{!"NumFunctions", i64 3}
324!9 = !{!"DetailedSummary", !10}
325!10 = !{!11, !12, !13}
326!11 = !{i32 10000, i64 100, i32 1}
327!12 = !{i32 999000, i64 100, i32 1}
328!13 = !{i32 999999, i64 1, i32 2}
329!14 = !{!"function_entry_count", i64 0}
330