1; RUN: opt -S -loop-sink < %s | FileCheck %s
2; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
3
4@g = global i32 0, align 4
5
6;     b1
7;    /  \
8;   b2  b6
9;  /  \  |
10; b3  b4 |
11;  \  /  |
12;   b5   |
13;    \  /
14;     b7
15; preheader: 1000
16; b2: 15
17; b3: 7
18; b4: 7
19; Sink load to b2
20; CHECK: t1
21; CHECK: .b2:
22; CHECK: load i32, i32* @g
23; CHECK: .b3:
24; CHECK-NOT:  load i32, i32* @g
25define i32 @t1(i32, i32) #0 !prof !0 {
26  %3 = icmp eq i32 %1, 0
27  br i1 %3, label %.exit, label %.preheader
28
29.preheader:
30  %invariant = load i32, i32* @g
31  br label %.b1
32
33.b1:
34  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
35  %c1 = icmp sgt i32 %iv, %0
36  br i1 %c1, label %.b2, label %.b6, !prof !1
37
38.b2:
39  %c2 = icmp sgt i32 %iv, 1
40  br i1 %c2, label %.b3, label %.b4
41
42.b3:
43  %t3 = sub nsw i32 %invariant, %iv
44  br label %.b5
45
46.b4:
47  %t4 = add nsw i32 %invariant, %iv
48  br label %.b5
49
50.b5:
51  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
52  %t5 = mul nsw i32 %p5, 5
53  br label %.b7
54
55.b6:
56  %t6 = add nsw i32 %iv, 100
57  br label %.b7
58
59.b7:
60  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
61  %t7 = add nuw nsw i32 %iv, 1
62  %c7 = icmp eq i32 %t7, %p7
63  br i1 %c7, label %.b1, label %.exit, !prof !3
64
65.exit:
66  ret i32 10
67}
68
69;     b1
70;    /  \
71;   b2  b6
72;  /  \  |
73; b3  b4 |
74;  \  /  |
75;   b5   |
76;    \  /
77;     b7
78; preheader: 500
79; b1: 16016
80; b3: 8
81; b6: 8
82; Sink load to b3 and b6
83; CHECK: t2
84; CHECK: .preheader:
85; CHECK-NOT: load i32, i32* @g
86; CHECK: .b3:
87; CHECK: load i32, i32* @g
88; CHECK: .b4:
89; CHECK: .b6:
90; CHECK: load i32, i32* @g
91; CHECK: .b7:
92define i32 @t2(i32, i32) #0 !prof !0 {
93  %3 = icmp eq i32 %1, 0
94  br i1 %3, label %.exit, label %.preheader
95
96.preheader:
97  %invariant = load i32, i32* @g
98  br label %.b1
99
100.b1:
101  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
102  %c1 = icmp sgt i32 %iv, %0
103  br i1 %c1, label %.b2, label %.b6, !prof !2
104
105.b2:
106  %c2 = icmp sgt i32 %iv, 1
107  br i1 %c2, label %.b3, label %.b4, !prof !1
108
109.b3:
110  %t3 = sub nsw i32 %invariant, %iv
111  br label %.b5
112
113.b4:
114  %t4 = add nsw i32 5, %iv
115  br label %.b5
116
117.b5:
118  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
119  %t5 = mul nsw i32 %p5, 5
120  br label %.b7
121
122.b6:
123  %t6 = add nsw i32 %iv, %invariant
124  br label %.b7
125
126.b7:
127  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
128  %t7 = add nuw nsw i32 %iv, 1
129  %c7 = icmp eq i32 %t7, %p7
130  br i1 %c7, label %.b1, label %.exit, !prof !3
131
132.exit:
133  ret i32 10
134}
135
136;     b1
137;    /  \
138;   b2  b6
139;  /  \  |
140; b3  b4 |
141;  \  /  |
142;   b5   |
143;    \  /
144;     b7
145; preheader: 500
146; b3: 8
147; b5: 16008
148; Do not sink load from preheader.
149; CHECK: t3
150; CHECK: .preheader:
151; CHECK: load i32, i32* @g
152; CHECK: .b1:
153; CHECK-NOT: load i32, i32* @g
154define i32 @t3(i32, i32) #0 !prof !0 {
155  %3 = icmp eq i32 %1, 0
156  br i1 %3, label %.exit, label %.preheader
157
158.preheader:
159  %invariant = load i32, i32* @g
160  br label %.b1
161
162.b1:
163  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
164  %c1 = icmp sgt i32 %iv, %0
165  br i1 %c1, label %.b2, label %.b6, !prof !2
166
167.b2:
168  %c2 = icmp sgt i32 %iv, 1
169  br i1 %c2, label %.b3, label %.b4, !prof !1
170
171.b3:
172  %t3 = sub nsw i32 %invariant, %iv
173  br label %.b5
174
175.b4:
176  %t4 = add nsw i32 5, %iv
177  br label %.b5
178
179.b5:
180  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
181  %t5 = mul nsw i32 %p5, %invariant
182  br label %.b7
183
184.b6:
185  %t6 = add nsw i32 %iv, 5
186  br label %.b7
187
188.b7:
189  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
190  %t7 = add nuw nsw i32 %iv, 1
191  %c7 = icmp eq i32 %t7, %p7
192  br i1 %c7, label %.b1, label %.exit, !prof !3
193
194.exit:
195  ret i32 10
196}
197
198; For single-BB loop with <=1 avg trip count, sink load to b1
199; CHECK: t4
200; CHECK: .preheader:
201; CHECK-NOT: load i32, i32* @g
202; CHECK: .b1:
203; CHECK: load i32, i32* @g
204; CHECK: .exit:
205define i32 @t4(i32, i32) #0 !prof !0 {
206.preheader:
207  %invariant = load i32, i32* @g
208  br label %.b1
209
210.b1:
211  %iv = phi i32 [ %t1, %.b1 ], [ 0, %.preheader ]
212  %t1 = add nsw i32 %invariant, %iv
213  %c1 = icmp sgt i32 %iv, %0
214  br i1 %c1, label %.b1, label %.exit, !prof !1
215
216.exit:
217  ret i32 10
218}
219
220;     b1
221;    /  \
222;   b2  b6
223;  /  \  |
224; b3  b4 |
225;  \  /  |
226;   b5   |
227;    \  /
228;     b7
229; preheader: 1000
230; b2: 15
231; b3: 7
232; b4: 7
233; There is alias store in loop, do not sink load
234; CHECK: t5
235; CHECK: .preheader:
236; CHECK: load i32, i32* @g
237; CHECK: .b1:
238; CHECK-NOT: load i32, i32* @g
239define i32 @t5(i32, i32*) #0 !prof !0 {
240  %3 = icmp eq i32 %0, 0
241  br i1 %3, label %.exit, label %.preheader
242
243.preheader:
244  %invariant = load i32, i32* @g
245  br label %.b1
246
247.b1:
248  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
249  %c1 = icmp sgt i32 %iv, %0
250  br i1 %c1, label %.b2, label %.b6, !prof !1
251
252.b2:
253  %c2 = icmp sgt i32 %iv, 1
254  br i1 %c2, label %.b3, label %.b4
255
256.b3:
257  %t3 = sub nsw i32 %invariant, %iv
258  br label %.b5
259
260.b4:
261  %t4 = add nsw i32 %invariant, %iv
262  br label %.b5
263
264.b5:
265  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
266  %t5 = mul nsw i32 %p5, 5
267  br label %.b7
268
269.b6:
270  %t6 = call i32 @foo()
271  br label %.b7
272
273.b7:
274  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
275  %t7 = add nuw nsw i32 %iv, 1
276  %c7 = icmp eq i32 %t7, %p7
277  br i1 %c7, label %.b1, label %.exit, !prof !3
278
279.exit:
280  ret i32 10
281}
282
283;     b1
284;    /  \
285;   b2  b6
286;  /  \  |
287; b3  b4 |
288;  \  /  |
289;   b5   |
290;    \  /
291;     b7
292; preheader: 1000
293; b2: 15
294; b3: 7
295; b4: 7
296; Regardless of aliasing store in loop this load from constant memory can be sunk.
297; CHECK: t5_const_memory
298; CHECK: .preheader:
299; CHECK-NOT: load i32, i32* @g_const
300; CHECK: .b2:
301; CHECK: load i32, i32* @g_const
302; CHECK: br i1 %c2, label %.b3, label %.b4
303define i32 @t5_const_memory(i32, i32*) #0 !prof !0 {
304  %3 = icmp eq i32 %0, 0
305  br i1 %3, label %.exit, label %.preheader
306
307.preheader:
308  %invariant = load i32, i32* @g_const
309  br label %.b1
310
311.b1:
312  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
313  %c1 = icmp sgt i32 %iv, %0
314  br i1 %c1, label %.b2, label %.b6, !prof !1
315
316.b2:
317  %c2 = icmp sgt i32 %iv, 1
318  br i1 %c2, label %.b3, label %.b4
319
320.b3:
321  %t3 = sub nsw i32 %invariant, %iv
322  br label %.b5
323
324.b4:
325  %t4 = add nsw i32 %invariant, %iv
326  br label %.b5
327
328.b5:
329  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
330  %t5 = mul nsw i32 %p5, 5
331  br label %.b7
332
333.b6:
334  %t6 = call i32 @foo()
335  br label %.b7
336
337.b7:
338  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
339  %t7 = add nuw nsw i32 %iv, 1
340  %c7 = icmp eq i32 %t7, %p7
341  br i1 %c7, label %.b1, label %.exit, !prof !3
342
343.exit:
344  ret i32 10
345}
346
347;     b1
348;    /  \
349;   b2  b3
350;    \  /
351;     b4
352; preheader: 1000
353; b2: 15
354; b3: 7
355; Do not sink unordered atomic load to b2
356; CHECK: t6
357; CHECK: .preheader:
358; CHECK:  load atomic i32, i32* @g unordered, align 4
359; CHECK: .b2:
360; CHECK-NOT: load atomic i32, i32* @g unordered, align 4
361define i32 @t6(i32, i32) #0 !prof !0 {
362  %3 = icmp eq i32 %1, 0
363  br i1 %3, label %.exit, label %.preheader
364
365.preheader:
366  %invariant = load atomic i32, i32* @g unordered, align 4
367  br label %.b1
368
369.b1:
370  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
371  %c1 = icmp sgt i32 %iv, %0
372  br i1 %c1, label %.b2, label %.b3, !prof !1
373
374.b2:
375  %t1 = add nsw i32 %invariant, %iv
376  br label %.b4
377
378.b3:
379  %t2 = add nsw i32 %iv, 100
380  br label %.b4
381
382.b4:
383  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
384  %t3 = add nuw nsw i32 %iv, 1
385  %c2 = icmp eq i32 %t3, %p1
386  br i1 %c2, label %.b1, label %.exit, !prof !3
387
388.exit:
389  ret i32 10
390}
391
392@g_const = constant i32 0, align 4
393
394;     b1
395;    /  \
396;   b2  b3
397;    \  /
398;     b4
399; preheader: 1000
400; b2: 0.5
401; b3: 999.5
402; Sink unordered atomic load to b2. It is allowed to sink into loop unordered
403; load from constant.
404; CHECK: t7
405; CHECK: .preheader:
406; CHECK-NOT:  load atomic i32, i32* @g_const unordered, align 4
407; CHECK: .b2:
408; CHECK: load atomic i32, i32* @g_const unordered, align 4
409define i32 @t7(i32, i32) #0 !prof !0 {
410  %3 = icmp eq i32 %1, 0
411  br i1 %3, label %.exit, label %.preheader
412
413.preheader:
414  %invariant = load atomic i32, i32* @g_const unordered, align 4
415  br label %.b1
416
417.b1:
418  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
419  %c1 = icmp sgt i32 %iv, %0
420  br i1 %c1, label %.b2, label %.b3, !prof !1
421
422.b2:
423  %t1 = add nsw i32 %invariant, %iv
424  br label %.b4
425
426.b3:
427  %t2 = add nsw i32 %iv, 100
428  br label %.b4
429
430.b4:
431  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
432  %t3 = add nuw nsw i32 %iv, 1
433  %c2 = icmp eq i32 %t3, %p1
434  br i1 %c2, label %.b1, label %.exit, !prof !3
435
436.exit:
437  ret i32 10
438}
439
440declare i32 @foo()
441
442!0 = !{!"function_entry_count", i64 1}
443!1 = !{!"branch_weights", i32 1, i32 2000}
444!2 = !{!"branch_weights", i32 2000, i32 1}
445!3 = !{!"branch_weights", i32 100, i32 1}
446