1; RUN: opt < %s -licm -S | FileCheck %s
2; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
3; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
4
5@X = global i32 0		; <i32*> [#uses=1]
6
7declare void @foo()
8
9declare i32 @llvm.bitreverse.i32(i32)
10
11; This testcase tests for a problem where LICM hoists
12; potentially trapping instructions when they are not guaranteed to execute.
13define i32 @test1(i1 %c) {
14; CHECK-LABEL: @test1(
15	%A = load i32, i32* @X		; <i32> [#uses=2]
16	br label %Loop
17Loop:		; preds = %LoopTail, %0
18	call void @foo( )
19	br i1 %c, label %LoopTail, label %IfUnEqual
20
21IfUnEqual:		; preds = %Loop
22; CHECK: IfUnEqual:
23; CHECK-NEXT: sdiv i32 4, %A
24	%B1 = sdiv i32 4, %A		; <i32> [#uses=1]
25	br label %LoopTail
26
27LoopTail:		; preds = %IfUnEqual, %Loop
28	%B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ]		; <i32> [#uses=1]
29	br i1 %c, label %Loop, label %Out
30Out:		; preds = %LoopTail
31	%C = sub i32 %A, %B		; <i32> [#uses=1]
32	ret i32 %C
33}
34
35
36declare void @foo2(i32) nounwind
37
38
39;; It is ok and desirable to hoist this potentially trapping instruction.
40define i32 @test2(i1 %c) {
41; CHECK-LABEL: @test2(
42; CHECK-NEXT: load i32, i32* @X
43; CHECK-NEXT: %B = sdiv i32 4, %A
44  %A = load i32, i32* @X
45  br label %Loop
46
47Loop:
48  ;; Should have hoisted this div!
49  %B = sdiv i32 4, %A
50  br label %loop2
51
52loop2:
53  call void @foo2( i32 %B )
54  br i1 %c, label %Loop, label %Out
55
56Out:
57  %C = sub i32 %A, %B
58  ret i32 %C
59}
60
61
62; This loop invariant instruction should be constant folded, not hoisted.
63define i32 @test3(i1 %c) {
64; CHECK-LABEL: define i32 @test3(
65; CHECK: call void @foo2(i32 6)
66	%A = load i32, i32* @X		; <i32> [#uses=2]
67	br label %Loop
68Loop:
69	%B = add i32 4, 2		; <i32> [#uses=2]
70	call void @foo2( i32 %B )
71	br i1 %c, label %Loop, label %Out
72Out:		; preds = %Loop
73	%C = sub i32 %A, %B		; <i32> [#uses=1]
74	ret i32 %C
75}
76
77; CHECK-LABEL: @test4(
78; CHECK: call
79; CHECK: sdiv
80; CHECK: ret
81define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
82entry:
83  br label %for.body
84
85for.body:                                         ; preds = %entry, %for.body
86  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
87  %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
88  call void @foo_may_call_exit(i32 0)
89  %div = sdiv i32 %x, %y
90  %add = add nsw i32 %n.01, %div
91  %inc = add nsw i32 %i.02, 1
92  %cmp = icmp slt i32 %inc, 10000
93  br i1 %cmp, label %for.body, label %for.end
94
95for.end:                                          ; preds = %for.body
96  %n.0.lcssa = phi i32 [ %add, %for.body ]
97  ret i32 %n.0.lcssa
98}
99
100declare void @foo_may_call_exit(i32)
101
102; PR14854
103; CHECK-LABEL: @test5(
104; CHECK: extractvalue
105; CHECK: br label %tailrecurse
106; CHECK: tailrecurse:
107; CHECK: ifend:
108; CHECK: insertvalue
109define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
110entry:
111  br label %tailrecurse
112
113tailrecurse:                                      ; preds = %then, %entry
114  %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
115  %out = extractvalue { i32*, i32 } %e, 1
116  %d = insertvalue { i32*, i32 } %e, i32* null, 0
117  %cmp1 = icmp sgt i32 %out, %i.tr
118  br i1 %cmp1, label %then, label %ifend
119
120then:                                             ; preds = %tailrecurse
121  call void @foo()
122  %cmp2 = add i32 %i.tr, 1
123  br label %tailrecurse
124
125ifend:                                            ; preds = %tailrecurse
126  ret { i32*, i32 } %d
127}
128
129; CHECK: define void @test6(float %f)
130; CHECK: fneg
131; CHECK: br label %for.body
132define void @test6(float %f) #2 {
133entry:
134  br label %for.body
135
136for.body:                                         ; preds = %for.body, %entry
137  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
138  call void @foo_may_call_exit(i32 0)
139  %neg = fneg float %f
140  call void @use(float %neg)
141  %inc = add nsw i32 %i, 1
142  %cmp = icmp slt i32 %inc, 10000
143  br i1 %cmp, label %for.body, label %for.end
144
145for.end:                                          ; preds = %for.body
146  ret void
147}
148
149declare void @use(float)
150
151; CHECK: define i32 @hoist_bitreverse(i32 %0)
152; CHECK: bitreverse
153; CHECK: br label %header
154define i32 @hoist_bitreverse(i32 %0)  {
155  br label %header
156
157header:
158  %sum = phi i32 [ 0, %1 ], [ %5, %latch ]
159  %2 = phi i32 [ 0, %1 ], [ %6, %latch ]
160  %3 = icmp slt i32 %2, 1024
161  br i1 %3, label %body, label %return
162
163body:
164  %4 = call i32 @llvm.bitreverse.i32(i32 %0)
165  %5 = add i32 %sum, %4
166  br label %latch
167
168latch:
169  %6 = add nsw i32 %2, 1
170  br label %header
171
172return:
173  ret i32 %sum
174}
175
176; Can neither sink nor hoist
177define i32 @test_volatile(i1 %c) {
178; CHECK-LABEL: @test_volatile(
179; CHECK-LABEL: Loop:
180; CHECK: load volatile i32, i32* @X
181; CHECK-LABEL: Out:
182  br label %Loop
183
184Loop:
185  %A = load volatile i32, i32* @X
186  br i1 %c, label %Loop, label %Out
187
188Out:
189  ret i32 %A
190}
191
192
193declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
194declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
195declare void @escaping.invariant.start({}*) nounwind
196; invariant.start dominates the load, and in this scope, the
197; load is invariant. So, we can hoist the `addrld` load out of the loop.
198define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
199; CHECK-LABEL: @test_fence
200; CHECK-LABEL: entry
201; CHECK: invariant.start
202; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
203; CHECK: br label %loop
204entry:
205  %gep = getelementptr inbounds i8, i8* %addr, i64 8
206  %addr.i = bitcast i8* %gep to i32 *
207  store atomic i32 5, i32 * %addr.i unordered, align 8
208  fence release
209  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
210  br label %loop
211
212loop:
213  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
214  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
215  %volload = load atomic i8, i8* %volatile unordered, align 8
216  fence acquire
217  %volchk = icmp eq i8 %volload, 0
218  %addrld = load atomic i32, i32* %addr.i unordered, align 8
219  %sel = select i1 %volchk, i32 0, i32 %addrld
220  %sum.next = add i32 %sel, %sum
221  %indvar.next = add i32 %indvar, 1
222  %cond = icmp slt i32 %indvar.next, %n
223  br i1 %cond, label %loop, label %loopexit
224
225loopexit:
226  ret i32 %sum
227}
228
229
230
231; Same as test above, but the load is no longer invariant (presence of
232; invariant.end). We cannot hoist the addrld out of loop.
233define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
234; CHECK-LABEL: @test_fence1
235; CHECK-LABEL: entry
236; CHECK: invariant.start
237; CHECK-NEXT: invariant.end
238; CHECK-NEXT: br label %loop
239entry:
240  %gep = getelementptr inbounds i8, i8* %addr, i64 8
241  %addr.i = bitcast i8* %gep to i32 *
242  store atomic i32 5, i32 * %addr.i unordered, align 8
243  fence release
244  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
245  call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
246  br label %loop
247
248loop:
249  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
250  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
251  %volload = load atomic i8, i8* %volatile unordered, align 8
252  fence acquire
253  %volchk = icmp eq i8 %volload, 0
254  %addrld = load atomic i32, i32* %addr.i unordered, align 8
255  %sel = select i1 %volchk, i32 0, i32 %addrld
256  %sum.next = add i32 %sel, %sum
257  %indvar.next = add i32 %indvar, 1
258  %cond = icmp slt i32 %indvar.next, %n
259  br i1 %cond, label %loop, label %loopexit
260
261loopexit:
262  ret i32 %sum
263}
264
265; same as test above, but instead of invariant.end, we have the result of
266; invariant.start escaping through a call. We cannot hoist the load.
267define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
268; CHECK-LABEL: @test_fence2
269; CHECK-LABEL: entry
270; CHECK-NOT: load
271; CHECK: br label %loop
272entry:
273  %gep = getelementptr inbounds i8, i8* %addr, i64 8
274  %addr.i = bitcast i8* %gep to i32 *
275  store atomic i32 5, i32 * %addr.i unordered, align 8
276  fence release
277  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
278  call void @escaping.invariant.start({}* %invst)
279  br label %loop
280
281loop:
282  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
283  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
284  %volload = load atomic i8, i8* %volatile unordered, align 8
285  fence acquire
286  %volchk = icmp eq i8 %volload, 0
287  %addrld = load atomic i32, i32* %addr.i unordered, align 8
288  %sel = select i1 %volchk, i32 0, i32 %addrld
289  %sum.next = add i32 %sel, %sum
290  %indvar.next = add i32 %indvar, 1
291  %cond = icmp slt i32 %indvar.next, %n
292  br i1 %cond, label %loop, label %loopexit
293
294loopexit:
295  ret i32 %sum
296}
297
298; FIXME: invariant.start dominates the load, and in this scope, the
299; load is invariant. So, we can hoist the `addrld` load out of the loop.
300; Consider the loadoperand addr.i bitcasted before being passed to
301; invariant.start
302define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
303; CHECK-LABEL: @test_fence3
304; CHECK-LABEL: entry
305; CHECK: invariant.start
306; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
307; CHECK: br label %loop
308entry:
309  %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
310  %gep = bitcast i32* %addr.i to i8 *
311  store atomic i32 5, i32 * %addr.i unordered, align 8
312  fence release
313  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
314  br label %loop
315
316loop:
317  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
318  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
319  %volload = load atomic i8, i8* %volatile unordered, align 8
320  fence acquire
321  %volchk = icmp eq i8 %volload, 0
322  %addrld = load atomic i32, i32* %addr.i unordered, align 8
323  %sel = select i1 %volchk, i32 0, i32 %addrld
324  %sum.next = add i32 %sel, %sum
325  %indvar.next = add i32 %indvar, 1
326  %cond = icmp slt i32 %indvar.next, %n
327  br i1 %cond, label %loop, label %loopexit
328
329loopexit:
330  ret i32 %sum
331}
332
333; We should not hoist the addrld out of the loop.
334define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
335; CHECK-LABEL: @test_fence4
336; CHECK-LABEL: entry
337; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
338; CHECK: br label %loop
339entry:
340  %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
341  %gep = bitcast i32* %addr.i to i8 *
342  br label %loop
343
344loop:
345  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
346  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
347  store atomic i32 5, i32 * %addr.i unordered, align 8
348  fence release
349  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
350  %volload = load atomic i8, i8* %volatile unordered, align 8
351  fence acquire
352  %volchk = icmp eq i8 %volload, 0
353  %addrld = load atomic i32, i32* %addr.i unordered, align 8
354  %sel = select i1 %volchk, i32 0, i32 %addrld
355  %sum.next = add i32 %sel, %sum
356  %indvar.next = add i32 %indvar, 1
357  %cond = icmp slt i32 %indvar.next, %n
358  br i1 %cond, label %loop, label %loopexit
359
360loopexit:
361  ret i32 %sum
362}
363
364; We can't hoist the invariant load out of the loop because
365; the marker is given a variable size (-1).
366define i32 @test_fence5(i8* %addr, i32 %n, i8* %volatile) {
367; CHECK-LABEL: @test_fence5
368; CHECK-LABEL: entry
369; CHECK: invariant.start
370; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
371; CHECK: br label %loop
372entry:
373  %gep = getelementptr inbounds i8, i8* %addr, i64 8
374  %addr.i = bitcast i8* %gep to i32 *
375  store atomic i32 5, i32 * %addr.i unordered, align 8
376  fence release
377  %invst = call {}* @llvm.invariant.start.p0i8(i64 -1, i8* %gep)
378  br label %loop
379
380loop:
381  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
382  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
383  %volload = load atomic i8, i8* %volatile unordered, align 8
384  fence acquire
385  %volchk = icmp eq i8 %volload, 0
386  %addrld = load atomic i32, i32* %addr.i unordered, align 8
387  %sel = select i1 %volchk, i32 0, i32 %addrld
388  %sum.next = add i32 %sel, %sum
389  %indvar.next = add i32 %indvar, 1
390  %cond = icmp slt i32 %indvar.next, %n
391  br i1 %cond, label %loop, label %loopexit
392
393loopexit:
394  ret i32 %sum
395}
396