1; RUN: opt < %s -jump-threading -S | FileCheck %s
2; RUN: opt < %s -aa-pipeline=basic-aa -passes=jump-threading -S | FileCheck %s
3
4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
5target triple = "i386-apple-darwin7"
6
7; Test that we can thread through the block with the partially redundant load (%2).
8; rdar://6402033
9define i32 @test1(i32* %P) nounwind {
10; CHECK-LABEL: @test1(
11entry:
12	%0 = tail call i32 (...) @f1() nounwind		; <i32> [#uses=1]
13	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
14	br i1 %1, label %bb1, label %bb
15
16bb:		; preds = %entry
17; CHECK: bb1.thread:
18; CHECK: store
19; CHECK: br label %bb3
20	store i32 42, i32* %P, align 4
21	br label %bb1
22
23bb1:		; preds = %entry, %bb
24	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
25	%2 = load i32, i32* %P, align 4		; <i32> [#uses=1]
26	%3 = icmp sgt i32 %2, 36		; <i1> [#uses=1]
27	br i1 %3, label %bb3, label %bb2
28
29bb2:		; preds = %bb1
30	%4 = tail call i32 (...) @f2() nounwind		; <i32> [#uses=0]
31	ret i32 %res.0
32
33bb3:		; preds = %bb1
34; CHECK: bb3:
35; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
36; CHECK: ret i32 %res.01
37	ret i32 %res.0
38}
39
40declare i32 @f1(...)
41
42declare i32 @f2(...)
43
44
45;; Check that we preserve TBAA information.
46; rdar://11039258
47
48define i32 @test2(i32* %P) nounwind {
49; CHECK-LABEL: @test2(
50entry:
51	%0 = tail call i32 (...) @f1() nounwind		; <i32> [#uses=1]
52	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
53	br i1 %1, label %bb1, label %bb
54
55bb:		; preds = %entry
56; CHECK: bb1.thread:
57; CHECK: store{{.*}}, !tbaa !0
58; CHECK: br label %bb3
59	store i32 42, i32* %P, align 4, !tbaa !0
60	br label %bb1
61
62bb1:		; preds = %entry, %bb
63	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
64	%2 = load i32, i32* %P, align 4, !tbaa !0
65	%3 = icmp sgt i32 %2, 36
66	br i1 %3, label %bb3, label %bb2
67
68bb2:		; preds = %bb1
69	%4 = tail call i32 (...) @f2() nounwind
70	ret i32 %res.0
71
72bb3:		; preds = %bb1
73; CHECK: bb3:
74; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
75; CHECK: ret i32 %res.01
76	ret i32 %res.0
77}
78
79define i32 @test3(i8** %x, i1 %f) {
80; Correctly thread loads of different (but compatible) types, placing bitcasts
81; as necessary in the predecessors. This is especially tricky because the same
82; predecessor ends up with two entries in the PHI node and they must share
83; a single cast.
84; CHECK-LABEL: @test3(
85entry:
86  %0 = bitcast i8** %x to i32**
87  %1 = load i32*, i32** %0, align 8
88  br i1 %f, label %if.end57, label %if.then56
89; CHECK: %[[LOAD:.*]] = load i32*, i32**
90; CHECK: %[[CAST:.*]] = bitcast i32* %[[LOAD]] to i8*
91
92if.then56:
93  br label %if.end57
94
95if.end57:
96  %2 = load i8*, i8** %x, align 8
97  %tobool59 = icmp eq i8* %2, null
98  br i1 %tobool59, label %return, label %if.then60
99; CHECK: %[[PHI:.*]] = phi i8* [ %[[CAST]], %[[PRED:[^ ]+]] ], [ %[[CAST]], %[[PRED]] ]
100; CHECK-NEXT: %[[CMP:.*]] = icmp eq i8* %[[PHI]], null
101; CHECK-NEXT: br i1 %[[CMP]]
102
103if.then60:
104  ret i32 42
105
106return:
107  ret i32 13
108}
109
110define i32 @test4(i32* %P) {
111; CHECK-LABEL: @test4(
112entry:
113  %v0 = tail call i32 (...) @f1()
114  %v1 = icmp eq i32 %v0, 0
115  br i1 %v1, label %bb1, label %bb
116
117bb:
118; CHECK: bb1.thread:
119; CHECK: store atomic
120; CHECK: br label %bb3
121  store atomic i32 42, i32* %P unordered, align 4
122  br label %bb1
123
124bb1:
125; CHECK: bb1:
126; CHECK-NOT: phi
127; CHECK: load atomic
128  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
129  %v2 = load atomic i32, i32* %P unordered, align 4
130  %v3 = icmp sgt i32 %v2, 36
131  br i1 %v3, label %bb3, label %bb2
132
133bb2:
134  %v4 = tail call i32 (...) @f2()
135  ret i32 %res.0
136
137bb3:
138  ret i32 %res.0
139}
140
141define i32 @test5(i32* %P) {
142; Negative test
143
144; CHECK-LABEL: @test5(
145entry:
146  %v0 = tail call i32 (...) @f1()
147  %v1 = icmp eq i32 %v0, 0
148  br i1 %v1, label %bb1, label %bb
149
150bb:
151; CHECK: bb:
152; CHECK-NEXT:   store atomic i32 42, i32* %P release, align 4
153; CHECK-NEXT:   br label %bb1
154  store atomic i32 42, i32* %P release, align 4
155  br label %bb1
156
157bb1:
158; CHECK: bb1:
159; CHECK-NEXT:  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
160; CHECK-NEXT:  %v2 = load atomic i32, i32* %P acquire, align 4
161; CHECK-NEXT:  %v3 = icmp sgt i32 %v2, 36
162; CHECK-NEXT:  br i1 %v3, label %bb3, label %bb2
163
164  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
165  %v2 = load atomic i32, i32* %P acquire, align 4
166  %v3 = icmp sgt i32 %v2, 36
167  br i1 %v3, label %bb3, label %bb2
168
169bb2:
170  %v4 = tail call i32 (...) @f2()
171  ret i32 %res.0
172
173bb3:
174  ret i32 %res.0
175}
176
177define i32 @test6(i32* %P) {
178; Negative test
179
180; CHECK-LABEL: @test6(
181entry:
182  %v0 = tail call i32 (...) @f1()
183  %v1 = icmp eq i32 %v0, 0
184  br i1 %v1, label %bb1, label %bb
185
186bb:
187; CHECK: bb:
188; CHECK-NEXT:   store i32 42, i32* %P
189; CHECK-NEXT:   br label %bb1
190  store i32 42, i32* %P
191  br label %bb1
192
193bb1:
194; CHECK: bb1:
195; CHECK-NEXT:  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
196; CHECK-NEXT:  %v2 = load atomic i32, i32* %P acquire, align 4
197; CHECK-NEXT:  %v3 = icmp sgt i32 %v2, 36
198; CHECK-NEXT:  br i1 %v3, label %bb3, label %bb2
199
200  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
201  %v2 = load atomic i32, i32* %P acquire, align 4
202  %v3 = icmp sgt i32 %v2, 36
203  br i1 %v3, label %bb3, label %bb2
204
205bb2:
206  %v4 = tail call i32 (...) @f2()
207  ret i32 %res.0
208
209bb3:
210  ret i32 %res.0
211}
212
213define i32 @test7(i32* %P) {
214; Negative test
215
216; CHECK-LABEL: @test7(
217entry:
218  %v0 = tail call i32 (...) @f1()
219  %v1 = icmp eq i32 %v0, 0
220  br i1 %v1, label %bb1, label %bb
221
222bb:
223; CHECK: bb:
224; CHECK-NEXT:   %val = load i32, i32* %P
225; CHECK-NEXT:   br label %bb1
226  %val = load i32, i32* %P
227  br label %bb1
228
229bb1:
230; CHECK: bb1:
231; CHECK-NEXT:  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
232; CHECK-NEXT:  %v2 = load atomic i32, i32* %P acquire, align 4
233; CHECK-NEXT:  %v3 = icmp sgt i32 %v2, 36
234; CHECK-NEXT:  br i1 %v3, label %bb3, label %bb2
235
236  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
237  %v2 = load atomic i32, i32* %P acquire, align 4
238  %v3 = icmp sgt i32 %v2, 36
239  br i1 %v3, label %bb3, label %bb2
240
241bb2:
242  %v4 = tail call i32 (...) @f2()
243  ret i32 %res.0
244
245bb3:
246  ret i32 %res.0
247}
248
249; Make sure we merge the aliasing metadata. (If we don't, we have a load
250; with the wrong metadata, so the branch gets incorrectly eliminated.)
251define void @test8(i32*, i32*, i32*) {
252; CHECK-LABEL: @test8(
253; CHECK: %a = load i32, i32* %0, !range !4
254; CHECK-NEXT: store i32 %a
255; CHECK: br i1 %c
256  %a = load i32, i32* %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10
257  %b = load i32, i32* %0, !range !5
258  store i32 %a, i32* %1
259  %c = icmp eq i32 %b, 8
260  br i1 %c, label %ret1, label %ret2
261
262ret1:
263  ret void
264
265ret2:
266  %xxx = tail call i32 (...) @f1() nounwind
267  ret void
268}
269
270; Make sure we merge/PRE aliasing metadata correctly.  That means that
271; we need to remove metadata from the existing load, and add appropriate
272; metadata to the newly inserted load.
273define void @test9(i32*, i32*, i32*, i1 %c) {
274; CHECK-LABEL: @test9(
275  br i1 %c, label %d1, label %d2
276
277; CHECK: d1:
278; CHECK-NEXT: %a = load i32, i32* %0{{$}}
279d1:
280  %a = load i32, i32* %0, !range !4, !alias.scope !9, !noalias !10
281  br label %d3
282
283; CHECK: d2:
284; CHECK-NEXT: %xxxx = tail call i32 (...) @f1()
285; CHECK-NEXT: %b.pr = load i32, i32* %0, !tbaa !0{{$}}
286d2:
287  %xxxx = tail call i32 (...) @f1() nounwind
288  br label %d3
289
290d3:
291  %p = phi i32 [ 1, %d2 ], [ %a, %d1 ]
292  %b = load i32, i32* %0, !tbaa !0
293  store i32 %p, i32* %1
294  %c2 = icmp eq i32 %b, 8
295  br i1 %c2, label %ret1, label %ret2
296
297ret1:
298  ret void
299
300ret2:
301  %xxx = tail call i32 (...) @f1() nounwind
302  ret void
303}
304
305define i32 @fn_noalias(i1 %c2,i64* noalias %P, i64* noalias %P2) {
306; CHECK-LABEL: @fn_noalias
307; CHECK-LABEL: cond1:
308; CHECK: %[[LD1:.*]] = load i64, i64* %P
309; CHECK: br i1 %c, label %[[THREAD:.*]], label %end
310; CHECK-LABEL: cond2:
311; CHECK: %[[LD2:.*]] = load i64, i64* %P
312; CHECK-LABEL: cond3:
313; CHECK: %[[PHI:.*]] = phi i64 [ %[[LD1]], %[[THREAD]] ], [ %[[LD2]], %cond2 ]
314; CHECK: call void @fn3(i64 %[[PHI]])
315entry:
316  br i1 %c2, label %cond2, label %cond1
317
318cond1:
319  %l1 = load i64, i64* %P
320  store i64 42, i64* %P2
321  %c = icmp eq i64 %l1, 0
322  br i1 %c, label %cond2, label %end
323
324cond2:
325  %l2 = load i64, i64* %P
326  call void @fn2(i64 %l2)
327  %c3 = icmp eq i64 %l2,  0
328  br i1 %c3, label %cond3, label %end
329
330cond3:
331  call void @fn3(i64 %l2)
332  br label %end
333
334end:
335  ret i32 0
336}
337
338; This tests if we can thread from %sw.bb.i to %do.body.preheader.i67 through
339; %sw.bb21.i. To make this happen, %l2 should be detected as a partically
340; redundant load with %l3 across the store to %phase in %sw.bb21.i.
341
342%struct.NEXT_MOVE = type { i32, i32, i32* }
343@hash_move = unnamed_addr global [65 x i32] zeroinitializer, align 4
344@current_move = internal global [65 x i32] zeroinitializer, align 4
345@last = internal unnamed_addr global [65 x i32*] zeroinitializer, align 8
346@next_status = internal unnamed_addr global [65 x %struct.NEXT_MOVE] zeroinitializer, align 8
347define fastcc i32 @Search(i64 %idxprom.i, i64 %idxprom.i89, i32 %c) {
348; CHECK-LABEL: @Search
349; CHECK-LABEL: sw.bb.i:
350; CHECK: %[[LD1:.*]] = load i32, i32* %arrayidx185, align 4
351; CHECK: %[[C1:.*]] = icmp eq i32 %[[LD1]], 0
352; CHECK: br i1 %[[C1]], label %sw.bb21.i.thread, label %if.then.i64
353; CHECK-LABEL: sw.bb21.i.thread:
354; CHECK: br label %[[THREAD_TO:.*]]
355; CHECK-LABEL: sw.bb21.i:
356; CHECK: %[[LD2:.*]] = load i32, i32* %arrayidx185, align 4
357; CHECK: %[[C2:.*]] = icmp eq i32 %[[LD2]], 0
358; CHECK:br i1 %[[C2]], label %[[THREAD_TO]], label %cleanup
359entry:
360  %arrayidx185 = getelementptr inbounds [65 x i32], [65 x i32]* @hash_move, i64 0, i64 %idxprom.i
361  %arrayidx307 = getelementptr inbounds [65 x i32], [65 x i32]* @current_move, i64 0, i64 %idxprom.i
362  %arrayidx89 = getelementptr inbounds [65 x i32*], [65 x i32*]* @last, i64 0, i64 %idxprom.i
363  %phase = getelementptr inbounds [65 x %struct.NEXT_MOVE], [65 x %struct.NEXT_MOVE]* @next_status, i64 0, i64 %idxprom.i, i32 0
364  br label %cond.true282
365
366cond.true282:
367  switch i32 %c, label %sw.default.i [
368    i32 1, label %sw.bb.i
369    i32 0, label %sw.bb21.i
370  ]
371
372sw.default.i:
373  br label %cleanup
374
375sw.bb.i:
376  %call.i62 = call fastcc i32* @GenerateCheckEvasions()
377  store i32* %call.i62, i32** %arrayidx89, align 8
378  %l2 = load i32, i32* %arrayidx185, align 4
379  %tobool.i63 = icmp eq i32 %l2, 0
380  br i1 %tobool.i63, label %sw.bb21.i, label %if.then.i64
381
382if.then.i64:                                      ; preds = %sw.bb.i
383  store i32 7, i32* %phase, align 8
384  store i32 %l2, i32* %arrayidx307, align 4
385  %call16.i = call fastcc i32 @ValidMove(i32 %l2)
386  %tobool17.i = icmp eq i32 %call16.i, 0
387  br i1 %tobool17.i, label %if.else.i65, label %cleanup
388
389if.else.i65:
390  call void @f65()
391  br label %sw.bb21.i
392
393sw.bb21.i:
394  store i32 10, i32* %phase, align 8
395  %l3= load i32, i32* %arrayidx185, align 4
396  %tobool27.i = icmp eq i32 %l3, 0
397  br i1 %tobool27.i, label %do.body.preheader.i67, label %cleanup
398
399do.body.preheader.i67:
400  call void @f67()
401  ret  i32 67
402
403cleanup:
404  call void @Cleanup()
405  ret  i32 0
406}
407
408declare fastcc i32* @GenerateCheckEvasions()
409declare fastcc i32 @ValidMove(i32 %move)
410declare void @f67()
411declare void @Cleanup()
412declare void @f65()
413
414define i32 @fn_SinglePred(i1 %c2,i64* %P) {
415; CHECK-LABEL: @fn_SinglePred
416; CHECK-LABEL: entry:
417; CHECK: %[[L1:.*]] = load i64, i64* %P
418; CHECK: br i1 %c, label %cond3, label %cond1
419; CHECK-LABEL: cond2:
420; CHECK-NOT: load
421; CHECK: %[[PHI:.*]] = phi i64 [ %[[L1]], %cond1 ]
422; CHECK: call void @fn2(i64 %[[PHI]])
423; CHECK: br label %end
424; CHECK-LABEL: cond3:
425; CHECK: call void @fn2(i64 %l1)
426; CHECK: call void @fn3(i64 %l1)
427
428entry:
429  %l1 = load i64, i64* %P
430  %c = icmp eq i64 %l1, 0
431  br i1 %c, label %cond2, label %cond1
432
433cond1:
434  br i1 %c2, label %cond2, label %end
435
436cond2:
437  %l2 = load i64, i64* %P
438  call void @fn2(i64 %l2)
439  %c3 = icmp eq i64 %l2,  0
440  br i1 %c3, label %cond3, label %end
441
442cond3:
443  call void @fn3(i64 %l2)
444  br label %end
445
446end:
447  ret i32 0
448}
449
450define i32 @fn_SinglePredMultihop(i1 %c1, i1 %c2,i64* %P) {
451; CHECK-LABEL: @fn_SinglePredMultihop
452; CHECK-LABEL: entry:
453; CHECK: %[[L1:.*]] = load i64, i64* %P
454; CHECK: br i1 %c0, label %cond3, label %cond0
455; CHECK-LABEL: cond2:
456; CHECK-NOT: load
457; CHECK: %[[PHI:.*]] = phi i64 [ %[[L1]], %cond1 ]
458; CHECK: call void @fn2(i64 %[[PHI]])
459; CHECK: br label %end
460; CHECK-LABEL: cond3:
461; CHECK: call void @fn2(i64 %l1)
462; CHECK: call void @fn3(i64 %l1)
463
464entry:
465  %l1 = load i64, i64* %P
466  %c0 = icmp eq i64 %l1, 0
467  br i1 %c0, label %cond2, label %cond0
468
469cond0:
470  br i1 %c1, label %cond1, label %end
471
472cond1:
473  br i1 %c2, label %cond2, label %end
474
475cond2:
476  %l2 = load i64, i64* %P
477  call void @fn2(i64 %l2)
478  %c3 = icmp eq i64 %l2,  0
479  br i1 %c3, label %cond3, label %end
480
481cond3:
482  call void @fn3(i64 %l2)
483  br label %end
484
485end:
486  ret i32 0
487}
488
489declare void @fn2(i64)
490declare void @fn3(i64)
491
492
493; Make sure we phi-translate and make the partially redundant load in
494; merge fully redudant and then we can jump-thread the block with the
495; store.
496;
497; CHECK-LABEL: define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32*
498; CHECK: merge.thread:
499; CHECK: store
500; CHECK: br label %left_x
501;
502; CHECK: left_x:
503; CHECK-NEXT: ret i32 20
504define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32*)  {
505  %cmp0 = icmp ne i32 %0, 0
506  br i1 %cmp0, label %left, label %right
507
508left:
509  store i32 1, i32* %1, align 4
510  br label %merge
511
512right:
513  br label %merge
514
515merge:
516  %phiptr = phi i32* [ %1, %left ], [ %2, %right ]
517  %newload = load i32, i32* %phiptr, align 4
518  %cmp1 = icmp slt i32 %newload, 5
519  br i1 %cmp1, label %left_x, label %right_x
520
521left_x:
522  ret i32 20
523
524right_x:
525  ret i32 10
526}
527
528!0 = !{!3, !3, i64 0}
529!1 = !{!"omnipotent char", !2}
530!2 = !{!"Simple C/C++ TBAA"}
531!3 = !{!"int", !1}
532!4 = !{ i32 0, i32 1 }
533!5 = !{ i32 8, i32 10 }
534!6 = !{!6}
535!7 = !{!7, !6}
536!8 = !{!8, !6}
537!9 = !{!7}
538!10 = !{!8}
539