1; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3
4declare void @llvm.experimental.guard(i1, ...)
5
6; This is a motivating example on why we prohibit hoisting through guards.
7; In the bottom block, we check that the index is within bounds and only access
8; the element in this case and deoptimize otherwise. If we hoist the load to a
9; place above the guard, it will may lead to out-of-bound array access.
10define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
11; CHECK-LABEL: @test_motivation(
12block1:
13  %el1 = getelementptr inbounds i32, i32* %q, i32 %index
14  %el2 = getelementptr inbounds i32, i32* %p, i32 %index
15	br i1 %C, label %block2, label %block3
16
17block2:
18
19; CHECK:        block2:
20; CHECK-NEXT:     br
21; CHECK-NOT:      load
22; CHECK-NOT:      sge
23; CHECK-NOT:      slt
24; CHECK-NOT:      and
25  br label %block4
26
27block3:
28  store i32 0, i32* %el1
29  br label %block4
30
31block4:
32
33; CHECK:        block4:
34; CHECK:          %cond1 = icmp sge i32 %index, 0
35; CHECK-NEXT:     %cond2 = icmp slt i32 %index, %len
36; CHECK-NEXT:     %in.bounds = and i1 %cond1, %cond2
37; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
38; CHECK-NEXT:     %PRE = load i32, i32* %P2
39; CHECK:          ret i32 %PRE
40
41  %P2 = phi i32* [%el2, %block3], [%el1, %block2]
42  %cond1 = icmp sge i32 %index, 0
43  %cond2 = icmp slt i32 %index, %len
44  %in.bounds = and i1 %cond1, %cond2
45  call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
46  %PRE = load i32, i32* %P2
47  ret i32 %PRE
48}
49
50; Guard in load's block that is above the load should prohibit the PRE.
51define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
52; CHECK-LABEL: @test_guard_01(
53block1:
54	br i1 %C, label %block2, label %block3
55
56block2:
57
58; CHECK:        block2:
59; CHECK-NEXT:     br
60; CHECK-NOT:      load
61
62 br label %block4
63
64block3:
65  store i32 0, i32* %p
66  br label %block4
67
68block4:
69
70; CHECK:        block4:
71; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %G)
72; CHECK-NEXT:     load
73; CHECK:          ret i32
74
75  %P2 = phi i32* [%p, %block3], [%q, %block2]
76  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
77  %PRE = load i32, i32* %P2
78  ret i32 %PRE
79}
80
81; Guard in load's block that is below the load should not prohibit the PRE.
82define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
83; CHECK-LABEL: @test_guard_02(
84block1:
85	br i1 %C, label %block2, label %block3
86
87block2:
88
89; CHECK:        block2:
90; CHECK-NEXT:     load i32, i32* %q
91
92 br label %block4
93
94block3:
95  store i32 0, i32* %p
96  br label %block4
97
98block4:
99
100; CHECK:        block4:
101; CHECK-NEXT:     phi i32 [
102; CHECK-NEXT:     phi i32* [
103; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
104; CHECK-NOT:      load
105; CHECK:          ret i32
106
107  %P2 = phi i32* [%p, %block3], [%q, %block2]
108  %PRE = load i32, i32* %P2
109  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
110  ret i32 %PRE
111}
112
113; Guard above the load's block should prevent PRE from hoisting through it.
114define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
115; CHECK-LABEL: @test_guard_03(
116block1:
117	br i1 %C, label %block2, label %block3
118
119block2:
120
121; CHECK:        block2:
122; CHECK-NEXT:     br
123; CHECK-NOT:      load
124
125 br label %block4
126
127block3:
128  store i32 0, i32* %p
129  br label %block4
130
131block4:
132
133; CHECK:        block4:
134; CHECK-NEXT:     phi i32*
135; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
136; CHECK-NEXT:     load
137; CHECK-NEXT:     ret i32
138
139  %P2 = phi i32* [%p, %block3], [%q, %block2]
140  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
141  br label %block5
142
143block5:
144  %PRE = load i32, i32* %P2
145  ret i32 %PRE
146}
147