1; RUN: opt -S -codegenprepare < %s | FileCheck %s
2
3target datalayout =
4"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
5target triple = "x86_64-unknown-linux-gnu"
6
7@x = external global [1 x [2 x <4 x float>]]
8
9; Can we sink single addressing mode computation to use?
10define void @test1(i1 %cond, i64* %base) {
11; CHECK-LABEL: @test1
12; CHECK: getelementptr inbounds i8, {{.+}} 40
13entry:
14  %addr = getelementptr inbounds i64, i64* %base, i64 5
15  %casted = bitcast i64* %addr to i32*
16  br i1 %cond, label %if.then, label %fallthrough
17
18if.then:
19  %v = load i32, i32* %casted, align 4
20  br label %fallthrough
21
22fallthrough:
23  ret void
24}
25
26declare void @foo(i32)
27
28; Make sure sinking two copies of addressing mode into different blocks works
29define void @test2(i1 %cond, i64* %base) {
30; CHECK-LABEL: @test2
31entry:
32  %addr = getelementptr inbounds i64, i64* %base, i64 5
33  %casted = bitcast i64* %addr to i32*
34  br i1 %cond, label %if.then, label %fallthrough
35
36if.then:
37; CHECK-LABEL: if.then:
38; CHECK: getelementptr inbounds i8, {{.+}} 40
39  %v1 = load i32, i32* %casted, align 4
40  call void @foo(i32 %v1)
41  %cmp = icmp eq i32 %v1, 0
42  br i1 %cmp, label %next, label %fallthrough
43
44next:
45; CHECK-LABEL: next:
46; CHECK: getelementptr inbounds i8, {{.+}} 40
47  %v2 = load i32, i32* %casted, align 4
48  call void @foo(i32 %v2)
49  br label %fallthrough
50
51fallthrough:
52  ret void
53}
54
55; If we have two loads in the same block, only need one copy of addressing mode
56; - instruction selection will duplicate if needed
57define void @test3(i1 %cond, i64* %base) {
58; CHECK-LABEL: @test3
59entry:
60  %addr = getelementptr inbounds i64, i64* %base, i64 5
61  %casted = bitcast i64* %addr to i32*
62  br i1 %cond, label %if.then, label %fallthrough
63
64if.then:
65; CHECK-LABEL: if.then:
66; CHECK: getelementptr inbounds i8, {{.+}} 40
67  %v1 = load i32, i32* %casted, align 4
68  call void @foo(i32 %v1)
69; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
70  %v2 = load i32, i32* %casted, align 4
71  call void @foo(i32 %v2)
72  br label %fallthrough
73
74fallthrough:
75  ret void
76}
77
78; Can we still sink addressing mode if there's a cold use of the
79; address itself?
80define void @test4(i1 %cond, i64* %base) {
81; CHECK-LABEL: @test4
82entry:
83  %addr = getelementptr inbounds i64, i64* %base, i64 5
84  %casted = bitcast i64* %addr to i32*
85  br i1 %cond, label %if.then, label %fallthrough
86
87if.then:
88; CHECK-LABEL: if.then:
89; CHECK: getelementptr inbounds i8, {{.+}} 40
90  %v1 = load i32, i32* %casted, align 4
91  call void @foo(i32 %v1)
92  %cmp = icmp eq i32 %v1, 0
93  br i1 %cmp, label %rare.1, label %fallthrough
94
95fallthrough:
96  ret void
97
98rare.1:
99; CHECK-LABEL: rare.1:
100; CHECK: getelementptr inbounds i8, {{.+}} 40
101  call void @slowpath(i32 %v1, i32* %casted) cold
102  br label %fallthrough
103}
104
105; Negative test - don't want to duplicate addressing into hot path
106define void @test5(i1 %cond, i64* %base) {
107; CHECK-LABEL: @test5
108entry:
109; CHECK: %addr = getelementptr inbounds
110  %addr = getelementptr inbounds i64, i64* %base, i64 5
111  %casted = bitcast i64* %addr to i32*
112  br i1 %cond, label %if.then, label %fallthrough
113
114if.then:
115; CHECK-LABEL: if.then:
116; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
117  %v1 = load i32, i32* %casted, align 4
118  call void @foo(i32 %v1)
119  %cmp = icmp eq i32 %v1, 0
120  br i1 %cmp, label %rare.1, label %fallthrough
121
122fallthrough:
123  ret void
124
125rare.1:
126  call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
127  br label %fallthrough
128}
129
130; Negative test - opt for size
131define void @test6(i1 %cond, i64* %base) minsize {
132; CHECK-LABEL: @test6
133entry:
134; CHECK: %addr = getelementptr
135  %addr = getelementptr inbounds i64, i64* %base, i64 5
136  %casted = bitcast i64* %addr to i32*
137  br i1 %cond, label %if.then, label %fallthrough
138
139if.then:
140; CHECK-LABEL: if.then:
141; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
142  %v1 = load i32, i32* %casted, align 4
143  call void @foo(i32 %v1)
144  %cmp = icmp eq i32 %v1, 0
145  br i1 %cmp, label %rare.1, label %fallthrough
146
147fallthrough:
148  ret void
149
150rare.1:
151  call void @slowpath(i32 %v1, i32* %casted) cold
152  br label %fallthrough
153}
154
155; Negative test - opt for size
156define void @test6_pgso(i1 %cond, i64* %base) !prof !14 {
157; CHECK-LABEL: @test6
158entry:
159; CHECK: %addr = getelementptr
160  %addr = getelementptr inbounds i64, i64* %base, i64 5
161  %casted = bitcast i64* %addr to i32*
162  br i1 %cond, label %if.then, label %fallthrough
163
164if.then:
165; CHECK-LABEL: if.then:
166; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
167  %v1 = load i32, i32* %casted, align 4
168  call void @foo(i32 %v1)
169  %cmp = icmp eq i32 %v1, 0
170  br i1 %cmp, label %rare.1, label %fallthrough
171
172fallthrough:
173  ret void
174
175rare.1:
176  call void @slowpath(i32 %v1, i32* %casted) cold
177  br label %fallthrough
178}
179
180; Make sure sinking two copies of addressing mode into different blocks works
181; when there are cold paths for each.
182define void @test7(i1 %cond, i64* %base) {
183; CHECK-LABEL: @test7
184entry:
185  %addr = getelementptr inbounds i64, i64* %base, i64 5
186  %casted = bitcast i64* %addr to i32*
187  br i1 %cond, label %if.then, label %fallthrough
188
189if.then:
190; CHECK-LABEL: if.then:
191; CHECK: getelementptr inbounds i8, {{.+}} 40
192  %v1 = load i32, i32* %casted, align 4
193  call void @foo(i32 %v1)
194  %cmp = icmp eq i32 %v1, 0
195  br i1 %cmp, label %rare.1, label %next
196
197next:
198; CHECK-LABEL: next:
199; CHECK: getelementptr inbounds i8, {{.+}} 40
200  %v2 = load i32, i32* %casted, align 4
201  call void @foo(i32 %v2)
202  %cmp2 = icmp eq i32 %v2, 0
203  br i1 %cmp2, label %rare.1, label %fallthrough
204
205fallthrough:
206  ret void
207
208rare.1:
209; CHECK-LABEL: rare.1:
210; CHECK: getelementptr inbounds i8, {{.+}} 40
211  call void @slowpath(i32 %v1, i32* %casted) cold
212  br label %next
213
214rare.2:
215; CHECK-LABEL: rare.2:
216; CHECK: getelementptr inbounds i8, {{.+}} 40
217  call void @slowpath(i32 %v2, i32* %casted) cold
218  br label %fallthrough
219}
220
221declare void @slowpath(i32, i32*)
222
223; Make sure we don't end up in an infinite loop after we fail to sink.
224; CHECK-LABEL: define void @test8
225; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
226define void @test8() {
227allocas:
228  %aFOO_load = load float*, float** undef
229  %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
230  %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
231  %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
232  %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
233  br label %load.i145
234
235load.i145:
236  %ptr.i143 = bitcast i8* %ptr to <4 x float>*
237  %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
238  %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
239  br label %pl_loop.i.i122
240
241pl_loop.i.i122:
242  br label %pl_loop.i.i122
243}
244
245; Make sure we can sink address computation even
246; if there is a cycle in phi nodes.
247define void @test9(i1 %cond, i64* %base) {
248; CHECK-LABEL: @test9
249entry:
250  %addr = getelementptr inbounds i64, i64* %base, i64 5
251  %casted = bitcast i64* %addr to i32*
252  br label %header
253
254header:
255  %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
256  %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
257  br i1 %cond, label %if.then, label %backedge
258
259if.then:
260  call void @foo(i32 %iv)
261  %addr.1 = getelementptr inbounds i64, i64* %base, i64 5
262  %casted.1 = bitcast i64* %addr.1 to i32*
263  br label %backedge
264
265backedge:
266; CHECK-LABEL: backedge:
267; CHECK: getelementptr inbounds i8, {{.+}} 40
268  %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
269  %v = load i32, i32* %casted.merged, align 4
270  call void @foo(i32 %v)
271  %iv.inc = add i32 %iv, 1
272  %cmp = icmp slt i32 %iv.inc, 1000
273  br i1 %cmp, label %header, label %exit
274
275exit:
276  ret void
277}
278
279; Make sure we can eliminate a select when both arguments perform equivalent
280; address computation.
281define void @test10(i1 %cond, i64* %base) {
282; CHECK-LABEL: @test10
283; CHECK: getelementptr inbounds i8, {{.+}} 40
284; CHECK-NOT: select
285entry:
286  %gep1 = getelementptr inbounds i64, i64* %base, i64 5
287  %gep1.casted = bitcast i64* %gep1 to i32*
288  %base.casted = bitcast i64* %base to i32*
289  %gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10
290  %casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2
291  %v = load i32, i32* %casted.merged, align 4
292  call void @foo(i32 %v)
293  ret void
294}
295
296; Found by fuzzer, getSExtValue of > 64 bit constant
297define void @i96_mul(i1* %base, i96 %offset) {
298BB:
299  ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
300  %B84 = mul i96 %offset, 39614081257132168796771975167
301  %G23 = getelementptr i1, i1* %base, i96 %B84
302  store i1 false, i1* %G23
303  ret void
304}
305
306!llvm.module.flags = !{!0}
307!0 = !{i32 1, !"ProfileSummary", !1}
308!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
309!2 = !{!"ProfileFormat", !"InstrProf"}
310!3 = !{!"TotalCount", i64 10000}
311!4 = !{!"MaxCount", i64 10}
312!5 = !{!"MaxInternalCount", i64 1}
313!6 = !{!"MaxFunctionCount", i64 1000}
314!7 = !{!"NumCounts", i64 3}
315!8 = !{!"NumFunctions", i64 3}
316!9 = !{!"DetailedSummary", !10}
317!10 = !{!11, !12, !13}
318!11 = !{i32 10000, i64 100, i32 1}
319!12 = !{i32 999000, i64 100, i32 1}
320!13 = !{i32 999999, i64 1, i32 2}
321!14 = !{!"function_entry_count", i64 0}
322