1; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
4; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
6; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
7
8declare i32 @llvm.r600.read.tidig.x() #0
9
10; OPT-LABEL: @test_sink_global_small_offset_i32(
11; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
12; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
13; OPT: br i1
14; OPT-CI: ptrtoint
15
16; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
17; GCN: {{^}}BB0_2:
18define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
19entry:
20  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
21  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
22  %tmp0 = icmp eq i32 %cond, 0
23  br i1 %tmp0, label %endif, label %if
24
25if:
26  %tmp1 = load i32, i32 addrspace(1)* %in.gep
27  br label %endif
28
29endif:
30  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
31  store i32 %x, i32 addrspace(1)* %out.gep
32  br label %done
33
34done:
35  ret void
36}
37
38; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
39; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
40; OPT: br i1
41
42; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
43; GCN: s_and_saveexec_b64
44; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
45; GCN: {{^}}BB1_2:
46; GCN: s_or_b64 exec
47define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
48entry:
49  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
50  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
51  %tmp0 = icmp eq i32 %cond, 0
52  br i1 %tmp0, label %endif, label %if
53
54if:
55  %tmp1 = load i8, i8 addrspace(1)* %in.gep
56  %tmp2 = sext i8 %tmp1 to i32
57  br label %endif
58
59endif:
60  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
61  store i32 %x, i32 addrspace(1)* %out.gep
62  br label %done
63
64done:
65  ret void
66}
67
68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
69; GCN: s_and_saveexec_b64
70; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
71; GCN: {{^}}BB2_2:
72; GCN: s_or_b64 exec
73define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
74entry:
75  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
76  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
77  %tmp0 = icmp eq i32 %cond, 0
78  br i1 %tmp0, label %endif, label %if
79
80if:
81  %tmp1 = load i8, i8 addrspace(1)* %in.gep
82  %tmp2 = sext i8 %tmp1 to i32
83  br label %endif
84
85endif:
86  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
87  store i32 %x, i32 addrspace(1)* %out.gep
88  br label %done
89
90done:
91  ret void
92}
93
94; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
95; GCN: s_and_saveexec_b64
96; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
97; GCN: {{^}}BB3_2:
98; GCN: s_or_b64 exec
99define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
100entry:
101  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
102  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
103  %tmp0 = icmp eq i32 %cond, 0
104  br i1 %tmp0, label %endif, label %if
105
106if:
107  %tmp1 = load i8, i8 addrspace(1)* %in.gep
108  %tmp2 = sext i8 %tmp1 to i32
109  br label %endif
110
111endif:
112  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
113  store i32 %x, i32 addrspace(1)* %out.gep
114  br label %done
115
116done:
117  ret void
118}
119
120; OPT-LABEL: @test_sink_scratch_small_offset_i32(
121; OPT-NOT:  getelementptr [512 x i32]
122; OPT: br i1
123; OPT: ptrtoint
124
125; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
126; GCN: s_and_saveexec_b64
127; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
128; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
129; GCN: {{^}}BB4_2:
130define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
131entry:
132  %alloca = alloca [512 x i32], align 4
133  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
134  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
135  %add.arg = add i32 %arg, 8
136  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
137  %tmp0 = icmp eq i32 %cond, 0
138  br i1 %tmp0, label %endif, label %if
139
140if:
141  store volatile i32 123, i32* %alloca.gep
142  %tmp1 = load volatile i32, i32* %alloca.gep
143  br label %endif
144
145endif:
146  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
147  store i32 %x, i32 addrspace(1)* %out.gep.0
148  %load = load volatile i32, i32* %alloca.gep
149  store i32 %load, i32 addrspace(1)* %out.gep.1
150  br label %done
151
152done:
153  ret void
154}
155
156; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
157; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
158; OPT: br i1
159; OPT-NOT: ptrtoint
160
161; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
162; GCN: s_and_saveexec_b64
163; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
164; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
165; GCN: {{^}}BB5_2:
166define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
167entry:
168  %alloca = alloca [512 x i32], align 4
169  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
170  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
171  %add.arg = add i32 %arg, 8
172  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
173  %tmp0 = icmp eq i32 %cond, 0
174  br i1 %tmp0, label %endif, label %if
175
176if:
177  store volatile i32 123, i32* %alloca.gep
178  %tmp1 = load volatile i32, i32* %alloca.gep
179  br label %endif
180
181endif:
182  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
183  store i32 %x, i32 addrspace(1)* %out.gep.0
184  %load = load volatile i32, i32* %alloca.gep
185  store i32 %load, i32 addrspace(1)* %out.gep.1
186  br label %done
187
188done:
189  ret void
190}
191
192; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
193; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0
194; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0
195; GCN: s_and_saveexec_b64
196; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
197; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
198; GCN: {{^}}BB6_2:
199define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
200entry:
201  %offset.ext = zext i32 %offset to i64
202  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
203  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
204  %tmp0 = icmp eq i32 %cond, 0
205  br i1 %tmp0, label %endif, label %if
206
207if:
208  %tmp1 = load i32, i32 addrspace(1)* %in.gep
209  br label %endif
210
211endif:
212  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
213  store i32 %x, i32 addrspace(1)* %out.gep
214  br label %done
215
216done:
217  ret void
218}
219
220attributes #0 = { nounwind readnone }
221attributes #1 = { nounwind }
222
223
224
225; OPT-LABEL: @test_sink_constant_small_offset_i32
226; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
227; OPT: br i1
228
229; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
230; GCN: s_and_saveexec_b64
231; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
232; GCN: s_or_b64 exec, exec
233define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
234entry:
235  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
236  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
237  %tmp0 = icmp eq i32 %cond, 0
238  br i1 %tmp0, label %endif, label %if
239
240if:
241  %tmp1 = load i32, i32 addrspace(2)* %in.gep
242  br label %endif
243
244endif:
245  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
246  store i32 %x, i32 addrspace(1)* %out.gep
247  br label %done
248
249done:
250  ret void
251}
252
253; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
254; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
255; OPT: br i1
256
257; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
258; GCN: s_and_saveexec_b64
259; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
260; GCN: s_or_b64 exec, exec
261define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
262entry:
263  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
264  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
265  %tmp0 = icmp eq i32 %cond, 0
266  br i1 %tmp0, label %endif, label %if
267
268if:
269  %tmp1 = load i32, i32 addrspace(2)* %in.gep
270  br label %endif
271
272endif:
273  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
274  store i32 %x, i32 addrspace(1)* %out.gep
275  br label %done
276
277done:
278  ret void
279}
280
281; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
282; OPT-SI:  getelementptr i32, i32 addrspace(2)*
283; OPT-CI-NOT:  getelementptr i32, i32 addrspace(2)*
284; OPT-VI-NOT:  getelementptr i32, i32 addrspace(2)*
285; OPT: br i1
286
287; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
288; GCN: s_and_saveexec_b64
289; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
290
291; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
292; GCN: s_or_b64 exec, exec
293define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
294entry:
295  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
296  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
297  %tmp0 = icmp eq i32 %cond, 0
298  br i1 %tmp0, label %endif, label %if
299
300if:
301  %tmp1 = load i32, i32 addrspace(2)* %in.gep
302  br label %endif
303
304endif:
305  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
306  store i32 %x, i32 addrspace(1)* %out.gep
307  br label %done
308
309done:
310  ret void
311}
312
313; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
314; OPT-SI: getelementptr i32, i32 addrspace(2)*
315; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
316; OPT: br i1
317
318; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
319; GCN: s_and_saveexec_b64
320; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
321; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
322; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
323; GCN: s_or_b64 exec, exec
324define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
325entry:
326  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
327  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
328  %tmp0 = icmp eq i32 %cond, 0
329  br i1 %tmp0, label %endif, label %if
330
331if:
332  %tmp1 = load i32, i32 addrspace(2)* %in.gep
333  br label %endif
334
335endif:
336  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
337  store i32 %x, i32 addrspace(1)* %out.gep
338  br label %done
339
340done:
341  ret void
342}
343
344; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
345; OPT: getelementptr i32, i32 addrspace(2)*
346; OPT: br i1
347
348; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
349; GCN: s_and_saveexec_b64
350; GCN: s_add_u32
351; GCN: s_addc_u32
352; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
353; GCN: s_or_b64 exec, exec
354define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
355entry:
356  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
357  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
358  %tmp0 = icmp eq i32 %cond, 0
359  br i1 %tmp0, label %endif, label %if
360
361if:
362  %tmp1 = load i32, i32 addrspace(2)* %in.gep
363  br label %endif
364
365endif:
366  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
367  store i32 %x, i32 addrspace(1)* %out.gep
368  br label %done
369
370done:
371  ret void
372}
373
374; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
375; GCN: s_and_saveexec_b64
376; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
377; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
378
379; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
380; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
381
382; GCN: s_or_b64 exec, exec
383define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
384entry:
385  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
386  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
387  %tmp0 = icmp eq i32 %cond, 0
388  br i1 %tmp0, label %endif, label %if
389
390if:
391  %tmp1 = load i32, i32 addrspace(2)* %in.gep
392  br label %endif
393
394endif:
395  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
396  store i32 %x, i32 addrspace(1)* %out.gep
397  br label %done
398
399done:
400  ret void
401}
402
403; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
404; OPT-SI: getelementptr i32, i32 addrspace(2)*
405; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
406; OPT-VI: getelementptr i32, i32 addrspace(2)*
407; OPT: br i1
408
409; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
410; GCN: s_and_saveexec_b64
411; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
412; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
413
414; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
415
416; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
417; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
418
419; GCN: s_or_b64 exec, exec
420define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
421entry:
422  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
423  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
424  %tmp0 = icmp eq i32 %cond, 0
425  br i1 %tmp0, label %endif, label %if
426
427if:
428  %tmp1 = load i32, i32 addrspace(2)* %in.gep
429  br label %endif
430
431endif:
432  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
433  store i32 %x, i32 addrspace(1)* %out.gep
434  br label %done
435
436done:
437  ret void
438}
439