1; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
2; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
3
4; GCN-LABEL: {{^}}use_dispatch_ptr:
5; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
6define hidden void @use_dispatch_ptr() #1 {
7  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
8  %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
9  %value = load volatile i32, i32 addrspace(4)* %header_ptr
10  ret void
11}
12
13; GCN-LABEL: {{^}}use_queue_ptr:
14; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
15define hidden void @use_queue_ptr() #1 {
16  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
17  %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
18  %value = load volatile i32, i32 addrspace(4)* %header_ptr
19  ret void
20}
21
22; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
23; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0
24; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0
25define hidden void @use_kernarg_segment_ptr() #1 {
26  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
27  %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
28  %value = load volatile i32, i32 addrspace(4)* %header_ptr
29  ret void
30}
31
32; GCN-LABEL: {{^}}use_implicitarg_ptr:
33; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
34define hidden void @use_implicitarg_ptr() #1 {
35  %implicit.arg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
36  %header_ptr = bitcast i8 addrspace(4)* %implicit.arg.ptr to i32 addrspace(4)*
37  %value = load volatile i32, i32 addrspace(4)* %header_ptr
38  ret void
39}
40
41; GCN-LABEL: {{^}}use_dispatch_id:
42; GCN: ; use s[10:11]
43define hidden void @use_dispatch_id() #1 {
44  %id = call i64 @llvm.amdgcn.dispatch.id()
45  call void asm sideeffect "; use $0", "s"(i64 %id)
46  ret void
47}
48; GCN-LABEL: {{^}}use_workgroup_id_x:
49; GCN: s_waitcnt
50; GCN: ; use s12
51define hidden void @use_workgroup_id_x() #1 {
52  %val = call i32 @llvm.amdgcn.workgroup.id.x()
53  call void asm sideeffect "; use $0", "s"(i32 %val)
54  ret void
55}
56
57; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
58; GCN: s_waitcnt
59; GCN-NOT: s32
60; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
61; GCN: ; use s12
62; GCN: s_setpc_b64
63define hidden void @use_stack_workgroup_id_x() #1 {
64  %alloca = alloca i32, addrspace(5)
65  store volatile i32 0, i32 addrspace(5)* %alloca
66  %val = call i32 @llvm.amdgcn.workgroup.id.x()
67  call void asm sideeffect "; use $0", "s"(i32 %val)
68  ret void
69}
70
71; GCN-LABEL: {{^}}use_workgroup_id_y:
72; GCN: s_waitcnt
73; GCN: ; use s13
74define hidden void @use_workgroup_id_y() #1 {
75  %val = call i32 @llvm.amdgcn.workgroup.id.y()
76  call void asm sideeffect "; use $0", "s"(i32 %val)
77  ret void
78}
79
80; GCN-LABEL: {{^}}use_workgroup_id_z:
81; GCN: s_waitcnt
82; GCN: ; use s14
83define hidden void @use_workgroup_id_z() #1 {
84  %val = call i32 @llvm.amdgcn.workgroup.id.z()
85  call void asm sideeffect "; use $0", "s"(i32 %val)
86  ret void
87}
88
89; GCN-LABEL: {{^}}use_workgroup_id_xy:
90; GCN: ; use s12
91; GCN: ; use s13
92define hidden void @use_workgroup_id_xy() #1 {
93  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
94  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
95  call void asm sideeffect "; use $0", "s"(i32 %val0)
96  call void asm sideeffect "; use $0", "s"(i32 %val1)
97  ret void
98}
99
100; GCN-LABEL: {{^}}use_workgroup_id_xyz:
101; GCN: ; use s12
102; GCN: ; use s13
103; GCN: ; use s14
104define hidden void @use_workgroup_id_xyz() #1 {
105  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
106  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
107  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
108  call void asm sideeffect "; use $0", "s"(i32 %val0)
109  call void asm sideeffect "; use $0", "s"(i32 %val1)
110  call void asm sideeffect "; use $0", "s"(i32 %val2)
111  ret void
112}
113
114; GCN-LABEL: {{^}}use_workgroup_id_xz:
115; GCN: ; use s12
116; GCN: ; use s14
117define hidden void @use_workgroup_id_xz() #1 {
118  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
119  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
120  call void asm sideeffect "; use $0", "s"(i32 %val0)
121  call void asm sideeffect "; use $0", "s"(i32 %val1)
122  ret void
123}
124
125; GCN-LABEL: {{^}}use_workgroup_id_yz:
126; GCN: ; use s13
127; GCN: ; use s14
128define hidden void @use_workgroup_id_yz() #1 {
129  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
130  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
131  call void asm sideeffect "; use $0", "s"(i32 %val0)
132  call void asm sideeffect "; use $0", "s"(i32 %val1)
133  ret void
134}
135
136; Argument is in right place already
137; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
138; GCN-NOT: s12
139; GCN-NOT: s13
140; GCN-NOT: s14
141; GCN: v_readlane_b32 s4, v40, 0
142define hidden void @func_indirect_use_workgroup_id_x() #1 {
143  call void @use_workgroup_id_x()
144  ret void
145}
146
147; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
148; GCN-NOT: s4
149; GCN: v_readlane_b32 s4, v40, 0
150define hidden void @func_indirect_use_workgroup_id_y() #1 {
151  call void @use_workgroup_id_y()
152  ret void
153}
154
155; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
156; GCN-NOT: s4
157; GCN: v_readlane_b32 s4, v40, 0
158define hidden void @func_indirect_use_workgroup_id_z() #1 {
159  call void @use_workgroup_id_z()
160  ret void
161}
162
163; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
164; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
165; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
166; GCN: ; use s12
167define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
168  %val = call i32 @llvm.amdgcn.workgroup.id.x()
169  store volatile i32 %arg0, i32 addrspace(1)* undef
170  call void asm sideeffect "; use $0", "s"(i32 %val)
171  ret void
172}
173
174; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
175; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
176; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
177; GCN: ; use s13
178define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
179  %val = call i32 @llvm.amdgcn.workgroup.id.y()
180  store volatile i32 %arg0, i32 addrspace(1)* undef
181  call void asm sideeffect "; use $0", "s"(i32 %val)
182  ret void
183}
184
185; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
186; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
187; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
188; GCN: ; use s14
189define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
190  %val = call i32 @llvm.amdgcn.workgroup.id.z()
191  store volatile i32 %arg0, i32 addrspace(1)* undef
192  call void asm sideeffect "; use $0", "s"(i32 %val)
193  ret void
194}
195
196; GCN-LABEL: {{^}}use_every_sgpr_input:
197; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
198; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
199; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
200; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
201; GCN: ; use s[10:11]
202; GCN: ; use s12
203; GCN: ; use s13
204; GCN: ; use s14
205define hidden void @use_every_sgpr_input() #1 {
206  %alloca = alloca i32, align 4, addrspace(5)
207  store volatile i32 0, i32 addrspace(5)* %alloca
208
209  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
210  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
211  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
212
213  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
214  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
215  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
216
217  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
218  %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
219  %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
220
221  %val3 = call i64 @llvm.amdgcn.dispatch.id()
222  call void asm sideeffect "; use $0", "s"(i64 %val3)
223
224  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
225  call void asm sideeffect "; use $0", "s"(i32 %val4)
226
227  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
228  call void asm sideeffect "; use $0", "s"(i32 %val5)
229
230  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
231  call void asm sideeffect "; use $0", "s"(i32 %val6)
232
233  ret void
234}
235
236; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
237; GCN: s_mov_b32 s12, s14
238; GCN: s_mov_b32 s13, s15
239; GCN: s_mov_b32 s14, s16
240; GCN: s_mov_b32 s32, 0
241; GCN: s_swappc_b64
242
243; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
244; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
245; GCN: .amdhsa_user_sgpr_queue_ptr 1
246; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1
247; GCN: .amdhsa_user_sgpr_dispatch_id 1
248; GCN: .amdhsa_user_sgpr_flat_scratch_init 1
249; GCN: .amdhsa_user_sgpr_private_segment_size 0
250; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
251; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
252; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
253; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
254; GCN: .amdhsa_system_sgpr_workgroup_info 0
255; GCN: .amdhsa_system_vgpr_workitem_id 2
256define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
257  call void @use_every_sgpr_input()
258  ret void
259}
260
261; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
262; GCN-NOT: s6
263; GCN-NOT: s7
264; GCN-NOT: s8
265; GCN-NOT: s9
266; GCN-NOT: s10
267; GCN-NOT: s11
268; GCN-NOT: s12
269; GCN-NOT: s13
270; GCN-NOT: s[6:7]
271; GCN-NOT: s[8:9]
272; GCN-NOT: s[10:11]
273; GCN-NOT: s[12:13]
274; GCN-NOT: s14
275; GCN: s_or_saveexec_b64 s[16:17], -1
276define hidden void @func_indirect_use_every_sgpr_input() #1 {
277  call void @use_every_sgpr_input()
278  ret void
279}
280
281; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
282; GCN-NOT: s12
283; GCN-NOT: s13
284; GCN-NOT: s14
285; GCN: ; use s[10:11]
286; GCN: ; use s12
287; GCN: ; use s13
288; GCN: ; use s14
289
290; GCN: s_swappc_b64
291define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
292  %alloca = alloca i32, align 4, addrspace(5)
293  store volatile i32 0, i32 addrspace(5)* %alloca
294
295  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
296  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
297  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
298
299  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
300  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
301  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
302
303  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
304  %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
305  %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
306
307  %val3 = call i64 @llvm.amdgcn.dispatch.id()
308  call void asm sideeffect "; use $0", "s"(i64 %val3)
309
310  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
311  call void asm sideeffect "; use $0", "s"(i32 %val4)
312
313  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
314  call void asm sideeffect "; use $0", "s"(i32 %val5)
315
316  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
317  call void asm sideeffect "; use $0", "s"(i32 %val6)
318
319  call void @use_workgroup_id_xyz()
320  ret void
321}
322
323declare i32 @llvm.amdgcn.workgroup.id.x() #0
324declare i32 @llvm.amdgcn.workgroup.id.y() #0
325declare i32 @llvm.amdgcn.workgroup.id.z() #0
326declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
327declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
328declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
329declare i64 @llvm.amdgcn.dispatch.id() #0
330declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
331
332attributes #0 = { nounwind readnone speculatable }
333attributes #1 = { nounwind noinline }
334