1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}use_workitem_id_x:
4; GCN: s_waitcnt
5; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
6; GCN-NEXT: s_waitcnt
7; GCN-NEXT: s_setpc_b64
8define void @use_workitem_id_x() #1 {
9  %val = call i32 @llvm.amdgcn.workitem.id.x()
10  store volatile i32 %val, i32 addrspace(1)* undef
11  ret void
12}
13
14; GCN-LABEL: {{^}}use_workitem_id_y:
15; GCN: s_waitcnt
16; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
17; GCN-NEXT: s_waitcnt
18; GCN-NEXT: s_setpc_b64
19define void @use_workitem_id_y() #1 {
20  %val = call i32 @llvm.amdgcn.workitem.id.y()
21  store volatile i32 %val, i32 addrspace(1)* undef
22  ret void
23}
24
25; GCN-LABEL: {{^}}use_workitem_id_z:
26; GCN: s_waitcnt
27; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
28; GCN-NEXT: s_waitcnt
29; GCN-NEXT: s_setpc_b64
30define void @use_workitem_id_z() #1 {
31  %val = call i32 @llvm.amdgcn.workitem.id.z()
32  store volatile i32 %val, i32 addrspace(1)* undef
33  ret void
34}
35
36; GCN-LABEL: {{^}}use_workitem_id_xy:
37; GCN: s_waitcnt
38; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
39; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
40; GCN-NEXT: s_waitcnt
41; GCN-NEXT: s_setpc_b64
42define void @use_workitem_id_xy() #1 {
43  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
44  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
45  store volatile i32 %val0, i32 addrspace(1)* undef
46  store volatile i32 %val1, i32 addrspace(1)* undef
47  ret void
48}
49
50; GCN-LABEL: {{^}}use_workitem_id_xyz:
51; GCN: s_waitcnt
52; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
53; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
54; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v2
55; GCN-NEXT: s_waitcnt
56; GCN-NEXT: s_setpc_b64
57define void @use_workitem_id_xyz() #1 {
58  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
59  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
60  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
61  store volatile i32 %val0, i32 addrspace(1)* undef
62  store volatile i32 %val1, i32 addrspace(1)* undef
63  store volatile i32 %val2, i32 addrspace(1)* undef
64  ret void
65}
66
67; GCN-LABEL: {{^}}use_workitem_id_xz:
68; GCN: s_waitcnt
69; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
70; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
71; GCN-NEXT: s_waitcnt
72; GCN-NEXT: s_setpc_b64
73define void @use_workitem_id_xz() #1 {
74  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
75  %val1 = call i32 @llvm.amdgcn.workitem.id.z()
76  store volatile i32 %val0, i32 addrspace(1)* undef
77  store volatile i32 %val1, i32 addrspace(1)* undef
78  ret void
79}
80
81; GCN-LABEL: {{^}}use_workitem_id_yz:
82; GCN: s_waitcnt
83; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
84; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
85; GCN-NEXT: s_waitcnt
86; GCN-NEXT: s_setpc_b64
87define void @use_workitem_id_yz() #1 {
88  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
89  %val1 = call i32 @llvm.amdgcn.workitem.id.z()
90  store volatile i32 %val0, i32 addrspace(1)* undef
91  store volatile i32 %val1, i32 addrspace(1)* undef
92  ret void
93}
94
95; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
96; GCN: enable_vgpr_workitem_id = 0
97
98; GCN-NOT: v0
99; GCN: s_swappc_b64
100; GCN-NOT: v0
101define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
102  call void @use_workitem_id_x()
103  ret void
104}
105
106; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
107; GCN: enable_vgpr_workitem_id = 1
108
109; GCN-NOT: v0
110; GCN-NOT: v1
111; GCN: v_mov_b32_e32 v0, v1
112; GCN-NOT: v0
113; GCN-NOT: v1
114; GCN: s_swappc_b64
115define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
116  call void @use_workitem_id_y()
117  ret void
118}
119
120; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
121; GCN: enable_vgpr_workitem_id = 2
122
123; GCN-NOT: v0
124; GCN-NOT: v2
125; GCN: v_mov_b32_e32 v0, v2
126; GCN-NOT: v0
127; GCN-NOT: v2
128; GCN: s_swappc_b64
129define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
130  call void @use_workitem_id_z()
131  ret void
132}
133
134; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
135; GCN-NOT: v0
136; GCN: s_swappc_b64
137; GCN-NOT: v0
138define void @func_indirect_use_workitem_id_x() #1 {
139  call void @use_workitem_id_x()
140  ret void
141}
142
143; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
144; GCN-NOT: v0
145; GCN: s_swappc_b64
146; GCN-NOT: v0
147define void @func_indirect_use_workitem_id_y() #1 {
148  call void @use_workitem_id_y()
149  ret void
150}
151
152; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
153; GCN-NOT: v0
154; GCN: s_swappc_b64
155; GCN-NOT: v0
156define void @func_indirect_use_workitem_id_z() #1 {
157  call void @use_workitem_id_z()
158  ret void
159}
160
161; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
162; GCN: s_waitcnt
163; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
164; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
165define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
166  %val = call i32 @llvm.amdgcn.workitem.id.x()
167  store volatile i32 %arg0, i32 addrspace(1)* undef
168  store volatile i32 %val, i32 addrspace(1)* undef
169  ret void
170}
171
172; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
173; GCN: s_waitcnt
174; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
175; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
176define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
177  %val = call i32 @llvm.amdgcn.workitem.id.y()
178  store volatile i32 %arg0, i32 addrspace(1)* undef
179  store volatile i32 %val, i32 addrspace(1)* undef
180  ret void
181}
182
183; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
184; GCN: s_waitcnt
185; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
186; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
187define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
188  %val = call i32 @llvm.amdgcn.workitem.id.z()
189  store volatile i32 %arg0, i32 addrspace(1)* undef
190  store volatile i32 %val, i32 addrspace(1)* undef
191  ret void
192}
193
194
195; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
196; GCN: enable_vgpr_workitem_id = 0
197
198; GCN: v_mov_b32_e32 v1, v0
199; GCN: v_mov_b32_e32 v0, 0x22b
200; GCN: s_swappc_b64
201define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
202  call void @other_arg_use_workitem_id_x(i32 555)
203  ret void
204}
205
206
207; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
208; GCN: enable_vgpr_workitem_id = 1
209
210; GCN-NOT: v1
211; GCN: v_mov_b32_e32 v0, 0x22b
212; GCN-NOT: v1
213; GCN: s_swappc_b64
214; GCN-NOT: v0
215define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
216  call void @other_arg_use_workitem_id_y(i32 555)
217  ret void
218}
219
220; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
221; GCN: enable_vgpr_workitem_id = 2
222
223; GCN-DAG: v_mov_b32_e32 v0, 0x22b
224; GCN-DAG: v_mov_b32_e32 v1, v2
225; GCN: s_swappc_b64
226; GCN-NOT: v0
227define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
228  call void @other_arg_use_workitem_id_z(i32 555)
229  ret void
230}
231
232; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
233; GCN: s_mov_b32 s5, s32
234; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
235; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
236; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
237
238; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
239; GCN-NEXT: s_waitcnt
240; GCN-NEXT: s_setpc_b64
241define void @too_many_args_use_workitem_id_x(
242  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
243  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
244  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
245  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
246  %val = call i32 @llvm.amdgcn.workitem.id.x()
247  store volatile i32 %val, i32 addrspace(1)* undef
248
249  store volatile i32 %arg0, i32 addrspace(1)* undef
250  store volatile i32 %arg1, i32 addrspace(1)* undef
251  store volatile i32 %arg2, i32 addrspace(1)* undef
252  store volatile i32 %arg3, i32 addrspace(1)* undef
253  store volatile i32 %arg4, i32 addrspace(1)* undef
254  store volatile i32 %arg5, i32 addrspace(1)* undef
255  store volatile i32 %arg6, i32 addrspace(1)* undef
256  store volatile i32 %arg7, i32 addrspace(1)* undef
257
258  store volatile i32 %arg8, i32 addrspace(1)* undef
259  store volatile i32 %arg9, i32 addrspace(1)* undef
260  store volatile i32 %arg10, i32 addrspace(1)* undef
261  store volatile i32 %arg11, i32 addrspace(1)* undef
262  store volatile i32 %arg12, i32 addrspace(1)* undef
263  store volatile i32 %arg13, i32 addrspace(1)* undef
264  store volatile i32 %arg14, i32 addrspace(1)* undef
265  store volatile i32 %arg15, i32 addrspace(1)* undef
266
267  store volatile i32 %arg16, i32 addrspace(1)* undef
268  store volatile i32 %arg17, i32 addrspace(1)* undef
269  store volatile i32 %arg18, i32 addrspace(1)* undef
270  store volatile i32 %arg19, i32 addrspace(1)* undef
271  store volatile i32 %arg20, i32 addrspace(1)* undef
272  store volatile i32 %arg21, i32 addrspace(1)* undef
273  store volatile i32 %arg22, i32 addrspace(1)* undef
274  store volatile i32 %arg23, i32 addrspace(1)* undef
275
276  store volatile i32 %arg24, i32 addrspace(1)* undef
277  store volatile i32 %arg25, i32 addrspace(1)* undef
278  store volatile i32 %arg26, i32 addrspace(1)* undef
279  store volatile i32 %arg27, i32 addrspace(1)* undef
280  store volatile i32 %arg28, i32 addrspace(1)* undef
281  store volatile i32 %arg29, i32 addrspace(1)* undef
282  store volatile i32 %arg30, i32 addrspace(1)* undef
283  store volatile i32 %arg31, i32 addrspace(1)* undef
284
285  ret void
286}
287
288; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
289; GCN: enable_vgpr_workitem_id = 0
290
291; GCN: s_mov_b32 s33, s7
292; GCN: s_mov_b32 s32, s33
293; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
294; GCN: s_mov_b32 s4, s33
295; GCN: s_swappc_b64
296define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
297  call void @too_many_args_use_workitem_id_x(
298    i32 10, i32 20, i32 30, i32 40,
299    i32 50, i32 60, i32 70, i32 80,
300    i32 90, i32 100, i32 110, i32 120,
301    i32 130, i32 140, i32 150, i32 160,
302    i32 170, i32 180, i32 190, i32 200,
303    i32 210, i32 220, i32 230, i32 240,
304    i32 250, i32 260, i32 270, i32 280,
305    i32 290, i32 300, i32 310, i32 320)
306  ret void
307}
308
309; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
310; GCN: s_mov_b32 s5, s32
311; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:8
312; GCN: s_swappc_b64
313define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
314  store volatile i32 %arg0, i32 addrspace(1)* undef
315  call void @too_many_args_use_workitem_id_x(
316    i32 10, i32 20, i32 30, i32 40,
317    i32 50, i32 60, i32 70, i32 80,
318    i32 90, i32 100, i32 110, i32 120,
319    i32 130, i32 140, i32 150, i32 160,
320    i32 170, i32 180, i32 190, i32 200,
321    i32 210, i32 220, i32 230, i32 240,
322    i32 250, i32 260, i32 270, i32 280,
323    i32 290, i32 300, i32 310, i32 320)
324  ret void
325}
326
327; Requires loading and storing to stack slot.
328; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
329; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
330; GCN: s_add_u32 s32, s32, 0x400{{$}}
331; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
332
333; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8{{$}}
334
335; GCN: s_swappc_b64
336
337; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
338; GCN: s_sub_u32 s32, s32, 0x400{{$}}
339; GCN: s_setpc_b64
340define void @too_many_args_call_too_many_args_use_workitem_id_x(
341  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
342  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
343  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
344  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
345  call void @too_many_args_use_workitem_id_x(
346    i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
347    i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
348    i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
349    i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
350  ret void
351}
352
353; stack layout:
354; frame[0] = emergency stack slot
355; frame[1] = byval arg32
356; frame[2] = stack passed workitem ID x
357; frame[3] = VGPR spill slot
358
359; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
360; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
361; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
362; GCN-NEXT: s_waitcnt
363; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32
364; GCN: buffer_load_dword v0, off, s[0:3], s5 offset:4
365; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
366; GCN: s_setpc_b64
367define void @too_many_args_use_workitem_id_x_byval(
368  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
369  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
370  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
371  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 {
372  %val = call i32 @llvm.amdgcn.workitem.id.x()
373  store volatile i32 %val, i32 addrspace(1)* undef
374
375  store volatile i32 %arg0, i32 addrspace(1)* undef
376  store volatile i32 %arg1, i32 addrspace(1)* undef
377  store volatile i32 %arg2, i32 addrspace(1)* undef
378  store volatile i32 %arg3, i32 addrspace(1)* undef
379  store volatile i32 %arg4, i32 addrspace(1)* undef
380  store volatile i32 %arg5, i32 addrspace(1)* undef
381  store volatile i32 %arg6, i32 addrspace(1)* undef
382  store volatile i32 %arg7, i32 addrspace(1)* undef
383
384  store volatile i32 %arg8, i32 addrspace(1)* undef
385  store volatile i32 %arg9, i32 addrspace(1)* undef
386  store volatile i32 %arg10, i32 addrspace(1)* undef
387  store volatile i32 %arg11, i32 addrspace(1)* undef
388  store volatile i32 %arg12, i32 addrspace(1)* undef
389  store volatile i32 %arg13, i32 addrspace(1)* undef
390  store volatile i32 %arg14, i32 addrspace(1)* undef
391  store volatile i32 %arg15, i32 addrspace(1)* undef
392
393  store volatile i32 %arg16, i32 addrspace(1)* undef
394  store volatile i32 %arg17, i32 addrspace(1)* undef
395  store volatile i32 %arg18, i32 addrspace(1)* undef
396  store volatile i32 %arg19, i32 addrspace(1)* undef
397  store volatile i32 %arg20, i32 addrspace(1)* undef
398  store volatile i32 %arg21, i32 addrspace(1)* undef
399  store volatile i32 %arg22, i32 addrspace(1)* undef
400  store volatile i32 %arg23, i32 addrspace(1)* undef
401
402  store volatile i32 %arg24, i32 addrspace(1)* undef
403  store volatile i32 %arg25, i32 addrspace(1)* undef
404  store volatile i32 %arg26, i32 addrspace(1)* undef
405  store volatile i32 %arg27, i32 addrspace(1)* undef
406  store volatile i32 %arg28, i32 addrspace(1)* undef
407  store volatile i32 %arg29, i32 addrspace(1)* undef
408  store volatile i32 %arg30, i32 addrspace(1)* undef
409  store volatile i32 %arg31, i32 addrspace(1)* undef
410  %private = load volatile i32, i32 addrspace(5)* %arg32
411  ret void
412}
413
414; frame[0] = emergency stack slot
415; frame[1] =
416
417; sp[0] = callee emergency stack slot reservation
418; sp[1] = byval
419; sp[2] = ??
420; sp[3] = stack passed workitem ID x
421
422; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
423; GCN: enable_vgpr_workitem_id = 0
424
425; GCN: s_mov_b32 s33, s7
426; GCN: s_add_u32 s32, s33, 0x400{{$}}
427
428; GCN-NOT: s32
429; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
430; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
431; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
432
433; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
434; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
435; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
436; GCN: s_swappc_b64
437define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
438  %alloca = alloca i32, align 4, addrspace(5)
439  store volatile i32 999, i32 addrspace(5)* %alloca
440  call void @too_many_args_use_workitem_id_x_byval(
441    i32 10, i32 20, i32 30, i32 40,
442    i32 50, i32 60, i32 70, i32 80,
443    i32 90, i32 100, i32 110, i32 120,
444    i32 130, i32 140, i32 150, i32 160,
445    i32 170, i32 180, i32 190, i32 200,
446    i32 210, i32 220, i32 230, i32 240,
447    i32 250, i32 260, i32 270, i32 280,
448    i32 290, i32 300, i32 310, i32 320,
449    i32 addrspace(5)* %alloca)
450  ret void
451}
452
453; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
454; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
455; GCN: buffer_store_dword [[K]], off, s[0:3], s5 offset:4
456; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
457
458; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s5 offset:4
459; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
460; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
461; GCN: s_swappc_b64
462define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
463  %alloca = alloca i32, align 4, addrspace(5)
464  store volatile i32 999, i32 addrspace(5)* %alloca
465  call void @too_many_args_use_workitem_id_x_byval(
466    i32 10, i32 20, i32 30, i32 40,
467    i32 50, i32 60, i32 70, i32 80,
468    i32 90, i32 100, i32 110, i32 120,
469    i32 130, i32 140, i32 150, i32 160,
470    i32 170, i32 180, i32 190, i32 200,
471    i32 210, i32 220, i32 230, i32 240,
472    i32 250, i32 260, i32 270, i32 280,
473    i32 290, i32 300, i32 310, i32 320,
474    i32 addrspace(5)* %alloca)
475  ret void
476}
477
478; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
479; GCN: s_mov_b32 s5, s32
480; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
481; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
482; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
483; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8{{$}}
484; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
485; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12{{$}}
486; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
487
488; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
489; GCN-NEXT: s_waitcnt
490; GCN-NEXT: s_setpc_b64
491define void @too_many_args_use_workitem_id_xyz(
492  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
493  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
494  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
495  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
496  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
497  store volatile i32 %val0, i32 addrspace(1)* undef
498  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
499  store volatile i32 %val1, i32 addrspace(1)* undef
500  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
501  store volatile i32 %val2, i32 addrspace(1)* undef
502
503  store volatile i32 %arg0, i32 addrspace(1)* undef
504  store volatile i32 %arg1, i32 addrspace(1)* undef
505  store volatile i32 %arg2, i32 addrspace(1)* undef
506  store volatile i32 %arg3, i32 addrspace(1)* undef
507  store volatile i32 %arg4, i32 addrspace(1)* undef
508  store volatile i32 %arg5, i32 addrspace(1)* undef
509  store volatile i32 %arg6, i32 addrspace(1)* undef
510  store volatile i32 %arg7, i32 addrspace(1)* undef
511
512  store volatile i32 %arg8, i32 addrspace(1)* undef
513  store volatile i32 %arg9, i32 addrspace(1)* undef
514  store volatile i32 %arg10, i32 addrspace(1)* undef
515  store volatile i32 %arg11, i32 addrspace(1)* undef
516  store volatile i32 %arg12, i32 addrspace(1)* undef
517  store volatile i32 %arg13, i32 addrspace(1)* undef
518  store volatile i32 %arg14, i32 addrspace(1)* undef
519  store volatile i32 %arg15, i32 addrspace(1)* undef
520
521  store volatile i32 %arg16, i32 addrspace(1)* undef
522  store volatile i32 %arg17, i32 addrspace(1)* undef
523  store volatile i32 %arg18, i32 addrspace(1)* undef
524  store volatile i32 %arg19, i32 addrspace(1)* undef
525  store volatile i32 %arg20, i32 addrspace(1)* undef
526  store volatile i32 %arg21, i32 addrspace(1)* undef
527  store volatile i32 %arg22, i32 addrspace(1)* undef
528  store volatile i32 %arg23, i32 addrspace(1)* undef
529
530  store volatile i32 %arg24, i32 addrspace(1)* undef
531  store volatile i32 %arg25, i32 addrspace(1)* undef
532  store volatile i32 %arg26, i32 addrspace(1)* undef
533  store volatile i32 %arg27, i32 addrspace(1)* undef
534  store volatile i32 %arg28, i32 addrspace(1)* undef
535  store volatile i32 %arg29, i32 addrspace(1)* undef
536  store volatile i32 %arg30, i32 addrspace(1)* undef
537  store volatile i32 %arg31, i32 addrspace(1)* undef
538
539  ret void
540}
541
542; frame[0] = kernel emergency stack slot
543; frame[1] = callee emergency stack slot
544; frame[2] = ID X
545; frame[3] = ID Y
546; frame[4] = ID Z
547
548; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
549; GCN: enable_vgpr_workitem_id = 2
550
551; GCN: s_mov_b32 s33, s7
552; GCN: s_mov_b32 s32, s33
553
554; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 offset:8
555; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:12
556; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:16
557; GCN: s_swappc_b64
558define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
559  call void @too_many_args_use_workitem_id_xyz(
560    i32 10, i32 20, i32 30, i32 40,
561    i32 50, i32 60, i32 70, i32 80,
562    i32 90, i32 100, i32 110, i32 120,
563    i32 130, i32 140, i32 150, i32 160,
564    i32 170, i32 180, i32 190, i32 200,
565    i32 210, i32 220, i32 230, i32 240,
566    i32 250, i32 260, i32 270, i32 280,
567    i32 290, i32 300, i32 310, i32 320)
568  ret void
569}
570
571; workitem ID X in register, yz on stack
572; v31 = workitem ID X
573; frame[0] = emergency slot
574; frame[1] = workitem Y
575; frame[2] = workitem Z
576
577; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
578; GCN: s_mov_b32 s5, s32
579; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
580; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:4{{$}}
581; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
582; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:8{{$}}
583; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
584
585; GCN: s_waitcnt
586; GCN-NEXT: s_setpc_b64
587; GCN: ScratchSize: 12
588define void @too_many_args_use_workitem_id_x_stack_yz(
589  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
590  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
591  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
592  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
593  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
594  store volatile i32 %val0, i32 addrspace(1)* undef
595  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
596  store volatile i32 %val1, i32 addrspace(1)* undef
597  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
598  store volatile i32 %val2, i32 addrspace(1)* undef
599
600  store volatile i32 %arg0, i32 addrspace(1)* undef
601  store volatile i32 %arg1, i32 addrspace(1)* undef
602  store volatile i32 %arg2, i32 addrspace(1)* undef
603  store volatile i32 %arg3, i32 addrspace(1)* undef
604  store volatile i32 %arg4, i32 addrspace(1)* undef
605  store volatile i32 %arg5, i32 addrspace(1)* undef
606  store volatile i32 %arg6, i32 addrspace(1)* undef
607  store volatile i32 %arg7, i32 addrspace(1)* undef
608
609  store volatile i32 %arg8, i32 addrspace(1)* undef
610  store volatile i32 %arg9, i32 addrspace(1)* undef
611  store volatile i32 %arg10, i32 addrspace(1)* undef
612  store volatile i32 %arg11, i32 addrspace(1)* undef
613  store volatile i32 %arg12, i32 addrspace(1)* undef
614  store volatile i32 %arg13, i32 addrspace(1)* undef
615  store volatile i32 %arg14, i32 addrspace(1)* undef
616  store volatile i32 %arg15, i32 addrspace(1)* undef
617
618  store volatile i32 %arg16, i32 addrspace(1)* undef
619  store volatile i32 %arg17, i32 addrspace(1)* undef
620  store volatile i32 %arg18, i32 addrspace(1)* undef
621  store volatile i32 %arg19, i32 addrspace(1)* undef
622  store volatile i32 %arg20, i32 addrspace(1)* undef
623  store volatile i32 %arg21, i32 addrspace(1)* undef
624  store volatile i32 %arg22, i32 addrspace(1)* undef
625  store volatile i32 %arg23, i32 addrspace(1)* undef
626
627  store volatile i32 %arg24, i32 addrspace(1)* undef
628  store volatile i32 %arg25, i32 addrspace(1)* undef
629  store volatile i32 %arg26, i32 addrspace(1)* undef
630  store volatile i32 %arg27, i32 addrspace(1)* undef
631  store volatile i32 %arg28, i32 addrspace(1)* undef
632  store volatile i32 %arg29, i32 addrspace(1)* undef
633  store volatile i32 %arg30, i32 addrspace(1)* undef
634
635  ret void
636}
637
638; frame[0] = kernel emergency stack slot
639; frame[1] = callee emergency stack slot
640; frame[2] = ID Y
641; frame[3] = ID Z
642
643; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
644; GCN: enable_vgpr_workitem_id = 2
645
646; GCN: s_mov_b32 s33, s7
647; GCN: s_mov_b32 s32, s33
648
649; GCN-DAG: v_mov_b32_e32 v31, v0
650; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:8
651; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:12
652; GCN: s_swappc_b64
653define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
654  call void @too_many_args_use_workitem_id_x_stack_yz(
655    i32 10, i32 20, i32 30, i32 40,
656    i32 50, i32 60, i32 70, i32 80,
657    i32 90, i32 100, i32 110, i32 120,
658    i32 130, i32 140, i32 150, i32 160,
659    i32 170, i32 180, i32 190, i32 200,
660    i32 210, i32 220, i32 230, i32 240,
661    i32 250, i32 260, i32 270, i32 280,
662    i32 290, i32 300, i32 310)
663  ret void
664}
665
666declare i32 @llvm.amdgcn.workitem.id.x() #0
667declare i32 @llvm.amdgcn.workitem.id.y() #0
668declare i32 @llvm.amdgcn.workitem.id.z() #0
669
670attributes #0 = { nounwind readnone speculatable }
671attributes #1 = { nounwind noinline }
672