1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
5
6declare hidden void @external_void_func_void() #0
7
8; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
9; GCN: s_getpc_b64 s[34:35]
10; GCN-NEXT: s_add_u32 s34, s34,
11; GCN-NEXT: s_addc_u32 s35, s35,
12; GCN-NEXT: s_mov_b32 s32, 0
13; GCN: s_swappc_b64 s[30:31], s[34:35]
14
15; GCN-NEXT: #ASMSTART
16; GCN-NEXT: #ASMEND
17; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35]
18define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
19  call void @external_void_func_void()
20  call void asm sideeffect "", ""() #0
21  call void @external_void_func_void()
22  ret void
23}
24
25; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
26; MUBUF:   buffer_store_dword
27; FLATSCR: scratch_store_dword
28; GCN: v_writelane_b32 v40, s33, 4
29; GCN: v_writelane_b32 v40, s34, 0
30; GCN: v_writelane_b32 v40, s35, 1
31; GCN: v_writelane_b32 v40, s30, 2
32; GCN: v_writelane_b32 v40, s31, 3
33
34; GCN: s_swappc_b64
35; GCN-NEXT: ;;#ASMSTART
36; GCN-NEXT: ;;#ASMEND
37; GCN-NEXT: s_swappc_b64
38; MUBUF-DAG:   v_readlane_b32 s4, v40, 2
39; MUBUF-DAG:   v_readlane_b32 s5, v40, 3
40; FLATSCR-DAG: v_readlane_b32 s0, v40, 2
41; FLATSCR-DAG: v_readlane_b32 s1, v40, 3
42; GCN: v_readlane_b32 s35, v40, 1
43; GCN: v_readlane_b32 s34, v40, 0
44
45; GCN: v_readlane_b32 s33, v40, 4
46; MUBUF:   buffer_load_dword
47; FLATSCR: scratch_load_dword
48; GCN: s_setpc_b64
49define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
50  call void @external_void_func_void()
51  call void asm sideeffect "", ""() #0
52  call void @external_void_func_void()
53  ret void
54}
55
56; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
57; MUBUF:   buffer_store_dword v40
58; FLATSCR: scratch_store_dword off, v40
59; GCN: v_writelane_b32 v40, s33, 4
60
61; GCN: s_mov_b32 s33, s32
62; MUBUF:   s_add_u32 s32, s32, 0x400
63; FLATSCR: s_add_u32 s32, s32, 16
64; GCN: s_swappc_b64
65; GCN-NEXT: s_swappc_b64
66
67; GCN: v_readlane_b32 s33, v40, 4
68; MUBUF:   buffer_load_dword v40
69; FLATSCR: scratch_load_dword v40
70define void @test_func_call_external_void_funcx2() #0 {
71  call void @external_void_func_void()
72  call void @external_void_func_void()
73  ret void
74}
75
76; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
77; GCN: s_waitcnt
78; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31]
79; GCN-NEXT: #ASMSTART
80; GCN: ; clobber
81; GCN-NEXT: #ASMEND
82; GCN-NEXT: s_setpc_b64 [[SAVEPC]]
83define void @void_func_void_clobber_s30_s31() #2 {
84  call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
85  ret void
86}
87
88; GCN-LABEL: {{^}}void_func_void_clobber_vcc:
89; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GCN-NEXT: ;;#ASMSTART
91; GCN-NEXT: ;;#ASMEND
92; GCN-NEXT: s_setpc_b64 s[30:31]
93define hidden void @void_func_void_clobber_vcc() #2 {
94  call void asm sideeffect "", "~{vcc}"() #0
95  ret void
96}
97
98; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc:
99; GCN: s_getpc_b64
100; GCN-NEXT: s_add_u32
101; GCN-NEXT: s_addc_u32
102; GCN: s_mov_b64 s[34:35], vcc
103; GCN-NEXT: s_swappc_b64
104; GCN: s_mov_b64 vcc, s[34:35]
105define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(i32 addrspace(1)* %out) #0 {
106  %vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
107  call void @void_func_void_clobber_vcc()
108  %val0 = load volatile i32, i32 addrspace(1)* undef
109  %val1 = load volatile i32, i32 addrspace(1)* undef
110  call void asm sideeffect "; use $0", "{vcc}"(i64 %vcc)
111  ret void
112}
113
114; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31:
115; GCN: s_mov_b32 s33, s31
116; GCN-NEXT: s_swappc_b64
117; GCN-NEXT: s_mov_b32 s31, s33
118define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 {
119  %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
120  call void @external_void_func_void()
121  call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
122  ret void
123}
124
125; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_v31:
126; GCN: v_mov_b32_e32 v40, v31
127; GCN-NEXT: s_swappc_b64
128; GCN-NEXT: v_mov_b32_e32 v31, v40
129define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)* %out) #0 {
130  %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
131  call void @external_void_func_void()
132  call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
133  ret void
134}
135
136; FIXME: What is the expected behavior for reserved registers here?
137
138; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
139; MUBUF:        s_getpc_b64 s[4:5]
140; MUBUF-NEXT:   s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
141; MUBUF-NEXT:   s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
142; FLATSCR:      s_getpc_b64 s[0:1]
143; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
144; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
145; GCN: s_mov_b32 s32, 0
146; GCN: #ASMSTART
147; GCN-NEXT: ; def s33
148; GCN-NEXT: #ASMEND
149; MUBUF:   s_swappc_b64 s[30:31], s[4:5]
150; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
151; GCN: ;;#ASMSTART
152; GCN-NEXT: ; use s33
153; GCN-NEXT: ;;#ASMEND
154; GCN-NOT: s33
155; GCN-NEXT: s_endpgm
156define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* %out) #0 {
157  %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
158  call void @external_void_func_void()
159  call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
160  ret void
161}
162
163; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
164; GCN-NOT: s34
165
166; MUBUF:        s_getpc_b64 s[4:5]
167; MUBUF-NEXT:   s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
168; MUBUF-NEXT:   s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
169; FLATSCR:      s_getpc_b64 s[0:1]
170; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
171; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
172; GCN: s_mov_b32 s32, 0
173
174; GCN-NOT: s34
175; GCN: ;;#ASMSTART
176; GCN-NEXT: ; def s34
177; GCN-NEXT: ;;#ASMEND
178
179; GCN-NOT: s34
180; MUBUF:   s_swappc_b64 s[30:31], s[4:5]
181; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
182
183; GCN-NOT: s34
184
185; GCN-NEXT: ;;#ASMSTART
186; GCN-NEXT: ; use s34
187; GCN-NEXT: ;;#ASMEND
188; GCN-NEXT: s_endpgm
189define amdgpu_kernel void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* %out) #0 {
190  %s34 = call i32 asm sideeffect "; def $0", "={s34}"()
191  call void @external_void_func_void()
192  call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
193  ret void
194}
195
196; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v40: {{.*}}
197
198; GCN-NOT: v32
199; MUBUF: s_getpc_b64 s[4:5]
200; MUBUF-NEXT:   s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
201; MUBUF-NEXT:   s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
202; FLATSCR:      s_getpc_b64 s[0:1]
203; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
204; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
205; GCN: s_mov_b32 s32, 0
206; GCN-NOT: v40
207
208; GCN: ;;#ASMSTART
209; GCN-NEXT: ; def v40
210; GCN-NEXT: ;;#ASMEND
211
212; MUBUF:   s_swappc_b64 s[30:31], s[4:5]
213; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
214
215; GCN-NOT: v40
216
217; GCN: ;;#ASMSTART
218; GCN-NEXT: ; use v40
219; GCN-NEXT: ;;#ASMEND
220; GCN-NEXT: s_endpgm
221define amdgpu_kernel void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* %out) #0 {
222  %v40 = call i32 asm sideeffect "; def $0", "={v40}"()
223  call void @external_void_func_void()
224  call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
225  ret void
226}
227
228; GCN-LABEL: {{^}}void_func_void_clobber_s33:
229; GCN: v_writelane_b32 v0, s33, 0
230; GCN-NEXT: #ASMSTART
231; GCN-NEXT: ; clobber
232; GCN-NEXT: #ASMEND
233; GCN-NEXT:	v_readlane_b32 s33, v0, 0
234; GCN: s_setpc_b64
235define hidden void @void_func_void_clobber_s33() #2 {
236  call void asm sideeffect "; clobber", "~{s33}"() #0
237  ret void
238}
239
240; GCN-LABEL: {{^}}void_func_void_clobber_s34:
241; GCN: v_writelane_b32 v0, s34, 0
242; GCN-NEXT: #ASMSTART
243; GCN-NEXT: ; clobber
244; GCN-NEXT: #ASMEND
245; GCN-NEXT:	v_readlane_b32 s34, v0, 0
246; GCN: s_setpc_b64
247define hidden void @void_func_void_clobber_s34() #2 {
248  call void asm sideeffect "; clobber", "~{s34}"() #0
249  ret void
250}
251
252; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33:
253; GCN: s_getpc_b64
254; GCN-NEXT: s_add_u32
255; GCN-NEXT: s_addc_u32
256; GCN-NEXT: s_mov_b32 s32, 0
257; GCN: s_swappc_b64
258; GCN-NEXT: s_endpgm
259define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
260  call void @void_func_void_clobber_s33()
261  ret void
262}
263
264; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s34:
265; GCN: s_getpc_b64
266; GCN-NEXT: s_add_u32
267; GCN-NEXT: s_addc_u32
268; GCN-NEXT: s_mov_b32 s32, 0
269; GCN: s_swappc_b64
270; GCN-NEXT: s_endpgm
271define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
272  call void @void_func_void_clobber_s34()
273  ret void
274}
275
276; GCN-LABEL: {{^}}callee_saved_sgpr_func:
277; GCN-NOT: s40
278; GCN: v_writelane_b32 v40, s40
279; GCN: s_swappc_b64
280; GCN-NOT: s40
281; GCN: ; use s40
282; GCN-NOT: s40
283; GCN: v_readlane_b32 s40, v40
284; GCN-NOT: s40
285define void @callee_saved_sgpr_func() #2 {
286  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
287  call void @external_void_func_void()
288  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
289  ret void
290}
291
292; GCN-LABEL: {{^}}callee_saved_sgpr_kernel:
293; GCN-NOT: s40
294; GCN: ; def s40
295; GCN-NOT: s40
296; GCN: s_swappc_b64
297; GCN-NOT: s40
298; GCN: ; use s40
299; GCN-NOT: s40
300define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 {
301  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
302  call void @external_void_func_void()
303  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
304  ret void
305}
306
307; First call preserved VGPR is used so it can't be used for SGPR spills.
308; GCN-LABEL: {{^}}callee_saved_sgpr_vgpr_func:
309; GCN-NOT: s40
310; GCN: v_writelane_b32 v41, s40
311; GCN: s_swappc_b64
312; GCN-NOT: s40
313; GCN: ; use s40
314; GCN-NOT: s40
315; GCN: v_readlane_b32 s40, v41
316; GCN-NOT: s40
317define void @callee_saved_sgpr_vgpr_func() #2 {
318  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
319  %v40 = call i32 asm sideeffect "; def v40", "={v40}"() #0
320  call void @external_void_func_void()
321  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
322  call void asm sideeffect "; use $0", "v"(i32 %v40) #0
323  ret void
324}
325
326; GCN-LABEL: {{^}}callee_saved_sgpr_vgpr_kernel:
327; GCN-NOT: s40
328; GCN: ; def s40
329; GCN-NOT: s40
330; GCN: s_swappc_b64
331; GCN-NOT: s40
332; GCN: ; use s40
333; GCN-NOT: s40
334define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 {
335  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
336  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
337  call void @external_void_func_void()
338  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
339  call void asm sideeffect "; use $0", "v"(i32 %v32) #0
340  ret void
341}
342
343attributes #0 = { nounwind }
344attributes #1 = { nounwind readnone }
345attributes #2 = { nounwind noinline }
346