1; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
3; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
4; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
5
6; FUNC-LABEL: {{^}}i8_arg:
7; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
8; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
9; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
10; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
11
12define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
13entry:
14  %0 = zext i8 %in to i32
15  store i32 %0, i32 addrspace(1)* %out, align 4
16  ret void
17}
18
19; FUNC-LABEL: {{^}}i8_zext_arg:
20; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
21; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
22; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
23
24define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
25entry:
26  %0 = zext i8 %in to i32
27  store i32 %0, i32 addrspace(1)* %out, align 4
28  ret void
29}
30
31; FUNC-LABEL: {{^}}i8_sext_arg:
32; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
33; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
34; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
35
36define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
37entry:
38  %0 = sext i8 %in to i32
39  store i32 %0, i32 addrspace(1)* %out, align 4
40  ret void
41}
42
43; FUNC-LABEL: {{^}}i16_arg:
44; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
45; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
46; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
47; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
48
49define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
50entry:
51  %0 = zext i16 %in to i32
52  store i32 %0, i32 addrspace(1)* %out, align 4
53  ret void
54}
55
56; FUNC-LABEL: {{^}}i16_zext_arg:
57; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
58; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
59; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
60
61define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
62entry:
63  %0 = zext i16 %in to i32
64  store i32 %0, i32 addrspace(1)* %out, align 4
65  ret void
66}
67
68; FUNC-LABEL: {{^}}i16_sext_arg:
69; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
70; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
71; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
72
73define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
74entry:
75  %0 = sext i16 %in to i32
76  store i32 %0, i32 addrspace(1)* %out, align 4
77  ret void
78}
79
80; FUNC-LABEL: {{^}}i32_arg:
81; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
82; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
83; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
84define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
85entry:
86  store i32 %in, i32 addrspace(1)* %out, align 4
87  ret void
88}
89
90; FUNC-LABEL: {{^}}f32_arg:
91; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
92; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
93; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
94define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
95entry:
96  store float %in, float addrspace(1)* %out, align 4
97  ret void
98}
99
100; FUNC-LABEL: {{^}}v2i8_arg:
101; EG: VTX_READ_8
102; EG: VTX_READ_8
103; GCN: buffer_load_ubyte
104; GCN: buffer_load_ubyte
105define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
106entry:
107  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
108  ret void
109}
110
111; FUNC-LABEL: {{^}}v2i16_arg:
112; EG: VTX_READ_16
113; EG: VTX_READ_16
114; GCN-DAG: buffer_load_ushort
115; GCN-DAG: buffer_load_ushort
116define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
117entry:
118  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
119  ret void
120}
121
122; FUNC-LABEL: {{^}}v2i32_arg:
123; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
124; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
125; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
126; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
127define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
128entry:
129  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
130  ret void
131}
132
133; FUNC-LABEL: {{^}}v2f32_arg:
134; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
135; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
136; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
137; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
138define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
139entry:
140  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
141  ret void
142}
143
144; FUNC-LABEL: {{^}}v3i8_arg:
145; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
146; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
147; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
148define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
149entry:
150  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
151  ret void
152}
153
154; FUNC-LABEL: {{^}}v3i16_arg:
155; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
156; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
157; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
158define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
159entry:
160  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
161  ret void
162}
163; FUNC-LABEL: {{^}}v3i32_arg:
164; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
165; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
166; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
167; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
168; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
169define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
170entry:
171  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
172  ret void
173}
174
175; FUNC-LABEL: {{^}}v3f32_arg:
176; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
177; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
178; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
179; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
180; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
181define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
182entry:
183  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
184  ret void
185}
186
187; FUNC-LABEL: {{^}}v4i8_arg:
188; EG: VTX_READ_8
189; EG: VTX_READ_8
190; EG: VTX_READ_8
191; EG: VTX_READ_8
192; GCN: buffer_load_ubyte
193; GCN: buffer_load_ubyte
194; GCN: buffer_load_ubyte
195; GCN: buffer_load_ubyte
196define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
197entry:
198  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
199  ret void
200}
201
202; FUNC-LABEL: {{^}}v4i16_arg:
203; EG: VTX_READ_16
204; EG: VTX_READ_16
205; EG: VTX_READ_16
206; EG: VTX_READ_16
207; GCN: buffer_load_ushort
208; GCN: buffer_load_ushort
209; GCN: buffer_load_ushort
210; GCN: buffer_load_ushort
211define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
212entry:
213  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
214  ret void
215}
216
217; FUNC-LABEL: {{^}}v4i32_arg:
218; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
219; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
220; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
221; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
222; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
223; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
224define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
225entry:
226  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
227  ret void
228}
229
230; FUNC-LABEL: {{^}}v4f32_arg:
231; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
232; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
233; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
234; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
235; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
236; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
237define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
238entry:
239  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
240  ret void
241}
242
243; FUNC-LABEL: {{^}}v8i8_arg:
244; EG: VTX_READ_8
245; EG: VTX_READ_8
246; EG: VTX_READ_8
247; EG: VTX_READ_8
248; EG: VTX_READ_8
249; EG: VTX_READ_8
250; EG: VTX_READ_8
251; EG: VTX_READ_8
252; GCN: buffer_load_ubyte
253; GCN: buffer_load_ubyte
254; GCN: buffer_load_ubyte
255; GCN: buffer_load_ubyte
256; GCN: buffer_load_ubyte
257; GCN: buffer_load_ubyte
258; GCN: buffer_load_ubyte
259define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
260entry:
261  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
262  ret void
263}
264
265; FUNC-LABEL: {{^}}v8i16_arg:
266; EG: VTX_READ_16
267; EG: VTX_READ_16
268; EG: VTX_READ_16
269; EG: VTX_READ_16
270; EG: VTX_READ_16
271; EG: VTX_READ_16
272; EG: VTX_READ_16
273; EG: VTX_READ_16
274; GCN: buffer_load_ushort
275; GCN: buffer_load_ushort
276; GCN: buffer_load_ushort
277; GCN: buffer_load_ushort
278; GCN: buffer_load_ushort
279; GCN: buffer_load_ushort
280; GCN: buffer_load_ushort
281; GCN: buffer_load_ushort
282define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
283entry:
284  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
285  ret void
286}
287
288; FUNC-LABEL: {{^}}v8i32_arg:
289; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
290; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
291; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
292; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
293; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
294; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
295; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
296; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
297; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
298; VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
299define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
300entry:
301  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
302  ret void
303}
304
305; FUNC-LABEL: {{^}}v8f32_arg:
306; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
307; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
308; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
309; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
310; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
311; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
312; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
313; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
314; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
315define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
316entry:
317  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
318  ret void
319}
320
321; FUNC-LABEL: {{^}}v16i8_arg:
322; EG: VTX_READ_8
323; EG: VTX_READ_8
324; EG: VTX_READ_8
325; EG: VTX_READ_8
326; EG: VTX_READ_8
327; EG: VTX_READ_8
328; EG: VTX_READ_8
329; EG: VTX_READ_8
330; EG: VTX_READ_8
331; EG: VTX_READ_8
332; EG: VTX_READ_8
333; EG: VTX_READ_8
334; EG: VTX_READ_8
335; EG: VTX_READ_8
336; EG: VTX_READ_8
337; EG: VTX_READ_8
338; GCN: buffer_load_ubyte
339; GCN: buffer_load_ubyte
340; GCN: buffer_load_ubyte
341; GCN: buffer_load_ubyte
342; GCN: buffer_load_ubyte
343; GCN: buffer_load_ubyte
344; GCN: buffer_load_ubyte
345; GCN: buffer_load_ubyte
346; GCN: buffer_load_ubyte
347; GCN: buffer_load_ubyte
348; GCN: buffer_load_ubyte
349; GCN: buffer_load_ubyte
350; GCN: buffer_load_ubyte
351; GCN: buffer_load_ubyte
352; GCN: buffer_load_ubyte
353; GCN: buffer_load_ubyte
354define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
355entry:
356  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
357  ret void
358}
359
360; FUNC-LABEL: {{^}}v16i16_arg:
361; EG: VTX_READ_16
362; EG: VTX_READ_16
363; EG: VTX_READ_16
364; EG: VTX_READ_16
365; EG: VTX_READ_16
366; EG: VTX_READ_16
367; EG: VTX_READ_16
368; EG: VTX_READ_16
369; EG: VTX_READ_16
370; EG: VTX_READ_16
371; EG: VTX_READ_16
372; EG: VTX_READ_16
373; EG: VTX_READ_16
374; EG: VTX_READ_16
375; EG: VTX_READ_16
376; EG: VTX_READ_16
377; GCN: buffer_load_ushort
378; GCN: buffer_load_ushort
379; GCN: buffer_load_ushort
380; GCN: buffer_load_ushort
381; GCN: buffer_load_ushort
382; GCN: buffer_load_ushort
383; GCN: buffer_load_ushort
384; GCN: buffer_load_ushort
385; GCN: buffer_load_ushort
386; GCN: buffer_load_ushort
387; GCN: buffer_load_ushort
388; GCN: buffer_load_ushort
389; GCN: buffer_load_ushort
390; GCN: buffer_load_ushort
391; GCN: buffer_load_ushort
392; GCN: buffer_load_ushort
393define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
394entry:
395  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
396  ret void
397}
398
399; FUNC-LABEL: {{^}}v16i32_arg:
400; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
401; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
402; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
403; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
404; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
405; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
406; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
407; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
408; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
409; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
410; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
411; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
412; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
413; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
414; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
415; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
416; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
417; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
418define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
419entry:
420  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
421  ret void
422}
423
424; FUNC-LABEL: {{^}}v16f32_arg:
425; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
426; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
427; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
428; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
429; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
430; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
431; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
432; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
433; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
434; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
435; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
436; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
437; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
438; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
439; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
440; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
441; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
442; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
443define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
444entry:
445  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
446  ret void
447}
448
449; FUNC-LABEL: {{^}}kernel_arg_i64:
450; GCN: s_load_dwordx2
451; GCN: s_load_dwordx2
452; GCN: buffer_store_dwordx2
453define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
454  store i64 %a, i64 addrspace(1)* %out, align 8
455  ret void
456}
457
458; FUNC-LABEL: {{^}}f64_kernel_arg:
459; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
460; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
461; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
462; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
463; GCN: buffer_store_dwordx2
464define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
465entry:
466  store double %in, double addrspace(1)* %out
467  ret void
468}
469
470; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
471; XGCN: s_load_dwordx2
472; XGCN: s_load_dwordx2
473; XGCN: buffer_store_dwordx2
474; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
475;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
476;   ret void
477; }
478