1; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
3; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
4; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
5
6; FUNC-LABEL: {{^}}i8_arg:
7; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
8; GCN: buffer_load_ubyte
9
10define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
11entry:
12  %0 = zext i8 %in to i32
13  store i32 %0, i32 addrspace(1)* %out, align 4
14  ret void
15}
16
17; FUNC-LABEL: {{^}}i8_zext_arg:
18; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
19; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
20; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
21
22define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
23entry:
24  %0 = zext i8 %in to i32
25  store i32 %0, i32 addrspace(1)* %out, align 4
26  ret void
27}
28
29; FUNC-LABEL: {{^}}i8_sext_arg:
30; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
31; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
32; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
33
34define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
35entry:
36  %0 = sext i8 %in to i32
37  store i32 %0, i32 addrspace(1)* %out, align 4
38  ret void
39}
40
41; FUNC-LABEL: {{^}}i16_arg:
42; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
43; GCN: buffer_load_ushort
44
45define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
46entry:
47  %0 = zext i16 %in to i32
48  store i32 %0, i32 addrspace(1)* %out, align 4
49  ret void
50}
51
52; FUNC-LABEL: {{^}}i16_zext_arg:
53; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
54; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
55; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
56
57define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
58entry:
59  %0 = zext i16 %in to i32
60  store i32 %0, i32 addrspace(1)* %out, align 4
61  ret void
62}
63
64; FUNC-LABEL: {{^}}i16_sext_arg:
65; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
66; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
67; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
68
69define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
70entry:
71  %0 = sext i16 %in to i32
72  store i32 %0, i32 addrspace(1)* %out, align 4
73  ret void
74}
75
76; FUNC-LABEL: {{^}}i32_arg:
77; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
78; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
79; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
80define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
81entry:
82  store i32 %in, i32 addrspace(1)* %out, align 4
83  ret void
84}
85
86; FUNC-LABEL: {{^}}f32_arg:
87; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
88; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
89; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
90define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
91entry:
92  store float %in, float addrspace(1)* %out, align 4
93  ret void
94}
95
96; FUNC-LABEL: {{^}}v2i8_arg:
97; EG: VTX_READ_8
98; EG: VTX_READ_8
99; GCN: buffer_load_ubyte
100; GCN: buffer_load_ubyte
101define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
102entry:
103  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
104  ret void
105}
106
107; FUNC-LABEL: {{^}}v2i16_arg:
108; EG: VTX_READ_16
109; EG: VTX_READ_16
110; GCN-DAG: buffer_load_ushort
111; GCN-DAG: buffer_load_ushort
112define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
113entry:
114  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
115  ret void
116}
117
118; FUNC-LABEL: {{^}}v2i32_arg:
119; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
120; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
121; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
122; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
123define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
124entry:
125  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
126  ret void
127}
128
129; FUNC-LABEL: {{^}}v2f32_arg:
130; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
131; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
132; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
133; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
134define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
135entry:
136  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
137  ret void
138}
139
140; FUNC-LABEL: {{^}}v3i8_arg:
141; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
142; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
143; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
144define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
145entry:
146  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
147  ret void
148}
149
150; FUNC-LABEL: {{^}}v3i16_arg:
151; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
152; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
153; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
154define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
155entry:
156  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
157  ret void
158}
159; FUNC-LABEL: {{^}}v3i32_arg:
160; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
161; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
162; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
163; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
164; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
165define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
166entry:
167  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
168  ret void
169}
170
171; FUNC-LABEL: {{^}}v3f32_arg:
172; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
173; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
174; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
175; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
176; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
177define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
178entry:
179  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
180  ret void
181}
182
183; FUNC-LABEL: {{^}}v4i8_arg:
184; EG: VTX_READ_8
185; EG: VTX_READ_8
186; EG: VTX_READ_8
187; EG: VTX_READ_8
188; GCN: buffer_load_ubyte
189; GCN: buffer_load_ubyte
190; GCN: buffer_load_ubyte
191; GCN: buffer_load_ubyte
192define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
193entry:
194  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
195  ret void
196}
197
198; FUNC-LABEL: {{^}}v4i16_arg:
199; EG: VTX_READ_16
200; EG: VTX_READ_16
201; EG: VTX_READ_16
202; EG: VTX_READ_16
203; GCN: buffer_load_ushort
204; GCN: buffer_load_ushort
205; GCN: buffer_load_ushort
206; GCN: buffer_load_ushort
207define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
208entry:
209  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
210  ret void
211}
212
213; FUNC-LABEL: {{^}}v4i32_arg:
214; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
215; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
216; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
217; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
218; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
219; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
220define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
221entry:
222  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
223  ret void
224}
225
226; FUNC-LABEL: {{^}}v4f32_arg:
227; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
228; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
229; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
230; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
231; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
232; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
233define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
234entry:
235  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
236  ret void
237}
238
239; FUNC-LABEL: {{^}}v8i8_arg:
240; EG: VTX_READ_8
241; EG: VTX_READ_8
242; EG: VTX_READ_8
243; EG: VTX_READ_8
244; EG: VTX_READ_8
245; EG: VTX_READ_8
246; EG: VTX_READ_8
247; EG: VTX_READ_8
248; GCN: buffer_load_ubyte
249; GCN: buffer_load_ubyte
250; GCN: buffer_load_ubyte
251; GCN: buffer_load_ubyte
252; GCN: buffer_load_ubyte
253; GCN: buffer_load_ubyte
254; GCN: buffer_load_ubyte
255define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
256entry:
257  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
258  ret void
259}
260
261; FUNC-LABEL: {{^}}v8i16_arg:
262; EG: VTX_READ_16
263; EG: VTX_READ_16
264; EG: VTX_READ_16
265; EG: VTX_READ_16
266; EG: VTX_READ_16
267; EG: VTX_READ_16
268; EG: VTX_READ_16
269; EG: VTX_READ_16
270; GCN: buffer_load_ushort
271; GCN: buffer_load_ushort
272; GCN: buffer_load_ushort
273; GCN: buffer_load_ushort
274; GCN: buffer_load_ushort
275; GCN: buffer_load_ushort
276; GCN: buffer_load_ushort
277; GCN: buffer_load_ushort
278define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
279entry:
280  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
281  ret void
282}
283
284; FUNC-LABEL: {{^}}v8i32_arg:
285; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
286; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
287; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
288; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
289; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
290; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
291; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
292; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
293; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
294; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
295define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
296entry:
297  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
298  ret void
299}
300
301; FUNC-LABEL: {{^}}v8f32_arg:
302; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
303; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
304; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
305; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
306; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
307; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
308; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
309; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
310; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
311define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
312entry:
313  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
314  ret void
315}
316
317; FUNC-LABEL: {{^}}v16i8_arg:
318; EG: VTX_READ_8
319; EG: VTX_READ_8
320; EG: VTX_READ_8
321; EG: VTX_READ_8
322; EG: VTX_READ_8
323; EG: VTX_READ_8
324; EG: VTX_READ_8
325; EG: VTX_READ_8
326; EG: VTX_READ_8
327; EG: VTX_READ_8
328; EG: VTX_READ_8
329; EG: VTX_READ_8
330; EG: VTX_READ_8
331; EG: VTX_READ_8
332; EG: VTX_READ_8
333; EG: VTX_READ_8
334; GCN: buffer_load_ubyte
335; GCN: buffer_load_ubyte
336; GCN: buffer_load_ubyte
337; GCN: buffer_load_ubyte
338; GCN: buffer_load_ubyte
339; GCN: buffer_load_ubyte
340; GCN: buffer_load_ubyte
341; GCN: buffer_load_ubyte
342; GCN: buffer_load_ubyte
343; GCN: buffer_load_ubyte
344; GCN: buffer_load_ubyte
345; GCN: buffer_load_ubyte
346; GCN: buffer_load_ubyte
347; GCN: buffer_load_ubyte
348; GCN: buffer_load_ubyte
349; GCN: buffer_load_ubyte
350define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
351entry:
352  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
353  ret void
354}
355
356; FUNC-LABEL: {{^}}v16i16_arg:
357; EG: VTX_READ_16
358; EG: VTX_READ_16
359; EG: VTX_READ_16
360; EG: VTX_READ_16
361; EG: VTX_READ_16
362; EG: VTX_READ_16
363; EG: VTX_READ_16
364; EG: VTX_READ_16
365; EG: VTX_READ_16
366; EG: VTX_READ_16
367; EG: VTX_READ_16
368; EG: VTX_READ_16
369; EG: VTX_READ_16
370; EG: VTX_READ_16
371; EG: VTX_READ_16
372; EG: VTX_READ_16
373; GCN: buffer_load_ushort
374; GCN: buffer_load_ushort
375; GCN: buffer_load_ushort
376; GCN: buffer_load_ushort
377; GCN: buffer_load_ushort
378; GCN: buffer_load_ushort
379; GCN: buffer_load_ushort
380; GCN: buffer_load_ushort
381; GCN: buffer_load_ushort
382; GCN: buffer_load_ushort
383; GCN: buffer_load_ushort
384; GCN: buffer_load_ushort
385; GCN: buffer_load_ushort
386; GCN: buffer_load_ushort
387; GCN: buffer_load_ushort
388; GCN: buffer_load_ushort
389define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
390entry:
391  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
392  ret void
393}
394
395; FUNC-LABEL: {{^}}v16i32_arg:
396; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
397; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
398; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
399; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
400; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
401; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
402; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
403; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
404; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
405; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
406; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
407; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
408; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
409; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
410; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
411; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
412; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
413; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
414define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
415entry:
416  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
417  ret void
418}
419
420; FUNC-LABEL: {{^}}v16f32_arg:
421; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
422; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
423; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
424; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
425; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
426; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
427; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
428; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
429; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
430; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
431; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
432; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
433; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
434; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
435; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
436; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
437; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
438; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
439define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
440entry:
441  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
442  ret void
443}
444
445; FUNC-LABEL: {{^}}kernel_arg_i64:
446; GCN: s_load_dwordx2
447; GCN: s_load_dwordx2
448; GCN: buffer_store_dwordx2
449define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
450  store i64 %a, i64 addrspace(1)* %out, align 8
451  ret void
452}
453
454; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
455; XGCN: s_load_dwordx2
456; XGCN: s_load_dwordx2
457; XGCN: buffer_store_dwordx2
458; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
459;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
460;   ret void
461; }
462