1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4; FUNC-LABEL: {{^}}extract_vector_elt_v1i8:
5; GCN: buffer_load_ubyte
6; GCN: buffer_store_byte
7define void @extract_vector_elt_v1i8(i8 addrspace(1)* %out, <1 x i8> %foo) #0 {
8  %p0 = extractelement <1 x i8> %foo, i32 0
9  store i8 %p0, i8 addrspace(1)* %out
10  ret void
11}
12
13; FUNC-LABEL: {{^}}extract_vector_elt_v2i8:
14; GCN: buffer_load_ubyte
15; GCN: buffer_load_ubyte
16; GCN: buffer_store_byte
17; GCN: buffer_store_byte
18define void @extract_vector_elt_v2i8(i8 addrspace(1)* %out, <2 x i8> %foo) #0 {
19  %p0 = extractelement <2 x i8> %foo, i32 0
20  %p1 = extractelement <2 x i8> %foo, i32 1
21  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
22  store i8 %p1, i8 addrspace(1)* %out
23  store i8 %p0, i8 addrspace(1)* %out1
24  ret void
25}
26
27; FUNC-LABEL: {{^}}extract_vector_elt_v3i8:
28; GCN: buffer_load_ubyte
29; GCN: buffer_load_ubyte
30; GCN: buffer_store_byte
31; GCN: buffer_store_byte
32define void @extract_vector_elt_v3i8(i8 addrspace(1)* %out, <3 x i8> %foo) #0 {
33  %p0 = extractelement <3 x i8> %foo, i32 0
34  %p1 = extractelement <3 x i8> %foo, i32 2
35  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
36  store i8 %p1, i8 addrspace(1)* %out
37  store i8 %p0, i8 addrspace(1)* %out1
38  ret void
39}
40
41; FUNC-LABEL: {{^}}extract_vector_elt_v4i8:
42; GCN: buffer_load_ubyte
43; GCN: buffer_load_ubyte
44; GCN: buffer_store_byte
45; GCN: buffer_store_byte
46define void @extract_vector_elt_v4i8(i8 addrspace(1)* %out, <4 x i8> %foo) #0 {
47  %p0 = extractelement <4 x i8> %foo, i32 0
48  %p1 = extractelement <4 x i8> %foo, i32 2
49  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
50  store i8 %p1, i8 addrspace(1)* %out
51  store i8 %p0, i8 addrspace(1)* %out1
52  ret void
53}
54
55; FUNC-LABEL: {{^}}extract_vector_elt_v8i8:
56; GCN: buffer_load_ubyte
57; GCN: buffer_load_ubyte
58; GCN: buffer_store_byte
59; GCN: buffer_store_byte
60define void @extract_vector_elt_v8i8(i8 addrspace(1)* %out, <8 x i8> %foo) #0 {
61  %p0 = extractelement <8 x i8> %foo, i32 0
62  %p1 = extractelement <8 x i8> %foo, i32 2
63  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
64  store i8 %p1, i8 addrspace(1)* %out
65  store i8 %p0, i8 addrspace(1)* %out1
66  ret void
67}
68
69; FUNC-LABEL: {{^}}extract_vector_elt_v16i8:
70; GCN: buffer_load_ubyte
71; GCN: buffer_load_ubyte
72; GCN: buffer_store_byte
73; GCN: buffer_store_byte
74define void @extract_vector_elt_v16i8(i8 addrspace(1)* %out, <16 x i8> %foo) #0 {
75  %p0 = extractelement <16 x i8> %foo, i32 0
76  %p1 = extractelement <16 x i8> %foo, i32 2
77  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
78  store i8 %p1, i8 addrspace(1)* %out
79  store i8 %p0, i8 addrspace(1)* %out1
80  ret void
81}
82
83; FUNC-LABEL: {{^}}extract_vector_elt_v32i8:
84; GCN: buffer_load_ubyte
85; GCN: buffer_load_ubyte
86; GCN: buffer_store_byte
87; GCN: buffer_store_byte
88define void @extract_vector_elt_v32i8(i8 addrspace(1)* %out, <32 x i8> %foo) #0 {
89  %p0 = extractelement <32 x i8> %foo, i32 0
90  %p1 = extractelement <32 x i8> %foo, i32 2
91  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
92  store i8 %p1, i8 addrspace(1)* %out
93  store i8 %p0, i8 addrspace(1)* %out1
94  ret void
95}
96
97; FUNC-LABEL: {{^}}extract_vector_elt_v64i8:
98; GCN: buffer_load_ubyte
99; GCN: buffer_load_ubyte
100; GCN: buffer_store_byte
101; GCN: buffer_store_byte
102define void @extract_vector_elt_v64i8(i8 addrspace(1)* %out, <64 x i8> %foo) #0 {
103  %p0 = extractelement <64 x i8> %foo, i32 0
104  %p1 = extractelement <64 x i8> %foo, i32 2
105  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
106  store i8 %p1, i8 addrspace(1)* %out
107  store i8 %p0, i8 addrspace(1)* %out1
108  ret void
109}
110
111; FUNC-LABEL: {{^}}dynamic_extract_vector_elt_v3i8:
112; GCN: buffer_load_ubyte
113; GCN: buffer_load_ubyte
114; GCN: buffer_load_ubyte
115
116; GCN: buffer_store_byte
117; GCN: buffer_store_byte
118; GCN: buffer_store_byte
119
120; GCN: buffer_store_byte
121; GCN: buffer_load_ubyte
122; GCN: buffer_store_byte
123define void @dynamic_extract_vector_elt_v3i8(i8 addrspace(1)* %out, <3 x i8> %foo, i32 %idx) #0 {
124  %p0 = extractelement <3 x i8> %foo, i32 %idx
125  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
126  store i8 %p0, i8 addrspace(1)* %out
127  ret void
128}
129
130; FUNC-LABEL: {{^}}dynamic_extract_vector_elt_v4i8:
131; GCN: buffer_load_ubyte
132; GCN: buffer_load_ubyte
133; GCN: buffer_load_ubyte
134; GCN: buffer_load_ubyte
135
136; GCN: buffer_store_byte
137; GCN: buffer_store_byte
138; GCN: buffer_store_byte
139; GCN: buffer_store_byte
140
141; GCN: buffer_store_byte
142; GCN: buffer_load_ubyte
143; GCN: buffer_store_byte
144define void @dynamic_extract_vector_elt_v4i8(i8 addrspace(1)* %out, <4 x i8> %foo, i32 %idx) #0 {
145  %p0 = extractelement <4 x i8> %foo, i32 %idx
146  %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
147  store i8 %p0, i8 addrspace(1)* %out
148  ret void
149}
150
151attributes #0 = { nounwind }
152