1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}global_load_i1:
6; GCN: buffer_load_ubyte
7; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
8; GCN: buffer_store_byte
9
10; EG: VTX_READ_8
11; EG: AND_INT
12define amdgpu_kernel void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
13  %load = load i1, i1 addrspace(1)* %in
14  store i1 %load, i1 addrspace(1)* %out
15  ret void
16}
17
18; FUNC-LABEL: {{^}}global_load_v2i1:
19define amdgpu_kernel void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
20  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
21  store <2 x i1> %load, <2 x i1> addrspace(1)* %out
22  ret void
23}
24
25; FUNC-LABEL: {{^}}global_load_v3i1:
26define amdgpu_kernel void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
27  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
28  store <3 x i1> %load, <3 x i1> addrspace(1)* %out
29  ret void
30}
31
32; FUNC-LABEL: {{^}}global_load_v4i1:
33define amdgpu_kernel void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
34  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
35  store <4 x i1> %load, <4 x i1> addrspace(1)* %out
36  ret void
37}
38
39; FUNC-LABEL: {{^}}global_load_v8i1:
40define amdgpu_kernel void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
41  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
42  store <8 x i1> %load, <8 x i1> addrspace(1)* %out
43  ret void
44}
45
46; FUNC-LABEL: {{^}}global_load_v16i1:
47define amdgpu_kernel void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
48  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
49  store <16 x i1> %load, <16 x i1> addrspace(1)* %out
50  ret void
51}
52
53; FUNC-LABEL: {{^}}global_load_v32i1:
54define amdgpu_kernel void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
55  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
56  store <32 x i1> %load, <32 x i1> addrspace(1)* %out
57  ret void
58}
59
60; FUNC-LABEL: {{^}}global_load_v64i1:
61define amdgpu_kernel void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
62  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
63  store <64 x i1> %load, <64 x i1> addrspace(1)* %out
64  ret void
65}
66
67; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
68; GCN: buffer_load_ubyte
69; GCN: buffer_store_dword
70define amdgpu_kernel void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
71  %a = load i1, i1 addrspace(1)* %in
72  %ext = zext i1 %a to i32
73  store i32 %ext, i32 addrspace(1)* %out
74  ret void
75}
76
77; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
78; GCN: buffer_load_ubyte
79; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
80; GCN: buffer_store_dword
81
82; EG: VTX_READ_8
83; EG: BFE_INT
84define amdgpu_kernel void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
85  %a = load i1, i1 addrspace(1)* %in
86  %ext = sext i1 %a to i32
87  store i32 %ext, i32 addrspace(1)* %out
88  ret void
89}
90
91; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32:
92define amdgpu_kernel void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
93  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
94  %ext = zext <1 x i1> %load to <1 x i32>
95  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
96  ret void
97}
98
99; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32:
100define amdgpu_kernel void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
101  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
102  %ext = sext <1 x i1> %load to <1 x i32>
103  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
104  ret void
105}
106
107; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32:
108define amdgpu_kernel void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
109  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
110  %ext = zext <2 x i1> %load to <2 x i32>
111  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
112  ret void
113}
114
115; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32:
116define amdgpu_kernel void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
117  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
118  %ext = sext <2 x i1> %load to <2 x i32>
119  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
120  ret void
121}
122
123; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32:
124define amdgpu_kernel void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
125  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
126  %ext = zext <3 x i1> %load to <3 x i32>
127  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
128  ret void
129}
130
131; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32:
132define amdgpu_kernel void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
133  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
134  %ext = sext <3 x i1> %load to <3 x i32>
135  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
136  ret void
137}
138
139; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32:
140define amdgpu_kernel void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
141  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
142  %ext = zext <4 x i1> %load to <4 x i32>
143  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
144  ret void
145}
146
147; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32:
148define amdgpu_kernel void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
149  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
150  %ext = sext <4 x i1> %load to <4 x i32>
151  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
152  ret void
153}
154
155; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32:
156define amdgpu_kernel void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
157  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
158  %ext = zext <8 x i1> %load to <8 x i32>
159  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
160  ret void
161}
162
163; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32:
164define amdgpu_kernel void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
165  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
166  %ext = sext <8 x i1> %load to <8 x i32>
167  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
168  ret void
169}
170
171; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32:
172define amdgpu_kernel void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
173  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
174  %ext = zext <16 x i1> %load to <16 x i32>
175  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
176  ret void
177}
178
179; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32:
180define amdgpu_kernel void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
181  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
182  %ext = sext <16 x i1> %load to <16 x i32>
183  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
184  ret void
185}
186
187; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32:
188define amdgpu_kernel void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
189  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
190  %ext = zext <32 x i1> %load to <32 x i32>
191  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
192  ret void
193}
194
195; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32:
196define amdgpu_kernel void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
197  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
198  %ext = sext <32 x i1> %load to <32 x i32>
199  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
200  ret void
201}
202
203; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32:
204define amdgpu_kernel void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
205  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
206  %ext = zext <64 x i1> %load to <64 x i32>
207  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
208  ret void
209}
210
211; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32:
212define amdgpu_kernel void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
213  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
214  %ext = sext <64 x i1> %load to <64 x i32>
215  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
216  ret void
217}
218
219; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
220; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
221; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
222; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]{{$}}
223; GCN: buffer_store_dwordx2
224define amdgpu_kernel void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
225  %a = load i1, i1 addrspace(1)* %in
226  %ext = zext i1 %a to i64
227  store i64 %ext, i64 addrspace(1)* %out
228  ret void
229}
230
231; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
232; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
233; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
234; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
235; GCN: buffer_store_dwordx2
236define amdgpu_kernel void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
237  %a = load i1, i1 addrspace(1)* %in
238  %ext = sext i1 %a to i64
239  store i64 %ext, i64 addrspace(1)* %out
240  ret void
241}
242
243; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64:
244define amdgpu_kernel void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
245  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
246  %ext = zext <1 x i1> %load to <1 x i64>
247  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
248  ret void
249}
250
251; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64:
252define amdgpu_kernel void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
253  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
254  %ext = sext <1 x i1> %load to <1 x i64>
255  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
256  ret void
257}
258
259; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64:
260define amdgpu_kernel void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
261  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
262  %ext = zext <2 x i1> %load to <2 x i64>
263  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
264  ret void
265}
266
267; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64:
268define amdgpu_kernel void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
269  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
270  %ext = sext <2 x i1> %load to <2 x i64>
271  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
272  ret void
273}
274
275; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64:
276define amdgpu_kernel void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
277  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
278  %ext = zext <3 x i1> %load to <3 x i64>
279  store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
280  ret void
281}
282
283; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64:
284define amdgpu_kernel void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
285  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
286  %ext = sext <3 x i1> %load to <3 x i64>
287  store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
288  ret void
289}
290
291; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64:
292define amdgpu_kernel void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
293  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
294  %ext = zext <4 x i1> %load to <4 x i64>
295  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
296  ret void
297}
298
299; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64:
300define amdgpu_kernel void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
301  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
302  %ext = sext <4 x i1> %load to <4 x i64>
303  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
304  ret void
305}
306
307; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64:
308define amdgpu_kernel void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
309  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
310  %ext = zext <8 x i1> %load to <8 x i64>
311  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
312  ret void
313}
314
315; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64:
316define amdgpu_kernel void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
317  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
318  %ext = sext <8 x i1> %load to <8 x i64>
319  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
320  ret void
321}
322
323; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64:
324define amdgpu_kernel void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
325  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
326  %ext = zext <16 x i1> %load to <16 x i64>
327  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
328  ret void
329}
330
331; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64:
332define amdgpu_kernel void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
333  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
334  %ext = sext <16 x i1> %load to <16 x i64>
335  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
336  ret void
337}
338
339; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64:
340define amdgpu_kernel void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
341  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
342  %ext = zext <32 x i1> %load to <32 x i64>
343  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
344  ret void
345}
346
347; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64:
348define amdgpu_kernel void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
349  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
350  %ext = sext <32 x i1> %load to <32 x i64>
351  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
352  ret void
353}
354
355; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64:
356define amdgpu_kernel void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
357  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
358  %ext = zext <64 x i1> %load to <64 x i64>
359  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
360  ret void
361}
362
363; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64:
364define amdgpu_kernel void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
365  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
366  %ext = sext <64 x i1> %load to <64 x i64>
367  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
368  ret void
369}
370
371attributes #0 = { nounwind }
372