1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
6; SI: buffer_load_dword v[[LO:[0-9]+]],
7; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
8; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
9define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
10  %a = load i32, i32 addrspace(1)* %in
11  %ext = zext i32 %a to i64
12  store i64 %ext, i64 addrspace(1)* %out
13  ret void
14}
15
16; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
17; SI: buffer_load_dword [[LOAD:v[0-9]+]],
18; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
19; SI: buffer_store_dwordx2
20define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
21  %a = load i32, i32 addrspace(1)* %in
22  %ext = sext i32 %a to i64
23  store i64 %ext, i64 addrspace(1)* %out
24  ret void
25}
26
27; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
28; SI: buffer_load_dword
29; SI: buffer_store_dwordx2
30; SI: s_endpgm
31define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
32  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
33  %ext = zext <1 x i32> %load to <1 x i64>
34  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
35  ret void
36}
37
38; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
39; SI: buffer_load_dword
40; SI: v_ashrrev_i32
41; SI: buffer_store_dwordx2
42; SI: s_endpgm
43define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
44  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
45  %ext = sext <1 x i32> %load to <1 x i64>
46  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
51; SI: buffer_load_dwordx2
52; SI: buffer_store_dwordx2
53; SI: buffer_store_dwordx2
54; SI: s_endpgm
55define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
56  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
57  %ext = zext <2 x i32> %load to <2 x i64>
58  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
59  ret void
60}
61
62; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
63; SI: buffer_load_dwordx2
64; SI-DAG: v_ashrrev_i32
65; SI-DAG: v_ashrrev_i32
66; SI-DAG: buffer_store_dwordx2
67; SI-DAG: buffer_store_dwordx2
68; SI: s_endpgm
69define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
70  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
71  %ext = sext <2 x i32> %load to <2 x i64>
72  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
73  ret void
74}
75
76; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
77; SI: buffer_load_dwordx4
78; SI: buffer_store_dwordx2
79; SI: buffer_store_dwordx2
80; SI: buffer_store_dwordx2
81; SI: buffer_store_dwordx2
82; SI: s_endpgm
83define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
84  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
85  %ext = zext <4 x i32> %load to <4 x i64>
86  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
87  ret void
88}
89
90; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
91; SI: buffer_load_dwordx4
92; SI-DAG: v_ashrrev_i32
93; SI-DAG: v_ashrrev_i32
94; SI-DAG: v_ashrrev_i32
95; SI-DAG: v_ashrrev_i32
96; SI-DAG: buffer_store_dwordx2
97; SI-DAG: buffer_store_dwordx2
98; SI-DAG: buffer_store_dwordx2
99; SI-DAG: buffer_store_dwordx2
100; SI: s_endpgm
101define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
102  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
103  %ext = sext <4 x i32> %load to <4 x i64>
104  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
105  ret void
106}
107
108; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
109; SI: buffer_load_dword
110; SI: buffer_load_dword
111; SI: buffer_load_dword
112; SI: buffer_load_dword
113; SI: buffer_load_dword
114; SI: buffer_load_dword
115; SI: buffer_load_dword
116; SI: buffer_load_dword
117; SI-DAG: buffer_store_dwordx2
118; SI-DAG: buffer_store_dwordx2
119; SI-DAG: buffer_store_dwordx2
120; SI-DAG: buffer_store_dwordx2
121; SI-DAG: buffer_store_dwordx2
122; SI-DAG: buffer_store_dwordx2
123; SI-DAG: buffer_store_dwordx2
124; SI-DAG: buffer_store_dwordx2
125; SI: s_endpgm
126define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
127  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
128  %ext = zext <8 x i32> %load to <8 x i64>
129  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
130  ret void
131}
132
133; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
134; SI: buffer_load_dword
135; SI: buffer_load_dword
136; SI: buffer_load_dword
137; SI: buffer_load_dword
138; SI: buffer_load_dword
139; SI: buffer_load_dword
140; SI: buffer_load_dword
141; SI: buffer_load_dword
142
143; SI-DAG: v_ashrrev_i32
144; SI-DAG: v_ashrrev_i32
145; SI-DAG: v_ashrrev_i32
146; SI-DAG: v_ashrrev_i32
147; SI-DAG: v_ashrrev_i32
148; SI-DAG: v_ashrrev_i32
149; SI-DAG: v_ashrrev_i32
150; SI-DAG: v_ashrrev_i32
151; SI-DAG: buffer_store_dwordx2
152; SI-DAG: buffer_store_dwordx2
153; SI-DAG: buffer_store_dwordx2
154; SI-DAG: buffer_store_dwordx2
155; SI-DAG: buffer_store_dwordx2
156; SI-DAG: buffer_store_dwordx2
157; SI-DAG: buffer_store_dwordx2
158; SI-DAG: buffer_store_dwordx2
159
160; SI: s_endpgm
161define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
162  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
163  %ext = sext <8 x i32> %load to <8 x i64>
164  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
165  ret void
166}
167
168; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
169; SI: buffer_load_dword
170; SI: buffer_load_dword
171; SI: buffer_load_dword
172; SI: buffer_load_dword
173; SI: buffer_load_dword
174; SI: buffer_load_dword
175; SI: buffer_load_dword
176; SI: buffer_load_dword
177; SI: buffer_load_dword
178; SI: buffer_load_dword
179; SI: buffer_load_dword
180; SI: buffer_load_dword
181; SI: buffer_load_dword
182; SI: buffer_load_dword
183; SI: buffer_load_dword
184; SI: buffer_load_dword
185
186; SI-DAG: v_ashrrev_i32
187; SI-DAG: v_ashrrev_i32
188; SI-DAG: v_ashrrev_i32
189; SI-DAG: v_ashrrev_i32
190; SI-DAG: buffer_store_dwordx2
191; SI-DAG: buffer_store_dwordx2
192
193; SI-DAG: v_ashrrev_i32
194; SI-DAG: v_ashrrev_i32
195; SI-DAG: v_ashrrev_i32
196; SI-DAG: v_ashrrev_i32
197; SI-DAG: buffer_store_dwordx2
198; SI-DAG: buffer_store_dwordx2
199
200; SI-DAG: v_ashrrev_i32
201; SI-DAG: v_ashrrev_i32
202; SI-DAG: v_ashrrev_i32
203; SI-DAG: v_ashrrev_i32
204; SI-DAG: buffer_store_dwordx2
205; SI-DAG: buffer_store_dwordx2
206
207; SI-DAG: v_ashrrev_i32
208; SI-DAG: v_ashrrev_i32
209; SI-DAG: v_ashrrev_i32
210; SI-DAG: v_ashrrev_i32
211; SI-DAG: buffer_store_dwordx2
212; SI-DAG: buffer_store_dwordx2
213; SI: s_endpgm
214define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
215  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
216  %ext = sext <16 x i32> %load to <16 x i64>
217  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
218  ret void
219}
220
221; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
222; SI: buffer_load_dword
223; SI: buffer_load_dword
224; SI: buffer_load_dword
225; SI: buffer_load_dword
226; SI: buffer_load_dword
227; SI: buffer_load_dword
228; SI: buffer_load_dword
229; SI: buffer_load_dword
230; SI: buffer_load_dword
231; SI: buffer_load_dword
232; SI: buffer_load_dword
233; SI: buffer_load_dword
234; SI: buffer_load_dword
235; SI: buffer_load_dword
236; SI: buffer_load_dword
237; SI: buffer_load_dword
238
239; SI: buffer_store_dwordx2
240; SI: buffer_store_dwordx2
241; SI: buffer_store_dwordx2
242; SI: buffer_store_dwordx2
243; SI: buffer_store_dwordx2
244; SI: buffer_store_dwordx2
245; SI: buffer_store_dwordx2
246; SI: buffer_store_dwordx2
247; SI: buffer_store_dwordx2
248; SI: buffer_store_dwordx2
249; SI: buffer_store_dwordx2
250; SI: buffer_store_dwordx2
251; SI: buffer_store_dwordx2
252; SI: buffer_store_dwordx2
253; SI: buffer_store_dwordx2
254; SI: buffer_store_dwordx2
255
256; SI: s_endpgm
257define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
258  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
259  %ext = zext <16 x i32> %load to <16 x i64>
260  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
261  ret void
262}
263
264; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
265; SI: buffer_load_dword
266; SI: buffer_load_dword
267; SI: buffer_load_dword
268; SI: buffer_load_dword
269; SI: buffer_load_dword
270; SI: buffer_load_dword
271; SI: buffer_load_dword
272; SI: buffer_load_dword
273
274; SI: buffer_load_dword
275; SI: buffer_load_dword
276; SI: buffer_load_dword
277; SI: buffer_load_dword
278; SI: buffer_load_dword
279; SI: buffer_load_dword
280; SI: buffer_load_dword
281; SI: buffer_load_dword
282
283; SI: buffer_load_dword
284; SI: buffer_load_dword
285; SI: buffer_load_dword
286; SI: buffer_load_dword
287; SI: buffer_load_dword
288; SI: buffer_load_dword
289; SI: buffer_load_dword
290; SI: buffer_load_dword
291
292; SI: buffer_load_dword
293; SI: buffer_load_dword
294; SI: buffer_load_dword
295; SI: buffer_load_dword
296; SI: buffer_load_dword
297; SI: buffer_load_dword
298; SI: buffer_load_dword
299; SI: buffer_load_dword
300
301; SI-DAG: v_ashrrev_i32
302; SI-DAG: v_ashrrev_i32
303; SI-DAG: v_ashrrev_i32
304; SI-DAG: v_ashrrev_i32
305; SI-DAG: v_ashrrev_i32
306; SI-DAG: v_ashrrev_i32
307; SI-DAG: v_ashrrev_i32
308; SI-DAG: v_ashrrev_i32
309; SI-DAG: v_ashrrev_i32
310; SI-DAG: v_ashrrev_i32
311; SI-DAG: v_ashrrev_i32
312; SI-DAG: v_ashrrev_i32
313; SI-DAG: v_ashrrev_i32
314; SI-DAG: v_ashrrev_i32
315; SI-DAG: v_ashrrev_i32
316; SI-DAG: v_ashrrev_i32
317; SI-DAG: v_ashrrev_i32
318; SI-DAG: v_ashrrev_i32
319; SI-DAG: v_ashrrev_i32
320; SI-DAG: v_ashrrev_i32
321; SI-DAG: v_ashrrev_i32
322; SI-DAG: v_ashrrev_i32
323; SI-DAG: v_ashrrev_i32
324; SI-DAG: v_ashrrev_i32
325; SI-DAG: v_ashrrev_i32
326; SI-DAG: v_ashrrev_i32
327; SI-DAG: v_ashrrev_i32
328; SI-DAG: v_ashrrev_i32
329; SI-DAG: v_ashrrev_i32
330; SI-DAG: v_ashrrev_i32
331; SI-DAG: v_ashrrev_i32
332; SI-DAG: v_ashrrev_i32
333
334; SI-DAG: buffer_store_dwordx2
335; SI-DAG: buffer_store_dwordx2
336; SI-DAG: buffer_store_dwordx2
337; SI-DAG: buffer_store_dwordx2
338; SI-DAG: buffer_store_dwordx2
339; SI-DAG: buffer_store_dwordx2
340; SI-DAG: buffer_store_dwordx2
341; SI-DAG: buffer_store_dwordx2
342
343; SI-DAG: buffer_store_dwordx2
344; SI-DAG: buffer_store_dwordx2
345; SI-DAG: buffer_store_dwordx2
346; SI-DAG: buffer_store_dwordx2
347; SI-DAG: buffer_store_dwordx2
348; SI-DAG: buffer_store_dwordx2
349; SI-DAG: buffer_store_dwordx2
350; SI-DAG: buffer_store_dwordx2
351
352; SI-DAG: buffer_store_dwordx2
353; SI-DAG: buffer_store_dwordx2
354; SI-DAG: buffer_store_dwordx2
355; SI-DAG: buffer_store_dwordx2
356; SI-DAG: buffer_store_dwordx2
357; SI-DAG: buffer_store_dwordx2
358; SI-DAG: buffer_store_dwordx2
359; SI-DAG: buffer_store_dwordx2
360
361; SI-DAG: buffer_store_dwordx2
362; SI-DAG: buffer_store_dwordx2
363; SI-DAG: buffer_store_dwordx2
364; SI-DAG: buffer_store_dwordx2
365; SI-DAG: buffer_store_dwordx2
366; SI-DAG: buffer_store_dwordx2
367; SI-DAG: buffer_store_dwordx2
368; SI-DAG: buffer_store_dwordx2
369
370; SI: s_endpgm
371define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
372  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
373  %ext = sext <32 x i32> %load to <32 x i64>
374  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
375  ret void
376}
377
378; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
379; SI: buffer_load_dword
380; SI: buffer_load_dword
381; SI: buffer_load_dword
382; SI: buffer_load_dword
383; SI: buffer_load_dword
384; SI: buffer_load_dword
385; SI: buffer_load_dword
386; SI: buffer_load_dword
387
388; SI: buffer_load_dword
389; SI: buffer_load_dword
390; SI: buffer_load_dword
391; SI: buffer_load_dword
392; SI: buffer_load_dword
393; SI: buffer_load_dword
394; SI: buffer_load_dword
395; SI: buffer_load_dword
396
397; SI: buffer_load_dword
398; SI: buffer_load_dword
399; SI: buffer_load_dword
400; SI: buffer_load_dword
401; SI: buffer_load_dword
402; SI: buffer_load_dword
403; SI: buffer_load_dword
404; SI: buffer_load_dword
405
406; SI: buffer_load_dword
407; SI: buffer_load_dword
408; SI: buffer_load_dword
409; SI: buffer_load_dword
410; SI: buffer_load_dword
411; SI: buffer_load_dword
412; SI: buffer_load_dword
413; SI: buffer_load_dword
414
415; SI-DAG: buffer_store_dwordx2
416; SI-DAG: buffer_store_dwordx2
417; SI-DAG: buffer_store_dwordx2
418; SI-DAG: buffer_store_dwordx2
419; SI-DAG: buffer_store_dwordx2
420; SI-DAG: buffer_store_dwordx2
421; SI-DAG: buffer_store_dwordx2
422; SI-DAG: buffer_store_dwordx2
423
424; SI-DAG: buffer_store_dwordx2
425; SI-DAG: buffer_store_dwordx2
426; SI-DAG: buffer_store_dwordx2
427; SI-DAG: buffer_store_dwordx2
428; SI-DAG: buffer_store_dwordx2
429; SI-DAG: buffer_store_dwordx2
430; SI-DAG: buffer_store_dwordx2
431; SI-DAG: buffer_store_dwordx2
432
433; SI-DAG: buffer_store_dwordx2
434; SI-DAG: buffer_store_dwordx2
435; SI-DAG: buffer_store_dwordx2
436; SI-DAG: buffer_store_dwordx2
437; SI-DAG: buffer_store_dwordx2
438; SI-DAG: buffer_store_dwordx2
439; SI-DAG: buffer_store_dwordx2
440; SI-DAG: buffer_store_dwordx2
441
442; SI-DAG: buffer_store_dwordx2
443; SI-DAG: buffer_store_dwordx2
444; SI-DAG: buffer_store_dwordx2
445; SI-DAG: buffer_store_dwordx2
446; SI-DAG: buffer_store_dwordx2
447; SI-DAG: buffer_store_dwordx2
448; SI-DAG: buffer_store_dwordx2
449; SI-DAG: buffer_store_dwordx2
450
451; SI: s_endpgm
452define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
453  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
454  %ext = zext <32 x i32> %load to <32 x i64>
455  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
456  ret void
457}
458