1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI-NOHSA -check-prefix=FUNC %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=GCNX3-HSA -check-prefix=FUNC %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=GCNX3-NOHSA -check-prefix=FUNC %s
4; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
6; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
7
8; FUNC-LABEL: {{^}}global_load_i32:
9; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
10; GCN-HSA: {{flat|global}}_load_dword
11
12; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
13define amdgpu_kernel void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
14entry:
15  %ld = load i32, i32 addrspace(1)* %in
16  store i32 %ld, i32 addrspace(1)* %out
17  ret void
18}
19
20; FUNC-LABEL: {{^}}global_load_v2i32:
21; GCN-NOHSA: buffer_load_dwordx2
22; GCN-HSA: {{flat|global}}_load_dwordx2
23
24; EG: VTX_READ_64
25define amdgpu_kernel void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
26entry:
27  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
28  store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
29  ret void
30}
31
32; FUNC-LABEL: {{^}}global_load_v3i32:
33; SI-NOHSA: buffer_load_dwordx4
34; GCNX3-NOHSA: buffer_load_dwordx3
35; GCNX3-HSA: {{flat|global}}_load_dwordx3
36
37; EG: VTX_READ_128
38define amdgpu_kernel void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
39entry:
40  %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
41  store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
42  ret void
43}
44
45; FUNC-LABEL: {{^}}global_load_v4i32:
46; GCN-NOHSA: buffer_load_dwordx4
47; GCN-HSA: {{flat|global}}_load_dwordx4
48
49; EG: VTX_READ_128
50define amdgpu_kernel void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
51entry:
52  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
53  store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
54  ret void
55}
56
57; FUNC-LABEL: {{^}}global_load_v8i32:
58; GCN-NOHSA: buffer_load_dwordx4
59; GCN-NOHSA: buffer_load_dwordx4
60; GCN-HSA: {{flat|global}}_load_dwordx4
61; GCN-HSA: {{flat|global}}_load_dwordx4
62
63; EG: VTX_READ_128
64; EG: VTX_READ_128
65define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
66entry:
67  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
68  store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
69  ret void
70}
71
72; FUNC-LABEL: {{^}}global_load_v16i32:
73; GCN-NOHSA: buffer_load_dwordx4
74; GCN-NOHSA: buffer_load_dwordx4
75; GCN-NOHSA: buffer_load_dwordx4
76; GCN-NOHSA: buffer_load_dwordx4
77
78; GCN-HSA: {{flat|global}}_load_dwordx4
79; GCN-HSA: {{flat|global}}_load_dwordx4
80; GCN-HSA: {{flat|global}}_load_dwordx4
81; GCN-HSA: {{flat|global}}_load_dwordx4
82
83; EG: VTX_READ_128
84; EG: VTX_READ_128
85; EG: VTX_READ_128
86; EG: VTX_READ_128
87define amdgpu_kernel void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
88entry:
89  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
90  store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
91  ret void
92}
93
94; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
95; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
96; GCN-HSA-DAG: {{flat|global}}_load_dword v[[LO:[0-9]+]],
97; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
98
99; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
100; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]]
101
102; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
103define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
104  %ld = load i32, i32 addrspace(1)* %in
105  %ext = zext i32 %ld to i64
106  store i64 %ext, i64 addrspace(1)* %out
107  ret void
108}
109
110; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
111; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
112; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
113; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
114; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
115; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
116
117
118; EG: MEM_RAT
119; EG: VTX_READ_32
120; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.
121; EG: 31
122define amdgpu_kernel void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
123  %ld = load i32, i32 addrspace(1)* %in
124  %ext = sext i32 %ld to i64
125  store i64 %ext, i64 addrspace(1)* %out
126  ret void
127}
128
129; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
130; GCN-NOHSA: buffer_load_dword
131; GCN-NOHSA: buffer_store_dwordx2
132
133; GCN-HSA: {{flat|global}}_load_dword
134; GCN-HSA: {{flat|global}}_store_dwordx2
135define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
136  %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
137  %ext = zext <1 x i32> %ld to <1 x i64>
138  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
139  ret void
140}
141
142; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
143; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
144; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
145; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
146; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
147; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
148define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
149  %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
150  %ext = sext <1 x i32> %ld to <1 x i64>
151  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
152  ret void
153}
154
155; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
156; GCN-NOHSA: buffer_load_dwordx2
157; GCN-NOHSA: buffer_store_dwordx4
158
159; GCN-HSA: {{flat|global}}_load_dwordx2
160; GCN-HSA: {{flat|global}}_store_dwordx4
161define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
162  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
163  %ext = zext <2 x i32> %ld to <2 x i64>
164  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
165  ret void
166}
167
168; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
169; GCN-NOHSA: buffer_load_dwordx2
170; GCN-HSA: {{flat|global}}_load_dwordx2
171
172; GCN-DAG: v_ashrrev_i32
173; GCN-DAG: v_ashrrev_i32
174
175; GCN-NOHSA-DAG: buffer_store_dwordx4
176; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
177define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
178  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
179  %ext = sext <2 x i32> %ld to <2 x i64>
180  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
181  ret void
182}
183
184; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
185; GCN-NOHSA: buffer_load_dwordx4
186; GCN-NOHSA: buffer_store_dwordx4
187; GCN-NOHSA: buffer_store_dwordx4
188
189; GCN-HSA: {{flat|global}}_load_dwordx4
190; GCN-HSA: {{flat|global}}_store_dwordx4
191; GCN-HSA: {{flat|global}}_store_dwordx4
192define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
193  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
194  %ext = zext <4 x i32> %ld to <4 x i64>
195  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
196  ret void
197}
198
199; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
200; GCN-NOHSA: buffer_load_dwordx4
201; GCN-HSA: {{flat|global}}_load_dwordx4
202
203; GCN-DAG: v_ashrrev_i32
204; GCN-DAG: v_ashrrev_i32
205; GCN-DAG: v_ashrrev_i32
206; GCN-DAG: v_ashrrev_i32
207
208; GCN-NOHSA-DAG: buffer_store_dwordx4
209; GCN-NOHSA-DAG: buffer_store_dwordx4
210
211; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
212; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
213define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
214  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
215  %ext = sext <4 x i32> %ld to <4 x i64>
216  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
217  ret void
218}
219
220; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
221; GCN-NOHSA: buffer_load_dwordx4
222; GCN-NOHSA: buffer_load_dwordx4
223
224; GCN-HSA: {{flat|global}}_load_dwordx4
225; GCN-HSA: {{flat|global}}_load_dwordx4
226
227; GCN-NOHSA-DAG: buffer_store_dwordx4
228; GCN-NOHSA-DAG: buffer_store_dwordx4
229; GCN-NOHSA-DAG: buffer_store_dwordx4
230; GCN-NOHSA-DAG: buffer_store_dwordx4
231
232; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
233; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
234; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
235; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
236define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
237  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
238  %ext = zext <8 x i32> %ld to <8 x i64>
239  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
240  ret void
241}
242
243; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
244; GCN-NOHSA: buffer_load_dwordx4
245; GCN-NOHSA: buffer_load_dwordx4
246
247; GCN-HSA: {{flat|global}}_load_dwordx4
248; GCN-HSA: {{flat|global}}_load_dwordx4
249
250; GCN-DAG: v_ashrrev_i32
251; GCN-DAG: v_ashrrev_i32
252; GCN-DAG: v_ashrrev_i32
253; GCN-DAG: v_ashrrev_i32
254; GCN-DAG: v_ashrrev_i32
255; GCN-DAG: v_ashrrev_i32
256; GCN-DAG: v_ashrrev_i32
257; GCN-DAG: v_ashrrev_i32
258
259; GCN-NOHSA-DAG: buffer_store_dwordx4
260; GCN-NOHSA-DAG: buffer_store_dwordx4
261; GCN-NOHSA-DAG: buffer_store_dwordx4
262; GCN-NOHSA-DAG: buffer_store_dwordx4
263
264; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
265; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
266; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
267; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
268define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
269  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
270  %ext = sext <8 x i32> %ld to <8 x i64>
271  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
272  ret void
273}
274
275; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
276; GCN-NOHSA: buffer_load_dwordx4
277; GCN-NOHSA: buffer_load_dwordx4
278; GCN-NOHSA: buffer_load_dwordx4
279; GCN-NOHSA: buffer_load_dwordx4
280
281; GCN-HSA: {{flat|global}}_load_dwordx4
282; GCN-HSA: {{flat|global}}_load_dwordx4
283; GCN-HSA: {{flat|global}}_load_dwordx4
284; GCN-HSA: {{flat|global}}_load_dwordx4
285
286
287; GCN-DAG: v_ashrrev_i32
288; GCN-DAG: v_ashrrev_i32
289; GCN-DAG: v_ashrrev_i32
290; GCN-DAG: v_ashrrev_i32
291; GCN-NOHSA-DAG: buffer_store_dwordx4
292; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
293
294; GCN-DAG: v_ashrrev_i32
295; GCN-DAG: v_ashrrev_i32
296; GCN-DAG: v_ashrrev_i32
297; GCN-DAG: v_ashrrev_i32
298; GCN-NOHSA-DAG: buffer_store_dwordx4
299; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
300
301; GCN-DAG: v_ashrrev_i32
302; GCN-DAG: v_ashrrev_i32
303; GCN-DAG: v_ashrrev_i32
304; GCN-DAG: v_ashrrev_i32
305; GCN-NOHSA-DAG: buffer_store_dwordx4
306; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
307
308; GCN-DAG: v_ashrrev_i32
309; GCN-DAG: v_ashrrev_i32
310; GCN-DAG: v_ashrrev_i32
311; GCN-DAG: v_ashrrev_i32
312; GCN-NOHSA-DAG: buffer_store_dwordx4
313; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
314define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
315  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
316  %ext = sext <16 x i32> %ld to <16 x i64>
317  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
318  ret void
319}
320
321; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
322; GCN-NOHSA: buffer_load_dwordx4
323; GCN-NOHSA: buffer_load_dwordx4
324; GCN-NOHSA: buffer_load_dwordx4
325; GCN-NOHSA: buffer_load_dwordx4
326
327; GCN-HSA: {{flat|global}}_load_dwordx4
328; GCN-HSA: {{flat|global}}_load_dwordx4
329; GCN-HSA: {{flat|global}}_load_dwordx4
330; GCN-HSA: {{flat|global}}_load_dwordx4
331
332; GCN-NOHSA: buffer_store_dwordx4
333; GCN-NOHSA: buffer_store_dwordx4
334; GCN-NOHSA: buffer_store_dwordx4
335; GCN-NOHSA: buffer_store_dwordx4
336; GCN-NOHSA: buffer_store_dwordx4
337; GCN-NOHSA: buffer_store_dwordx4
338; GCN-NOHSA: buffer_store_dwordx4
339; GCN-NOHSA: buffer_store_dwordx4
340
341; GCN-HSA: {{flat|global}}_store_dwordx4
342; GCN-HSA: {{flat|global}}_store_dwordx4
343; GCN-HSA: {{flat|global}}_store_dwordx4
344; GCN-HSA: {{flat|global}}_store_dwordx4
345; GCN-HSA: {{flat|global}}_store_dwordx4
346; GCN-HSA: {{flat|global}}_store_dwordx4
347; GCN-HSA: {{flat|global}}_store_dwordx4
348; GCN-HSA: {{flat|global}}_store_dwordx4
349define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
350  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
351  %ext = zext <16 x i32> %ld to <16 x i64>
352  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
353  ret void
354}
355
356; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
357
358; GCN-NOHSA: buffer_load_dwordx4
359; GCN-NOHSA: buffer_load_dwordx4
360; GCN-NOHSA: buffer_load_dwordx4
361; GCN-NOHSA: buffer_load_dwordx4
362; GCN-NOHSA: buffer_load_dwordx4
363; GCN-NOHSA: buffer_load_dwordx4
364; GCN-NOHSA: buffer_load_dwordx4
365; GCN-NOHSA-DAG: buffer_load_dwordx4
366
367; GCN-HSA: {{flat|global}}_load_dwordx4
368; GCN-HSA: {{flat|global}}_load_dwordx4
369; GCN-HSA: {{flat|global}}_load_dwordx4
370; GCN-HSA: {{flat|global}}_load_dwordx4
371; GCN-HSA: {{flat|global}}_load_dwordx4
372; GCN-HSA: {{flat|global}}_load_dwordx4
373; GCN-HSA: {{flat|global}}_load_dwordx4
374; GCN-HSA: {{flat|global}}_load_dwordx4
375
376; GCN-DAG: v_ashrrev_i32
377; GCN-DAG: v_ashrrev_i32
378; GCN-DAG: v_ashrrev_i32
379; GCN-DAG: v_ashrrev_i32
380; GCN-DAG: v_ashrrev_i32
381; GCN-DAG: v_ashrrev_i32
382; GCN-DAG: v_ashrrev_i32
383; GCN-DAG: v_ashrrev_i32
384; GCN-DAG: v_ashrrev_i32
385; GCN-DAG: v_ashrrev_i32
386; GCN-DAG: v_ashrrev_i32
387; GCN-DAG: v_ashrrev_i32
388; GCN-DAG: v_ashrrev_i32
389; GCN-DAG: v_ashrrev_i32
390; GCN-DAG: v_ashrrev_i32
391; GCN-DAG: v_ashrrev_i32
392; GCN-DAG: v_ashrrev_i32
393; GCN-DAG: v_ashrrev_i32
394; GCN-DAG: v_ashrrev_i32
395; GCN-DAG: v_ashrrev_i32
396; GCN-DAG: v_ashrrev_i32
397; GCN-DAG: v_ashrrev_i32
398; GCN-DAG: v_ashrrev_i32
399; GCN-DAG: v_ashrrev_i32
400; GCN-DAG: v_ashrrev_i32
401; GCN-DAG: v_ashrrev_i32
402; GCN-DAG: v_ashrrev_i32
403; GCN-DAG: v_ashrrev_i32
404; GCN-DAG: v_ashrrev_i32
405; GCN-DAG: v_ashrrev_i32
406; GCN-DAG: v_ashrrev_i32
407; GCN-DAG: v_ashrrev_i32
408
409; GCN-NOHSA: buffer_store_dwordx4
410; GCN-NOHSA: buffer_store_dwordx4
411; GCN-NOHSA: buffer_store_dwordx4
412; GCN-NOHSA: buffer_store_dwordx4
413
414; GCN-NOHSA: buffer_store_dwordx4
415; GCN-NOHSA: buffer_store_dwordx4
416; GCN-NOHSA: buffer_store_dwordx4
417; GCN-NOHSA: buffer_store_dwordx4
418
419; GCN-NOHSA: buffer_store_dwordx4
420; GCN-NOHSA: buffer_store_dwordx4
421; GCN-NOHSA: buffer_store_dwordx4
422; GCN-NOHSA: buffer_store_dwordx4
423
424; GCN-NOHSA: buffer_store_dwordx4
425; GCN-NOHSA: buffer_store_dwordx4
426; GCN-NOHSA: buffer_store_dwordx4
427; GCN-NOHSA: buffer_store_dwordx4
428
429; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
430; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
431; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
432; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
433
434; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
435; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
436; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
437; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
438
439; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
440; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
441; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
442; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
443
444; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
445; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
446; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
447; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
448
449define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
450  %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
451  %ext = sext <32 x i32> %ld to <32 x i64>
452  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
453  ret void
454}
455
456; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
457; GCN-NOHSA: buffer_load_dwordx4
458; GCN-NOHSA: buffer_load_dwordx4
459; GCN-NOHSA: buffer_load_dwordx4
460; GCN-NOHSA: buffer_load_dwordx4
461; GCN-NOHSA: buffer_load_dwordx4
462; GCN-NOHSA: buffer_load_dwordx4
463; GCN-NOHSA: buffer_load_dwordx4
464; GCN-NOHSA: buffer_load_dwordx4
465
466; GCN-HSA: {{flat|global}}_load_dwordx4
467; GCN-HSA: {{flat|global}}_load_dwordx4
468; GCN-HSA: {{flat|global}}_load_dwordx4
469; GCN-HSA: {{flat|global}}_load_dwordx4
470; GCN-HSA: {{flat|global}}_load_dwordx4
471; GCN-HSA: {{flat|global}}_load_dwordx4
472; GCN-HSA: {{flat|global}}_load_dwordx4
473; GCN-HSA: {{flat|global}}_load_dwordx4
474
475
476; GCN-NOHSA-DAG: buffer_store_dwordx4
477; GCN-NOHSA-DAG: buffer_store_dwordx4
478; GCN-NOHSA-DAG: buffer_store_dwordx4
479; GCN-NOHSA-DAG: buffer_store_dwordx4
480
481; GCN-NOHSA-DAG: buffer_store_dwordx4
482; GCN-NOHSA-DAG: buffer_store_dwordx4
483; GCN-NOHSA-DAG: buffer_store_dwordx4
484; GCN-NOHSA-DAG: buffer_store_dwordx4
485
486; GCN-NOHSA-DAG: buffer_store_dwordx4
487; GCN-NOHSA-DAG: buffer_store_dwordx4
488; GCN-NOHSA-DAG: buffer_store_dwordx4
489; GCN-NOHSA-DAG: buffer_store_dwordx4
490
491; GCN-NOHSA-DAG: buffer_store_dwordx4
492; GCN-NOHSA-DAG: buffer_store_dwordx4
493; GCN-NOHSA-DAG: buffer_store_dwordx4
494; GCN-NOHSA-DAG: buffer_store_dwordx4
495
496
497; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
498; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
499; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
500; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
501
502; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
503; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
504; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
505; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
506
507; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
508; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
509; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
510; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
511
512; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
513; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
514; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
515; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
516define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
517  %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
518  %ext = zext <32 x i32> %ld to <32 x i64>
519  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
520  ret void
521}
522
523; FUNC-LABEL: {{^}}global_load_v32i32:
524; GCN-NOHSA: buffer_load_dwordx4
525; GCN-NOHSA: buffer_load_dwordx4
526; GCN-NOHSA: buffer_load_dwordx4
527; GCN-NOHSA: buffer_load_dwordx4
528; GCN-NOHSA: buffer_load_dwordx4
529; GCN-NOHSA: buffer_load_dwordx4
530; GCN-NOHSA: buffer_load_dwordx4
531; GCN-NOHSA: buffer_load_dwordx4
532
533; GCN-HSA: {{flat|global}}_load_dwordx4
534; GCN-HSA: {{flat|global}}_load_dwordx4
535; GCN-HSA: {{flat|global}}_load_dwordx4
536; GCN-HSA: {{flat|global}}_load_dwordx4
537; GCN-HSA: {{flat|global}}_load_dwordx4
538; GCN-HSA: {{flat|global}}_load_dwordx4
539; GCN-HSA: {{flat|global}}_load_dwordx4
540; GCN-HSA: {{flat|global}}_load_dwordx4
541
542
543; GCN-NOHSA-DAG: buffer_store_dwordx4
544; GCN-NOHSA-DAG: buffer_store_dwordx4
545; GCN-NOHSA-DAG: buffer_store_dwordx4
546; GCN-NOHSA-DAG: buffer_store_dwordx4
547
548; GCN-NOHSA-DAG: buffer_store_dwordx4
549; GCN-NOHSA-DAG: buffer_store_dwordx4
550; GCN-NOHSA-DAG: buffer_store_dwordx4
551; GCN-NOHSA-DAG: buffer_store_dwordx4
552
553; GCN-NOHSA-DAG: buffer_store_dwordx4
554; GCN-NOHSA-DAG: buffer_store_dwordx4
555; GCN-NOHSA-DAG: buffer_store_dwordx4
556; GCN-NOHSA-DAG: buffer_store_dwordx4
557
558; GCN-NOHSA-DAG: buffer_store_dwordx4
559; GCN-NOHSA-DAG: buffer_store_dwordx4
560; GCN-NOHSA-DAG: buffer_store_dwordx4
561; GCN-NOHSA-DAG: buffer_store_dwordx4
562
563; GCN-NOT: accvgpr
564
565; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
566; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
567; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
568; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
569
570; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
571; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
572; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
573; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
574
575; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
576; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
577; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
578; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
579
580; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
581; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
582; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
583; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
584define amdgpu_kernel void @global_load_v32i32(<32 x i32> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
585  %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
586  store <32 x i32> %ld, <32 x i32> addrspace(1)* %out
587  ret void
588}
589
590attributes #0 = { nounwind }
591