1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5
6; FUNC-LABEL: {{^}}constant_load_i32:
7; GCN: s_load_dword s{{[0-9]+}}
8
9; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
10define void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
11entry:
12  %ld = load i32, i32 addrspace(2)* %in
13  store i32 %ld, i32 addrspace(1)* %out
14  ret void
15}
16
17; FUNC-LABEL: {{^}}constant_load_v2i32:
18; GCN: s_load_dwordx2
19
20; EG: VTX_READ_64
21define void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
22entry:
23  %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
24  store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
25  ret void
26}
27
28; FUNC-LABEL: {{^}}constant_load_v3i32:
29; GCN: s_load_dwordx4
30
31; EG: VTX_READ_128
32define void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
33entry:
34  %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
35  store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
36  ret void
37}
38
39; FUNC-LABEL: {{^}}constant_load_v4i32:
40; GCN: s_load_dwordx4
41
42; EG: VTX_READ_128
43define void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
44entry:
45  %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
46  store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}constant_load_v8i32:
51; GCN: s_load_dwordx8
52
53; EG: VTX_READ_128
54; EG: VTX_READ_128
55define void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
56entry:
57  %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
58  store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
59  ret void
60}
61
62; FUNC-LABEL: {{^}}constant_load_v16i32:
63; GCN: s_load_dwordx16
64
65; EG: VTX_READ_128
66; EG: VTX_READ_128
67; EG: VTX_READ_128
68; EG: VTX_READ_128
69define void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
70entry:
71  %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
72  store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
73  ret void
74}
75
76; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64:
77; GCN-DAG: s_load_dword s[[SLO:[0-9]+]],
78; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}}
79; GCN: store_dwordx2
80
81; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
82; EG: CF_END
83; EG: VTX_READ_32
84define void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
85  %ld = load i32, i32 addrspace(2)* %in
86  %ext = zext i32 %ld to i64
87  store i64 %ext, i64 addrspace(1)* %out
88  ret void
89}
90
91; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64:
92; GCN: s_load_dword s[[SLO:[0-9]+]]
93; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31
94; GCN: store_dwordx2
95
96; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
97; EG: CF_END
98; EG: VTX_READ_32
99; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.
100; EG: 31
101define void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
102  %ld = load i32, i32 addrspace(2)* %in
103  %ext = sext i32 %ld to i64
104  store i64 %ext, i64 addrspace(1)* %out
105  ret void
106}
107
108; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
109; GCN: s_load_dword
110; GCN: store_dwordx2
111define void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
112  %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
113  %ext = zext <1 x i32> %ld to <1 x i64>
114  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
115  ret void
116}
117
118; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64:
119; GCN: s_load_dword s[[LO:[0-9]+]]
120; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
121; GCN: store_dwordx2
122define void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
123  %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
124  %ext = sext <1 x i32> %ld to <1 x i64>
125  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
126  ret void
127}
128
129; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
130; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
131; GCN: store_dwordx4
132define void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
133  %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
134  %ext = zext <2 x i32> %ld to <2 x i64>
135  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
136  ret void
137}
138
139; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64:
140; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
141
142; GCN-DAG: s_ashr_i32
143; GCN-DAG: s_ashr_i32
144
145; GCN: store_dwordx4
146define void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
147  %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
148  %ext = sext <2 x i32> %ld to <2 x i64>
149  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
150  ret void
151}
152
153; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64:
154; GCN: s_load_dwordx4
155
156; GCN: store_dwordx4
157; GCN: store_dwordx4
158define void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
159  %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
160  %ext = zext <4 x i32> %ld to <4 x i64>
161  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
162  ret void
163}
164
165; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64:
166; GCN: s_load_dwordx4
167
168; GCN: s_ashr_i32
169; GCN: s_ashr_i32
170; GCN: s_ashr_i32
171; GCN: s_ashr_i32
172
173; GCN: store_dwordx4
174; GCN: store_dwordx4
175define void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
176  %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
177  %ext = sext <4 x i32> %ld to <4 x i64>
178  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
179  ret void
180}
181
182; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64:
183; GCN: s_load_dwordx8
184
185; GCN-NOHSA-DAG: buffer_store_dwordx4
186; GCN-NOHSA-DAG: buffer_store_dwordx4
187; GCN-NOHSA-DAG: buffer_store_dwordx4
188; GCN-NOHSA-DAG: buffer_store_dwordx4
189
190; GCN-HSA-DAG: flat_store_dwordx4
191; GCN-HSA-DAG: flat_store_dwordx4
192; GCN-SA-DAG: flat_store_dwordx4
193; GCN-HSA-DAG: flat_store_dwordx4
194define void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
195  %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
196  %ext = zext <8 x i32> %ld to <8 x i64>
197  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
198  ret void
199}
200
201; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64:
202; GCN: s_load_dwordx8
203
204; GCN: s_ashr_i32
205; GCN: s_ashr_i32
206; GCN: s_ashr_i32
207; GCN: s_ashr_i32
208; GCN: s_ashr_i32
209; GCN: s_ashr_i32
210; GCN: s_ashr_i32
211; GCN: s_ashr_i32
212
213; GCN-NOHSA-DAG: buffer_store_dwordx4
214; GCN-NOHSA-DAG: buffer_store_dwordx4
215; GCN-NOHSA-DAG: buffer_store_dwordx4
216; GCN-NOHSA-DAG: buffer_store_dwordx4
217
218; GCN-HSA-DAG: flat_store_dwordx4
219; GCN-HSA-DAG: flat_store_dwordx4
220; GCN-HSA-DAG: flat_store_dwordx4
221; GCN-HSA-DAG: flat_store_dwordx4
222define void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
223  %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
224  %ext = sext <8 x i32> %ld to <8 x i64>
225  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
226  ret void
227}
228
229; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64:
230; GCN: s_load_dwordx16
231
232
233; GCN-DAG: s_ashr_i32
234
235; GCN: store_dwordx4
236; GCN: store_dwordx4
237; GCN: store_dwordx4
238; GCN: store_dwordx4
239; GCN: store_dwordx4
240; GCN: store_dwordx4
241; GCN: store_dwordx4
242; GCN: store_dwordx4
243define void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
244  %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
245  %ext = sext <16 x i32> %ld to <16 x i64>
246  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
247  ret void
248}
249
250; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64
251; GCN: s_load_dwordx16
252
253; GCN-NOHSA: buffer_store_dwordx4
254; GCN-NOHSA: buffer_store_dwordx4
255; GCN-NOHSA: buffer_store_dwordx4
256; GCN-NOHSA: buffer_store_dwordx4
257; GCN-NOHSA: buffer_store_dwordx4
258; GCN-NOHSA: buffer_store_dwordx4
259; GCN-NOHSA: buffer_store_dwordx4
260; GCN-NOHSA: buffer_store_dwordx4
261
262; GCN-HSA: flat_store_dwordx4
263; GCN-HSA: flat_store_dwordx4
264; GCN-HSA: flat_store_dwordx4
265; GCN-HSA: flat_store_dwordx4
266; GCN-HSA: flat_store_dwordx4
267; GCN-HSA: flat_store_dwordx4
268; GCN-HSA: flat_store_dwordx4
269; GCN-HSA: flat_store_dwordx4
270define void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
271  %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
272  %ext = zext <16 x i32> %ld to <16 x i64>
273  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
274  ret void
275}
276
277; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64:
278
279; GCN: s_load_dwordx16
280; GCN: s_load_dwordx16
281
282; GCN-NOHSA: buffer_store_dwordx4
283; GCN-NOHSA: buffer_store_dwordx4
284; GCN-NOHSA: buffer_store_dwordx4
285; GCN-NOHSA: buffer_store_dwordx4
286
287; GCN-NOHSA: buffer_store_dwordx4
288; GCN-NOHSA: buffer_store_dwordx4
289; GCN-NOHSA: buffer_store_dwordx4
290; GCN-NOHSA: buffer_store_dwordx4
291
292; GCN-NOHSA: buffer_store_dwordx4
293; GCN-NOHSA: buffer_store_dwordx4
294; GCN-NOHSA: buffer_store_dwordx4
295; GCN-NOHSA: buffer_store_dwordx4
296
297; GCN-NOHSA: buffer_store_dwordx4
298; GCN-NOHSA: buffer_store_dwordx4
299; GCN-NOHSA: buffer_store_dwordx4
300; GCN-NOHSA: buffer_store_dwordx4
301
302; GCN-HSA: flat_store_dwordx4
303; GCN-HSA: flat_store_dwordx4
304; GCN-HSA: flat_store_dwordx4
305; GCN-HSA: flat_store_dwordx4
306
307; GCN-HSA: flat_store_dwordx4
308; GCN-HSA: flat_store_dwordx4
309; GCN-HSA: flat_store_dwordx4
310; GCN-HSA: flat_store_dwordx4
311
312; GCN-HSA: flat_store_dwordx4
313; GCN-HSA: flat_store_dwordx4
314; GCN-HSA: flat_store_dwordx4
315; GCN-HSA: flat_store_dwordx4
316
317; GCN-HSA: flat_store_dwordx4
318; GCN-HSA: flat_store_dwordx4
319; GCN-HSA: flat_store_dwordx4
320; GCN-HSA: flat_store_dwordx4
321
322define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
323  %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
324  %ext = sext <32 x i32> %ld to <32 x i64>
325  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
326  ret void
327}
328
329; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64:
330; GCN: s_load_dwordx16
331; GCN: s_load_dwordx16
332
333; GCN-NOHSA-DAG: buffer_store_dwordx4
334; GCN-NOHSA-DAG: buffer_store_dwordx4
335; GCN-NOHSA-DAG: buffer_store_dwordx4
336; GCN-NOHSA-DAG: buffer_store_dwordx4
337
338; GCN-NOHSA-DAG: buffer_store_dwordx4
339; GCN-NOHSA-DAG: buffer_store_dwordx4
340; GCN-NOHSA-DAG: buffer_store_dwordx4
341; GCN-NOHSA-DAG: buffer_store_dwordx4
342
343; GCN-NOHSA-DAG: buffer_store_dwordx4
344; GCN-NOHSA-DAG: buffer_store_dwordx4
345; GCN-NOHSA-DAG: buffer_store_dwordx4
346; GCN-NOHSA-DAG: buffer_store_dwordx4
347
348; GCN-NOHSA-DAG: buffer_store_dwordx4
349; GCN-NOHSA-DAG: buffer_store_dwordx4
350; GCN-NOHSA-DAG: buffer_store_dwordx4
351; GCN-NOHSA-DAG: buffer_store_dwordx4
352
353
354; GCN-HSA-DAG: flat_store_dwordx4
355; GCN-HSA-DAG: flat_store_dwordx4
356; GCN-HSA-DAG: flat_store_dwordx4
357; GCN-HSA-DAG: flat_store_dwordx4
358
359; GCN-HSA-DAG: flat_store_dwordx4
360; GCN-HSA-DAG: flat_store_dwordx4
361; GCN-HSA-DAG: flat_store_dwordx4
362; GCN-HSA-DAG: flat_store_dwordx4
363
364; GCN-HSA-DAG: flat_store_dwordx4
365; GCN-HSA-DAG: flat_store_dwordx4
366; GCN-HSA-DAG: flat_store_dwordx4
367; GCN-HSA-DAG: flat_store_dwordx4
368
369; GCN-HSA-DAG: flat_store_dwordx4
370; GCN-HSA-DAG: flat_store_dwordx4
371; GCN-HSA-DAG: flat_store_dwordx4
372; GCN-HSA-DAG: flat_store_dwordx4
373define void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
374  %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
375  %ext = zext <32 x i32> %ld to <32 x i64>
376  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
377  ret void
378}
379
380attributes #0 = { nounwind }
381