1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s
4; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5
6; Testing for ds_read/write_b128
7; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s
8; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s
9
10; FUNC-LABEL: {{^}}local_load_i16:
11; GFX9-NOT: m0
12; SICIVI: s_mov_b32 m0
13
14; GCN: ds_read_u16 v{{[0-9]+}}
15
16; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
17; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
18; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
19; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
20; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]]
21define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
22entry:
23  %ld = load i16, i16 addrspace(3)* %in
24  store i16 %ld, i16 addrspace(3)* %out
25  ret void
26}
27
28; FUNC-LABEL: {{^}}local_load_v2i16:
29; GFX9-NOT: m0
30; SICIVI: s_mov_b32 m0
31
32; GCN: ds_read_b32
33
34; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
35; EG: LDS_READ_RET {{.*}} [[FROM]]
36; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
37; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
38; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
39define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
40entry:
41  %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
42  store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
43  ret void
44}
45
46; FUNC-LABEL: {{^}}local_load_v3i16:
47; GFX9-NOT: m0
48; SICIVI: s_mov_b32 m0
49
50; GCN: ds_read_b64
51; GCN-DAG: ds_write_b32
52; GCN-DAG: ds_write_b16
53
54; EG-DAG: LDS_USHORT_READ_RET
55; EG-DAG: LDS_READ_RET
56define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
57entry:
58  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
59  store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
60  ret void
61}
62
63; FUNC-LABEL: {{^}}local_load_v4i16:
64; GFX9-NOT: m0
65; SICIVI: s_mov_b32 m0
66
67; GCN: ds_read_b64
68
69; EG: LDS_READ_RET
70; EG: LDS_READ_RET
71define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
72entry:
73  %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
74  store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
75  ret void
76}
77
78; FUNC-LABEL: {{^}}local_load_v8i16:
79; GFX9-NOT: m0
80; SICIVI: s_mov_b32 m0
81
82; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
83
84; EG: LDS_READ_RET
85; EG: LDS_READ_RET
86; EG: LDS_READ_RET
87; EG: LDS_READ_RET
88define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
89entry:
90  %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
91  store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
92  ret void
93}
94
95; FUNC-LABEL: {{^}}local_load_v16i16:
96; GFX9-NOT: m0
97; SICIVI: s_mov_b32 m0
98
99; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}}
100; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
101
102
103; EG: LDS_READ_RET
104; EG: LDS_READ_RET
105; EG: LDS_READ_RET
106; EG: LDS_READ_RET
107
108; EG: LDS_READ_RET
109; EG: LDS_READ_RET
110; EG: LDS_READ_RET
111; EG: LDS_READ_RET
112define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
113entry:
114  %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
115  store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
116  ret void
117}
118
119; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
120; GFX9-NOT: m0
121; SICIVI: s_mov_b32 m0
122
123; GCN: ds_read_u16
124; GCN: ds_write_b32
125
126; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
127; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
128; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
129; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
130; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
131define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
132  %a = load i16, i16 addrspace(3)* %in
133  %ext = zext i16 %a to i32
134  store i32 %ext, i32 addrspace(3)* %out
135  ret void
136}
137
138; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
139; GCN-NOT: s_wqm_b64
140
141; GFX9-NOT: m0
142; SICIVI: s_mov_b32 m0
143
144; GCN: ds_read_i16
145
146; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
147; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
148; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
149; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
150; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
151; EG: 16
152; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
153define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
154  %a = load i16, i16 addrspace(3)* %in
155  %ext = sext i16 %a to i32
156  store i32 %ext, i32 addrspace(3)* %out
157  ret void
158}
159
160; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
161; GFX9-NOT: m0
162; SICIVI: s_mov_b32 m0
163
164; GCN: ds_read_u16
165
166; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
167; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
168; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
169; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
170; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
171define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
172  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
173  %ext = zext <1 x i16> %load to <1 x i32>
174  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
175  ret void
176}
177
178; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
179; GFX9-NOT: m0
180; SICIVI: s_mov_b32 m0
181
182; GCN: ds_read_i16
183
184; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
185; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
186; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
187; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
188; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
189; EG: 16
190; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
191define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
192  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
193  %ext = sext <1 x i16> %load to <1 x i32>
194  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
195  ret void
196}
197
198; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
199; GCN-NOT: s_wqm_b64
200; GFX9-NOT: m0
201; SICIVI: s_mov_b32 m0
202
203; GCN: ds_read_b32
204
205; EG: LDS_READ_RET
206define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
207  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
208  %ext = zext <2 x i16> %load to <2 x i32>
209  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
210  ret void
211}
212
213; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
214; GCN-NOT: s_wqm_b64
215; GFX9-NOT: m0
216; SICIVI: s_mov_b32 m0
217
218; GCN: ds_read_b32
219
220; EG: LDS_READ_RET
221; EG: BFE_INT
222; EG: BFE_INT
223define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
224  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
225  %ext = sext <2 x i16> %load to <2 x i32>
226  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
227  ret void
228}
229
230; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
231; GFX9-NOT: m0
232; SICIVI: s_mov_b32 m0
233
234; GCN: ds_read_b64
235; GCN-DAG: ds_write_b32
236; GCN-DAG: ds_write_b64
237
238; EG: LDS_READ_RET
239define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
240entry:
241  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
242  %ext = zext <3 x i16> %ld to <3 x i32>
243  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
244  ret void
245}
246
247; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
248; GFX9-NOT: m0
249; SICIVI: s_mov_b32 m0
250
251; GCN: ds_read_b64
252; GCN-DAG: ds_write_b32
253; GCN-DAG: ds_write_b64
254
255; EG: LDS_READ_RET
256; EG-DAG: BFE_INT
257; EG-DAG: BFE_INT
258; EG-DAG: BFE_INT
259define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
260entry:
261  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
262  %ext = sext <3 x i16> %ld to <3 x i32>
263  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
264  ret void
265}
266
267; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
268; GCN-NOT: s_wqm_b64
269; GFX9-NOT: m0
270; SICIVI: s_mov_b32 m0
271
272; GCN: ds_read_b64
273
274; EG: LDS_READ_RET
275; EG: LDS_READ_RET
276define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
277  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
278  %ext = zext <4 x i16> %load to <4 x i32>
279  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
280  ret void
281}
282
283; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
284; GCN-NOT: s_wqm_b64
285; GFX9-NOT: m0
286; SICIVI: s_mov_b32 m0
287
288; GCN: ds_read_b64
289
290; EG: LDS_READ_RET
291; EG: LDS_READ_RET
292; EG-DAG: BFE_INT
293; EG-DAG: BFE_INT
294; EG-DAG: BFE_INT
295; EG-DAG: BFE_INT
296define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
297  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
298  %ext = sext <4 x i16> %load to <4 x i32>
299  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
300  ret void
301}
302
303; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
304; GFX9-NOT: m0
305; SICIVI: s_mov_b32 m0
306
307; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
308
309; EG: LDS_READ_RET
310; EG: LDS_READ_RET
311; EG: LDS_READ_RET
312; EG: LDS_READ_RET
313define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
314  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
315  %ext = zext <8 x i16> %load to <8 x i32>
316  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
317  ret void
318}
319
320; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
321; GFX9-NOT: m0
322; SICIVI: s_mov_b32 m0
323
324; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
325
326; EG: LDS_READ_RET
327; EG: LDS_READ_RET
328; EG: LDS_READ_RET
329; EG: LDS_READ_RET
330; EG-DAG: BFE_INT
331; EG-DAG: BFE_INT
332; EG-DAG: BFE_INT
333; EG-DAG: BFE_INT
334; EG-DAG: BFE_INT
335; EG-DAG: BFE_INT
336; EG-DAG: BFE_INT
337; EG-DAG: BFE_INT
338define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
339  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
340  %ext = sext <8 x i16> %load to <8 x i32>
341  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
342  ret void
343}
344
345; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
346; GFX9-NOT: m0
347; SICIVI: s_mov_b32 m0
348
349; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
350; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
351
352; GCN: ds_write2_b64
353; GCN: ds_write2_b64
354; GCN: ds_write2_b64
355; GCN: ds_write2_b64
356
357; EG: LDS_READ_RET
358; EG: LDS_READ_RET
359; EG: LDS_READ_RET
360; EG: LDS_READ_RET
361; EG: LDS_READ_RET
362; EG: LDS_READ_RET
363; EG: LDS_READ_RET
364; EG: LDS_READ_RET
365define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
366  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
367  %ext = zext <16 x i16> %load to <16 x i32>
368  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
369  ret void
370}
371
372; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
373; GFX9-NOT: m0
374; SICIVI: s_mov_b32 m0
375
376
377; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
378; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
379
380; EG: LDS_READ_RET
381; EG: LDS_READ_RET
382; EG: LDS_READ_RET
383; EG: LDS_READ_RET
384; EG: LDS_READ_RET
385; EG: LDS_READ_RET
386; EG: LDS_READ_RET
387; EG: LDS_READ_RET
388; EG-DAG: BFE_INT
389; EG-DAG: BFE_INT
390; EG-DAG: BFE_INT
391; EG-DAG: BFE_INT
392; EG-DAG: BFE_INT
393; EG-DAG: BFE_INT
394; EG-DAG: BFE_INT
395; EG-DAG: BFE_INT
396; EG-DAG: BFE_INT
397; EG-DAG: BFE_INT
398; EG-DAG: BFE_INT
399; EG-DAG: BFE_INT
400; EG-DAG: BFE_INT
401; EG-DAG: BFE_INT
402; EG-DAG: BFE_INT
403; EG-DAG: BFE_INT
404define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
405  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
406  %ext = sext <16 x i16> %load to <16 x i32>
407  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
408  ret void
409}
410
411; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
412; GFX9-NOT: m0
413; SICIVI: s_mov_b32 m0
414
415; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
416; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
417; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
418; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
419
420; EG: LDS_READ_RET
421; EG: LDS_READ_RET
422; EG: LDS_READ_RET
423; EG: LDS_READ_RET
424; EG: LDS_READ_RET
425; EG: LDS_READ_RET
426; EG: LDS_READ_RET
427; EG: LDS_READ_RET
428; EG: LDS_READ_RET
429; EG: LDS_READ_RET
430; EG: LDS_READ_RET
431; EG: LDS_READ_RET
432; EG: LDS_READ_RET
433; EG: LDS_READ_RET
434; EG: LDS_READ_RET
435; EG: LDS_READ_RET
436define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
437  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
438  %ext = zext <32 x i16> %load to <32 x i32>
439  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
440  ret void
441}
442
443; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
444; GFX9-NOT: m0
445; SICIVI: s_mov_b32 m0
446
447; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
448; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
449; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
450; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
451; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
452; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
453; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
454; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
455; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
456; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
457; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
458; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
459
460; EG: LDS_READ_RET
461; EG: LDS_READ_RET
462; EG: LDS_READ_RET
463; EG: LDS_READ_RET
464; EG: LDS_READ_RET
465; EG: LDS_READ_RET
466; EG: LDS_READ_RET
467; EG: LDS_READ_RET
468; EG: LDS_READ_RET
469; EG: LDS_READ_RET
470; EG: LDS_READ_RET
471; EG: LDS_READ_RET
472; EG: LDS_READ_RET
473; EG: LDS_READ_RET
474; EG: LDS_READ_RET
475; EG: LDS_READ_RET
476define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
477  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
478  %ext = sext <32 x i16> %load to <32 x i32>
479  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
480  ret void
481}
482
483; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
484; GFX9-NOT: m0
485; SICIVI: s_mov_b32 m0
486
487; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15
488; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
489; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
490; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
491; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
492; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9
493; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13
494; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11
495; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
496; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
497; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27
498; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25
499; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23
500; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21
501; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19
502; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17
503; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
504; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
505; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
506; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
507; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
508; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
509; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
510; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
511
512; EG: LDS_READ_RET
513; EG: LDS_READ_RET
514; EG: LDS_READ_RET
515; EG: LDS_READ_RET
516; EG: LDS_READ_RET
517; EG: LDS_READ_RET
518; EG: LDS_READ_RET
519; EG: LDS_READ_RET
520; EG: LDS_READ_RET
521; EG: LDS_READ_RET
522; EG: LDS_READ_RET
523; EG: LDS_READ_RET
524; EG: LDS_READ_RET
525; EG: LDS_READ_RET
526; EG: LDS_READ_RET
527; EG: LDS_READ_RET
528; EG: LDS_READ_RET
529; EG: LDS_READ_RET
530; EG: LDS_READ_RET
531; EG: LDS_READ_RET
532; EG: LDS_READ_RET
533; EG: LDS_READ_RET
534; EG: LDS_READ_RET
535; EG: LDS_READ_RET
536; EG: LDS_READ_RET
537; EG: LDS_READ_RET
538; EG: LDS_READ_RET
539; EG: LDS_READ_RET
540; EG: LDS_READ_RET
541; EG: LDS_READ_RET
542; EG: LDS_READ_RET
543; EG: LDS_READ_RET
544define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
545  %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
546  %ext = zext <64 x i16> %load to <64 x i32>
547  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
548  ret void
549}
550
551; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
552; GFX9-NOT: m0
553; SICIVI: s_mov_b32 m0
554
555; EG: LDS_READ_RET
556; EG: LDS_READ_RET
557; EG: LDS_READ_RET
558; EG: LDS_READ_RET
559; EG: LDS_READ_RET
560; EG: LDS_READ_RET
561; EG: LDS_READ_RET
562; EG: LDS_READ_RET
563; EG: LDS_READ_RET
564; EG: LDS_READ_RET
565; EG: LDS_READ_RET
566; EG: LDS_READ_RET
567; EG: LDS_READ_RET
568; EG: LDS_READ_RET
569; EG: LDS_READ_RET
570; EG: LDS_READ_RET
571; EG: LDS_READ_RET
572; EG: LDS_READ_RET
573; EG: LDS_READ_RET
574; EG: LDS_READ_RET
575; EG: LDS_READ_RET
576; EG: LDS_READ_RET
577; EG: LDS_READ_RET
578; EG: LDS_READ_RET
579; EG: LDS_READ_RET
580; EG: LDS_READ_RET
581; EG: LDS_READ_RET
582; EG: LDS_READ_RET
583; EG: LDS_READ_RET
584; EG: LDS_READ_RET
585; EG: LDS_READ_RET
586; EG: LDS_READ_RET
587define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
588  %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
589  %ext = sext <64 x i16> %load to <64 x i32>
590  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
591  ret void
592}
593
594; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
595; GFX9-NOT: m0
596; SICIVI: s_mov_b32 m0
597
598; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
599; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
600
601; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
602
603; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
604; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
605; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
606; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
607; EG-DAG: LDS_WRITE
608define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
609  %a = load i16, i16 addrspace(3)* %in
610  %ext = zext i16 %a to i64
611  store i64 %ext, i64 addrspace(3)* %out
612  ret void
613}
614
615; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
616; GFX9-NOT: m0
617; SICIVI: s_mov_b32 m0
618
619; FIXME: Need to optimize this sequence to avoid an extra shift.
620;  t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32
621;          t28: i64 = any_extend t25
622;        t30: i64 = sign_extend_inreg t28, ValueType:ch:i16
623; SI: ds_read_i16 v[[LO:[0-9]+]],
624; GFX89: ds_read_u16 v[[ULO:[0-9]+]]
625; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
626; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
627
628; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
629
630; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
631; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
632; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
633; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
634; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
635; EG-DAG: LDS_WRITE
636; EG-DAG: 16
637; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
638define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
639  %a = load i16, i16 addrspace(3)* %in
640  %ext = sext i16 %a to i64
641  store i64 %ext, i64 addrspace(3)* %out
642  ret void
643}
644
645; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
646; GFX9-NOT: m0
647; SICIVI: s_mov_b32 m0
648
649
650; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
651; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
652; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
653; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
654; EG-DAG: LDS_WRITE
655define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
656  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
657  %ext = zext <1 x i16> %load to <1 x i64>
658  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
659  ret void
660}
661
662; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
663; GFX9-NOT: m0
664; SICIVI: s_mov_b32 m0
665
666
667; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
668; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
669; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
670; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
671; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
672; EG-DAG: LDS_WRITE
673; EG-DAG: 16
674; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
675define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
676  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
677  %ext = sext <1 x i16> %load to <1 x i64>
678  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
679  ret void
680}
681
682; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
683; GFX9-NOT: m0
684; SICIVI: s_mov_b32 m0
685
686
687; EG: LDS_READ_RET
688define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
689  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
690  %ext = zext <2 x i16> %load to <2 x i64>
691  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
692  ret void
693}
694
695; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
696; GFX9-NOT: m0
697; SICIVI: s_mov_b32 m0
698
699
700; EG: LDS_READ_RET
701; EG-DAG: BFE_INT
702; EG-DAG: ASHR
703define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
704  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
705  %ext = sext <2 x i16> %load to <2 x i64>
706  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
707  ret void
708}
709
710; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
711; GFX9-NOT: m0
712; SICIVI: s_mov_b32 m0
713
714
715; EG: LDS_READ_RET
716; EG: LDS_READ_RET
717define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
718  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
719  %ext = zext <4 x i16> %load to <4 x i64>
720  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
721  ret void
722}
723
724; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
725; GFX9-NOT: m0
726; SICIVI: s_mov_b32 m0
727
728
729; EG: LDS_READ_RET
730; EG: LDS_READ_RET
731; EG-DAG: BFE_INT
732; EG-DAG: BFE_INT
733; EG-DAG: ASHR
734; EG-DAG: ASHR
735define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
736  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
737  %ext = sext <4 x i16> %load to <4 x i64>
738  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
739  ret void
740}
741
742; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
743; GFX9-NOT: m0
744; SICIVI: s_mov_b32 m0
745
746
747; EG: LDS_READ_RET
748; EG: LDS_READ_RET
749; EG: LDS_READ_RET
750; EG: LDS_READ_RET
751define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
752  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
753  %ext = zext <8 x i16> %load to <8 x i64>
754  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
755  ret void
756}
757
758; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
759; GFX9-NOT: m0
760; SICIVI: s_mov_b32 m0
761
762
763; EG: LDS_READ_RET
764; EG: LDS_READ_RET
765; EG: LDS_READ_RET
766; EG: LDS_READ_RET
767; EG-DAG: BFE_INT
768; EG-DAG: BFE_INT
769; EG-DAG: ASHR
770; EG-DAG: ASHR
771; EG-DAG: BFE_INT
772; EG-DAG: BFE_INT
773; EG-DAG: ASHR
774; EG-DAG: ASHR
775define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
776  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
777  %ext = sext <8 x i16> %load to <8 x i64>
778  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
779  ret void
780}
781
782; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
783; GFX9-NOT: m0
784; SICIVI: s_mov_b32 m0
785
786
787; EG: LDS_READ_RET
788; EG: LDS_READ_RET
789; EG: LDS_READ_RET
790; EG: LDS_READ_RET
791; EG: LDS_READ_RET
792; EG: LDS_READ_RET
793; EG: LDS_READ_RET
794; EG: LDS_READ_RET
795define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
796  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
797  %ext = zext <16 x i16> %load to <16 x i64>
798  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
799  ret void
800}
801
802; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
803; GFX9-NOT: m0
804; SICIVI: s_mov_b32 m0
805
806
807; EG: LDS_READ_RET
808; EG: LDS_READ_RET
809; EG: LDS_READ_RET
810; EG: LDS_READ_RET
811; EG: LDS_READ_RET
812; EG: LDS_READ_RET
813; EG: LDS_READ_RET
814; EG: LDS_READ_RET
815; EG-DAG: BFE_INT
816; EG-DAG: BFE_INT
817; EG-DAG: ASHR
818; EG-DAG: ASHR
819; EG-DAG: BFE_INT
820; EG-DAG: BFE_INT
821; EG-DAG: ASHR
822; EG-DAG: ASHR
823; EG-DAG: BFE_INT
824; EG-DAG: BFE_INT
825; EG-DAG: ASHR
826; EG-DAG: ASHR
827; EG-DAG: BFE_INT
828; EG-DAG: BFE_INT
829; EG-DAG: ASHR
830; EG-DAG: ASHR
831define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
832  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
833  %ext = sext <16 x i16> %load to <16 x i64>
834  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
835  ret void
836}
837
838; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
839; GFX9-NOT: m0
840; SICIVI: s_mov_b32 m0
841
842
843; EG: LDS_READ_RET
844; EG: LDS_READ_RET
845; EG: LDS_READ_RET
846; EG: LDS_READ_RET
847; EG: LDS_READ_RET
848; EG: LDS_READ_RET
849; EG: LDS_READ_RET
850; EG: LDS_READ_RET
851; EG: LDS_READ_RET
852; EG: LDS_READ_RET
853; EG: LDS_READ_RET
854; EG: LDS_READ_RET
855; EG: LDS_READ_RET
856; EG: LDS_READ_RET
857; EG: LDS_READ_RET
858; EG: LDS_READ_RET
859define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
860  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
861  %ext = zext <32 x i16> %load to <32 x i64>
862  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
863  ret void
864}
865
866; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
867; GFX9-NOT: m0
868; SICIVI: s_mov_b32 m0
869
870
871; EG: LDS_READ_RET
872; EG: LDS_READ_RET
873; EG: LDS_READ_RET
874; EG: LDS_READ_RET
875; EG: LDS_READ_RET
876; EG: LDS_READ_RET
877; EG: LDS_READ_RET
878; EG: LDS_READ_RET
879; EG: LDS_READ_RET
880; EG: LDS_READ_RET
881; EG: LDS_READ_RET
882; EG: LDS_READ_RET
883; EG: LDS_READ_RET
884; EG: LDS_READ_RET
885; EG: LDS_READ_RET
886; EG: LDS_READ_RET
887; EG-DAG: BFE_INT
888; EG-DAG: BFE_INT
889; EG-DAG: ASHR
890; EG-DAG: ASHR
891; EG-DAG: BFE_INT
892; EG-DAG: BFE_INT
893; EG-DAG: ASHR
894; EG-DAG: ASHR
895; EG-DAG: BFE_INT
896; EG-DAG: BFE_INT
897; EG-DAG: ASHR
898; EG-DAG: ASHR
899; EG-DAG: BFE_INT
900; EG-DAG: BFE_INT
901; EG-DAG: ASHR
902; EG-DAG: ASHR
903; EG-DAG: BFE_INT
904; EG-DAG: BFE_INT
905; EG-DAG: ASHR
906; EG-DAG: ASHR
907; EG-DAG: BFE_INT
908; EG-DAG: BFE_INT
909; EG-DAG: ASHR
910; EG-DAG: ASHR
911; EG-DAG: BFE_INT
912; EG-DAG: BFE_INT
913; EG-DAG: ASHR
914; EG-DAG: ASHR
915; EG-DAG: BFE_INT
916; EG-DAG: BFE_INT
917; EG-DAG: ASHR
918; EG-DAG: ASHR
919define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
920  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
921  %ext = sext <32 x i16> %load to <32 x i64>
922  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
923  ret void
924}
925
926; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
927; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
928;   %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
929;   %ext = zext <64 x i16> %load to <64 x i64>
930;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
931;   ret void
932; }
933
934; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
935; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
936;   %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
937;   %ext = sext <64 x i16> %load to <64 x i64>
938;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
939;   ret void
940; }
941
942; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
943; FUNC-LABEL: {{^}}local_v8i16_to_128:
944
945; SI-NOT: ds_read_b128
946; SI-NOT: ds_write_b128
947
948; CIVI: ds_read_b128
949; CIVI: ds_write_b128
950
951; EG: LDS_READ_RET
952; EG: LDS_READ_RET
953; EG: LDS_READ_RET
954; EG: LDS_READ_RET
955define amdgpu_kernel void @local_v8i16_to_128(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
956  %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in, align 16
957  store <8 x i16> %ld, <8 x i16> addrspace(3)* %out, align 16
958  ret void
959}
960
961attributes #0 = { nounwind }
962