1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN-HSA %s
4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN-NOHSA,GCN-NOHSA-VI %s
5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,EG %s
6
7define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
8; GCN-NOHSA-SI-LABEL: constant_load_i16:
9; GCN-NOHSA-SI:       ; %bb.0: ; %entry
10; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
11; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
12; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
13; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
14; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
15; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
16; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
17; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
18; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
19; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
20; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
21; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
22; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
23; GCN-NOHSA-SI-NEXT:    s_endpgm
24;
25; GCN-HSA-LABEL: constant_load_i16:
26; GCN-HSA:       ; %bb.0: ; %entry
27; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
28; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
29; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
30; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
31; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
32; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
33; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
34; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
35; GCN-HSA-NEXT:    flat_store_short v[0:1], v2
36; GCN-HSA-NEXT:    s_endpgm
37;
38; GCN-NOHSA-VI-LABEL: constant_load_i16:
39; GCN-NOHSA-VI:       ; %bb.0: ; %entry
40; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
41; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
42; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
43; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
44; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
45; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
46; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
47; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
48; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
49; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
50; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
51; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
52; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
53; GCN-NOHSA-VI-NEXT:    s_endpgm
54;
55; EG-LABEL: constant_load_i16:
56; EG:       ; %bb.0: ; %entry
57; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
58; EG-NEXT:    TEX 0 @6
59; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
60; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
61; EG-NEXT:    CF_END
62; EG-NEXT:    PAD
63; EG-NEXT:    Fetch clause starting at 6:
64; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
65; EG-NEXT:    ALU clause starting at 8:
66; EG-NEXT:     MOV * T0.X, KC0[2].Z,
67; EG-NEXT:    ALU clause starting at 9:
68; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
69; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
70; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
71; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
72; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
73; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
74; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
75; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
76; EG-NEXT:     MOV T0.Y, 0.0,
77; EG-NEXT:     MOV * T0.Z, 0.0,
78; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
79; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
80entry:
81  %ld = load i16, i16 addrspace(4)* %in
82  store i16 %ld, i16 addrspace(1)* %out
83  ret void
84}
85
86define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
87; GCN-NOHSA-SI-LABEL: constant_load_v2i16:
88; GCN-NOHSA-SI:       ; %bb.0: ; %entry
89; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
90; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
91; GCN-NOHSA-SI-NEXT:    s_load_dword s4, s[2:3], 0x0
92; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
93; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
94; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
95; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
96; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
97; GCN-NOHSA-SI-NEXT:    s_endpgm
98;
99; GCN-HSA-LABEL: constant_load_v2i16:
100; GCN-HSA:       ; %bb.0: ; %entry
101; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
102; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
103; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
104; GCN-HSA-NEXT:    s_load_dword s0, s[2:3], 0x0
105; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
106; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
107; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
108; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
109; GCN-HSA-NEXT:    s_endpgm
110;
111; GCN-NOHSA-VI-LABEL: constant_load_v2i16:
112; GCN-NOHSA-VI:       ; %bb.0: ; %entry
113; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
114; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
115; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
116; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
117; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
118; GCN-NOHSA-VI-NEXT:    s_load_dword s0, s[2:3], 0x0
119; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
120; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
121; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
122; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
123; GCN-NOHSA-VI-NEXT:    s_endpgm
124;
125; EG-LABEL: constant_load_v2i16:
126; EG:       ; %bb.0: ; %entry
127; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
128; EG-NEXT:    TEX 0 @6
129; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
130; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
131; EG-NEXT:    CF_END
132; EG-NEXT:    PAD
133; EG-NEXT:    Fetch clause starting at 6:
134; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
135; EG-NEXT:    ALU clause starting at 8:
136; EG-NEXT:     MOV * T0.X, KC0[2].Z,
137; EG-NEXT:    ALU clause starting at 9:
138; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
139; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
140entry:
141  %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
142  store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
143  ret void
144}
145
146define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
147; GCN-NOHSA-SI-LABEL: constant_load_v3i16:
148; GCN-NOHSA-SI:       ; %bb.0: ; %entry
149; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
150; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
151; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
152; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
153; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
154; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
155; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
156; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[0:3], 0 offset:4
157; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
158; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
159; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
160; GCN-NOHSA-SI-NEXT:    s_endpgm
161;
162; GCN-HSA-LABEL: constant_load_v3i16:
163; GCN-HSA:       ; %bb.0: ; %entry
164; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
165; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
166; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
167; GCN-HSA-NEXT:    s_add_u32 s4, s0, 4
168; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
169; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
170; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
171; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
172; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
173; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s3
174; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
175; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s2
176; GCN-HSA-NEXT:    flat_store_short v[2:3], v4
177; GCN-HSA-NEXT:    flat_store_dword v[0:1], v5
178; GCN-HSA-NEXT:    s_endpgm
179;
180; GCN-NOHSA-VI-LABEL: constant_load_v3i16:
181; GCN-NOHSA-VI:       ; %bb.0: ; %entry
182; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
183; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
184; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
185; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
186; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
187; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
188; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
189; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
190; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s3
191; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s2
192; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 offset:4
193; GCN-NOHSA-VI-NEXT:    buffer_store_dword v1, off, s[4:7], 0
194; GCN-NOHSA-VI-NEXT:    s_endpgm
195;
196; EG-LABEL: constant_load_v3i16:
197; EG:       ; %bb.0: ; %entry
198; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
199; EG-NEXT:    TEX 2 @6
200; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
201; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
202; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
203; EG-NEXT:    CF_END
204; EG-NEXT:    Fetch clause starting at 6:
205; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
206; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
207; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
208; EG-NEXT:    ALU clause starting at 12:
209; EG-NEXT:     MOV * T5.X, KC0[2].Z,
210; EG-NEXT:    ALU clause starting at 13:
211; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
212; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
213; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
214; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
215; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
216; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
217; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
218; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
219; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
220; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
221; EG-NEXT:     MOV T5.Y, 0.0,
222; EG-NEXT:     MOV * T5.Z, 0.0,
223; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
224; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
225; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
226; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
227; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
228; EG-NEXT:     OR_INT T6.X, PV.W, PS,
229; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
230; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
231entry:
232  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
233  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
234  ret void
235}
236
237define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
238; GCN-NOHSA-SI-LABEL: constant_load_v4i16:
239; GCN-NOHSA-SI:       ; %bb.0: ; %entry
240; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
241; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
242; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
243; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
244; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
245; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
246; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
247; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
248; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
249; GCN-NOHSA-SI-NEXT:    s_endpgm
250;
251; GCN-HSA-LABEL: constant_load_v4i16:
252; GCN-HSA:       ; %bb.0: ; %entry
253; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
254; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
255; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
256; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
257; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
258; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
259; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
260; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
261; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
262; GCN-HSA-NEXT:    s_endpgm
263;
264; GCN-NOHSA-VI-LABEL: constant_load_v4i16:
265; GCN-NOHSA-VI:       ; %bb.0: ; %entry
266; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
267; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
268; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
269; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
270; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
271; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
272; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
273; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
274; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
275; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
276; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
277; GCN-NOHSA-VI-NEXT:    s_endpgm
278;
279; EG-LABEL: constant_load_v4i16:
280; EG:       ; %bb.0: ; %entry
281; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
282; EG-NEXT:    TEX 0 @6
283; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
284; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
285; EG-NEXT:    CF_END
286; EG-NEXT:    PAD
287; EG-NEXT:    Fetch clause starting at 6:
288; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
289; EG-NEXT:    ALU clause starting at 8:
290; EG-NEXT:     MOV * T0.X, KC0[2].Z,
291; EG-NEXT:    ALU clause starting at 9:
292; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
293; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
294entry:
295  %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
296  store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
297  ret void
298}
299
300define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
301; GCN-NOHSA-SI-LABEL: constant_load_v8i16:
302; GCN-NOHSA-SI:       ; %bb.0: ; %entry
303; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
304; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
305; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
306; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
307; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
308; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
309; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
310; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
311; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
312; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
313; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
314; GCN-NOHSA-SI-NEXT:    s_endpgm
315;
316; GCN-HSA-LABEL: constant_load_v8i16:
317; GCN-HSA:       ; %bb.0: ; %entry
318; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
319; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
320; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
321; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
322; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
323; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
324; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
325; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
326; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
327; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
328; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
329; GCN-HSA-NEXT:    s_endpgm
330;
331; GCN-NOHSA-VI-LABEL: constant_load_v8i16:
332; GCN-NOHSA-VI:       ; %bb.0: ; %entry
333; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
334; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
335; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
336; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
337; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
338; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
339; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
340; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
341; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
342; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
343; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
344; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
345; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
346; GCN-NOHSA-VI-NEXT:    s_endpgm
347;
348; EG-LABEL: constant_load_v8i16:
349; EG:       ; %bb.0: ; %entry
350; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
351; EG-NEXT:    TEX 0 @6
352; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
353; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
354; EG-NEXT:    CF_END
355; EG-NEXT:    PAD
356; EG-NEXT:    Fetch clause starting at 6:
357; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
358; EG-NEXT:    ALU clause starting at 8:
359; EG-NEXT:     MOV * T0.X, KC0[2].Z,
360; EG-NEXT:    ALU clause starting at 9:
361; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
362; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
363entry:
364  %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
365  store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
366  ret void
367}
368
369define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
370; GCN-NOHSA-SI-LABEL: constant_load_v16i16:
371; GCN-NOHSA-SI:       ; %bb.0: ; %entry
372; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
373; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
374; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
375; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xf000
376; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
377; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
378; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
379; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
380; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
381; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
382; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
383; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
384; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
385; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s1
386; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s2
387; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s3
388; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
389; GCN-NOHSA-SI-NEXT:    s_endpgm
390;
391; GCN-HSA-LABEL: constant_load_v16i16:
392; GCN-HSA:       ; %bb.0: ; %entry
393; GCN-HSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
394; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
395; GCN-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
396; GCN-HSA-NEXT:    s_add_u32 s10, s8, 16
397; GCN-HSA-NEXT:    s_addc_u32 s11, s9, 0
398; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s10
399; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s11
400; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
401; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
402; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
403; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
404; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
405; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
406; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
407; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
408; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
409; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s2
410; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s3
411; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
412; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
413; GCN-HSA-NEXT:    s_endpgm
414;
415; GCN-NOHSA-VI-LABEL: constant_load_v16i16:
416; GCN-NOHSA-VI:       ; %bb.0: ; %entry
417; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
418; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, 0xf000
419; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, -1
420; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
421; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s0
422; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s1
423; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[0:7], s[2:3], 0x0
424; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
425; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
426; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
427; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
428; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
429; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
430; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
431; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, s2
432; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, s3
433; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
434; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0
435; GCN-NOHSA-VI-NEXT:    s_endpgm
436;
437; EG-LABEL: constant_load_v16i16:
438; EG:       ; %bb.0: ; %entry
439; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
440; EG-NEXT:    TEX 0 @8
441; EG-NEXT:    ALU 3, @13, KC0[CB0:0-32], KC1[]
442; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
443; EG-NEXT:    ALU 1, @17, KC0[CB0:0-32], KC1[]
444; EG-NEXT:    TEX 0 @10
445; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
446; EG-NEXT:    CF_END
447; EG-NEXT:    Fetch clause starting at 8:
448; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
449; EG-NEXT:    Fetch clause starting at 10:
450; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
451; EG-NEXT:    ALU clause starting at 12:
452; EG-NEXT:     MOV * T0.X, KC0[2].Z,
453; EG-NEXT:    ALU clause starting at 13:
454; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
455; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
456; EG-NEXT:     LSHR * T2.X, PV.W, literal.x,
457; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
458; EG-NEXT:    ALU clause starting at 17:
459; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
460; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
461entry:
462  %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
463  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
464  ret void
465}
466
467define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
468; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2:
469; GCN-NOHSA-SI:       ; %bb.0: ; %entry
470; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
471; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
472; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
473; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
474; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
475; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:2
476; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:4
477; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
478; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:8
479; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:10
480; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:12
481; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:14
482; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:16
483; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:18
484; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:20
485; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0 offset:22
486; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:24
487; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:26
488; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:28
489; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:30
490; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(8)
491; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
492; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
493; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
494; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
495; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
496; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
497; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
498; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
499; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
500; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v3, v7, v6
501; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v2, v16, v5
502; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v1, v17, v4
503; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v0, v18, v0
504; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v7, v15, v14
505; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v6, v13, v12
506; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v5, v11, v10
507; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v4, v9, v8
508; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
509; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
510; GCN-NOHSA-SI-NEXT:    s_endpgm
511;
512; GCN-HSA-LABEL: constant_load_v16i16_align2:
513; GCN-HSA:       ; %bb.0: ; %entry
514; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
515; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
516; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
517; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
518; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
519; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
520; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
521; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
522; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
523; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
524; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
525; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
526; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[0:3]
527; GCN-HSA-NEXT:    s_endpgm
528;
529; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2:
530; GCN-NOHSA-VI:       ; %bb.0: ; %entry
531; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
532; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
533; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
534; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
535; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
536; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:2
537; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:4
538; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
539; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:8
540; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:10
541; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:12
542; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:14
543; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:16
544; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:18
545; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:20
546; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0 offset:22
547; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:24
548; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:26
549; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:28
550; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:30
551; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(14)
552; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
553; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v0, v18, v0
554; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(12)
555; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
556; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v1, v17, v4
557; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(10)
558; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
559; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v2, v16, v5
560; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(8)
561; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
562; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v3, v7, v6
563; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
564; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
565; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v4, v9, v8
566; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
567; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
568; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v5, v11, v10
569; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(2)
570; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
571; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v6, v13, v12
572; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
573; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
574; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v7, v15, v14
575; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
576; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
577; GCN-NOHSA-VI-NEXT:    s_endpgm
578;
579; EG-LABEL: constant_load_v16i16_align2:
580; EG:       ; %bb.0: ; %entry
581; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
582; EG-NEXT:    TEX 0 @8
583; EG-NEXT:    ALU 1, @13, KC0[], KC1[]
584; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
585; EG-NEXT:    TEX 0 @10
586; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
587; EG-NEXT:    CF_END
588; EG-NEXT:    PAD
589; EG-NEXT:    Fetch clause starting at 8:
590; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
591; EG-NEXT:    Fetch clause starting at 10:
592; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
593; EG-NEXT:    ALU clause starting at 12:
594; EG-NEXT:     MOV * T0.X, KC0[2].Y,
595; EG-NEXT:    ALU clause starting at 13:
596; EG-NEXT:     MOV * T2.X, literal.x,
597; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
598entry:
599  %ld =  load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
600  store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
601  ret void
602}
603
604define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
605; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32:
606; GCN-NOHSA-SI:       ; %bb.0:
607; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
608; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
609; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
610; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
611; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
612; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
613; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
614; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
615; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
616; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
617; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
618; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
619; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
620; GCN-NOHSA-SI-NEXT:    s_endpgm
621;
622; GCN-HSA-LABEL: constant_zextload_i16_to_i32:
623; GCN-HSA:       ; %bb.0:
624; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
625; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
626; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
627; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
628; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
629; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
630; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
631; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
632; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
633; GCN-HSA-NEXT:    s_endpgm
634;
635; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32:
636; GCN-NOHSA-VI:       ; %bb.0:
637; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
638; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
639; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
640; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
641; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
642; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
643; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
644; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
645; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
646; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
647; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
648; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
649; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
650; GCN-NOHSA-VI-NEXT:    s_endpgm
651;
652; EG-LABEL: constant_zextload_i16_to_i32:
653; EG:       ; %bb.0:
654; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
655; EG-NEXT:    TEX 0 @6
656; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
657; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
658; EG-NEXT:    CF_END
659; EG-NEXT:    PAD
660; EG-NEXT:    Fetch clause starting at 6:
661; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
662; EG-NEXT:    ALU clause starting at 8:
663; EG-NEXT:     MOV * T0.X, KC0[2].Z,
664; EG-NEXT:    ALU clause starting at 9:
665; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
666; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
667  %a = load i16, i16 addrspace(4)* %in
668  %ext = zext i16 %a to i32
669  store i32 %ext, i32 addrspace(1)* %out
670  ret void
671}
672
673define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
674; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32:
675; GCN-NOHSA-SI:       ; %bb.0:
676; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
677; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
678; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
679; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
680; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
681; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
682; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
683; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
684; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
685; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
686; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
687; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
688; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
689; GCN-NOHSA-SI-NEXT:    s_endpgm
690;
691; GCN-HSA-LABEL: constant_sextload_i16_to_i32:
692; GCN-HSA:       ; %bb.0:
693; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
694; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
695; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
696; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
697; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
698; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
699; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
700; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
701; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
702; GCN-HSA-NEXT:    s_endpgm
703;
704; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32:
705; GCN-NOHSA-VI:       ; %bb.0:
706; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
707; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
708; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
709; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
710; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
711; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
712; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
713; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
714; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
715; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
716; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[4:7], 0
717; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
718; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
719; GCN-NOHSA-VI-NEXT:    s_endpgm
720;
721; EG-LABEL: constant_sextload_i16_to_i32:
722; EG:       ; %bb.0:
723; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
724; EG-NEXT:    TEX 0 @6
725; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
726; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
727; EG-NEXT:    CF_END
728; EG-NEXT:    PAD
729; EG-NEXT:    Fetch clause starting at 6:
730; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
731; EG-NEXT:    ALU clause starting at 8:
732; EG-NEXT:     MOV * T0.X, KC0[2].Z,
733; EG-NEXT:    ALU clause starting at 9:
734; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
735; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
736; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
737  %a = load i16, i16 addrspace(4)* %in
738  %ext = sext i16 %a to i32
739  store i32 %ext, i32 addrspace(1)* %out
740  ret void
741}
742
743define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
744; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32:
745; GCN-NOHSA-SI:       ; %bb.0:
746; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
747; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
748; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
749; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
750; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
751; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
752; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
753; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
754; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
755; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
756; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
757; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
758; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
759; GCN-NOHSA-SI-NEXT:    s_endpgm
760;
761; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32:
762; GCN-HSA:       ; %bb.0:
763; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
764; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
765; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
766; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
767; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
768; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
769; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
770; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
771; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
772; GCN-HSA-NEXT:    s_endpgm
773;
774; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32:
775; GCN-NOHSA-VI:       ; %bb.0:
776; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
777; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
778; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
779; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
780; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
781; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
782; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
783; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
784; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
785; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
786; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
787; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
788; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
789; GCN-NOHSA-VI-NEXT:    s_endpgm
790;
791; EG-LABEL: constant_zextload_v1i16_to_v1i32:
792; EG:       ; %bb.0:
793; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
794; EG-NEXT:    TEX 0 @6
795; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
796; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
797; EG-NEXT:    CF_END
798; EG-NEXT:    PAD
799; EG-NEXT:    Fetch clause starting at 6:
800; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
801; EG-NEXT:    ALU clause starting at 8:
802; EG-NEXT:     MOV * T0.X, KC0[2].Z,
803; EG-NEXT:    ALU clause starting at 9:
804; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
805; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
806  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
807  %ext = zext <1 x i16> %load to <1 x i32>
808  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
809  ret void
810}
811
812define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
813; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32:
814; GCN-NOHSA-SI:       ; %bb.0:
815; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
816; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
817; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
818; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
819; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
820; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
821; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
822; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
823; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
824; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
825; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
826; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
827; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
828; GCN-NOHSA-SI-NEXT:    s_endpgm
829;
830; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32:
831; GCN-HSA:       ; %bb.0:
832; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
833; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
834; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
835; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
836; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
837; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
838; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
839; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
840; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
841; GCN-HSA-NEXT:    s_endpgm
842;
843; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32:
844; GCN-NOHSA-VI:       ; %bb.0:
845; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
846; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
847; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
848; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
849; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
850; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
851; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
852; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
853; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
854; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
855; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[4:7], 0
856; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
857; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
858; GCN-NOHSA-VI-NEXT:    s_endpgm
859;
860; EG-LABEL: constant_sextload_v1i16_to_v1i32:
861; EG:       ; %bb.0:
862; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
863; EG-NEXT:    TEX 0 @6
864; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
865; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
866; EG-NEXT:    CF_END
867; EG-NEXT:    PAD
868; EG-NEXT:    Fetch clause starting at 6:
869; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
870; EG-NEXT:    ALU clause starting at 8:
871; EG-NEXT:     MOV * T0.X, KC0[2].Z,
872; EG-NEXT:    ALU clause starting at 9:
873; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
874; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
875; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
876  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
877  %ext = sext <1 x i16> %load to <1 x i32>
878  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
879  ret void
880}
881
882define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
883; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32:
884; GCN-NOHSA-SI:       ; %bb.0:
885; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
886; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
887; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
888; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
889; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
890; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
891; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
892; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
893; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
894; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
895; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
896; GCN-NOHSA-SI-NEXT:    s_endpgm
897;
898; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32:
899; GCN-HSA:       ; %bb.0:
900; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
901; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
902; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
903; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
904; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
905; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
906; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
907; GCN-HSA-NEXT:    s_and_b32 s1, s2, 0xffff
908; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
909; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
910; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
911; GCN-HSA-NEXT:    s_endpgm
912;
913; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32:
914; GCN-NOHSA-VI:       ; %bb.0:
915; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
916; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
917; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
918; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
919; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
920; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
921; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
922; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
923; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s2, 16
924; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s2, 0xffff
925; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
926; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s0
927; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
928; GCN-NOHSA-VI-NEXT:    s_endpgm
929;
930; EG-LABEL: constant_zextload_v2i16_to_v2i32:
931; EG:       ; %bb.0:
932; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
933; EG-NEXT:    TEX 0 @6
934; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
935; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
936; EG-NEXT:    CF_END
937; EG-NEXT:    PAD
938; EG-NEXT:    Fetch clause starting at 6:
939; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
940; EG-NEXT:    ALU clause starting at 8:
941; EG-NEXT:     MOV * T4.X, KC0[2].Z,
942; EG-NEXT:    ALU clause starting at 9:
943; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
944; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
945; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
946; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
947; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
948  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
949  %ext = zext <2 x i16> %load to <2 x i32>
950  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
951  ret void
952}
953
954; TODO: We should use ASHR instead of LSHR + BFE
955define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
956; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32:
957; GCN-NOHSA-SI:       ; %bb.0:
958; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
959; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
960; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
961; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
962; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
963; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s4, s2, 16
964; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s2
965; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
966; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
967; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
968; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
969; GCN-NOHSA-SI-NEXT:    s_endpgm
970;
971; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32:
972; GCN-HSA:       ; %bb.0:
973; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
974; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
975; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
976; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
977; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
978; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
979; GCN-HSA-NEXT:    s_ashr_i32 s0, s2, 16
980; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s2
981; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
982; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
983; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
984; GCN-HSA-NEXT:    s_endpgm
985;
986; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32:
987; GCN-NOHSA-VI:       ; %bb.0:
988; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
989; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
990; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
991; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
992; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
993; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
994; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
995; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
996; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s0, s2, 16
997; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s1, s2
998; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
999; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s0
1000; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1001; GCN-NOHSA-VI-NEXT:    s_endpgm
1002;
1003; EG-LABEL: constant_sextload_v2i16_to_v2i32:
1004; EG:       ; %bb.0:
1005; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1006; EG-NEXT:    TEX 0 @6
1007; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1008; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
1009; EG-NEXT:    CF_END
1010; EG-NEXT:    PAD
1011; EG-NEXT:    Fetch clause starting at 6:
1012; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1013; EG-NEXT:    ALU clause starting at 8:
1014; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1015; EG-NEXT:    ALU clause starting at 9:
1016; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1017; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
1018; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
1019; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1020; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
1021; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1022  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
1023  %ext = sext <2 x i16> %load to <2 x i32>
1024  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
1025  ret void
1026}
1027
1028define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
1029; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32:
1030; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1031; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1032; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1033; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1034; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1035; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1036; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, 0xffff
1037; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1038; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
1039; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s6
1040; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s6
1041; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1042; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1043; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1044; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1045; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
1046; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1047; GCN-NOHSA-SI-NEXT:    s_endpgm
1048;
1049; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32:
1050; GCN-HSA:       ; %bb.0: ; %entry
1051; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1052; GCN-HSA-NEXT:    s_mov_b32 s6, 0xffff
1053; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1054; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1055; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1056; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1057; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1058; GCN-HSA-NEXT:    s_lshr_b32 s2, s0, 16
1059; GCN-HSA-NEXT:    s_and_b32 s1, s1, s6
1060; GCN-HSA-NEXT:    s_and_b32 s0, s0, s6
1061; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1062; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s2
1063; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1064; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1065; GCN-HSA-NEXT:    s_endpgm
1066;
1067; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32:
1068; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1069; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1070; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
1071; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1072; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1073; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1074; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1075; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1076; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
1077; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1078; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s4, 16
1079; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s8
1080; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s8
1081; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1082; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s6
1083; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1084; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
1085; GCN-NOHSA-VI-NEXT:    s_endpgm
1086;
1087; EG-LABEL: constant_zextload_v3i16_to_v3i32:
1088; EG:       ; %bb.0: ; %entry
1089; EG-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1090; EG-NEXT:    TEX 2 @6
1091; EG-NEXT:    ALU 2, @17, KC0[], KC1[]
1092; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
1093; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
1094; EG-NEXT:    CF_END
1095; EG-NEXT:    Fetch clause starting at 6:
1096; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1097; EG-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1098; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1099; EG-NEXT:    ALU clause starting at 12:
1100; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
1101; EG-NEXT:     MOV * T1.X, KC0[2].Z,
1102; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1103; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1104; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1105; EG-NEXT:    ALU clause starting at 17:
1106; EG-NEXT:     LSHR T4.X, T0.W, literal.x,
1107; EG-NEXT:     MOV * T3.Y, T1.X,
1108; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1109entry:
1110  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
1111  %ext = zext <3 x i16> %ld to <3 x i32>
1112  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1113  ret void
1114}
1115
1116define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
1117; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32:
1118; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1119; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1120; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1121; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1122; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1123; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1124; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1125; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s6, s4, 16
1126; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1127; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1128; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1129; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1130; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1131; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1132; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s6
1133; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1134; GCN-NOHSA-SI-NEXT:    s_endpgm
1135;
1136; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32:
1137; GCN-HSA:       ; %bb.0: ; %entry
1138; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1139; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1140; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1141; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1142; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1143; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1144; GCN-HSA-NEXT:    s_ashr_i32 s2, s0, 16
1145; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s1
1146; GCN-HSA-NEXT:    s_sext_i32_i16 s0, s0
1147; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1148; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s2
1149; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1150; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1151; GCN-HSA-NEXT:    s_endpgm
1152;
1153; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32:
1154; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1155; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1156; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1157; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1158; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1159; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1160; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1161; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
1162; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1163; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s6, s4, 16
1164; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1165; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1166; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1167; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s6
1168; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1169; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
1170; GCN-NOHSA-VI-NEXT:    s_endpgm
1171;
1172; EG-LABEL: constant_sextload_v3i16_to_v3i32:
1173; EG:       ; %bb.0: ; %entry
1174; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1175; EG-NEXT:    TEX 2 @6
1176; EG-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1177; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1178; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1179; EG-NEXT:    CF_END
1180; EG-NEXT:    Fetch clause starting at 6:
1181; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 2, #1
1182; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 4, #1
1183; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1184; EG-NEXT:    ALU clause starting at 12:
1185; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1186; EG-NEXT:    ALU clause starting at 13:
1187; EG-NEXT:     BFE_INT * T0.Y, T1.X, 0.0, literal.x,
1188; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1189; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1190; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1191; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1192; EG-NEXT:     BFE_INT T2.X, T2.X, 0.0, literal.x,
1193; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1194; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1195; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
1196; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1197entry:
1198  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
1199  %ext = sext <3 x i16> %ld to <3 x i32>
1200  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1201  ret void
1202}
1203
1204; v4i16 is naturally 8 byte aligned
1205; TODO: This should use LD, but for some there are redundant MOVs
1206define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
1207; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32:
1208; GCN-NOHSA-SI:       ; %bb.0:
1209; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1210; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1211; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1212; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1213; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
1214; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1215; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s5, 16
1216; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
1217; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s2
1218; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s2
1219; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1220; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1221; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
1222; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1223; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1224; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1225; GCN-NOHSA-SI-NEXT:    s_endpgm
1226;
1227; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32:
1228; GCN-HSA:       ; %bb.0:
1229; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1230; GCN-HSA-NEXT:    s_mov_b32 s6, 0xffff
1231; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1232; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1233; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1234; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1235; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1236; GCN-HSA-NEXT:    s_lshr_b32 s2, s1, 16
1237; GCN-HSA-NEXT:    s_lshr_b32 s3, s0, 16
1238; GCN-HSA-NEXT:    s_and_b32 s1, s1, s6
1239; GCN-HSA-NEXT:    s_and_b32 s0, s0, s6
1240; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1241; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1242; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1243; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1244; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1245; GCN-HSA-NEXT:    s_endpgm
1246;
1247; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32:
1248; GCN-NOHSA-VI:       ; %bb.0:
1249; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1250; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
1251; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1252; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1253; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1254; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1255; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1256; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
1257; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1258; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s5, 16
1259; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s4, 16
1260; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s8
1261; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s8
1262; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1263; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
1264; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1265; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s6
1266; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1267; GCN-NOHSA-VI-NEXT:    s_endpgm
1268;
1269; EG-LABEL: constant_zextload_v4i16_to_v4i32:
1270; EG:       ; %bb.0:
1271; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1272; EG-NEXT:    TEX 0 @6
1273; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
1274; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1275; EG-NEXT:    CF_END
1276; EG-NEXT:    PAD
1277; EG-NEXT:    Fetch clause starting at 6:
1278; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1279; EG-NEXT:    ALU clause starting at 8:
1280; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1281; EG-NEXT:    ALU clause starting at 9:
1282; EG-NEXT:     MOV T2.X, T5.X,
1283; EG-NEXT:     MOV * T3.X, T5.Y,
1284; EG-NEXT:     MOV T0.Y, PV.X,
1285; EG-NEXT:     MOV * T0.Z, PS,
1286; EG-NEXT:     LSHR * T5.W, PV.Z, literal.x,
1287; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1288; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
1289; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1290; EG-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
1291; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1292; EG-NEXT:     AND_INT T5.X, T0.Y, literal.x,
1293; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
1294; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1295  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
1296  %ext = zext <4 x i16> %load to <4 x i32>
1297  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1298  ret void
1299}
1300
1301; v4i16 is naturally 8 byte aligned
1302; TODO: This should use LD, but for some there are redundant MOVs
1303; TODO: We should use ASHR instead of LSHR + BFE
1304define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
1305; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32:
1306; GCN-NOHSA-SI:       ; %bb.0:
1307; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1308; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1309; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1310; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1311; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1312; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s4, 16
1313; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[4:5], 48
1314; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1315; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1316; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1317; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1318; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s8
1319; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1320; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1321; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1322; GCN-NOHSA-SI-NEXT:    s_endpgm
1323;
1324; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32:
1325; GCN-HSA:       ; %bb.0:
1326; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1327; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1328; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1329; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1330; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1331; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1332; GCN-HSA-NEXT:    s_ashr_i64 s[2:3], s[0:1], 48
1333; GCN-HSA-NEXT:    s_ashr_i32 s4, s0, 16
1334; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s1
1335; GCN-HSA-NEXT:    s_sext_i32_i16 s0, s0
1336; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1337; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s4
1338; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1339; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1340; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1341; GCN-HSA-NEXT:    s_endpgm
1342;
1343; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32:
1344; GCN-NOHSA-VI:       ; %bb.0:
1345; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1346; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1347; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1348; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1349; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1350; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1351; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
1352; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1353; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s6, s5, 16
1354; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s7, s4, 16
1355; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1356; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1357; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1358; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
1359; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1360; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s6
1361; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1362; GCN-NOHSA-VI-NEXT:    s_endpgm
1363;
1364; EG-LABEL: constant_sextload_v4i16_to_v4i32:
1365; EG:       ; %bb.0:
1366; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1367; EG-NEXT:    TEX 0 @6
1368; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
1369; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1370; EG-NEXT:    CF_END
1371; EG-NEXT:    PAD
1372; EG-NEXT:    Fetch clause starting at 6:
1373; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1374; EG-NEXT:    ALU clause starting at 8:
1375; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1376; EG-NEXT:    ALU clause starting at 9:
1377; EG-NEXT:     MOV T2.X, T5.X,
1378; EG-NEXT:     MOV * T3.X, T5.Y,
1379; EG-NEXT:     MOV T0.Y, PV.X,
1380; EG-NEXT:     MOV * T0.Z, PS,
1381; EG-NEXT:     BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
1382; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1383; EG-NEXT:     BFE_INT T5.X, T0.Y, 0.0, literal.x,
1384; EG-NEXT:     LSHR * T0.W, T0.Z, literal.x,
1385; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1386; EG-NEXT:     BFE_INT T5.W, PV.W, 0.0, literal.x,
1387; EG-NEXT:     LSHR * T0.W, T0.Y, literal.x,
1388; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1389; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
1390; EG-NEXT:     BFE_INT * T5.Y, PS, 0.0, literal.y,
1391; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1392  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
1393  %ext = sext <4 x i16> %load to <4 x i32>
1394  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1395  ret void
1396}
1397
1398; v8i16 is naturally 16 byte aligned
1399; TODO: These should use LSHR instead of BFE_UINT
1400; TODO: This should use DST, but for some there are redundant MOVs
1401define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
1402; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32:
1403; GCN-NOHSA-SI:       ; %bb.0:
1404; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1405; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1406; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1407; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1408; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1409; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, 0xffff
1410; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1411; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s5, 16
1412; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s4, 16
1413; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s7, 16
1414; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s6, 16
1415; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s8
1416; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s8
1417; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s8
1418; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s8
1419; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1420; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s12
1421; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1422; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
1423; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1424; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1425; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1426; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s10
1427; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1428; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
1429; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1430; GCN-NOHSA-SI-NEXT:    s_endpgm
1431;
1432; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32:
1433; GCN-HSA:       ; %bb.0:
1434; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1435; GCN-HSA-NEXT:    s_mov_b32 s8, 0xffff
1436; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1437; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1438; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1439; GCN-HSA-NEXT:    s_lshr_b32 s2, s7, 16
1440; GCN-HSA-NEXT:    s_lshr_b32 s3, s6, 16
1441; GCN-HSA-NEXT:    s_lshr_b32 s9, s5, 16
1442; GCN-HSA-NEXT:    s_lshr_b32 s10, s4, 16
1443; GCN-HSA-NEXT:    s_and_b32 s7, s7, s8
1444; GCN-HSA-NEXT:    s_and_b32 s6, s6, s8
1445; GCN-HSA-NEXT:    s_and_b32 s5, s5, s8
1446; GCN-HSA-NEXT:    s_and_b32 s4, s4, s8
1447; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1448; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1449; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1450; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1451; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1452; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1453; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1454; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1455; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1456; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1457; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1458; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s10
1459; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1460; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
1461; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1462; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1463; GCN-HSA-NEXT:    s_endpgm
1464;
1465; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32:
1466; GCN-NOHSA-VI:       ; %bb.0:
1467; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1468; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
1469; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1470; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1471; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1472; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1473; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1474; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[6:7], 0x0
1475; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1476; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s7, 16
1477; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s6, 16
1478; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, s8
1479; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, s8
1480; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s5, 16
1481; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s4, 16
1482; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s8
1483; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s8
1484; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1485; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s12
1486; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1487; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
1488; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1489; GCN-NOHSA-VI-NEXT:    s_nop 0
1490; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1491; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s10
1492; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1493; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s9
1494; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1495; GCN-NOHSA-VI-NEXT:    s_endpgm
1496;
1497; EG-LABEL: constant_zextload_v8i16_to_v8i32:
1498; EG:       ; %bb.0:
1499; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1500; EG-NEXT:    TEX 0 @6
1501; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1502; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1503; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1504; EG-NEXT:    CF_END
1505; EG-NEXT:    Fetch clause starting at 6:
1506; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1507; EG-NEXT:    ALU clause starting at 8:
1508; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1509; EG-NEXT:    ALU clause starting at 9:
1510; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
1511; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1512; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
1513; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1514; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
1515; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
1516; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1517; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
1518; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
1519; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
1520; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1521; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
1522; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1523; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
1524; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1525; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1526; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
1527; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1528  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
1529  %ext = zext <8 x i16> %load to <8 x i32>
1530  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1531  ret void
1532}
1533
1534; v8i16 is naturally 16 byte aligned
1535; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
1536; TODO: This should use DST, but for some there are redundant MOVs
1537define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
1538; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32:
1539; GCN-NOHSA-SI:       ; %bb.0:
1540; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1541; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1542; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1543; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1544; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1545; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1546; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s5, 16
1547; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s9, s4, 16
1548; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1549; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s10, s7, 16
1550; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s11, s6, 16
1551; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
1552; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
1553; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1554; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1555; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
1556; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1557; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s10
1558; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1559; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1560; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1561; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
1562; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1563; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s8
1564; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1565; GCN-NOHSA-SI-NEXT:    s_endpgm
1566;
1567; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32:
1568; GCN-HSA:       ; %bb.0:
1569; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1570; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1571; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1572; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1573; GCN-HSA-NEXT:    s_ashr_i32 s2, s7, 16
1574; GCN-HSA-NEXT:    s_ashr_i32 s3, s6, 16
1575; GCN-HSA-NEXT:    s_ashr_i32 s8, s5, 16
1576; GCN-HSA-NEXT:    s_ashr_i32 s9, s4, 16
1577; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1578; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1579; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1580; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1581; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
1582; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
1583; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1584; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1585; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1586; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1587; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1588; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
1589; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
1590; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1591; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1592; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
1593; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1594; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
1595; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1596; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1597; GCN-HSA-NEXT:    s_endpgm
1598;
1599; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32:
1600; GCN-NOHSA-VI:       ; %bb.0:
1601; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1602; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1603; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1604; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1605; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1606; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1607; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[6:7], 0x0
1608; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1609; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s10, s7, 16
1610; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s11, s6, 16
1611; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
1612; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
1613; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s8, s5, 16
1614; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s9, s4, 16
1615; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1616; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1617; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1618; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
1619; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1620; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s10
1621; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1622; GCN-NOHSA-VI-NEXT:    s_nop 0
1623; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1624; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
1625; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1626; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s8
1627; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1628; GCN-NOHSA-VI-NEXT:    s_endpgm
1629;
1630; EG-LABEL: constant_sextload_v8i16_to_v8i32:
1631; EG:       ; %bb.0:
1632; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1633; EG-NEXT:    TEX 0 @6
1634; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
1635; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1636; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1637; EG-NEXT:    CF_END
1638; EG-NEXT:    Fetch clause starting at 6:
1639; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1640; EG-NEXT:    ALU clause starting at 8:
1641; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1642; EG-NEXT:    ALU clause starting at 9:
1643; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
1644; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1645; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
1646; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
1647; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
1648; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1649; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
1650; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
1651; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
1652; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
1653; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1654; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
1655; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
1656; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
1657; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
1658; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1659; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1660; EG-NEXT:     LSHR T10.X, PS, literal.x,
1661; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
1662; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1663  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
1664  %ext = sext <8 x i16> %load to <8 x i32>
1665  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1666  ret void
1667}
1668
1669define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
1670; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32:
1671; GCN-NOHSA-SI:       ; %bb.0:
1672; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
1673; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1674; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1675; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xf000
1676; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
1677; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, 0xffff
1678; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1679; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s1, 16
1680; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s0, 16
1681; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s3, 16
1682; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s2, 16
1683; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s5, 16
1684; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s4, 16
1685; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s7, 16
1686; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s6, 16
1687; GCN-NOHSA-SI-NEXT:    s_and_b32 s1, s1, s12
1688; GCN-NOHSA-SI-NEXT:    s_and_b32 s0, s0, s12
1689; GCN-NOHSA-SI-NEXT:    s_and_b32 s3, s3, s12
1690; GCN-NOHSA-SI-NEXT:    s_and_b32 s2, s2, s12
1691; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s12
1692; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s12
1693; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s12
1694; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s12
1695; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1696; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s20
1697; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1698; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s19
1699; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
1700; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1701; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1702; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s18
1703; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s17
1705; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
1706; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1707; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
1708; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s16
1709; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s3
1710; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s15
1711; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
1712; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1713; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
1714; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s14
1715; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s1
1716; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
1717; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
1718; GCN-NOHSA-SI-NEXT:    s_endpgm
1719;
1720; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32:
1721; GCN-HSA:       ; %bb.0:
1722; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1723; GCN-HSA-NEXT:    s_mov_b32 s12, 0xffff
1724; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1725; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1726; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1727; GCN-HSA-NEXT:    s_lshr_b32 s2, s11, 16
1728; GCN-HSA-NEXT:    s_lshr_b32 s3, s10, 16
1729; GCN-HSA-NEXT:    s_lshr_b32 s13, s5, 16
1730; GCN-HSA-NEXT:    s_lshr_b32 s14, s4, 16
1731; GCN-HSA-NEXT:    s_lshr_b32 s15, s7, 16
1732; GCN-HSA-NEXT:    s_lshr_b32 s16, s6, 16
1733; GCN-HSA-NEXT:    s_lshr_b32 s17, s9, 16
1734; GCN-HSA-NEXT:    s_lshr_b32 s18, s8, 16
1735; GCN-HSA-NEXT:    s_and_b32 s11, s11, s12
1736; GCN-HSA-NEXT:    s_and_b32 s10, s10, s12
1737; GCN-HSA-NEXT:    s_and_b32 s5, s5, s12
1738; GCN-HSA-NEXT:    s_and_b32 s4, s4, s12
1739; GCN-HSA-NEXT:    s_and_b32 s7, s7, s12
1740; GCN-HSA-NEXT:    s_and_b32 s6, s6, s12
1741; GCN-HSA-NEXT:    s_and_b32 s9, s9, s12
1742; GCN-HSA-NEXT:    s_and_b32 s8, s8, s12
1743; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1744; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
1745; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1746; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1747; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1748; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1749; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
1750; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
1751; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
1752; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1753; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1754; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1755; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1756; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1757; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
1758; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s18
1759; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
1760; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s17
1761; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1762; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1763; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1764; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1765; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s16
1766; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1767; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
1768; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1769; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1770; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1771; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1772; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s14
1773; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1774; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
1775; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1776; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1777; GCN-HSA-NEXT:    s_endpgm
1778;
1779; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32:
1780; GCN-NOHSA-VI:       ; %bb.0:
1781; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1782; GCN-NOHSA-VI-NEXT:    s_mov_b32 s12, 0xffff
1783; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1784; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1785; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1786; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1787; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1788; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[6:7], 0x0
1789; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1790; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s19, s11, 16
1791; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s10, 16
1792; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, s12
1793; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, s12
1794; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s17, s9, 16
1795; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s8, 16
1796; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, s12
1797; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, s12
1798; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
1799; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s20
1800; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
1801; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s19
1802; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1803; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s7, 16
1804; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s6, 16
1805; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, s12
1806; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, s12
1807; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
1808; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s18
1809; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
1810; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
1811; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1812; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s5, 16
1813; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s4, 16
1814; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s12
1815; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s12
1816; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1817; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s16
1818; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1819; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
1820; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1821; GCN-NOHSA-VI-NEXT:    s_nop 0
1822; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1823; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s14
1824; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1825; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
1826; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1827; GCN-NOHSA-VI-NEXT:    s_endpgm
1828;
1829; EG-LABEL: constant_zextload_v16i16_to_v16i32:
1830; EG:       ; %bb.0:
1831; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1832; EG-NEXT:    TEX 1 @8
1833; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
1834; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
1835; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
1836; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
1837; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
1838; EG-NEXT:    CF_END
1839; EG-NEXT:    Fetch clause starting at 8:
1840; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
1841; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
1842; EG-NEXT:    ALU clause starting at 12:
1843; EG-NEXT:     MOV * T11.X, KC0[2].Z,
1844; EG-NEXT:    ALU clause starting at 13:
1845; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
1846; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1847; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
1848; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1849; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
1850; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
1851; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1852; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
1853; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
1854; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
1855; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1856; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
1857; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
1858; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1859; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
1860; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
1861; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1862; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1863; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
1864; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
1865; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
1866; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
1867; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1868; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1869; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
1870; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1871; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
1872; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
1873; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
1874; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
1875; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1876; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1877; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1878; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
1879; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
1880; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1881  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
1882  %ext = zext <16 x i16> %load to <16 x i32>
1883  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
1884  ret void
1885}
1886
1887define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
1888; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32:
1889; GCN-NOHSA-SI:       ; %bb.0:
1890; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
1891; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1892; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1893; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xf000
1894; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
1895; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1896; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s12, s1, 16
1897; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s13, s0, 16
1898; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s1, s1
1899; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s0, s0
1900; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s14, s3, 16
1901; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s15, s2, 16
1902; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s3, s3
1903; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s2, s2
1904; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s16, s5, 16
1905; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s17, s4, 16
1906; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1907; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s7, 16
1908; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s6, 16
1909; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
1910; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
1911; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1912; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1913; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
1914; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1915; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
1916; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
1917; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1918; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1919; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
1920; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1921; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
1922; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
1923; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1924; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
1925; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
1926; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s3
1927; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
1928; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
1929; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1930; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
1931; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
1932; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s1
1933; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
1934; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
1935; GCN-NOHSA-SI-NEXT:    s_endpgm
1936;
1937; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32:
1938; GCN-HSA:       ; %bb.0:
1939; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1940; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1941; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1942; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1943; GCN-HSA-NEXT:    s_ashr_i32 s2, s11, 16
1944; GCN-HSA-NEXT:    s_ashr_i32 s3, s10, 16
1945; GCN-HSA-NEXT:    s_ashr_i32 s12, s5, 16
1946; GCN-HSA-NEXT:    s_ashr_i32 s13, s4, 16
1947; GCN-HSA-NEXT:    s_ashr_i32 s14, s7, 16
1948; GCN-HSA-NEXT:    s_ashr_i32 s15, s6, 16
1949; GCN-HSA-NEXT:    s_ashr_i32 s16, s9, 16
1950; GCN-HSA-NEXT:    s_ashr_i32 s17, s8, 16
1951; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1952; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
1953; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1954; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1955; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1956; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
1957; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
1958; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1959; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
1960; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
1961; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
1962; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1963; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1964; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1965; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
1966; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
1967; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1968; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1969; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
1970; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
1971; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
1972; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
1973; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1974; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1975; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
1976; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
1977; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1978; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1979; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
1980; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1981; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s14
1982; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1983; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1984; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
1985; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
1986; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1987; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1988; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
1989; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1990; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
1991; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1992; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1993; GCN-HSA-NEXT:    s_endpgm
1994;
1995; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32:
1996; GCN-NOHSA-VI:       ; %bb.0:
1997; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1998; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1999; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2000; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2001; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2002; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2003; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[6:7], 0x0
2004; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2005; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s18, s11, 16
2006; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s19, s10, 16
2007; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
2008; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
2009; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s16, s9, 16
2010; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s17, s8, 16
2011; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
2012; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
2013; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2014; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
2015; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2016; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s18
2017; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2018; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s14, s7, 16
2019; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s15, s6, 16
2020; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
2021; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
2022; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2023; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
2024; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2025; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s16
2026; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2027; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s12, s5, 16
2028; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s13, s4, 16
2029; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
2030; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
2031; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2032; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
2033; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2034; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s14
2035; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2036; GCN-NOHSA-VI-NEXT:    s_nop 0
2037; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2038; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
2039; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2040; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s12
2041; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2042; GCN-NOHSA-VI-NEXT:    s_endpgm
2043;
2044; EG-LABEL: constant_sextload_v16i16_to_v16i32:
2045; EG:       ; %bb.0:
2046; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2047; EG-NEXT:    TEX 1 @8
2048; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
2049; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
2050; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
2051; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
2052; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
2053; EG-NEXT:    CF_END
2054; EG-NEXT:    Fetch clause starting at 8:
2055; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2056; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2057; EG-NEXT:    ALU clause starting at 12:
2058; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2059; EG-NEXT:    ALU clause starting at 13:
2060; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
2061; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2062; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2063; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
2064; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
2065; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2066; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
2067; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
2068; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
2069; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
2070; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
2071; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2072; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
2073; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
2074; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
2075; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
2076; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
2077; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2078; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
2079; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
2080; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
2081; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
2082; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
2083; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2084; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
2085; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
2086; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
2087; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
2088; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2089; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
2090; EG-NEXT:     LSHR T11.X, PS, literal.x,
2091; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
2092; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
2093; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
2094; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2095; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2096; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2097; EG-NEXT:     LSHR T12.X, PS, literal.x,
2098; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
2099; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2100  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
2101  %ext = sext <16 x i16> %load to <16 x i32>
2102  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
2103  ret void
2104}
2105
2106define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
2107; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32:
2108; GCN-NOHSA-SI:       ; %bb.0:
2109; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2110; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2111; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2112; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, 0xffff
2113; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2114; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s1, 16
2115; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s0, 16
2116; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s3, 16
2117; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s2, 16
2118; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s5, 16
2119; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s4, 16
2120; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s7, 16
2121; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s6, 16
2122; GCN-NOHSA-SI-NEXT:    s_and_b32 s27, s1, s18
2123; GCN-NOHSA-SI-NEXT:    s_and_b32 s28, s0, s18
2124; GCN-NOHSA-SI-NEXT:    s_and_b32 s29, s3, s18
2125; GCN-NOHSA-SI-NEXT:    s_and_b32 s30, s2, s18
2126; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s18
2127; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s18
2128; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s18
2129; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s18
2130; GCN-NOHSA-SI-NEXT:    s_and_b32 s31, s9, s18
2131; GCN-NOHSA-SI-NEXT:    s_and_b32 s33, s8, s18
2132; GCN-NOHSA-SI-NEXT:    s_and_b32 s34, s11, s18
2133; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s10, s18
2134; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s13, s18
2135; GCN-NOHSA-SI-NEXT:    s_and_b32 s37, s12, s18
2136; GCN-NOHSA-SI-NEXT:    s_and_b32 s38, s15, s18
2137; GCN-NOHSA-SI-NEXT:    s_and_b32 s18, s14, s18
2138; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s9, 16
2139; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s8, 16
2140; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s11, 16
2141; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s10, 16
2142; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s13, 16
2143; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s12, 16
2144; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s15, 16
2145; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s14, 16
2146; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2147; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2148; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2149; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2150; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
2151; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s14
2152; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s38
2153; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s15
2154; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2155; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2156; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s37
2157; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s12
2158; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s36
2159; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
2160; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2161; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2162; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s35
2163; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s10
2164; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s34
2165; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
2166; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2167; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2168; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s33
2169; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s8
2170; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
2171; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
2172; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2173; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2174; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2175; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s26
2176; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2177; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
2178; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2179; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2180; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2181; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s24
2182; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2183; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s23
2184; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2185; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2186; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
2187; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s22
2188; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
2189; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
2190; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2191; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2192; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
2193; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s20
2194; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
2195; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s19
2196; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2197; GCN-NOHSA-SI-NEXT:    s_endpgm
2198;
2199; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32:
2200; GCN-HSA:       ; %bb.0:
2201; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2202; GCN-HSA-NEXT:    s_mov_b32 s20, 0xffff
2203; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2204; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
2205; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2206; GCN-HSA-NEXT:    s_and_b32 s34, s17, s20
2207; GCN-HSA-NEXT:    s_and_b32 s35, s16, s20
2208; GCN-HSA-NEXT:    s_and_b32 s36, s19, s20
2209; GCN-HSA-NEXT:    s_and_b32 s21, s5, s20
2210; GCN-HSA-NEXT:    s_and_b32 s22, s4, s20
2211; GCN-HSA-NEXT:    s_and_b32 s23, s7, s20
2212; GCN-HSA-NEXT:    s_and_b32 s24, s6, s20
2213; GCN-HSA-NEXT:    s_and_b32 s25, s9, s20
2214; GCN-HSA-NEXT:    s_and_b32 s26, s8, s20
2215; GCN-HSA-NEXT:    s_and_b32 s27, s11, s20
2216; GCN-HSA-NEXT:    s_and_b32 s28, s10, s20
2217; GCN-HSA-NEXT:    s_and_b32 s29, s13, s20
2218; GCN-HSA-NEXT:    s_and_b32 s30, s12, s20
2219; GCN-HSA-NEXT:    s_and_b32 s31, s15, s20
2220; GCN-HSA-NEXT:    s_and_b32 s33, s14, s20
2221; GCN-HSA-NEXT:    s_and_b32 s20, s18, s20
2222; GCN-HSA-NEXT:    s_lshr_b32 s17, s17, 16
2223; GCN-HSA-NEXT:    s_lshr_b32 s16, s16, 16
2224; GCN-HSA-NEXT:    s_lshr_b32 s19, s19, 16
2225; GCN-HSA-NEXT:    s_lshr_b32 s18, s18, 16
2226; GCN-HSA-NEXT:    s_lshr_b32 s5, s5, 16
2227; GCN-HSA-NEXT:    s_lshr_b32 s4, s4, 16
2228; GCN-HSA-NEXT:    s_lshr_b32 s7, s7, 16
2229; GCN-HSA-NEXT:    s_lshr_b32 s6, s6, 16
2230; GCN-HSA-NEXT:    s_lshr_b32 s9, s9, 16
2231; GCN-HSA-NEXT:    s_lshr_b32 s8, s8, 16
2232; GCN-HSA-NEXT:    s_lshr_b32 s11, s11, 16
2233; GCN-HSA-NEXT:    s_lshr_b32 s10, s10, 16
2234; GCN-HSA-NEXT:    s_lshr_b32 s13, s13, 16
2235; GCN-HSA-NEXT:    s_lshr_b32 s12, s12, 16
2236; GCN-HSA-NEXT:    s_lshr_b32 s15, s15, 16
2237; GCN-HSA-NEXT:    s_lshr_b32 s14, s14, 16
2238; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
2239; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2240; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
2241; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
2242; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
2243; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2244; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
2245; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
2246; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
2247; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
2248; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s18
2249; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s36
2250; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s19
2251; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s35
2252; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s16
2253; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2254; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s34
2255; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s17
2256; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2257; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2258; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s33
2259; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2260; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2261; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
2262; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s14
2263; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s31
2264; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
2265; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2266; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2267; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2268; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2269; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2270; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s30
2271; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s12
2272; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s29
2273; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
2274; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2275; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2276; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2277; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2278; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
2279; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
2280; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s10
2281; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s27
2282; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
2283; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2284; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2285; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2286; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2287; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2288; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
2289; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s8
2290; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s25
2291; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
2292; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2293; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2294; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2295; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s24
2296; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s6
2297; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s23
2298; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
2299; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2300; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2301; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2302; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
2303; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s4
2304; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s21
2305; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
2306; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2307; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2308; GCN-HSA-NEXT:    s_endpgm
2309;
2310; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32:
2311; GCN-NOHSA-VI:       ; %bb.0:
2312; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2313; GCN-NOHSA-VI-NEXT:    s_mov_b32 s20, 0xffff
2314; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2315; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2316; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2317; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2318; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2319; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[4:19], s[6:7], 0x0
2320; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2321; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s36, s19, 16
2322; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s37, s18, 16
2323; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s19, s20
2324; GCN-NOHSA-VI-NEXT:    s_and_b32 s18, s18, s20
2325; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s17, 16
2326; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s16, 16
2327; GCN-NOHSA-VI-NEXT:    s_and_b32 s17, s17, s20
2328; GCN-NOHSA-VI-NEXT:    s_and_b32 s16, s16, s20
2329; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
2330; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
2331; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
2332; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s36
2333; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2334; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s31, s15, 16
2335; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s14, 16
2336; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s15, s20
2337; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s14, s20
2338; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
2339; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s35
2340; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
2341; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s34
2342; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2343; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s29, s13, 16
2344; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s30, s12, 16
2345; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, s20
2346; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, s20
2347; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
2348; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s33
2349; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
2350; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s31
2351; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2352; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s27, s11, 16
2353; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s10, 16
2354; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, s20
2355; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, s20
2356; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
2357; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s30
2358; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
2359; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s29
2360; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2361; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s25, s9, 16
2362; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s26, s8, 16
2363; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, s20
2364; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, s20
2365; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2366; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s28
2367; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2368; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s27
2369; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2370; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s23, s7, 16
2371; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s24, s6, 16
2372; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, s20
2373; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, s20
2374; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2375; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s26
2376; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2377; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s25
2378; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2379; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s21, s5, 16
2380; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s22, s4, 16
2381; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s20
2382; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s20
2383; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2384; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s24
2385; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2386; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s23
2387; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2388; GCN-NOHSA-VI-NEXT:    s_nop 0
2389; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2390; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s22
2391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s21
2393; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2394; GCN-NOHSA-VI-NEXT:    s_endpgm
2395;
2396; EG-LABEL: constant_zextload_v32i16_to_v32i32:
2397; EG:       ; %bb.0:
2398; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2399; EG-NEXT:    TEX 3 @12
2400; EG-NEXT:    ALU 71, @21, KC0[CB0:0-32], KC1[]
2401; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
2402; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
2403; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0
2404; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0
2405; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0
2406; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0
2407; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
2408; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1
2409; EG-NEXT:    CF_END
2410; EG-NEXT:    Fetch clause starting at 12:
2411; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
2412; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
2413; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
2414; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
2415; EG-NEXT:    ALU clause starting at 20:
2416; EG-NEXT:     MOV * T19.X, KC0[2].Z,
2417; EG-NEXT:    ALU clause starting at 21:
2418; EG-NEXT:     LSHR * T23.W, T20.Y, literal.x,
2419; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2420; EG-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
2421; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2422; EG-NEXT:     LSHR T23.Y, T20.X, literal.x,
2423; EG-NEXT:     LSHR * T24.W, T20.W, literal.x,
2424; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2425; EG-NEXT:     AND_INT T23.X, T20.X, literal.x,
2426; EG-NEXT:     AND_INT T24.Z, T20.W, literal.x,
2427; EG-NEXT:     LSHR * T20.X, KC0[2].Y, literal.y,
2428; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
2429; EG-NEXT:     LSHR T24.Y, T20.Z, literal.x,
2430; EG-NEXT:     LSHR * T25.W, T19.Y, literal.x,
2431; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2432; EG-NEXT:     AND_INT T24.X, T20.Z, literal.x,
2433; EG-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
2434; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2435; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2436; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
2437; EG-NEXT:     LSHR T25.Y, T19.X, literal.y,
2438; EG-NEXT:     LSHR T27.W, T19.W, literal.y,
2439; EG-NEXT:     AND_INT * T25.X, T19.X, literal.z,
2440; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2441; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2442; EG-NEXT:     AND_INT T27.Z, T19.W, literal.x,
2443; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2444; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2445; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
2446; EG-NEXT:     LSHR T27.Y, T19.Z, literal.y,
2447; EG-NEXT:     LSHR T28.W, T22.Y, literal.y,
2448; EG-NEXT:     AND_INT * T27.X, T19.Z, literal.z,
2449; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2450; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2451; EG-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
2452; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2453; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2454; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
2455; EG-NEXT:     LSHR T28.Y, T22.X, literal.y,
2456; EG-NEXT:     LSHR T30.W, T22.W, literal.y,
2457; EG-NEXT:     AND_INT * T28.X, T22.X, literal.z,
2458; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2459; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2460; EG-NEXT:     AND_INT T30.Z, T22.W, literal.x,
2461; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2462; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2463; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
2464; EG-NEXT:     LSHR T30.Y, T22.Z, literal.y,
2465; EG-NEXT:     LSHR T31.W, T21.Y, literal.y,
2466; EG-NEXT:     AND_INT * T30.X, T22.Z, literal.z,
2467; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2468; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2469; EG-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
2470; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2471; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2472; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
2473; EG-NEXT:     LSHR T31.Y, T21.X, literal.y,
2474; EG-NEXT:     LSHR T33.W, T21.W, literal.y,
2475; EG-NEXT:     AND_INT * T31.X, T21.X, literal.z,
2476; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2477; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2478; EG-NEXT:     AND_INT T33.Z, T21.W, literal.x,
2479; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2480; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
2481; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
2482; EG-NEXT:     LSHR T33.Y, T21.Z, literal.y,
2483; EG-NEXT:     AND_INT * T33.X, T21.Z, literal.z,
2484; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2485; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2486; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2487; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2488; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
2489; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2490  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
2491  %ext = zext <32 x i16> %load to <32 x i32>
2492  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
2493  ret void
2494}
2495
2496define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
2497; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32:
2498; GCN-NOHSA-SI:       ; %bb.0:
2499; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2500; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2501; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2502; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2503; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s1, 16
2504; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s0, 16
2505; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s20, s1
2506; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s21, s0
2507; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s22, s3, 16
2508; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s23, s2, 16
2509; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s24, s3
2510; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s25, s2
2511; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s26, s5, 16
2512; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s27, s4, 16
2513; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
2514; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
2515; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s28, s7, 16
2516; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s29, s6, 16
2517; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
2518; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
2519; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s30, s9, 16
2520; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s31, s8, 16
2521; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
2522; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
2523; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s11, 16
2524; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s10, 16
2525; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
2526; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
2527; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s35, s13, 16
2528; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s36, s12, 16
2529; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
2530; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
2531; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s37, s15, 16
2532; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s38, s14, 16
2533; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
2534; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
2535; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2536; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2537; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2538; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2539; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
2540; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
2541; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
2542; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s37
2543; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2544; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2545; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
2546; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
2547; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
2548; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
2549; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2550; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2551; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
2552; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
2553; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
2554; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
2555; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2556; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2557; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2558; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
2559; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2560; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
2561; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2562; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2563; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2564; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
2565; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2566; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
2567; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2568; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2569; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2570; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
2571; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2572; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
2573; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2574; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2575; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s25
2576; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
2577; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
2578; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
2579; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2580; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2581; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s21
2582; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
2583; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
2584; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
2585; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2586; GCN-NOHSA-SI-NEXT:    s_endpgm
2587;
2588; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32:
2589; GCN-HSA:       ; %bb.0:
2590; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2591; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2592; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
2593; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2594; GCN-HSA-NEXT:    s_ashr_i32 s33, s17, 16
2595; GCN-HSA-NEXT:    s_ashr_i32 s34, s16, 16
2596; GCN-HSA-NEXT:    s_ashr_i32 s35, s19, 16
2597; GCN-HSA-NEXT:    s_ashr_i32 s36, s18, 16
2598; GCN-HSA-NEXT:    s_ashr_i32 s20, s5, 16
2599; GCN-HSA-NEXT:    s_ashr_i32 s21, s4, 16
2600; GCN-HSA-NEXT:    s_ashr_i32 s22, s7, 16
2601; GCN-HSA-NEXT:    s_ashr_i32 s23, s6, 16
2602; GCN-HSA-NEXT:    s_ashr_i32 s24, s9, 16
2603; GCN-HSA-NEXT:    s_ashr_i32 s25, s8, 16
2604; GCN-HSA-NEXT:    s_ashr_i32 s26, s11, 16
2605; GCN-HSA-NEXT:    s_ashr_i32 s27, s10, 16
2606; GCN-HSA-NEXT:    s_ashr_i32 s28, s13, 16
2607; GCN-HSA-NEXT:    s_ashr_i32 s29, s12, 16
2608; GCN-HSA-NEXT:    s_ashr_i32 s30, s15, 16
2609; GCN-HSA-NEXT:    s_ashr_i32 s31, s14, 16
2610; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
2611; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2612; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
2613; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
2614; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
2615; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2616; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
2617; GCN-HSA-NEXT:    s_sext_i32_i16 s16, s16
2618; GCN-HSA-NEXT:    s_sext_i32_i16 s19, s19
2619; GCN-HSA-NEXT:    s_sext_i32_i16 s18, s18
2620; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
2621; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
2622; GCN-HSA-NEXT:    s_sext_i32_i16 s17, s17
2623; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
2624; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s36
2625; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
2626; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s35
2627; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
2628; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s34
2629; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2630; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s17
2631; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s33
2632; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2633; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2634; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s15
2635; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2636; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s14
2637; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2638; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
2639; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
2640; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
2641; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
2642; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
2643; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2644; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2645; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2646; GCN-HSA-NEXT:    s_sext_i32_i16 s13, s13
2647; GCN-HSA-NEXT:    s_sext_i32_i16 s12, s12
2648; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2649; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2650; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
2651; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
2652; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
2653; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
2654; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2655; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2656; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2657; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
2658; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
2659; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2660; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
2661; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
2662; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
2663; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
2664; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
2665; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2666; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2667; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2668; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
2669; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
2670; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2671; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2672; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2673; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
2674; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
2675; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
2676; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2677; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2678; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
2679; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
2680; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2681; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
2682; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
2683; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
2684; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
2685; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2686; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2687; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
2688; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
2689; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2690; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2691; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
2692; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
2693; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
2694; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2695; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2696; GCN-HSA-NEXT:    s_endpgm
2697;
2698; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32:
2699; GCN-NOHSA-VI:       ; %bb.0:
2700; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2701; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2702; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2703; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2704; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2705; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2706; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[4:19], s[6:7], 0x0
2707; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2708; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s35, s19, 16
2709; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s36, s18, 16
2710; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s19, s19
2711; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s18, s18
2712; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s17, 16
2713; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s16, 16
2714; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s17, s17
2715; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s16, s16
2716; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
2717; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s36
2718; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
2719; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
2720; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2721; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s30, s15, 16
2722; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s31, s14, 16
2723; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
2724; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
2725; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
2726; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
2727; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
2728; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
2729; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2730; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s28, s13, 16
2731; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s29, s12, 16
2732; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
2733; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
2734; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
2735; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
2736; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
2737; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s30
2738; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2739; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s26, s11, 16
2740; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s27, s10, 16
2741; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
2742; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
2743; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
2744; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
2745; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
2746; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s28
2747; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2748; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s24, s9, 16
2749; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s25, s8, 16
2750; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
2751; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
2752; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2753; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
2754; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2755; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s26
2756; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2757; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s22, s7, 16
2758; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s23, s6, 16
2759; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
2760; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
2761; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2762; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
2763; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2764; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s24
2765; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2766; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s20, s5, 16
2767; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s21, s4, 16
2768; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
2769; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
2770; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2771; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
2772; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2773; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s22
2774; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2775; GCN-NOHSA-VI-NEXT:    s_nop 0
2776; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2777; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
2778; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2779; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s20
2780; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2781; GCN-NOHSA-VI-NEXT:    s_endpgm
2782;
2783; EG-LABEL: constant_sextload_v32i16_to_v32i32:
2784; EG:       ; %bb.0:
2785; EG-NEXT:    ALU 8, @20, KC0[CB0:0-32], KC1[]
2786; EG-NEXT:    TEX 3 @12
2787; EG-NEXT:    ALU 73, @29, KC0[CB0:0-32], KC1[]
2788; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0
2789; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0
2790; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0
2791; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
2792; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0
2793; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
2794; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
2795; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
2796; EG-NEXT:    CF_END
2797; EG-NEXT:    Fetch clause starting at 12:
2798; EG-NEXT:     VTX_READ_128 T23.XYZW, T22.X, 16, #1
2799; EG-NEXT:     VTX_READ_128 T24.XYZW, T22.X, 32, #1
2800; EG-NEXT:     VTX_READ_128 T25.XYZW, T22.X, 0, #1
2801; EG-NEXT:     VTX_READ_128 T22.XYZW, T22.X, 48, #1
2802; EG-NEXT:    ALU clause starting at 20:
2803; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
2804; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2805; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2806; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
2807; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2808; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
2809; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
2810; EG-NEXT:     MOV * T22.X, KC0[2].Z,
2811; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2812; EG-NEXT:    ALU clause starting at 29:
2813; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2814; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2815; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
2816; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2817; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
2818; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
2819; EG-NEXT:     LSHR T0.W, T22.W, literal.y,
2820; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2821; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2822; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
2823; EG-NEXT:     LSHR T28.X, PS, literal.x,
2824; EG-NEXT:     LSHR T0.Y, T22.Y, literal.y,
2825; EG-NEXT:     BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212
2826; EG-NEXT:     LSHR T1.W, T24.W, literal.y,
2827; EG-NEXT:     LSHR * T2.W, T24.Y, literal.y,
2828; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2829; EG-NEXT:     BFE_INT T29.X, T25.X, 0.0, literal.x,
2830; EG-NEXT:     LSHR T1.Y, T23.W, literal.x,
2831; EG-NEXT:     BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212
2832; EG-NEXT:     LSHR T3.W, T23.Y, literal.x,
2833; EG-NEXT:     LSHR * T4.W, T25.Y, literal.x,
2834; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2835; EG-NEXT:     BFE_INT T30.X, T25.Z, 0.0, literal.x,
2836; EG-NEXT:     LSHR T2.Y, T25.W, literal.x,
2837; EG-NEXT:     BFE_INT T31.Z, T23.Y, 0.0, literal.x,
2838; EG-NEXT:     BFE_INT T29.W, PS, 0.0, literal.x,
2839; EG-NEXT:     LSHR * T4.W, T25.X, literal.x,
2840; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2841; EG-NEXT:     BFE_INT T31.X, T23.X, 0.0, literal.x,
2842; EG-NEXT:     BFE_INT T29.Y, PS, 0.0, literal.x,
2843; EG-NEXT:     BFE_INT T32.Z, T23.W, 0.0, literal.x,
2844; EG-NEXT:     BFE_INT T30.W, PV.Y, 0.0, literal.x,
2845; EG-NEXT:     LSHR * T4.W, T25.Z, literal.x,
2846; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2847; EG-NEXT:     BFE_INT T32.X, T23.Z, 0.0, literal.x,
2848; EG-NEXT:     BFE_INT T30.Y, PS, 0.0, literal.x,
2849; EG-NEXT:     BFE_INT T25.Z, T24.Y, 0.0, literal.x,
2850; EG-NEXT:     BFE_INT T31.W, T3.W, 0.0, literal.x,
2851; EG-NEXT:     LSHR * T3.W, T23.X, literal.x,
2852; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2853; EG-NEXT:     BFE_INT T25.X, T24.X, 0.0, literal.x,
2854; EG-NEXT:     BFE_INT T31.Y, PS, 0.0, literal.x,
2855; EG-NEXT:     BFE_INT T33.Z, T24.W, 0.0, literal.x,
2856; EG-NEXT:     BFE_INT T32.W, T1.Y, 0.0, literal.x,
2857; EG-NEXT:     LSHR * T3.W, T23.Z, literal.x,
2858; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2859; EG-NEXT:     BFE_INT T33.X, T24.Z, 0.0, literal.x,
2860; EG-NEXT:     BFE_INT T32.Y, PS, 0.0, literal.x,
2861; EG-NEXT:     BFE_INT T23.Z, T22.Y, 0.0, literal.x,
2862; EG-NEXT:     BFE_INT T25.W, T2.W, 0.0, literal.x,
2863; EG-NEXT:     LSHR * T2.W, T24.X, literal.x,
2864; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2865; EG-NEXT:     BFE_INT T23.X, T22.X, 0.0, literal.x,
2866; EG-NEXT:     BFE_INT T25.Y, PS, 0.0, literal.x,
2867; EG-NEXT:     BFE_INT T34.Z, T22.W, 0.0, literal.x,
2868; EG-NEXT:     BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
2869; EG-NEXT:     LSHR * T1.W, T24.Z, literal.x,
2870; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2871; EG-NEXT:     BFE_INT T34.X, T22.Z, 0.0, literal.x,
2872; EG-NEXT:     BFE_INT T33.Y, PS, 0.0, literal.x,
2873; EG-NEXT:     LSHR T0.Z, T22.X, literal.x,
2874; EG-NEXT:     BFE_INT T23.W, T0.Y, 0.0, literal.x,
2875; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2876; EG-NEXT:    16(2.242078e-44), 96(1.345247e-43)
2877; EG-NEXT:     LSHR T22.X, PS, literal.x,
2878; EG-NEXT:     BFE_INT T23.Y, PV.Z, 0.0, literal.y,
2879; EG-NEXT:     LSHR T0.Z, T22.Z, literal.y,
2880; EG-NEXT:     BFE_INT T34.W, T0.W, 0.0, literal.y,
2881; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2882; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2883; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2884; EG-NEXT:     LSHR T24.X, PS, literal.x,
2885; EG-NEXT:     BFE_INT * T34.Y, PV.Z, 0.0, literal.y,
2886; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2887  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
2888  %ext = sext <32 x i16> %load to <32 x i32>
2889  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
2890  ret void
2891}
2892
2893define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
2894; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32:
2895; GCN-NOHSA-SI:       ; %bb.0:
2896; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
2897; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2898; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
2899; GCN-NOHSA-SI-NEXT:    s_mov_b32 s20, 0xffff
2900; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x10
2901; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2902; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s5, 16
2903; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s4, 16
2904; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s7, 16
2905; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s6, 16
2906; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s9, 16
2907; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s8, 16
2908; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s11, 16
2909; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s10, 16
2910; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s29, s13, 16
2911; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s12, 16
2912; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s31, s15, 16
2913; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s14, 16
2914; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s17, 16
2915; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s35, s16, 16
2916; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s19, 16
2917; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s53, s18, 16
2918; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s20
2919; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s20
2920; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s20
2921; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s20
2922; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, s20
2923; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, s20
2924; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s20
2925; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, s20
2926; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, s20
2927; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, s20
2928; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, s20
2929; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, s20
2930; GCN-NOHSA-SI-NEXT:    s_and_b32 s17, s17, s20
2931; GCN-NOHSA-SI-NEXT:    s_and_b32 s16, s16, s20
2932; GCN-NOHSA-SI-NEXT:    s_and_b32 s19, s19, s20
2933; GCN-NOHSA-SI-NEXT:    s_and_b32 s18, s18, s20
2934; GCN-NOHSA-SI-NEXT:    s_and_b32 s54, s37, s20
2935; GCN-NOHSA-SI-NEXT:    s_and_b32 s55, s36, s20
2936; GCN-NOHSA-SI-NEXT:    s_and_b32 s56, s39, s20
2937; GCN-NOHSA-SI-NEXT:    s_and_b32 s57, s38, s20
2938; GCN-NOHSA-SI-NEXT:    s_and_b32 s58, s41, s20
2939; GCN-NOHSA-SI-NEXT:    s_and_b32 s59, s40, s20
2940; GCN-NOHSA-SI-NEXT:    s_and_b32 s60, s43, s20
2941; GCN-NOHSA-SI-NEXT:    s_and_b32 s61, s42, s20
2942; GCN-NOHSA-SI-NEXT:    s_and_b32 s62, s45, s20
2943; GCN-NOHSA-SI-NEXT:    s_and_b32 s63, s44, s20
2944; GCN-NOHSA-SI-NEXT:    s_and_b32 s64, s47, s20
2945; GCN-NOHSA-SI-NEXT:    s_and_b32 s65, s46, s20
2946; GCN-NOHSA-SI-NEXT:    s_and_b32 s66, s49, s20
2947; GCN-NOHSA-SI-NEXT:    s_and_b32 s67, s48, s20
2948; GCN-NOHSA-SI-NEXT:    s_and_b32 s68, s51, s20
2949; GCN-NOHSA-SI-NEXT:    s_and_b32 s20, s50, s20
2950; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s37, s37, 16
2951; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s36, s36, 16
2952; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s39, s39, 16
2953; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s38, s38, 16
2954; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s41, s41, 16
2955; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s40, s40, 16
2956; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s42, s42, 16
2957; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s45, s45, 16
2958; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s44, s44, 16
2959; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s47, s47, 16
2960; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s46, s46, 16
2961; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s49, s49, 16
2962; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s48, s48, 16
2963; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s51, s51, 16
2964; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s50, s50, 16
2965; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s43, s43, 16
2966; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2967; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2968; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s20
2969; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
2970; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s68
2971; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
2972; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s67
2973; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s48
2974; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s66
2975; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s49
2976; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s65
2977; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s46
2978; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s64
2979; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s47
2980; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s63
2981; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s44
2982; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s62
2983; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s45
2984; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s61
2985; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s42
2986; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s60
2987; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s59
2988; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s43
2989; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s40
2990; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s58
2991; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s41
2992; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
2993; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
2994; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
2995; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
2996; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
2997; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
2998; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
2999; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s57
3000; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
3001; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s56
3002; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s39
3003; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3004; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3005; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s55
3006; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
3007; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s54
3008; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s37
3009; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3010; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3011; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
3012; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s53
3013; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
3014; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s52
3015; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3016; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3017; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
3018; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s35
3019; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
3020; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s34
3021; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3022; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3023; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
3024; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s33
3025; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
3026; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s31
3027; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3028; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3029; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
3030; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s30
3031; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
3032; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s29
3033; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3034; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3035; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
3036; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s28
3037; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
3038; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s27
3039; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3040; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3041; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
3042; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s26
3043; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
3044; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
3045; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3046; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3047; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3048; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s24
3049; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3050; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s23
3051; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3052; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3053; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3054; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s22
3055; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3056; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
3057; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3058; GCN-NOHSA-SI-NEXT:    s_endpgm
3059;
3060; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32:
3061; GCN-HSA:       ; %bb.0:
3062; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3063; GCN-HSA-NEXT:    s_mov_b32 s37, 0xffff
3064; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3065; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
3066; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3067; GCN-HSA-NEXT:    s_lshr_b32 s20, s5, 16
3068; GCN-HSA-NEXT:    s_lshr_b32 s21, s4, 16
3069; GCN-HSA-NEXT:    s_lshr_b32 s22, s7, 16
3070; GCN-HSA-NEXT:    s_lshr_b32 s23, s6, 16
3071; GCN-HSA-NEXT:    s_lshr_b32 s24, s9, 16
3072; GCN-HSA-NEXT:    s_lshr_b32 s25, s8, 16
3073; GCN-HSA-NEXT:    s_lshr_b32 s26, s11, 16
3074; GCN-HSA-NEXT:    s_lshr_b32 s27, s10, 16
3075; GCN-HSA-NEXT:    s_lshr_b32 s28, s13, 16
3076; GCN-HSA-NEXT:    s_lshr_b32 s29, s12, 16
3077; GCN-HSA-NEXT:    s_lshr_b32 s30, s15, 16
3078; GCN-HSA-NEXT:    s_lshr_b32 s31, s14, 16
3079; GCN-HSA-NEXT:    s_lshr_b32 s33, s17, 16
3080; GCN-HSA-NEXT:    s_lshr_b32 s34, s16, 16
3081; GCN-HSA-NEXT:    s_lshr_b32 s35, s19, 16
3082; GCN-HSA-NEXT:    s_lshr_b32 s36, s18, 16
3083; GCN-HSA-NEXT:    s_and_b32 s38, s5, s37
3084; GCN-HSA-NEXT:    s_and_b32 s39, s4, s37
3085; GCN-HSA-NEXT:    s_and_b32 s40, s7, s37
3086; GCN-HSA-NEXT:    s_and_b32 s41, s6, s37
3087; GCN-HSA-NEXT:    s_and_b32 s42, s9, s37
3088; GCN-HSA-NEXT:    s_and_b32 s43, s8, s37
3089; GCN-HSA-NEXT:    s_and_b32 s44, s11, s37
3090; GCN-HSA-NEXT:    s_and_b32 s45, s10, s37
3091; GCN-HSA-NEXT:    s_and_b32 s46, s13, s37
3092; GCN-HSA-NEXT:    s_and_b32 s47, s12, s37
3093; GCN-HSA-NEXT:    s_and_b32 s48, s15, s37
3094; GCN-HSA-NEXT:    s_and_b32 s49, s14, s37
3095; GCN-HSA-NEXT:    s_and_b32 s50, s17, s37
3096; GCN-HSA-NEXT:    s_and_b32 s51, s16, s37
3097; GCN-HSA-NEXT:    s_and_b32 s52, s19, s37
3098; GCN-HSA-NEXT:    s_and_b32 s53, s18, s37
3099; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x10
3100; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3101; GCN-HSA-NEXT:    s_and_b32 s59, s8, s37
3102; GCN-HSA-NEXT:    s_and_b32 s60, s11, s37
3103; GCN-HSA-NEXT:    s_and_b32 s61, s10, s37
3104; GCN-HSA-NEXT:    s_and_b32 s62, s13, s37
3105; GCN-HSA-NEXT:    s_and_b32 s63, s12, s37
3106; GCN-HSA-NEXT:    s_and_b32 s64, s15, s37
3107; GCN-HSA-NEXT:    s_and_b32 s65, s14, s37
3108; GCN-HSA-NEXT:    s_and_b32 s66, s17, s37
3109; GCN-HSA-NEXT:    s_and_b32 s67, s16, s37
3110; GCN-HSA-NEXT:    s_and_b32 s68, s19, s37
3111; GCN-HSA-NEXT:    s_and_b32 s54, s5, s37
3112; GCN-HSA-NEXT:    s_and_b32 s55, s4, s37
3113; GCN-HSA-NEXT:    s_and_b32 s56, s7, s37
3114; GCN-HSA-NEXT:    s_and_b32 s57, s6, s37
3115; GCN-HSA-NEXT:    s_and_b32 s58, s9, s37
3116; GCN-HSA-NEXT:    s_and_b32 s37, s18, s37
3117; GCN-HSA-NEXT:    s_lshr_b32 s9, s9, 16
3118; GCN-HSA-NEXT:    s_lshr_b32 s11, s11, 16
3119; GCN-HSA-NEXT:    s_lshr_b32 s10, s10, 16
3120; GCN-HSA-NEXT:    s_lshr_b32 s13, s13, 16
3121; GCN-HSA-NEXT:    s_lshr_b32 s12, s12, 16
3122; GCN-HSA-NEXT:    s_lshr_b32 s15, s15, 16
3123; GCN-HSA-NEXT:    s_lshr_b32 s14, s14, 16
3124; GCN-HSA-NEXT:    s_lshr_b32 s17, s17, 16
3125; GCN-HSA-NEXT:    s_lshr_b32 s16, s16, 16
3126; GCN-HSA-NEXT:    s_lshr_b32 s19, s19, 16
3127; GCN-HSA-NEXT:    s_lshr_b32 s18, s18, 16
3128; GCN-HSA-NEXT:    s_lshr_b32 s5, s5, 16
3129; GCN-HSA-NEXT:    s_lshr_b32 s4, s4, 16
3130; GCN-HSA-NEXT:    s_lshr_b32 s7, s7, 16
3131; GCN-HSA-NEXT:    s_lshr_b32 s6, s6, 16
3132; GCN-HSA-NEXT:    s_lshr_b32 s8, s8, 16
3133; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
3134; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3135; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s3
3136; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s2
3137; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
3138; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3139; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3140; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3141; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xd0
3142; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3143; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s3
3144; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s2
3145; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
3146; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3147; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s3
3148; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s2
3149; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
3150; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3151; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s3
3152; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s2
3153; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
3154; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3155; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s3
3156; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s2
3157; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
3158; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s67
3159; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s16
3160; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s66
3161; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s17
3162; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3163; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
3164; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3165; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3166; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
3167; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3168; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s3
3169; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s2
3170; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
3171; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s61
3172; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s10
3173; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s60
3174; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s11
3175; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3176; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
3177; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s37
3178; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3179; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3180; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
3181; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3182; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
3183; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
3184; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
3185; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s18
3186; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s68
3187; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s19
3188; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[0:3]
3189; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s65
3190; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s14
3191; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s64
3192; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s15
3193; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s63
3194; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s12
3195; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s62
3196; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s13
3197; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s59
3198; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s57
3199; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s8
3200; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s58
3201; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s9
3202; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s6
3203; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s56
3204; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s55
3205; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
3206; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s4
3207; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3208; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
3209; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
3210; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s54
3211; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s53
3212; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s5
3213; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s51
3214; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s36
3215; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s52
3216; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s35
3217; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s34
3218; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
3219; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s50
3220; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s33
3221; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3222; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
3223; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3224; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
3225; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3226; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3227; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3228; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s49
3229; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
3230; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s48
3231; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
3232; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3233; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3234; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3235; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3236; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
3237; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s47
3238; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
3239; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s46
3240; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
3241; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3242; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3243; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3244; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3245; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3246; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s45
3247; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
3248; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s44
3249; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
3250; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3251; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3252; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3253; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3254; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
3255; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s43
3256; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
3257; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s42
3258; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
3259; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3260; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3261; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3262; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s41
3263; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
3264; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s40
3265; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
3266; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3267; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3268; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3269; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s39
3270; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
3271; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s38
3272; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
3273; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3274; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3275; GCN-HSA-NEXT:    s_endpgm
3276;
3277; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32:
3278; GCN-NOHSA-VI:       ; %bb.0:
3279; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
3280; GCN-NOHSA-VI-NEXT:    s_mov_b32 s20, 0xffff
3281; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3282; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x40
3283; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x0
3284; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3285; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3286; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3287; GCN-NOHSA-VI-NEXT:    s_and_b32 s68, s19, s20
3288; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s52, s51, 16
3289; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s53, s50, 16
3290; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s54, s5, 16
3291; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s55, s4, 16
3292; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s56, s7, 16
3293; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s57, s6, 16
3294; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s58, s9, 16
3295; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s59, s8, 16
3296; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s60, s11, 16
3297; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s61, s10, 16
3298; GCN-NOHSA-VI-NEXT:    s_and_b32 s62, s13, s20
3299; GCN-NOHSA-VI-NEXT:    s_and_b32 s63, s12, s20
3300; GCN-NOHSA-VI-NEXT:    s_and_b32 s64, s15, s20
3301; GCN-NOHSA-VI-NEXT:    s_and_b32 s65, s14, s20
3302; GCN-NOHSA-VI-NEXT:    s_and_b32 s66, s17, s20
3303; GCN-NOHSA-VI-NEXT:    s_and_b32 s67, s16, s20
3304; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s19, s19, 16
3305; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s21, s37, 16
3306; GCN-NOHSA-VI-NEXT:    s_and_b32 s22, s37, s20
3307; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s23, s36, 16
3308; GCN-NOHSA-VI-NEXT:    s_and_b32 s24, s36, s20
3309; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s25, s39, 16
3310; GCN-NOHSA-VI-NEXT:    s_and_b32 s26, s39, s20
3311; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s27, s38, 16
3312; GCN-NOHSA-VI-NEXT:    s_and_b32 s28, s38, s20
3313; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s29, s41, 16
3314; GCN-NOHSA-VI-NEXT:    s_and_b32 s30, s41, s20
3315; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s31, s40, 16
3316; GCN-NOHSA-VI-NEXT:    s_and_b32 s33, s40, s20
3317; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s43, 16
3318; GCN-NOHSA-VI-NEXT:    s_and_b32 s35, s43, s20
3319; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s36, s42, 16
3320; GCN-NOHSA-VI-NEXT:    s_and_b32 s37, s42, s20
3321; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s38, s45, 16
3322; GCN-NOHSA-VI-NEXT:    s_and_b32 s39, s45, s20
3323; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s40, s44, 16
3324; GCN-NOHSA-VI-NEXT:    s_and_b32 s41, s44, s20
3325; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s42, s47, 16
3326; GCN-NOHSA-VI-NEXT:    s_and_b32 s43, s47, s20
3327; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s44, s46, 16
3328; GCN-NOHSA-VI-NEXT:    s_and_b32 s45, s46, s20
3329; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s46, s49, 16
3330; GCN-NOHSA-VI-NEXT:    s_and_b32 s47, s49, s20
3331; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s49, s48, 16
3332; GCN-NOHSA-VI-NEXT:    s_and_b32 s48, s48, s20
3333; GCN-NOHSA-VI-NEXT:    s_and_b32 s51, s51, s20
3334; GCN-NOHSA-VI-NEXT:    s_and_b32 s50, s50, s20
3335; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s20
3336; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s20
3337; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, s20
3338; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, s20
3339; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, s20
3340; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, s20
3341; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, s20
3342; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, s20
3343; GCN-NOHSA-VI-NEXT:    s_and_b32 s20, s18, s20
3344; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s18, 16
3345; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s17, s17, 16
3346; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s16, 16
3347; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
3348; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s18
3349; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s68
3350; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s19
3351; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3352; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s15, 16
3353; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s14, 16
3354; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s67
3355; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s16
3356; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s66
3357; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
3358; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3359; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s13, 16
3360; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s12, 16
3361; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s65
3362; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s14
3363; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s64
3364; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
3365; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
3366; GCN-NOHSA-VI-NEXT:    s_nop 0
3367; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s63
3368; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s12
3369; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s62
3370; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
3371; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
3372; GCN-NOHSA-VI-NEXT:    s_nop 0
3373; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
3374; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s61
3375; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
3376; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s60
3377; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
3378; GCN-NOHSA-VI-NEXT:    s_nop 0
3379; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
3380; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s59
3381; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
3382; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s58
3383; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
3384; GCN-NOHSA-VI-NEXT:    s_nop 0
3385; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
3386; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s57
3387; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
3388; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s56
3389; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3390; GCN-NOHSA-VI-NEXT:    s_nop 0
3391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
3392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s55
3393; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
3394; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s54
3395; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3396; GCN-NOHSA-VI-NEXT:    s_nop 0
3397; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s50
3398; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s53
3399; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s51
3400; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s52
3401; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3402; GCN-NOHSA-VI-NEXT:    s_nop 0
3403; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s48
3404; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s49
3405; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s47
3406; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s46
3407; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3408; GCN-NOHSA-VI-NEXT:    s_nop 0
3409; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s45
3410; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s44
3411; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s43
3412; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s42
3413; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3414; GCN-NOHSA-VI-NEXT:    s_nop 0
3415; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s41
3416; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s40
3417; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s39
3418; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s38
3419; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3420; GCN-NOHSA-VI-NEXT:    s_nop 0
3421; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s37
3422; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s36
3423; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s35
3424; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s34
3425; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3426; GCN-NOHSA-VI-NEXT:    s_nop 0
3427; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s33
3428; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
3429; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s30
3430; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s29
3431; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3432; GCN-NOHSA-VI-NEXT:    s_nop 0
3433; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
3434; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
3435; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
3436; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s25
3437; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3438; GCN-NOHSA-VI-NEXT:    s_nop 0
3439; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
3440; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
3441; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s22
3442; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s21
3443; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3444; GCN-NOHSA-VI-NEXT:    s_endpgm
3445;
3446; EG-LABEL: constant_zextload_v64i16_to_v64i32:
3447; EG:       ; %bb.0:
3448; EG-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
3449; EG-NEXT:    TEX 3 @22
3450; EG-NEXT:    ALU 55, @39, KC0[CB0:0-32], KC1[]
3451; EG-NEXT:    TEX 3 @30
3452; EG-NEXT:    ALU 87, @95, KC0[CB0:0-32], KC1[]
3453; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
3454; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0
3455; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0
3456; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0
3457; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0
3458; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0
3459; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0
3460; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0
3461; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0
3462; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0
3463; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0
3464; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T38.X, 0
3465; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0
3466; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T39.X, 0
3467; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T43.X, 0
3468; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T36.X, 1
3469; EG-NEXT:    CF_END
3470; EG-NEXT:    Fetch clause starting at 22:
3471; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
3472; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 48, #1
3473; EG-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 32, #1
3474; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 16, #1
3475; EG-NEXT:    Fetch clause starting at 30:
3476; EG-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 112, #1
3477; EG-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 96, #1
3478; EG-NEXT:     VTX_READ_128 T51.XYZW, T35.X, 80, #1
3479; EG-NEXT:     VTX_READ_128 T52.XYZW, T35.X, 64, #1
3480; EG-NEXT:    ALU clause starting at 38:
3481; EG-NEXT:     MOV * T35.X, KC0[2].Z,
3482; EG-NEXT:    ALU clause starting at 39:
3483; EG-NEXT:     LSHR * T40.W, T36.Y, literal.x,
3484; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3485; EG-NEXT:     AND_INT * T40.Z, T36.Y, literal.x,
3486; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3487; EG-NEXT:     LSHR T40.Y, T36.X, literal.x,
3488; EG-NEXT:     LSHR * T41.W, T36.W, literal.x,
3489; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3490; EG-NEXT:     AND_INT T40.X, T36.X, literal.x,
3491; EG-NEXT:     AND_INT T41.Z, T36.W, literal.x,
3492; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.y,
3493; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
3494; EG-NEXT:     LSHR T41.Y, T36.Z, literal.x,
3495; EG-NEXT:     LSHR * T42.W, T39.Y, literal.x,
3496; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3497; EG-NEXT:     AND_INT T41.X, T36.Z, literal.x,
3498; EG-NEXT:     AND_INT T42.Z, T39.Y, literal.x,
3499; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3500; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
3501; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
3502; EG-NEXT:     LSHR T42.Y, T39.X, literal.y,
3503; EG-NEXT:     LSHR T44.W, T39.W, literal.y,
3504; EG-NEXT:     AND_INT * T42.X, T39.X, literal.z,
3505; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3506; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3507; EG-NEXT:     AND_INT T44.Z, T39.W, literal.x,
3508; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3509; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
3510; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
3511; EG-NEXT:     LSHR T44.Y, T39.Z, literal.y,
3512; EG-NEXT:     LSHR T45.W, T38.Y, literal.y,
3513; EG-NEXT:     AND_INT * T44.X, T39.Z, literal.z,
3514; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3515; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3516; EG-NEXT:     AND_INT T45.Z, T38.Y, literal.x,
3517; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3518; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
3519; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
3520; EG-NEXT:     LSHR T45.Y, T38.X, literal.y,
3521; EG-NEXT:     LSHR T47.W, T38.W, literal.y,
3522; EG-NEXT:     AND_INT * T45.X, T38.X, literal.z,
3523; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3524; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3525; EG-NEXT:     AND_INT T47.Z, T38.W, literal.x,
3526; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3527; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
3528; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
3529; EG-NEXT:     LSHR T47.Y, T38.Z, literal.y,
3530; EG-NEXT:     AND_INT * T47.X, T38.Z, literal.z,
3531; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3532; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3533; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
3534; EG-NEXT:     LSHR * T35.W, T37.Y, literal.y,
3535; EG-NEXT:    80(1.121039e-43), 16(2.242078e-44)
3536; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
3537; EG-NEXT:     AND_INT * T35.Z, T37.Y, literal.y,
3538; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
3539; EG-NEXT:    ALU clause starting at 95:
3540; EG-NEXT:     LSHR T35.Y, T37.X, literal.x,
3541; EG-NEXT:     LSHR * T53.W, T37.W, literal.x,
3542; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3543; EG-NEXT:     AND_INT T35.X, T37.X, literal.x,
3544; EG-NEXT:     AND_INT T53.Z, T37.W, literal.x,
3545; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3546; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
3547; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
3548; EG-NEXT:     LSHR T53.Y, T37.Z, literal.y,
3549; EG-NEXT:     LSHR T54.W, T52.Y, literal.y,
3550; EG-NEXT:     AND_INT * T53.X, T37.Z, literal.z,
3551; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3552; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3553; EG-NEXT:     AND_INT T54.Z, T52.Y, literal.x,
3554; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3555; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
3556; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
3557; EG-NEXT:     LSHR T54.Y, T52.X, literal.y,
3558; EG-NEXT:     LSHR T56.W, T52.W, literal.y,
3559; EG-NEXT:     AND_INT * T54.X, T52.X, literal.z,
3560; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3561; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3562; EG-NEXT:     AND_INT T56.Z, T52.W, literal.x,
3563; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3564; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
3565; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
3566; EG-NEXT:     LSHR T56.Y, T52.Z, literal.y,
3567; EG-NEXT:     LSHR T57.W, T51.Y, literal.y,
3568; EG-NEXT:     AND_INT * T56.X, T52.Z, literal.z,
3569; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3570; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3571; EG-NEXT:     AND_INT T57.Z, T51.Y, literal.x,
3572; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3573; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
3574; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
3575; EG-NEXT:     LSHR T57.Y, T51.X, literal.y,
3576; EG-NEXT:     LSHR T59.W, T51.W, literal.y,
3577; EG-NEXT:     AND_INT * T57.X, T51.X, literal.z,
3578; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3579; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3580; EG-NEXT:     AND_INT T59.Z, T51.W, literal.x,
3581; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3582; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
3583; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
3584; EG-NEXT:     LSHR T59.Y, T51.Z, literal.y,
3585; EG-NEXT:     LSHR T60.W, T50.Y, literal.y,
3586; EG-NEXT:     AND_INT * T59.X, T51.Z, literal.z,
3587; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3588; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3589; EG-NEXT:     AND_INT T60.Z, T50.Y, literal.x,
3590; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3591; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
3592; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
3593; EG-NEXT:     LSHR T60.Y, T50.X, literal.y,
3594; EG-NEXT:     LSHR T62.W, T50.W, literal.y,
3595; EG-NEXT:     AND_INT * T60.X, T50.X, literal.z,
3596; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3597; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3598; EG-NEXT:     AND_INT T62.Z, T50.W, literal.x,
3599; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3600; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
3601; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
3602; EG-NEXT:     LSHR T62.Y, T50.Z, literal.y,
3603; EG-NEXT:     LSHR T63.W, T49.Y, literal.y,
3604; EG-NEXT:     AND_INT * T62.X, T50.Z, literal.z,
3605; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3606; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3607; EG-NEXT:     AND_INT T63.Z, T49.Y, literal.x,
3608; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3609; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
3610; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
3611; EG-NEXT:     LSHR T63.Y, T49.X, literal.y,
3612; EG-NEXT:     LSHR T65.W, T49.W, literal.y,
3613; EG-NEXT:     AND_INT * T63.X, T49.X, literal.z,
3614; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3615; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3616; EG-NEXT:     AND_INT T65.Z, T49.W, literal.x,
3617; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3618; EG-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
3619; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
3620; EG-NEXT:     LSHR T65.Y, T49.Z, literal.y,
3621; EG-NEXT:     AND_INT * T65.X, T49.Z, literal.z,
3622; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3623; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3624; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3625; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
3626; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,
3627; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3628  %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
3629  %ext = zext <64 x i16> %load to <64 x i32>
3630  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
3631  ret void
3632}
3633
3634define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
3635; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32:
3636; GCN-NOHSA-SI:       ; %bb.0:
3637; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
3638; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3639; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x0
3640; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x10
3641; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3642; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s20, s37, 16
3643; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s21, s36, 16
3644; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s22, s37
3645; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s23, s36
3646; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s24, s39, 16
3647; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s25, s38, 16
3648; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s26, s39
3649; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s27, s38
3650; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s28, s41, 16
3651; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s29, s40, 16
3652; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s30, s41
3653; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s31, s40
3654; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s43, 16
3655; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s42, 16
3656; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s35, s43
3657; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s36, s42
3658; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s37, s45, 16
3659; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s38, s44, 16
3660; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s39, s45
3661; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s40, s44
3662; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s41, s47, 16
3663; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s42, s46, 16
3664; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s43, s47
3665; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s44, s46
3666; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s45, s49, 16
3667; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s46, s48, 16
3668; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s47, s49
3669; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s48, s48
3670; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s49, s51, 16
3671; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s52, s50, 16
3672; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s51, s51
3673; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s50, s50
3674; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s53, s5, 16
3675; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s54, s4, 16
3676; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
3677; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
3678; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s55, s7, 16
3679; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s56, s6, 16
3680; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
3681; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
3682; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s57, s9, 16
3683; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s58, s8, 16
3684; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
3685; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
3686; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s59, s10, 16
3687; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s60, s11
3688; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
3689; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s61, s13, 16
3690; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s62, s12, 16
3691; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
3692; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
3693; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s63, s15, 16
3694; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s64, s14, 16
3695; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
3696; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
3697; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s65, s17, 16
3698; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s66, s16, 16
3699; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s17, s17
3700; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s16, s16
3701; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s67, s19, 16
3702; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s68, s18, 16
3703; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s19, s19
3704; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s18, s18
3705; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s11, s11, 16
3706; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3707; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3708; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
3709; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s68
3710; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
3711; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s67
3712; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s16
3713; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s66
3714; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s17
3715; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s65
3716; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s14
3717; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s64
3718; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s15
3719; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s63
3720; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s12
3721; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s62
3722; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s13
3723; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s61
3724; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s10
3725; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s59
3726; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s60
3727; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s8
3728; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s11
3729; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s58
3730; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s9
3731; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s57
3732; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3733; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
3734; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
3735; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
3736; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3737; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
3738; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
3739; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3740; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
3741; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3742; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
3743; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3744; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3745; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3746; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s54
3747; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3748; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s53
3749; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3750; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3751; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s50
3752; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s52
3753; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s51
3754; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
3755; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3756; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3757; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s48
3758; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s46
3759; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s47
3760; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s45
3761; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3762; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3763; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s44
3764; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s42
3765; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s43
3766; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s41
3767; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3768; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3769; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s40
3770; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
3771; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s39
3772; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s37
3773; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3774; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3775; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
3776; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
3777; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
3778; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
3779; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3780; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3781; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s31
3782; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
3783; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s30
3784; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
3785; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3786; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3787; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s27
3788; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s25
3789; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s26
3790; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s24
3791; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3792; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3793; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s23
3794; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s21
3795; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s22
3796; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s20
3797; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3798; GCN-NOHSA-SI-NEXT:    s_endpgm
3799;
3800; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32:
3801; GCN-HSA:       ; %bb.0:
3802; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3803; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3804; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
3805; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3806; GCN-HSA-NEXT:    s_ashr_i32 s20, s5, 16
3807; GCN-HSA-NEXT:    s_ashr_i32 s21, s4, 16
3808; GCN-HSA-NEXT:    s_sext_i32_i16 s22, s5
3809; GCN-HSA-NEXT:    s_sext_i32_i16 s23, s4
3810; GCN-HSA-NEXT:    s_ashr_i32 s24, s7, 16
3811; GCN-HSA-NEXT:    s_ashr_i32 s25, s6, 16
3812; GCN-HSA-NEXT:    s_sext_i32_i16 s26, s7
3813; GCN-HSA-NEXT:    s_sext_i32_i16 s27, s6
3814; GCN-HSA-NEXT:    s_ashr_i32 s28, s9, 16
3815; GCN-HSA-NEXT:    s_ashr_i32 s29, s8, 16
3816; GCN-HSA-NEXT:    s_sext_i32_i16 s30, s9
3817; GCN-HSA-NEXT:    s_sext_i32_i16 s31, s8
3818; GCN-HSA-NEXT:    s_ashr_i32 s33, s11, 16
3819; GCN-HSA-NEXT:    s_ashr_i32 s34, s10, 16
3820; GCN-HSA-NEXT:    s_sext_i32_i16 s35, s11
3821; GCN-HSA-NEXT:    s_sext_i32_i16 s36, s10
3822; GCN-HSA-NEXT:    s_ashr_i32 s37, s13, 16
3823; GCN-HSA-NEXT:    s_ashr_i32 s38, s12, 16
3824; GCN-HSA-NEXT:    s_sext_i32_i16 s39, s13
3825; GCN-HSA-NEXT:    s_sext_i32_i16 s40, s12
3826; GCN-HSA-NEXT:    s_ashr_i32 s41, s15, 16
3827; GCN-HSA-NEXT:    s_ashr_i32 s42, s14, 16
3828; GCN-HSA-NEXT:    s_sext_i32_i16 s43, s15
3829; GCN-HSA-NEXT:    s_sext_i32_i16 s44, s14
3830; GCN-HSA-NEXT:    s_ashr_i32 s45, s17, 16
3831; GCN-HSA-NEXT:    s_ashr_i32 s46, s16, 16
3832; GCN-HSA-NEXT:    s_sext_i32_i16 s47, s17
3833; GCN-HSA-NEXT:    s_sext_i32_i16 s48, s16
3834; GCN-HSA-NEXT:    s_ashr_i32 s49, s19, 16
3835; GCN-HSA-NEXT:    s_ashr_i32 s50, s18, 16
3836; GCN-HSA-NEXT:    s_sext_i32_i16 s51, s19
3837; GCN-HSA-NEXT:    s_sext_i32_i16 s52, s18
3838; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x10
3839; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3840; GCN-HSA-NEXT:    s_ashr_i32 s57, s9, 16
3841; GCN-HSA-NEXT:    s_ashr_i32 s59, s11, 16
3842; GCN-HSA-NEXT:    s_ashr_i32 s60, s10, 16
3843; GCN-HSA-NEXT:    s_ashr_i32 s61, s13, 16
3844; GCN-HSA-NEXT:    s_ashr_i32 s62, s12, 16
3845; GCN-HSA-NEXT:    s_ashr_i32 s63, s15, 16
3846; GCN-HSA-NEXT:    s_ashr_i32 s64, s14, 16
3847; GCN-HSA-NEXT:    s_ashr_i32 s65, s17, 16
3848; GCN-HSA-NEXT:    s_ashr_i32 s66, s16, 16
3849; GCN-HSA-NEXT:    s_ashr_i32 s67, s19, 16
3850; GCN-HSA-NEXT:    s_ashr_i32 s68, s18, 16
3851; GCN-HSA-NEXT:    s_ashr_i32 s53, s5, 16
3852; GCN-HSA-NEXT:    s_ashr_i32 s54, s4, 16
3853; GCN-HSA-NEXT:    s_ashr_i32 s55, s7, 16
3854; GCN-HSA-NEXT:    s_ashr_i32 s56, s6, 16
3855; GCN-HSA-NEXT:    s_ashr_i32 s58, s8, 16
3856; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
3857; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3858; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s3
3859; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s2
3860; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
3861; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3862; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3863; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3864; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xd0
3865; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3866; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s3
3867; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s2
3868; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
3869; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3870; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s3
3871; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s2
3872; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
3873; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3874; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s3
3875; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s2
3876; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
3877; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3878; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s3
3879; GCN-HSA-NEXT:    s_sext_i32_i16 s17, s17
3880; GCN-HSA-NEXT:    s_sext_i32_i16 s16, s16
3881; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s2
3882; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
3883; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
3884; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s66
3885; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s17
3886; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s65
3887; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3888; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
3889; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3890; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3891; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
3892; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3893; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s3
3894; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
3895; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
3896; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s2
3897; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
3898; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s10
3899; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s60
3900; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s11
3901; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s59
3902; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3903; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
3904; GCN-HSA-NEXT:    s_sext_i32_i16 s19, s19
3905; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3906; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3907; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
3908; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3909; GCN-HSA-NEXT:    s_sext_i32_i16 s18, s18
3910; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
3911; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
3912; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
3913; GCN-HSA-NEXT:    s_sext_i32_i16 s13, s13
3914; GCN-HSA-NEXT:    s_sext_i32_i16 s12, s12
3915; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s15
3916; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s14
3917; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
3918; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
3919; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
3920; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
3921; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
3922; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
3923; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s68
3924; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
3925; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s67
3926; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[0:3]
3927; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
3928; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s14
3929; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s64
3930; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s15
3931; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s63
3932; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s12
3933; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s62
3934; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s13
3935; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s61
3936; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s8
3937; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
3938; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s58
3939; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s9
3940; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s57
3941; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s56
3942; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
3943; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
3944; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s55
3945; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s54
3946; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3947; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
3948; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
3949; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s5
3950; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s52
3951; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s53
3952; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s48
3953; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s50
3954; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s51
3955; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s49
3956; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s46
3957; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
3958; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s47
3959; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s45
3960; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3961; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
3962; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3963; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
3964; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3965; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3966; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3967; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s44
3968; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s42
3969; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s43
3970; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s41
3971; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3972; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3973; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3974; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3975; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
3976; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s40
3977; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s38
3978; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s39
3979; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s37
3980; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3981; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3982; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3983; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3984; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3985; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s36
3986; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s34
3987; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s35
3988; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s33
3989; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3990; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3991; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3992; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3993; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
3994; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s31
3995; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
3996; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s30
3997; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
3998; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3999; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4000; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4001; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s27
4002; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
4003; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s26
4004; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
4005; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4006; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4007; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4008; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s23
4009; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
4010; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
4011; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
4012; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4013; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4014; GCN-HSA-NEXT:    s_endpgm
4015;
4016; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32:
4017; GCN-NOHSA-VI:       ; %bb.0:
4018; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4019; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4020; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x40
4021; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x0
4022; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4023; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4024; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4025; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s67, s19, 16
4026; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s68, s18, 16
4027; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s19, s19
4028; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s18, s18
4029; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s65, s17, 16
4030; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s66, s16, 16
4031; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s17, s17
4032; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s16, s16
4033; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
4034; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s68
4035; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
4036; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s67
4037; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
4038; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s63, s15, 16
4039; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s64, s14, 16
4040; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
4041; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
4042; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
4043; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s66
4044; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
4045; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s65
4046; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4047; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s61, s13, 16
4048; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s62, s12, 16
4049; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
4050; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
4051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
4052; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s64
4053; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
4054; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s63
4055; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
4056; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s59, s11, 16
4057; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s60, s10, 16
4058; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
4059; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
4060; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
4061; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s62
4062; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
4063; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s61
4064; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
4065; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s57, s9, 16
4066; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s58, s8, 16
4067; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
4068; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
4069; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
4070; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s60
4071; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
4072; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
4073; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
4074; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s55, s7, 16
4075; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s56, s6, 16
4076; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
4077; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
4078; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
4079; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s58
4080; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
4081; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s57
4082; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
4083; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s53, s5, 16
4084; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s54, s4, 16
4085; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
4086; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
4087; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4088; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s56
4089; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
4090; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s55
4091; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
4092; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s52, s50, 16
4093; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s20, s37, 16
4094; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s22, s37
4095; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s24, s39, 16
4096; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s26, s39
4097; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s28, s41, 16
4098; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s30, s41
4099; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s43, 16
4100; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s35, s43
4101; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s37, s45, 16
4102; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s39, s45
4103; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s41, s47, 16
4104; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s43, s47
4105; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s45, s49, 16
4106; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s47, s49
4107; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s49, s51, 16
4108; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s51, s51
4109; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s50, s50
4110; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
4111; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s54
4112; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
4113; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s53
4114; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
4115; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s21, s36, 16
4116; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s23, s36
4117; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s25, s38, 16
4118; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s27, s38
4119; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s29, s40, 16
4120; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s31, s40
4121; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s42, 16
4122; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s36, s42
4123; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s38, s44, 16
4124; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s40, s44
4125; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s42, s46, 16
4126; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s44, s46
4127; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s46, s48, 16
4128; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s48, s48
4129; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s50
4130; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s52
4131; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s51
4132; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s49
4133; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
4134; GCN-NOHSA-VI-NEXT:    s_nop 0
4135; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s48
4136; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s46
4137; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s47
4138; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
4139; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
4140; GCN-NOHSA-VI-NEXT:    s_nop 0
4141; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s44
4142; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s42
4143; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s43
4144; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
4145; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
4146; GCN-NOHSA-VI-NEXT:    s_nop 0
4147; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s40
4148; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s38
4149; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s39
4150; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s37
4151; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
4152; GCN-NOHSA-VI-NEXT:    s_nop 0
4153; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
4154; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
4155; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s35
4156; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
4157; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
4158; GCN-NOHSA-VI-NEXT:    s_nop 0
4159; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s31
4160; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
4161; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s30
4162; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s28
4163; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
4164; GCN-NOHSA-VI-NEXT:    s_nop 0
4165; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s27
4166; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
4167; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
4168; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s24
4169; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4170; GCN-NOHSA-VI-NEXT:    s_nop 0
4171; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s23
4172; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
4173; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s22
4174; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s20
4175; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4176; GCN-NOHSA-VI-NEXT:    s_endpgm
4177;
4178; EG-LABEL: constant_sextload_v64i16_to_v64i32:
4179; EG:       ; %bb.0:
4180; EG-NEXT:    ALU 17, @38, KC0[CB0:0-32], KC1[]
4181; EG-NEXT:    TEX 7 @22
4182; EG-NEXT:    ALU 75, @56, KC0[CB0:0-32], KC1[]
4183; EG-NEXT:    ALU 71, @132, KC0[CB0:0-32], KC1[]
4184; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0
4185; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0
4186; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0
4187; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0
4188; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0
4189; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0
4190; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0
4191; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0
4192; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0
4193; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0
4194; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0
4195; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0
4196; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0
4197; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
4198; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
4199; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
4200; EG-NEXT:    CF_END
4201; EG-NEXT:    PAD
4202; EG-NEXT:    Fetch clause starting at 22:
4203; EG-NEXT:     VTX_READ_128 T42.XYZW, T41.X, 16, #1
4204; EG-NEXT:     VTX_READ_128 T43.XYZW, T41.X, 32, #1
4205; EG-NEXT:     VTX_READ_128 T44.XYZW, T41.X, 0, #1
4206; EG-NEXT:     VTX_READ_128 T45.XYZW, T41.X, 48, #1
4207; EG-NEXT:     VTX_READ_128 T46.XYZW, T41.X, 64, #1
4208; EG-NEXT:     VTX_READ_128 T47.XYZW, T41.X, 80, #1
4209; EG-NEXT:     VTX_READ_128 T48.XYZW, T41.X, 96, #1
4210; EG-NEXT:     VTX_READ_128 T41.XYZW, T41.X, 112, #1
4211; EG-NEXT:    ALU clause starting at 38:
4212; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
4213; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4214; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4215; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
4216; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4217; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
4218; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
4219; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4220; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
4221; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
4222; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4223; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
4224; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
4225; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4226; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
4227; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
4228; EG-NEXT:     MOV * T41.X, KC0[2].Z,
4229; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4230; EG-NEXT:    ALU clause starting at 56:
4231; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4232; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
4233; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
4234; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4235; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
4236; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
4237; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4238; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
4239; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
4240; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4241; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
4242; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
4243; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4244; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
4245; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
4246; EG-NEXT:     LSHR T0.Y, T41.W, literal.y,
4247; EG-NEXT:     LSHR T0.Z, T41.Y, literal.y,
4248; EG-NEXT:     LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212
4249; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4250; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4251; EG-NEXT:    176(2.466285e-43), 0(0.000000e+00)
4252; EG-NEXT:     LSHR T54.X, PS, literal.x,
4253; EG-NEXT:     LSHR T1.Y, T48.Y, literal.y,
4254; EG-NEXT:     LSHR T1.Z, T47.W, literal.y,
4255; EG-NEXT:     LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212
4256; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.z,
4257; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4258; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
4259; EG-NEXT:     LSHR T55.X, PS, literal.x,
4260; EG-NEXT:     LSHR T2.Y, T46.W, literal.y,
4261; EG-NEXT:     LSHR T2.Z, T46.Y, literal.y,
4262; EG-NEXT:     LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212
4263; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.z,
4264; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4265; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
4266; EG-NEXT:     LSHR T56.X, PS, literal.x,
4267; EG-NEXT:     LSHR T3.Y, T45.Y, literal.y,
4268; EG-NEXT:     BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212
4269; EG-NEXT:     LSHR T3.W, T43.W, literal.y,
4270; EG-NEXT:     LSHR * T4.W, T43.Y, literal.y,
4271; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4272; EG-NEXT:     BFE_INT T57.X, T44.X, 0.0, literal.x,
4273; EG-NEXT:     LSHR T4.Y, T42.W, literal.x,
4274; EG-NEXT:     BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212
4275; EG-NEXT:     LSHR T5.W, T42.Y, literal.x,
4276; EG-NEXT:     LSHR * T6.W, T44.Y, literal.x,
4277; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4278; EG-NEXT:     BFE_INT T58.X, T44.Z, 0.0, literal.x,
4279; EG-NEXT:     LSHR T5.Y, T44.W, literal.x,
4280; EG-NEXT:     BFE_INT T59.Z, T42.Y, 0.0, literal.x,
4281; EG-NEXT:     BFE_INT T57.W, PS, 0.0, literal.x,
4282; EG-NEXT:     LSHR * T6.W, T44.X, literal.x,
4283; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4284; EG-NEXT:     BFE_INT T59.X, T42.X, 0.0, literal.x,
4285; EG-NEXT:     BFE_INT T57.Y, PS, 0.0, literal.x,
4286; EG-NEXT:     BFE_INT T60.Z, T42.W, 0.0, literal.x,
4287; EG-NEXT:     BFE_INT T58.W, PV.Y, 0.0, literal.x,
4288; EG-NEXT:     LSHR * T6.W, T44.Z, literal.x,
4289; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4290; EG-NEXT:     BFE_INT T60.X, T42.Z, 0.0, literal.x,
4291; EG-NEXT:     BFE_INT T58.Y, PS, 0.0, literal.x,
4292; EG-NEXT:     BFE_INT T44.Z, T43.Y, 0.0, literal.x,
4293; EG-NEXT:     BFE_INT T59.W, T5.W, 0.0, literal.x,
4294; EG-NEXT:     LSHR * T5.W, T42.X, literal.x,
4295; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4296; EG-NEXT:     BFE_INT T44.X, T43.X, 0.0, literal.x,
4297; EG-NEXT:     BFE_INT T59.Y, PS, 0.0, literal.x,
4298; EG-NEXT:     BFE_INT T61.Z, T43.W, 0.0, literal.x,
4299; EG-NEXT:     BFE_INT T60.W, T4.Y, 0.0, literal.x,
4300; EG-NEXT:     LSHR * T5.W, T42.Z, literal.x,
4301; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4302; EG-NEXT:     BFE_INT T61.X, T43.Z, 0.0, literal.x,
4303; EG-NEXT:     BFE_INT T60.Y, PS, 0.0, literal.x,
4304; EG-NEXT:     BFE_INT T42.Z, T45.Y, 0.0, literal.x,
4305; EG-NEXT:     BFE_INT * T44.W, T4.W, 0.0, literal.x,
4306; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4307; EG-NEXT:    ALU clause starting at 132:
4308; EG-NEXT:     LSHR * T4.W, T43.X, literal.x,
4309; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4310; EG-NEXT:     BFE_INT T42.X, T45.X, 0.0, literal.x,
4311; EG-NEXT:     BFE_INT T44.Y, PV.W, 0.0, literal.x,
4312; EG-NEXT:     BFE_INT T62.Z, T45.W, 0.0, literal.x,
4313; EG-NEXT:     BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
4314; EG-NEXT:     LSHR * T3.W, T43.Z, literal.x,
4315; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4316; EG-NEXT:     BFE_INT T62.X, T45.Z, 0.0, literal.x,
4317; EG-NEXT:     BFE_INT T61.Y, PS, 0.0, literal.x,
4318; EG-NEXT:     BFE_INT T43.Z, T46.Y, 0.0, literal.x,
4319; EG-NEXT:     BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4320; EG-NEXT:     LSHR * T3.W, T45.X, literal.x,
4321; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4322; EG-NEXT:     BFE_INT T43.X, T46.X, 0.0, literal.x,
4323; EG-NEXT:     BFE_INT T42.Y, PS, 0.0, literal.x,
4324; EG-NEXT:     BFE_INT T63.Z, T46.W, 0.0, literal.x,
4325; EG-NEXT:     BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
4326; EG-NEXT:     LSHR * T2.W, T45.Z, literal.x,
4327; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4328; EG-NEXT:     BFE_INT T63.X, T46.Z, 0.0, literal.x,
4329; EG-NEXT:     BFE_INT T62.Y, PS, 0.0, literal.x,
4330; EG-NEXT:     BFE_INT T45.Z, T47.Y, 0.0, literal.x,
4331; EG-NEXT:     BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4332; EG-NEXT:     LSHR * T2.W, T46.X, literal.x,
4333; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4334; EG-NEXT:     BFE_INT T45.X, T47.X, 0.0, literal.x,
4335; EG-NEXT:     BFE_INT T43.Y, PS, 0.0, literal.x,
4336; EG-NEXT:     BFE_INT T64.Z, T47.W, 0.0, literal.x,
4337; EG-NEXT:     BFE_INT T63.W, T2.Y, 0.0, literal.x,
4338; EG-NEXT:     LSHR * T2.W, T46.Z, literal.x,
4339; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4340; EG-NEXT:     BFE_INT T64.X, T47.Z, 0.0, literal.x,
4341; EG-NEXT:     BFE_INT T63.Y, PS, 0.0, literal.x,
4342; EG-NEXT:     BFE_INT T46.Z, T48.Y, 0.0, literal.x,
4343; EG-NEXT:     BFE_INT T45.W, T1.W, 0.0, literal.x,
4344; EG-NEXT:     LSHR * T1.W, T47.X, literal.x,
4345; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4346; EG-NEXT:     BFE_INT T46.X, T48.X, 0.0, literal.x,
4347; EG-NEXT:     BFE_INT T45.Y, PS, 0.0, literal.x,
4348; EG-NEXT:     BFE_INT T65.Z, T48.W, 0.0, literal.x,
4349; EG-NEXT:     BFE_INT T64.W, T1.Z, 0.0, literal.x,
4350; EG-NEXT:     LSHR * T1.W, T47.Z, literal.x,
4351; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4352; EG-NEXT:     BFE_INT T65.X, T48.Z, 0.0, literal.x,
4353; EG-NEXT:     BFE_INT T64.Y, PS, 0.0, literal.x,
4354; EG-NEXT:     BFE_INT T47.Z, T41.Y, 0.0, literal.x,
4355; EG-NEXT:     BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4356; EG-NEXT:     LSHR * T1.W, T48.X, literal.x,
4357; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4358; EG-NEXT:     BFE_INT T47.X, T41.X, 0.0, literal.x,
4359; EG-NEXT:     BFE_INT T46.Y, PS, 0.0, literal.x,
4360; EG-NEXT:     BFE_INT T66.Z, T41.W, 0.0, literal.x,
4361; EG-NEXT:     BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
4362; EG-NEXT:     LSHR * T0.W, T48.Z, literal.x,
4363; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4364; EG-NEXT:     BFE_INT T66.X, T41.Z, 0.0, literal.x,
4365; EG-NEXT:     BFE_INT T65.Y, PS, 0.0, literal.x,
4366; EG-NEXT:     LSHR T1.Z, T41.X, literal.x,
4367; EG-NEXT:     BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4368; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4369; EG-NEXT:    16(2.242078e-44), 224(3.138909e-43)
4370; EG-NEXT:     LSHR T41.X, PS, literal.x,
4371; EG-NEXT:     BFE_INT T47.Y, PV.Z, 0.0, literal.y,
4372; EG-NEXT:     LSHR T0.Z, T41.Z, literal.y,
4373; EG-NEXT:     BFE_INT T66.W, T0.Y, 0.0, literal.y,
4374; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4375; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4376; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
4377; EG-NEXT:     LSHR T48.X, PS, literal.x,
4378; EG-NEXT:     BFE_INT * T66.Y, PV.Z, 0.0, literal.y,
4379; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4380  %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
4381  %ext = sext <64 x i16> %load to <64 x i32>
4382  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
4383  ret void
4384}
4385
4386define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
4387; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64:
4388; GCN-NOHSA-SI:       ; %bb.0:
4389; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4390; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4391; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4392; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4393; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4394; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4395; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4396; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4397; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4398; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4399; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4400; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4401; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4402; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4403; GCN-NOHSA-SI-NEXT:    s_endpgm
4404;
4405; GCN-HSA-LABEL: constant_zextload_i16_to_i64:
4406; GCN-HSA:       ; %bb.0:
4407; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4408; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4409; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
4410; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
4411; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
4412; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
4413; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
4414; GCN-HSA-NEXT:    v_mov_b32_e32 v3, 0
4415; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4416; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4417; GCN-HSA-NEXT:    s_endpgm
4418;
4419; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64:
4420; GCN-NOHSA-VI:       ; %bb.0:
4421; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4422; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4423; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4424; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
4425; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
4426; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4427; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
4428; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
4429; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4430; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4431; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4432; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4433; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4434; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4435; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
4436; GCN-NOHSA-VI-NEXT:    s_endpgm
4437;
4438; EG-LABEL: constant_zextload_i16_to_i64:
4439; EG:       ; %bb.0:
4440; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4441; EG-NEXT:    TEX 0 @6
4442; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4443; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4444; EG-NEXT:    CF_END
4445; EG-NEXT:    PAD
4446; EG-NEXT:    Fetch clause starting at 6:
4447; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4448; EG-NEXT:    ALU clause starting at 8:
4449; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4450; EG-NEXT:    ALU clause starting at 9:
4451; EG-NEXT:     MOV * T0.Y, 0.0,
4452; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4453; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4454  %a = load i16, i16 addrspace(4)* %in
4455  %ext = zext i16 %a to i64
4456  store i64 %ext, i64 addrspace(1)* %out
4457  ret void
4458}
4459
4460; FIXME: Need to optimize this sequence to avoid extra bfe:
4461;  t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
4462;          t31: i64 = any_extend t28
4463;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
4464; TODO: These could be expanded earlier using ASHR 15
4465define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
4466; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64:
4467; GCN-NOHSA-SI:       ; %bb.0:
4468; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4469; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4470; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4471; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4472; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4473; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4474; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4475; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4476; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
4477; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4478; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4479; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4480; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4481; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4482; GCN-NOHSA-SI-NEXT:    s_endpgm
4483;
4484; GCN-HSA-LABEL: constant_sextload_i16_to_i64:
4485; GCN-HSA:       ; %bb.0:
4486; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4487; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4488; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
4489; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
4490; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
4491; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
4492; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
4493; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4494; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4495; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4496; GCN-HSA-NEXT:    s_endpgm
4497;
4498; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64:
4499; GCN-NOHSA-VI:       ; %bb.0:
4500; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4501; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4502; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4503; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4504; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4505; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4506; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
4507; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
4508; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
4509; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
4510; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
4511; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4512; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
4513; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4514; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
4515; GCN-NOHSA-VI-NEXT:    s_endpgm
4516;
4517; EG-LABEL: constant_sextload_i16_to_i64:
4518; EG:       ; %bb.0:
4519; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4520; EG-NEXT:    TEX 0 @6
4521; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
4522; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4523; EG-NEXT:    CF_END
4524; EG-NEXT:    PAD
4525; EG-NEXT:    Fetch clause starting at 6:
4526; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4527; EG-NEXT:    ALU clause starting at 8:
4528; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4529; EG-NEXT:    ALU clause starting at 9:
4530; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
4531; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
4532; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4533; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
4534; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4535  %a = load i16, i16 addrspace(4)* %in
4536  %ext = sext i16 %a to i64
4537  store i64 %ext, i64 addrspace(1)* %out
4538  ret void
4539}
4540
4541define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
4542; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64:
4543; GCN-NOHSA-SI:       ; %bb.0:
4544; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4545; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4546; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4547; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4548; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4549; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4550; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4551; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4552; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4553; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4554; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4555; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4556; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4557; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4558; GCN-NOHSA-SI-NEXT:    s_endpgm
4559;
4560; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64:
4561; GCN-HSA:       ; %bb.0:
4562; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4563; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4564; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
4565; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
4566; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
4567; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
4568; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
4569; GCN-HSA-NEXT:    v_mov_b32_e32 v3, 0
4570; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4571; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4572; GCN-HSA-NEXT:    s_endpgm
4573;
4574; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64:
4575; GCN-NOHSA-VI:       ; %bb.0:
4576; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4577; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4578; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4579; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
4580; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
4581; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4582; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
4583; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
4584; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4585; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4586; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4587; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4588; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4589; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4590; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
4591; GCN-NOHSA-VI-NEXT:    s_endpgm
4592;
4593; EG-LABEL: constant_zextload_v1i16_to_v1i64:
4594; EG:       ; %bb.0:
4595; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4596; EG-NEXT:    TEX 0 @6
4597; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4598; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4599; EG-NEXT:    CF_END
4600; EG-NEXT:    PAD
4601; EG-NEXT:    Fetch clause starting at 6:
4602; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4603; EG-NEXT:    ALU clause starting at 8:
4604; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4605; EG-NEXT:    ALU clause starting at 9:
4606; EG-NEXT:     MOV * T0.Y, 0.0,
4607; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4608; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4609  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
4610  %ext = zext <1 x i16> %load to <1 x i64>
4611  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
4612  ret void
4613}
4614
4615define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
4616; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64:
4617; GCN-NOHSA-SI:       ; %bb.0:
4618; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4619; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4620; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4621; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4622; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4623; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4624; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4625; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4626; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
4627; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4628; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4629; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4630; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4631; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4632; GCN-NOHSA-SI-NEXT:    s_endpgm
4633;
4634; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64:
4635; GCN-HSA:       ; %bb.0:
4636; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4637; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4638; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
4639; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
4640; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
4641; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
4642; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
4643; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4644; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4645; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4646; GCN-HSA-NEXT:    s_endpgm
4647;
4648; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64:
4649; GCN-NOHSA-VI:       ; %bb.0:
4650; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4651; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4652; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4653; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4654; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4655; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4656; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
4657; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
4658; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
4659; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
4660; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
4661; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4662; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
4663; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4664; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
4665; GCN-NOHSA-VI-NEXT:    s_endpgm
4666;
4667; EG-LABEL: constant_sextload_v1i16_to_v1i64:
4668; EG:       ; %bb.0:
4669; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4670; EG-NEXT:    TEX 0 @6
4671; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
4672; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4673; EG-NEXT:    CF_END
4674; EG-NEXT:    PAD
4675; EG-NEXT:    Fetch clause starting at 6:
4676; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4677; EG-NEXT:    ALU clause starting at 8:
4678; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4679; EG-NEXT:    ALU clause starting at 9:
4680; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
4681; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
4682; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4683; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
4684; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4685  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
4686  %ext = sext <1 x i16> %load to <1 x i64>
4687  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
4688  ret void
4689}
4690
4691define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
4692; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64:
4693; GCN-NOHSA-SI:       ; %bb.0:
4694; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4695; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4696; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
4697; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4698; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4699; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4700; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
4701; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
4702; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4703; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
4704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
4705; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4706; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4707; GCN-NOHSA-SI-NEXT:    s_endpgm
4708;
4709; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64:
4710; GCN-HSA:       ; %bb.0:
4711; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4712; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4713; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
4714; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4715; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4716; GCN-HSA-NEXT:    s_load_dword s0, s[2:3], 0x0
4717; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4718; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4719; GCN-HSA-NEXT:    s_lshr_b32 s1, s0, 16
4720; GCN-HSA-NEXT:    s_and_b32 s0, s0, 0xffff
4721; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
4722; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
4723; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4724; GCN-HSA-NEXT:    s_endpgm
4725;
4726; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64:
4727; GCN-NOHSA-VI:       ; %bb.0:
4728; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4729; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4730; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4731; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4732; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
4733; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4734; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4735; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[6:7], 0x0
4736; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4737; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4738; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s4, 0xffff
4739; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
4740; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
4741; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
4742; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4743; GCN-NOHSA-VI-NEXT:    s_endpgm
4744;
4745; EG-LABEL: constant_zextload_v2i16_to_v2i64:
4746; EG:       ; %bb.0:
4747; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4748; EG-NEXT:    TEX 0 @6
4749; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
4750; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
4751; EG-NEXT:    CF_END
4752; EG-NEXT:    PAD
4753; EG-NEXT:    Fetch clause starting at 6:
4754; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
4755; EG-NEXT:    ALU clause starting at 8:
4756; EG-NEXT:     MOV * T4.X, KC0[2].Z,
4757; EG-NEXT:    ALU clause starting at 9:
4758; EG-NEXT:     LSHR * T4.Z, T4.X, literal.x,
4759; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4760; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
4761; EG-NEXT:     MOV T4.Y, 0.0,
4762; EG-NEXT:     MOV T4.W, 0.0,
4763; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
4764; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
4765  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
4766  %ext = zext <2 x i16> %load to <2 x i64>
4767  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
4768  ret void
4769}
4770
4771define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
4772; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64:
4773; GCN-NOHSA-SI:       ; %bb.0:
4774; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4775; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4776; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
4777; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4778; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4779; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
4780; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[2:3], 0x100000
4781; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4782; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4783; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
4784; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
4785; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4786; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
4787; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4788; GCN-NOHSA-SI-NEXT:    s_endpgm
4789;
4790; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64:
4791; GCN-HSA:       ; %bb.0:
4792; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4793; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4794; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4795; GCN-HSA-NEXT:    s_load_dword s0, s[2:3], 0x0
4796; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4797; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4798; GCN-HSA-NEXT:    s_lshr_b32 s2, s0, 16
4799; GCN-HSA-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
4800; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
4801; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
4802; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
4803; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
4804; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
4805; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4806; GCN-HSA-NEXT:    s_endpgm
4807;
4808; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64:
4809; GCN-NOHSA-VI:       ; %bb.0:
4810; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4811; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4812; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4813; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4814; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4815; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[6:7], 0x0
4816; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4817; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4818; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[4:5], 0x100000
4819; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
4820; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4821; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4822; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
4823; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
4824; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
4825; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4826; GCN-NOHSA-VI-NEXT:    s_endpgm
4827;
4828; EG-LABEL: constant_sextload_v2i16_to_v2i64:
4829; EG:       ; %bb.0:
4830; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4831; EG-NEXT:    TEX 0 @6
4832; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
4833; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
4834; EG-NEXT:    CF_END
4835; EG-NEXT:    PAD
4836; EG-NEXT:    Fetch clause starting at 6:
4837; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
4838; EG-NEXT:    ALU clause starting at 8:
4839; EG-NEXT:     MOV * T4.X, KC0[2].Z,
4840; EG-NEXT:    ALU clause starting at 9:
4841; EG-NEXT:     ASHR * T4.W, T4.X, literal.x,
4842; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4843; EG-NEXT:     ASHR * T4.Z, T4.X, literal.x,
4844; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4845; EG-NEXT:     BFE_INT T4.X, T4.X, 0.0, literal.x,
4846; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
4847; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4848; EG-NEXT:     ASHR * T4.Y, PV.X, literal.x,
4849; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4850  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
4851  %ext = sext <2 x i16> %load to <2 x i64>
4852  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
4853  ret void
4854}
4855
4856define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
4857; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64:
4858; GCN-NOHSA-SI:       ; %bb.0:
4859; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4860; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4861; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4862; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4863; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4864; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4865; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, 0xffff
4866; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
4867; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4868; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4869; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4870; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s0, s3, 16
4871; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s1, s2, 16
4872; GCN-NOHSA-SI-NEXT:    s_and_b32 s3, s3, s8
4873; GCN-NOHSA-SI-NEXT:    s_and_b32 s2, s2, s8
4874; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s3
4875; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s0
4876; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
4877; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4878; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
4879; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s1
4880; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4881; GCN-NOHSA-SI-NEXT:    s_endpgm
4882;
4883; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64:
4884; GCN-HSA:       ; %bb.0:
4885; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4886; GCN-HSA-NEXT:    s_mov_b32 s6, 0xffff
4887; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4888; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
4889; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4890; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4891; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4892; GCN-HSA-NEXT:    s_lshr_b32 s4, s3, 16
4893; GCN-HSA-NEXT:    s_lshr_b32 s5, s2, 16
4894; GCN-HSA-NEXT:    s_and_b32 s7, s2, s6
4895; GCN-HSA-NEXT:    s_and_b32 s2, s3, s6
4896; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4897; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
4898; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4899; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4900; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
4901; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4902; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4903; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4904; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
4905; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
4906; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4907; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4908; GCN-HSA-NEXT:    s_endpgm
4909;
4910; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64:
4911; GCN-NOHSA-VI:       ; %bb.0:
4912; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4913; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
4914; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4915; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4916; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4917; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4918; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4919; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4920; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4921; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
4922; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4923; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s5, s8
4924; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
4925; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s4, s8
4926; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
4927; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s7
4928; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
4929; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4930; GCN-NOHSA-VI-NEXT:    s_nop 0
4931; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4932; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
4933; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4934; GCN-NOHSA-VI-NEXT:    s_endpgm
4935;
4936; EG-LABEL: constant_zextload_v4i16_to_v4i64:
4937; EG:       ; %bb.0:
4938; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4939; EG-NEXT:    TEX 0 @6
4940; EG-NEXT:    ALU 18, @9, KC0[CB0:0-32], KC1[]
4941; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
4942; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
4943; EG-NEXT:    CF_END
4944; EG-NEXT:    Fetch clause starting at 6:
4945; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
4946; EG-NEXT:    ALU clause starting at 8:
4947; EG-NEXT:     MOV * T5.X, KC0[2].Z,
4948; EG-NEXT:    ALU clause starting at 9:
4949; EG-NEXT:     MOV T2.X, T5.X,
4950; EG-NEXT:     MOV * T3.X, T5.Y,
4951; EG-NEXT:     MOV T0.Y, PV.X,
4952; EG-NEXT:     MOV * T0.Z, PS,
4953; EG-NEXT:     LSHR * T5.Z, PV.Z, literal.x,
4954; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4955; EG-NEXT:     AND_INT T5.X, T0.Z, literal.x,
4956; EG-NEXT:     MOV T5.Y, 0.0,
4957; EG-NEXT:     LSHR T6.Z, T0.Y, literal.y,
4958; EG-NEXT:     AND_INT * T6.X, T0.Y, literal.x,
4959; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
4960; EG-NEXT:     MOV T6.Y, 0.0,
4961; EG-NEXT:     MOV T5.W, 0.0,
4962; EG-NEXT:     MOV * T6.W, 0.0,
4963; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
4964; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4965; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4966; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
4967; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4968  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
4969  %ext = zext <4 x i16> %load to <4 x i64>
4970  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
4971  ret void
4972}
4973
4974define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
4975; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64:
4976; GCN-NOHSA-SI:       ; %bb.0:
4977; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4978; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4979; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
4980; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4981; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4982; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4983; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, s5
4984; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s4, 16
4985; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[4:5], 0x100000
4986; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
4987; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
4988; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
4989; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
4990; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
4991; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4992; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
4993; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4994; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4995; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
4996; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
4997; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
4998; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
4999; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5000; GCN-NOHSA-SI-NEXT:    s_endpgm
5001;
5002; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64:
5003; GCN-HSA:       ; %bb.0:
5004; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5005; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5006; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
5007; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5008; GCN-HSA-NEXT:    s_mov_b32 s4, s3
5009; GCN-HSA-NEXT:    s_lshr_b32 s6, s2, 16
5010; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5011; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[2:3], 0x100000
5012; GCN-HSA-NEXT:    s_ashr_i64 s[2:3], s[2:3], 48
5013; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5014; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5015; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5016; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
5017; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5018; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5019; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5020; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
5021; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5022; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5023; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5024; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5025; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
5026; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5027; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5028; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5029; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5030; GCN-HSA-NEXT:    s_endpgm
5031;
5032; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64:
5033; GCN-NOHSA-VI:       ; %bb.0:
5034; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5035; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5036; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5037; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5038; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5039; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5040; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5041; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5042; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s5
5043; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s5, 16
5044; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5045; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5046; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[4:5], 0x100000
5047; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5048; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5049; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
5050; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
5051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
5052; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
5053; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5054; GCN-NOHSA-VI-NEXT:    s_nop 0
5055; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
5056; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
5057; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5058; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5059; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5060; GCN-NOHSA-VI-NEXT:    s_endpgm
5061;
5062; EG-LABEL: constant_sextload_v4i16_to_v4i64:
5063; EG:       ; %bb.0:
5064; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5065; EG-NEXT:    TEX 0 @6
5066; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
5067; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
5068; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
5069; EG-NEXT:    CF_END
5070; EG-NEXT:    Fetch clause starting at 6:
5071; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5072; EG-NEXT:    ALU clause starting at 8:
5073; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5074; EG-NEXT:    ALU clause starting at 9:
5075; EG-NEXT:     MOV T2.X, T5.X,
5076; EG-NEXT:     MOV * T3.X, T5.Y,
5077; EG-NEXT:     MOV T0.Y, PS,
5078; EG-NEXT:     MOV * T0.Z, PV.X,
5079; EG-NEXT:     ASHR * T5.W, PV.Z, literal.x,
5080; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5081; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
5082; EG-NEXT:     ASHR T5.Z, T0.Z, literal.y,
5083; EG-NEXT:     ASHR * T7.W, T0.Y, literal.z,
5084; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5085; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5086; EG-NEXT:     BFE_INT T5.X, T0.Z, 0.0, literal.x,
5087; EG-NEXT:     ASHR * T7.Z, T0.Y, literal.x,
5088; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5089; EG-NEXT:     BFE_INT T7.X, T0.Y, 0.0, literal.x,
5090; EG-NEXT:     ASHR T5.Y, PV.X, literal.y,
5091; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5092; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5093; EG-NEXT:     LSHR T8.X, PV.W, literal.x,
5094; EG-NEXT:     ASHR * T7.Y, PV.X, literal.y,
5095; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5096  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
5097  %ext = sext <4 x i16> %load to <4 x i64>
5098  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
5099  ret void
5100}
5101
5102define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
5103; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64:
5104; GCN-NOHSA-SI:       ; %bb.0:
5105; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5106; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5107; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5108; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x0
5109; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5110; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5111; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
5112; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5113; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5114; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5115; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5116; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s0, s9, 16
5117; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s1, s11, 16
5118; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s3, s10, 16
5119; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s8, 16
5120; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, s2
5121; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, s2
5122; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s2
5123; GCN-NOHSA-SI-NEXT:    s_and_b32 s2, s9, s2
5124; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
5125; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s1
5126; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:48
5127; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5128; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
5129; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s0
5130; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
5131; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5132; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5133; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s3
5134; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:32
5135; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5136; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5137; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5138; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5139; GCN-NOHSA-SI-NEXT:    s_endpgm
5140;
5141; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64:
5142; GCN-HSA:       ; %bb.0:
5143; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5144; GCN-HSA-NEXT:    s_mov_b32 s8, 0xffff
5145; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5146; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5147; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5148; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5149; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5150; GCN-HSA-NEXT:    s_lshr_b32 s9, s5, 16
5151; GCN-HSA-NEXT:    s_lshr_b32 s2, s7, 16
5152; GCN-HSA-NEXT:    s_lshr_b32 s10, s6, 16
5153; GCN-HSA-NEXT:    s_lshr_b32 s11, s4, 16
5154; GCN-HSA-NEXT:    s_and_b32 s3, s7, s8
5155; GCN-HSA-NEXT:    s_and_b32 s4, s4, s8
5156; GCN-HSA-NEXT:    s_and_b32 s6, s6, s8
5157; GCN-HSA-NEXT:    s_and_b32 s5, s5, s8
5158; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5159; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5160; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
5161; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5162; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5163; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5164; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5165; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5166; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5167; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5168; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5169; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5170; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
5171; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
5172; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5173; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5174; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5175; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5176; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5177; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5178; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5179; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5180; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5181; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
5182; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5183; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5184; GCN-HSA-NEXT:    s_endpgm
5185;
5186; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64:
5187; GCN-NOHSA-VI:       ; %bb.0:
5188; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5189; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
5190; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5191; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5192; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5193; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5194; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5195; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5196; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[6:7], 0x0
5197; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5198; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5199; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s4, s8
5200; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s5, s8
5201; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s6, s8
5202; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s7, s8
5203; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s7, 16
5204; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5205; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
5206; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
5207; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5208; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
5209; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s11
5210; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5211; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5212; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5213; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
5214; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
5215; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5216; GCN-NOHSA-VI-NEXT:    s_nop 0
5217; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s9
5218; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5219; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5220; GCN-NOHSA-VI-NEXT:    s_endpgm
5221;
5222; EG-LABEL: constant_zextload_v8i16_to_v8i64:
5223; EG:       ; %bb.0:
5224; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5225; EG-NEXT:    TEX 0 @8
5226; EG-NEXT:    ALU 30, @11, KC0[CB0:0-32], KC1[]
5227; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
5228; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
5229; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
5230; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
5231; EG-NEXT:    CF_END
5232; EG-NEXT:    Fetch clause starting at 8:
5233; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
5234; EG-NEXT:    ALU clause starting at 10:
5235; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5236; EG-NEXT:    ALU clause starting at 11:
5237; EG-NEXT:     LSHR * T8.Z, T7.W, literal.x,
5238; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5239; EG-NEXT:     AND_INT T8.X, T7.W, literal.x,
5240; EG-NEXT:     MOV T8.Y, 0.0,
5241; EG-NEXT:     LSHR T9.Z, T7.Z, literal.y,
5242; EG-NEXT:     AND_INT * T9.X, T7.Z, literal.x,
5243; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5244; EG-NEXT:     MOV T9.Y, 0.0,
5245; EG-NEXT:     LSHR * T10.Z, T7.Y, literal.x,
5246; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5247; EG-NEXT:     AND_INT T10.X, T7.Y, literal.x,
5248; EG-NEXT:     MOV T10.Y, 0.0,
5249; EG-NEXT:     LSHR T7.Z, T7.X, literal.y,
5250; EG-NEXT:     AND_INT * T7.X, T7.X, literal.x,
5251; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5252; EG-NEXT:     MOV T7.Y, 0.0,
5253; EG-NEXT:     MOV T8.W, 0.0,
5254; EG-NEXT:     MOV * T9.W, 0.0,
5255; EG-NEXT:     MOV T10.W, 0.0,
5256; EG-NEXT:     MOV * T7.W, 0.0,
5257; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
5258; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5259; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5260; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
5261; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5262; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5263; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
5264; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5265; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5266; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
5267; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5268  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
5269  %ext = zext <8 x i16> %load to <8 x i64>
5270  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
5271  ret void
5272}
5273
5274define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
5275; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64:
5276; GCN-NOHSA-SI:       ; %bb.0:
5277; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5278; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5279; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5280; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5281; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5282; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5283; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s7
5284; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s5
5285; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s6, 16
5286; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s4, 16
5287; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[4:5], 0x100000
5288; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
5289; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5290; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5291; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5292; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5293; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5294; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5295; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5296; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
5297; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
5298; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
5299; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5300; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5301; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5302; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
5303; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5304; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
5305; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5306; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5307; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
5308; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
5309; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s16
5310; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s17
5311; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5312; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
5313; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5314; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s14
5315; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s15
5316; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
5317; GCN-NOHSA-SI-NEXT:    s_endpgm
5318;
5319; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64:
5320; GCN-HSA:       ; %bb.0:
5321; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5322; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5323; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5324; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5325; GCN-HSA-NEXT:    s_mov_b32 s2, s7
5326; GCN-HSA-NEXT:    s_mov_b32 s8, s5
5327; GCN-HSA-NEXT:    s_lshr_b32 s10, s6, 16
5328; GCN-HSA-NEXT:    s_lshr_b32 s12, s4, 16
5329; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5330; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[4:5], 0x100000
5331; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
5332; GCN-HSA-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5333; GCN-HSA-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5334; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5335; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5336; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5337; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5338; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5339; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5340; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5341; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5342; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5343; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5344; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5345; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5346; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5347; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5348; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5349; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5350; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5351; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5352; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
5353; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
5354; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
5355; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5356; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5357; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5358; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s16
5359; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
5360; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5361; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
5362; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5363; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5364; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5365; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
5366; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
5367; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
5368; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
5369; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5370; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5371; GCN-HSA-NEXT:    s_endpgm
5372;
5373; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64:
5374; GCN-NOHSA-VI:       ; %bb.0:
5375; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5376; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5377; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5378; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5379; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5380; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5381; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[6:7], 0x0
5382; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5383; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[6:7], 0x100000
5384; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5385; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
5386; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
5387; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
5388; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
5389; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5390; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s5
5391; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s5, 16
5392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5393; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
5394; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5395; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
5396; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5397; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[4:5], 0x100000
5398; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5399; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5400; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5401; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
5402; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
5403; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
5404; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
5405; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5406; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5407; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
5408; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
5409; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
5410; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
5411; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5412; GCN-NOHSA-VI-NEXT:    s_nop 0
5413; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
5414; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
5415; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5416; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5417; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5418; GCN-NOHSA-VI-NEXT:    s_endpgm
5419;
5420; EG-LABEL: constant_sextload_v8i16_to_v8i64:
5421; EG:       ; %bb.0:
5422; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5423; EG-NEXT:    TEX 0 @8
5424; EG-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
5425; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
5426; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
5427; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
5428; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
5429; EG-NEXT:    CF_END
5430; EG-NEXT:    Fetch clause starting at 8:
5431; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
5432; EG-NEXT:    ALU clause starting at 10:
5433; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5434; EG-NEXT:    ALU clause starting at 11:
5435; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
5436; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5437; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5438; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
5439; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
5440; EG-NEXT:     ASHR * T10.W, T7.X, literal.z,
5441; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5442; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5443; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
5444; EG-NEXT:     ASHR T10.Z, T7.X, literal.y,
5445; EG-NEXT:     ASHR * T12.W, T7.Y, literal.z,
5446; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5447; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5448; EG-NEXT:     BFE_INT T10.X, T7.X, 0.0, literal.x,
5449; EG-NEXT:     ASHR T12.Z, T7.Y, literal.x,
5450; EG-NEXT:     ASHR * T13.W, T7.Z, literal.y,
5451; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5452; EG-NEXT:     BFE_INT T12.X, T7.Y, 0.0, literal.x,
5453; EG-NEXT:     ASHR T10.Y, PV.X, literal.y,
5454; EG-NEXT:     ASHR T13.Z, T7.Z, literal.x,
5455; EG-NEXT:     ASHR * T14.W, T7.W, literal.y,
5456; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5457; EG-NEXT:     BFE_INT T13.X, T7.Z, 0.0, literal.x,
5458; EG-NEXT:     ASHR T12.Y, PV.X, literal.y,
5459; EG-NEXT:     ASHR * T14.Z, T7.W, literal.x,
5460; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5461; EG-NEXT:     BFE_INT T14.X, T7.W, 0.0, literal.x,
5462; EG-NEXT:     ASHR T13.Y, PV.X, literal.y,
5463; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
5464; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5465; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
5466; EG-NEXT:     LSHR T7.X, PV.W, literal.x,
5467; EG-NEXT:     ASHR * T14.Y, PV.X, literal.y,
5468; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5469  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
5470  %ext = sext <8 x i16> %load to <8 x i64>
5471  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
5472  ret void
5473}
5474
5475define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
5476; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64:
5477; GCN-NOHSA-SI:       ; %bb.0:
5478; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x9
5479; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5480; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5481; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[14:15], 0x0
5482; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5483; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5484; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, 0xffff
5485; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5486; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s12
5487; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s13
5488; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5489; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s5, 16
5490; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s7, 16
5491; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s11, 16
5492; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s9, 16
5493; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s8, 16
5494; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s10, 16
5495; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s6, 16
5496; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s4, 16
5497; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s14
5498; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s14
5499; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, s14
5500; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, s14
5501; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s14
5502; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s14
5503; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, s14
5504; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s14
5505; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
5506; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s16
5507; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5508; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5509; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
5510; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
5511; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5512; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5513; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
5514; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
5515; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5516; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5517; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
5518; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5519; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5520; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5521; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5522; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
5523; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
5524; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5525; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5526; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
5527; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5528; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5529; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
5530; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
5531; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5532; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5533; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
5534; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
5535; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5536; GCN-NOHSA-SI-NEXT:    s_endpgm
5537;
5538; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64:
5539; GCN-HSA:       ; %bb.0:
5540; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5541; GCN-HSA-NEXT:    s_mov_b32 s12, 0xffff
5542; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5543; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5544; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5545; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5546; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5547; GCN-HSA-NEXT:    s_lshr_b32 s13, s5, 16
5548; GCN-HSA-NEXT:    s_lshr_b32 s14, s7, 16
5549; GCN-HSA-NEXT:    s_lshr_b32 s15, s11, 16
5550; GCN-HSA-NEXT:    s_lshr_b32 s2, s9, 16
5551; GCN-HSA-NEXT:    s_lshr_b32 s16, s8, 16
5552; GCN-HSA-NEXT:    s_lshr_b32 s17, s10, 16
5553; GCN-HSA-NEXT:    s_lshr_b32 s18, s6, 16
5554; GCN-HSA-NEXT:    s_lshr_b32 s19, s4, 16
5555; GCN-HSA-NEXT:    s_and_b32 s3, s9, s12
5556; GCN-HSA-NEXT:    s_and_b32 s4, s4, s12
5557; GCN-HSA-NEXT:    s_and_b32 s6, s6, s12
5558; GCN-HSA-NEXT:    s_and_b32 s10, s10, s12
5559; GCN-HSA-NEXT:    s_and_b32 s8, s8, s12
5560; GCN-HSA-NEXT:    s_and_b32 s5, s5, s12
5561; GCN-HSA-NEXT:    s_and_b32 s7, s7, s12
5562; GCN-HSA-NEXT:    s_and_b32 s11, s11, s12
5563; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5564; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
5565; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
5566; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5567; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5568; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5569; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
5570; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5571; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5572; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5573; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5574; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5575; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
5576; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
5577; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5578; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5579; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5580; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5581; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5582; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
5583; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
5584; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5585; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5586; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5587; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5588; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
5589; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
5590; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
5591; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5592; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5593; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5594; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5595; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
5596; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5597; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s16
5598; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5599; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5600; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5601; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5602; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5603; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
5604; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s17
5605; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5606; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5607; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5608; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5609; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
5610; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5611; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5612; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5613; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5614; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
5615; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5616; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5617; GCN-HSA-NEXT:    s_endpgm
5618;
5619; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64:
5620; GCN-NOHSA-VI:       ; %bb.0:
5621; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5622; GCN-NOHSA-VI-NEXT:    s_mov_b32 s12, 0xffff
5623; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5624; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5625; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5626; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5627; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5628; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5629; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[6:7], 0x0
5630; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5631; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5632; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s4, s12
5633; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s5, s12
5634; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s6, s12
5635; GCN-NOHSA-VI-NEXT:    s_and_b32 s16, s7, s12
5636; GCN-NOHSA-VI-NEXT:    s_and_b32 s17, s8, s12
5637; GCN-NOHSA-VI-NEXT:    s_and_b32 s18, s9, s12
5638; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s10, s12
5639; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s11, s12
5640; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s11, 16
5641; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
5642; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
5643; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
5644; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5645; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s9, 16
5646; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s19
5647; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
5648; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5649; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s8, 16
5650; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5651; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
5652; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5653; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s7, 16
5654; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s17
5655; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
5656; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
5657; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5658; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
5659; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
5660; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5661; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
5662; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s15
5663; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5664; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5665; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5666; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
5667; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
5668; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5669; GCN-NOHSA-VI-NEXT:    s_nop 0
5670; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s13
5671; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5672; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5673; GCN-NOHSA-VI-NEXT:    s_endpgm
5674;
5675; EG-LABEL: constant_zextload_v16i16_to_v16i64:
5676; EG:       ; %bb.0:
5677; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
5678; EG-NEXT:    TEX 1 @12
5679; EG-NEXT:    ALU 62, @17, KC0[CB0:0-32], KC1[]
5680; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
5681; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
5682; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
5683; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
5684; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
5685; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
5686; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
5687; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
5688; EG-NEXT:    CF_END
5689; EG-NEXT:    Fetch clause starting at 12:
5690; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
5691; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
5692; EG-NEXT:    ALU clause starting at 16:
5693; EG-NEXT:     MOV * T11.X, KC0[2].Z,
5694; EG-NEXT:    ALU clause starting at 17:
5695; EG-NEXT:     LSHR * T13.Z, T12.W, literal.x,
5696; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5697; EG-NEXT:     AND_INT T13.X, T12.W, literal.x,
5698; EG-NEXT:     MOV T13.Y, 0.0,
5699; EG-NEXT:     LSHR T14.Z, T12.Z, literal.y,
5700; EG-NEXT:     AND_INT * T14.X, T12.Z, literal.x,
5701; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5702; EG-NEXT:     MOV T14.Y, 0.0,
5703; EG-NEXT:     LSHR * T15.Z, T12.Y, literal.x,
5704; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5705; EG-NEXT:     AND_INT T15.X, T12.Y, literal.x,
5706; EG-NEXT:     MOV T15.Y, 0.0,
5707; EG-NEXT:     LSHR T12.Z, T12.X, literal.y,
5708; EG-NEXT:     AND_INT * T12.X, T12.X, literal.x,
5709; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5710; EG-NEXT:     MOV T12.Y, 0.0,
5711; EG-NEXT:     LSHR * T16.Z, T11.W, literal.x,
5712; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5713; EG-NEXT:     AND_INT T16.X, T11.W, literal.x,
5714; EG-NEXT:     MOV T16.Y, 0.0,
5715; EG-NEXT:     LSHR T17.Z, T11.Z, literal.y,
5716; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.x,
5717; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5718; EG-NEXT:     MOV T17.Y, 0.0,
5719; EG-NEXT:     LSHR * T18.Z, T11.Y, literal.x,
5720; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5721; EG-NEXT:     AND_INT T18.X, T11.Y, literal.x,
5722; EG-NEXT:     MOV T18.Y, 0.0,
5723; EG-NEXT:     LSHR T11.Z, T11.X, literal.y,
5724; EG-NEXT:     AND_INT * T11.X, T11.X, literal.x,
5725; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5726; EG-NEXT:     MOV T11.Y, 0.0,
5727; EG-NEXT:     MOV T13.W, 0.0,
5728; EG-NEXT:     MOV * T14.W, 0.0,
5729; EG-NEXT:     MOV T15.W, 0.0,
5730; EG-NEXT:     MOV * T12.W, 0.0,
5731; EG-NEXT:     MOV T16.W, 0.0,
5732; EG-NEXT:     MOV * T17.W, 0.0,
5733; EG-NEXT:     MOV T18.W, 0.0,
5734; EG-NEXT:     MOV * T11.W, 0.0,
5735; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
5736; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5737; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5738; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
5739; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5740; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5741; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
5742; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5743; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5744; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
5745; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5746; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
5747; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
5748; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5749; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
5750; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
5751; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5752; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
5753; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
5754; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5755; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
5756; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
5757; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5758  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
5759  %ext = zext <16 x i16> %load to <16 x i64>
5760  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
5761  ret void
5762}
5763
5764define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
5765; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64:
5766; GCN-NOHSA-SI:       ; %bb.0:
5767; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5768; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5769; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5770; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5771; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5772; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5773; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, s11
5774; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, s9
5775; GCN-NOHSA-SI-NEXT:    s_mov_b32 s16, s7
5776; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s5
5777; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s10, 16
5778; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s8, 16
5779; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s6, 16
5780; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s4, 16
5781; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x100000
5782; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
5783; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[34:35], s[8:9], 0x100000
5784; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[36:37], s[10:11], 0x100000
5785; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5786; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5787; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
5788; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
5789; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
5790; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
5791; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5792; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5793; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
5794; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
5795; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
5796; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
5797; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
5798; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
5799; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
5800; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
5801; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5802; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5803; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
5804; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
5805; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
5806; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
5807; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5808; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5809; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
5810; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
5811; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
5812; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
5813; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5814; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5815; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
5816; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
5817; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5818; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
5819; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5820; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5821; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
5822; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s37
5823; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s34
5824; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s35
5825; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s30
5826; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s31
5827; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s28
5828; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s29
5829; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
5830; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
5831; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5832; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s22
5833; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s23
5834; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
5835; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s24
5836; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s25
5837; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
5838; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s26
5839; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s27
5840; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
5841; GCN-NOHSA-SI-NEXT:    s_endpgm
5842;
5843; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64:
5844; GCN-HSA:       ; %bb.0:
5845; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5846; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5847; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5848; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5849; GCN-HSA-NEXT:    s_mov_b32 s2, s11
5850; GCN-HSA-NEXT:    s_mov_b32 s12, s9
5851; GCN-HSA-NEXT:    s_mov_b32 s14, s7
5852; GCN-HSA-NEXT:    s_mov_b32 s16, s5
5853; GCN-HSA-NEXT:    s_lshr_b32 s18, s10, 16
5854; GCN-HSA-NEXT:    s_lshr_b32 s20, s8, 16
5855; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5856; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[10:11], 0x100000
5857; GCN-HSA-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
5858; GCN-HSA-NEXT:    s_lshr_b32 s22, s6, 16
5859; GCN-HSA-NEXT:    s_lshr_b32 s24, s4, 16
5860; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5861; GCN-HSA-NEXT:    s_bfe_i64 s[26:27], s[4:5], 0x100000
5862; GCN-HSA-NEXT:    s_bfe_i64 s[28:29], s[6:7], 0x100000
5863; GCN-HSA-NEXT:    s_bfe_i64 s[30:31], s[8:9], 0x100000
5864; GCN-HSA-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
5865; GCN-HSA-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5866; GCN-HSA-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5867; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5868; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5869; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5870; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
5871; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[24:25], 0x100000
5872; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[22:23], 0x100000
5873; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
5874; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
5875; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
5876; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5877; GCN-HSA-NEXT:    s_add_u32 s22, s0, 0x70
5878; GCN-HSA-NEXT:    s_addc_u32 s23, s1, 0
5879; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s8
5880; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0x50
5881; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s22
5882; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s9
5883; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
5884; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s9
5885; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s23
5886; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s12
5887; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s13
5888; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s8
5889; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
5890; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
5891; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5892; GCN-HSA-NEXT:    s_add_u32 s6, s0, 48
5893; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5894; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
5895; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
5896; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
5897; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
5898; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
5899; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5900; GCN-HSA-NEXT:    s_nop 0
5901; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
5902; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
5903; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
5904; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
5905; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
5906; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
5907; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x60
5908; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s16
5909; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
5910; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5911; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
5912; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
5913; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
5914; GCN-HSA-NEXT:    s_add_u32 s4, s0, 64
5915; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s34
5916; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s35
5917; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
5918; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s19
5919; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5920; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
5921; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
5922; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
5923; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
5924; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s30
5925; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
5926; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s20
5927; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s21
5928; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5929; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
5930; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
5931; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
5932; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
5933; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5934; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
5935; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
5936; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5937; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5938; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
5939; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
5940; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5941; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
5942; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5943; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5944; GCN-HSA-NEXT:    s_endpgm
5945;
5946; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64:
5947; GCN-NOHSA-VI:       ; %bb.0:
5948; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5949; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5950; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5951; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5952; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5953; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5954; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[6:7], 0x0
5955; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5956; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[10:11], 0x100000
5957; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
5958; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[10:11], 0x100000
5959; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s11
5960; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[36:37], s[10:11], 0x100000
5961; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s11, 16
5962; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[8:9], 0x100000
5963; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s8, 16
5964; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5965; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[8:9], 0x100000
5966; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s9
5967; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
5968; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5969; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[8:9], 0x100000
5970; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s9, 16
5971; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
5972; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
5973; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
5974; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
5975; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5976; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[6:7], 0x100000
5977; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
5978; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5979; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
5980; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
5981; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
5982; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
5983; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5984; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[6:7], 0x100000
5985; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
5986; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
5987; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
5988; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
5989; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s9
5990; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5991; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5992; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
5993; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
5994; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
5995; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s27
5996; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
5997; GCN-NOHSA-VI-NEXT:    s_mov_b32 s14, s5
5998; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s5, 16
5999; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
6000; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
6001; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
6002; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
6003; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6004; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[4:5], 0x100000
6005; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
6006; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
6007; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
6008; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
6009; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
6010; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s20
6011; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s21
6012; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6013; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
6014; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
6015; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
6016; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
6017; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
6018; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6019; GCN-NOHSA-VI-NEXT:    s_nop 0
6020; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
6021; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
6022; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
6023; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
6024; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6025; GCN-NOHSA-VI-NEXT:    s_endpgm
6026;
6027; EG-LABEL: constant_sextload_v16i16_to_v16i64:
6028; EG:       ; %bb.0:
6029; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
6030; EG-NEXT:    TEX 1 @12
6031; EG-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
6032; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
6033; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
6034; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
6035; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
6036; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
6037; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
6038; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
6039; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
6040; EG-NEXT:    CF_END
6041; EG-NEXT:    Fetch clause starting at 12:
6042; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
6043; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
6044; EG-NEXT:    ALU clause starting at 16:
6045; EG-NEXT:     MOV * T11.X, KC0[2].Z,
6046; EG-NEXT:    ALU clause starting at 17:
6047; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
6048; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6049; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6050; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
6051; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6052; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6053; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
6054; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6055; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6056; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
6057; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6058; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6059; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
6060; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6061; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6062; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
6063; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
6064; EG-NEXT:     ASHR * T19.W, T11.X, literal.z,
6065; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6066; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6067; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
6068; EG-NEXT:     ASHR T19.Z, T11.X, literal.y,
6069; EG-NEXT:     ASHR * T21.W, T11.Y, literal.z,
6070; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6071; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6072; EG-NEXT:     BFE_INT T19.X, T11.X, 0.0, literal.x,
6073; EG-NEXT:     ASHR T21.Z, T11.Y, literal.x,
6074; EG-NEXT:     ASHR * T22.W, T11.Z, literal.y,
6075; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6076; EG-NEXT:     BFE_INT T21.X, T11.Y, 0.0, literal.x,
6077; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
6078; EG-NEXT:     ASHR T22.Z, T11.Z, literal.x,
6079; EG-NEXT:     ASHR * T23.W, T11.W, literal.y,
6080; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6081; EG-NEXT:     BFE_INT T22.X, T11.Z, 0.0, literal.x,
6082; EG-NEXT:     ASHR T21.Y, PV.X, literal.y,
6083; EG-NEXT:     ASHR T23.Z, T11.W, literal.x,
6084; EG-NEXT:     ASHR * T24.W, T12.X, literal.y,
6085; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6086; EG-NEXT:     BFE_INT T23.X, T11.W, 0.0, literal.x,
6087; EG-NEXT:     ASHR T22.Y, PV.X, literal.y,
6088; EG-NEXT:     ASHR T24.Z, T12.X, literal.x,
6089; EG-NEXT:     ASHR * T11.W, T12.Y, literal.y,
6090; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6091; EG-NEXT:     BFE_INT T24.X, T12.X, 0.0, literal.x,
6092; EG-NEXT:     ASHR T23.Y, PV.X, literal.y,
6093; EG-NEXT:     ASHR T11.Z, T12.Y, literal.x,
6094; EG-NEXT:     ASHR * T25.W, T12.Z, literal.y,
6095; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6096; EG-NEXT:     BFE_INT T11.X, T12.Y, 0.0, literal.x,
6097; EG-NEXT:     ASHR T24.Y, PV.X, literal.y,
6098; EG-NEXT:     ASHR T25.Z, T12.Z, literal.x,
6099; EG-NEXT:     ASHR * T26.W, T12.W, literal.y,
6100; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6101; EG-NEXT:     BFE_INT T25.X, T12.Z, 0.0, literal.x,
6102; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
6103; EG-NEXT:     ASHR * T26.Z, T12.W, literal.x,
6104; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6105; EG-NEXT:     BFE_INT T26.X, T12.W, 0.0, literal.x,
6106; EG-NEXT:     ASHR T25.Y, PV.X, literal.y,
6107; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
6108; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6109; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
6110; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
6111; EG-NEXT:     ASHR * T26.Y, PV.X, literal.y,
6112; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6113  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
6114  %ext = sext <16 x i16> %load to <16 x i64>
6115  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
6116  ret void
6117}
6118
6119define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
6120; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64:
6121; GCN-NOHSA-SI:       ; %bb.0:
6122; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
6123; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6124; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
6125; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
6126; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6127; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s5, 16
6128; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s7, 16
6129; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s9, 16
6130; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s11, 16
6131; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s13, 16
6132; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s15, 16
6133; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s17, 16
6134; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s19, 16
6135; GCN-NOHSA-SI-NEXT:    s_and_b32 s28, s4, s2
6136; GCN-NOHSA-SI-NEXT:    s_and_b32 s29, s6, s2
6137; GCN-NOHSA-SI-NEXT:    s_and_b32 s30, s8, s2
6138; GCN-NOHSA-SI-NEXT:    s_and_b32 s31, s10, s2
6139; GCN-NOHSA-SI-NEXT:    s_and_b32 s33, s12, s2
6140; GCN-NOHSA-SI-NEXT:    s_and_b32 s34, s14, s2
6141; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s16, s2
6142; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s18, s2
6143; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s2
6144; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s2
6145; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, s2
6146; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s2
6147; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, s2
6148; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, s2
6149; GCN-NOHSA-SI-NEXT:    s_and_b32 s17, s17, s2
6150; GCN-NOHSA-SI-NEXT:    s_and_b32 s19, s19, s2
6151; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s18, 16
6152; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s16, 16
6153; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s14, 16
6154; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s12, 16
6155; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s10, 16
6156; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s8, 16
6157; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s6, 16
6158; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s4, 16
6159; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6160; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
6161; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6162; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
6163; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s19
6164; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
6165; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6166; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6167; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s17
6168; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s26
6169; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
6170; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6171; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s15
6172; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s25
6173; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
6174; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6175; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s13
6176; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
6177; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
6178; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6179; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
6180; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
6181; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6182; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6183; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
6184; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s22
6185; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6186; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6187; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
6188; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
6189; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6190; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6191; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
6192; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
6193; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6194; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6195; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
6196; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
6197; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6198; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6199; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s35
6200; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s16
6201; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
6202; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6203; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s34
6204; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s14
6205; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
6206; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6207; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s33
6208; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
6209; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
6210; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6211; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s31
6212; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
6213; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
6214; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6215; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
6216; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
6217; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
6218; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6219; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s29
6220; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
6221; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6222; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6223; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
6224; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
6225; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6226; GCN-NOHSA-SI-NEXT:    s_endpgm
6227;
6228; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64:
6229; GCN-HSA:       ; %bb.0:
6230; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6231; GCN-HSA-NEXT:    s_mov_b32 s20, 0xffff
6232; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
6233; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
6234; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6235; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
6236; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6237; GCN-HSA-NEXT:    s_and_b32 s21, s4, s20
6238; GCN-HSA-NEXT:    s_and_b32 s22, s6, s20
6239; GCN-HSA-NEXT:    s_and_b32 s23, s8, s20
6240; GCN-HSA-NEXT:    s_and_b32 s24, s10, s20
6241; GCN-HSA-NEXT:    s_and_b32 s25, s12, s20
6242; GCN-HSA-NEXT:    s_and_b32 s26, s14, s20
6243; GCN-HSA-NEXT:    s_and_b32 s27, s16, s20
6244; GCN-HSA-NEXT:    s_and_b32 s28, s18, s20
6245; GCN-HSA-NEXT:    s_and_b32 s29, s5, s20
6246; GCN-HSA-NEXT:    s_and_b32 s30, s7, s20
6247; GCN-HSA-NEXT:    s_and_b32 s31, s9, s20
6248; GCN-HSA-NEXT:    s_and_b32 s33, s11, s20
6249; GCN-HSA-NEXT:    s_and_b32 s34, s13, s20
6250; GCN-HSA-NEXT:    s_and_b32 s35, s15, s20
6251; GCN-HSA-NEXT:    s_and_b32 s36, s17, s20
6252; GCN-HSA-NEXT:    s_and_b32 s20, s19, s20
6253; GCN-HSA-NEXT:    s_lshr_b32 s19, s19, 16
6254; GCN-HSA-NEXT:    s_lshr_b32 s5, s5, 16
6255; GCN-HSA-NEXT:    s_lshr_b32 s7, s7, 16
6256; GCN-HSA-NEXT:    s_lshr_b32 s9, s9, 16
6257; GCN-HSA-NEXT:    s_lshr_b32 s11, s11, 16
6258; GCN-HSA-NEXT:    s_lshr_b32 s13, s13, 16
6259; GCN-HSA-NEXT:    s_lshr_b32 s15, s15, 16
6260; GCN-HSA-NEXT:    s_lshr_b32 s17, s17, 16
6261; GCN-HSA-NEXT:    s_lshr_b32 s18, s18, 16
6262; GCN-HSA-NEXT:    s_lshr_b32 s16, s16, 16
6263; GCN-HSA-NEXT:    s_lshr_b32 s14, s14, 16
6264; GCN-HSA-NEXT:    s_lshr_b32 s12, s12, 16
6265; GCN-HSA-NEXT:    s_lshr_b32 s10, s10, 16
6266; GCN-HSA-NEXT:    s_lshr_b32 s8, s8, 16
6267; GCN-HSA-NEXT:    s_lshr_b32 s6, s6, 16
6268; GCN-HSA-NEXT:    s_lshr_b32 s4, s4, 16
6269; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
6270; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6271; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6272; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6273; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xd0
6274; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6275; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s3
6276; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s2
6277; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
6278; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6279; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
6280; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
6281; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
6282; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6283; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
6284; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
6285; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
6286; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
6287; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
6288; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6289; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6290; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s36
6291; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s17
6292; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
6293; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6294; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s35
6295; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
6296; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
6297; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6298; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
6299; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s34
6300; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
6301; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
6302; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6303; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s33
6304; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
6305; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6306; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6307; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6308; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6309; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s31
6310; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
6311; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6312; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6313; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6314; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6315; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6316; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s30
6317; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
6318; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6319; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6320; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6321; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6322; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
6323; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s29
6324; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
6325; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6326; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6327; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6328; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6329; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
6330; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
6331; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
6332; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6333; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6334; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6335; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6336; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
6337; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s27
6338; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s16
6339; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6340; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6341; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6342; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6343; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
6344; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
6345; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
6346; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6347; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6348; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6349; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6350; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
6351; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s25
6352; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
6353; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6354; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6355; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6356; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6357; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
6358; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s24
6359; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
6360; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6361; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6362; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6363; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6364; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
6365; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s23
6366; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
6367; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6368; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6369; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6370; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
6371; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
6372; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6373; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6374; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6375; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s21
6376; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
6377; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6378; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6379; GCN-HSA-NEXT:    s_endpgm
6380;
6381; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64:
6382; GCN-NOHSA-VI:       ; %bb.0:
6383; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
6384; GCN-NOHSA-VI-NEXT:    s_mov_b32 s20, 0xffff
6385; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
6386; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6387; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6388; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6389; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
6390; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
6391; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[4:19], s[6:7], 0x0
6392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
6393; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6394; GCN-NOHSA-VI-NEXT:    s_and_b32 s21, s4, s20
6395; GCN-NOHSA-VI-NEXT:    s_and_b32 s22, s5, s20
6396; GCN-NOHSA-VI-NEXT:    s_and_b32 s23, s6, s20
6397; GCN-NOHSA-VI-NEXT:    s_and_b32 s24, s7, s20
6398; GCN-NOHSA-VI-NEXT:    s_and_b32 s25, s8, s20
6399; GCN-NOHSA-VI-NEXT:    s_and_b32 s26, s9, s20
6400; GCN-NOHSA-VI-NEXT:    s_and_b32 s27, s10, s20
6401; GCN-NOHSA-VI-NEXT:    s_and_b32 s28, s11, s20
6402; GCN-NOHSA-VI-NEXT:    s_and_b32 s29, s12, s20
6403; GCN-NOHSA-VI-NEXT:    s_and_b32 s30, s13, s20
6404; GCN-NOHSA-VI-NEXT:    s_and_b32 s31, s14, s20
6405; GCN-NOHSA-VI-NEXT:    s_and_b32 s33, s15, s20
6406; GCN-NOHSA-VI-NEXT:    s_and_b32 s34, s16, s20
6407; GCN-NOHSA-VI-NEXT:    s_and_b32 s35, s17, s20
6408; GCN-NOHSA-VI-NEXT:    s_and_b32 s36, s18, s20
6409; GCN-NOHSA-VI-NEXT:    s_and_b32 s20, s19, s20
6410; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s19, s19, 16
6411; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s18, 16
6412; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
6413; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
6414; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6415; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s17, s17, 16
6416; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
6417; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s18
6418; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6419; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s16, 16
6420; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s35
6421; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
6422; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
6423; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s15, 16
6424; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s34
6425; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
6426; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
6427; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s14, 16
6428; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s33
6429; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
6430; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
6431; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s13, 16
6432; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s31
6433; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s14
6434; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
6435; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s12, 16
6436; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
6437; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
6438; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
6439; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s11, 16
6440; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s29
6441; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
6442; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
6443; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
6444; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
6445; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
6446; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6447; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s9, 16
6448; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s27
6449; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
6450; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
6451; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s8, 16
6452; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
6453; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
6454; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6455; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s7, 16
6456; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s25
6457; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
6458; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
6459; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
6460; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
6461; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
6462; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6463; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
6464; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s23
6465; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
6466; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6467; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
6468; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
6469; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
6470; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6471; GCN-NOHSA-VI-NEXT:    s_nop 0
6472; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s21
6473; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
6474; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6475; GCN-NOHSA-VI-NEXT:    s_endpgm
6476;
6477; EG-LABEL: constant_zextload_v32i16_to_v32i64:
6478; EG:       ; %bb.0:
6479; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
6480; EG-NEXT:    TEX 2 @22
6481; EG-NEXT:    ALU 33, @31, KC0[], KC1[]
6482; EG-NEXT:    TEX 0 @28
6483; EG-NEXT:    ALU 92, @65, KC0[CB0:0-32], KC1[]
6484; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
6485; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
6486; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
6487; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
6488; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
6489; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
6490; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
6491; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
6492; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
6493; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
6494; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
6495; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
6496; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
6497; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
6498; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
6499; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
6500; EG-NEXT:    CF_END
6501; EG-NEXT:    Fetch clause starting at 22:
6502; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
6503; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 16, #1
6504; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
6505; EG-NEXT:    Fetch clause starting at 28:
6506; EG-NEXT:     VTX_READ_128 T29.XYZW, T19.X, 0, #1
6507; EG-NEXT:    ALU clause starting at 30:
6508; EG-NEXT:     MOV * T19.X, KC0[2].Z,
6509; EG-NEXT:    ALU clause starting at 31:
6510; EG-NEXT:     LSHR * T23.Z, T20.W, literal.x,
6511; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6512; EG-NEXT:     AND_INT T23.X, T20.W, literal.x,
6513; EG-NEXT:     MOV T23.Y, 0.0,
6514; EG-NEXT:     LSHR T24.Z, T20.Z, literal.y,
6515; EG-NEXT:     AND_INT * T24.X, T20.Z, literal.x,
6516; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6517; EG-NEXT:     MOV T24.Y, 0.0,
6518; EG-NEXT:     LSHR * T25.Z, T20.Y, literal.x,
6519; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6520; EG-NEXT:     AND_INT T25.X, T20.Y, literal.x,
6521; EG-NEXT:     MOV T25.Y, 0.0,
6522; EG-NEXT:     LSHR T20.Z, T20.X, literal.y,
6523; EG-NEXT:     AND_INT * T20.X, T20.X, literal.x,
6524; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6525; EG-NEXT:     MOV T20.Y, 0.0,
6526; EG-NEXT:     LSHR * T26.Z, T22.W, literal.x,
6527; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6528; EG-NEXT:     AND_INT T26.X, T22.W, literal.x,
6529; EG-NEXT:     MOV T26.Y, 0.0,
6530; EG-NEXT:     LSHR T27.Z, T22.Z, literal.y,
6531; EG-NEXT:     AND_INT * T27.X, T22.Z, literal.x,
6532; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6533; EG-NEXT:     MOV T27.Y, 0.0,
6534; EG-NEXT:     LSHR * T28.Z, T22.Y, literal.x,
6535; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6536; EG-NEXT:     AND_INT T28.X, T22.Y, literal.x,
6537; EG-NEXT:     MOV T28.Y, 0.0,
6538; EG-NEXT:     LSHR T22.Z, T22.X, literal.y,
6539; EG-NEXT:     AND_INT * T22.X, T22.X, literal.x,
6540; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6541; EG-NEXT:     MOV T22.Y, 0.0,
6542; EG-NEXT:     LSHR * T19.Z, T21.W, literal.x,
6543; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6544; EG-NEXT:    ALU clause starting at 65:
6545; EG-NEXT:     AND_INT T19.X, T21.W, literal.x,
6546; EG-NEXT:     MOV T19.Y, 0.0,
6547; EG-NEXT:     LSHR T30.Z, T21.Z, literal.y,
6548; EG-NEXT:     AND_INT * T30.X, T21.Z, literal.x,
6549; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6550; EG-NEXT:     MOV T30.Y, 0.0,
6551; EG-NEXT:     LSHR * T31.Z, T21.Y, literal.x,
6552; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6553; EG-NEXT:     AND_INT T31.X, T21.Y, literal.x,
6554; EG-NEXT:     MOV T31.Y, 0.0,
6555; EG-NEXT:     LSHR T21.Z, T21.X, literal.y,
6556; EG-NEXT:     AND_INT * T21.X, T21.X, literal.x,
6557; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6558; EG-NEXT:     MOV T21.Y, 0.0,
6559; EG-NEXT:     LSHR * T32.Z, T29.W, literal.x,
6560; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6561; EG-NEXT:     AND_INT T32.X, T29.W, literal.x,
6562; EG-NEXT:     MOV T32.Y, 0.0,
6563; EG-NEXT:     LSHR T33.Z, T29.Z, literal.y,
6564; EG-NEXT:     AND_INT * T33.X, T29.Z, literal.x,
6565; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6566; EG-NEXT:     MOV T33.Y, 0.0,
6567; EG-NEXT:     LSHR * T34.Z, T29.Y, literal.x,
6568; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6569; EG-NEXT:     AND_INT T34.X, T29.Y, literal.x,
6570; EG-NEXT:     MOV T34.Y, 0.0,
6571; EG-NEXT:     LSHR T29.Z, T29.X, literal.y,
6572; EG-NEXT:     AND_INT * T29.X, T29.X, literal.x,
6573; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6574; EG-NEXT:     MOV T29.Y, 0.0,
6575; EG-NEXT:     MOV T23.W, 0.0,
6576; EG-NEXT:     MOV * T24.W, 0.0,
6577; EG-NEXT:     MOV T25.W, 0.0,
6578; EG-NEXT:     MOV * T20.W, 0.0,
6579; EG-NEXT:     MOV T26.W, 0.0,
6580; EG-NEXT:     MOV * T27.W, 0.0,
6581; EG-NEXT:     MOV T28.W, 0.0,
6582; EG-NEXT:     MOV * T22.W, 0.0,
6583; EG-NEXT:     MOV T19.W, 0.0,
6584; EG-NEXT:     MOV * T30.W, 0.0,
6585; EG-NEXT:     MOV T31.W, 0.0,
6586; EG-NEXT:     MOV * T21.W, 0.0,
6587; EG-NEXT:     MOV T32.W, 0.0,
6588; EG-NEXT:     MOV * T33.W, 0.0,
6589; EG-NEXT:     MOV T34.W, 0.0,
6590; EG-NEXT:     MOV * T29.W, 0.0,
6591; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
6592; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6593; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6594; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
6595; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6596; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6597; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
6598; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6599; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6600; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
6601; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6602; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6603; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
6604; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6605; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6606; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
6607; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6608; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6609; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
6610; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6611; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6612; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
6613; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6614; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
6615; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
6616; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6617; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
6618; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
6619; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6620; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
6621; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
6622; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6623; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
6624; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
6625; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6626; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
6627; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
6628; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6629; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
6630; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
6631; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6632; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
6633; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
6634; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6635; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
6636; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
6637; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6638  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
6639  %ext = zext <32 x i16> %load to <32 x i64>
6640  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
6641  ret void
6642}
6643
6644define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
6645; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64:
6646; GCN-NOHSA-SI:       ; %bb.0:
6647; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
6648; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6649; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x0
6650; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6651; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, s23
6652; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s21
6653; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, s19
6654; GCN-NOHSA-SI-NEXT:    s_mov_b32 s24, s17
6655; GCN-NOHSA-SI-NEXT:    s_mov_b32 s26, s15
6656; GCN-NOHSA-SI-NEXT:    s_mov_b32 s28, s13
6657; GCN-NOHSA-SI-NEXT:    s_mov_b32 s30, s11
6658; GCN-NOHSA-SI-NEXT:    s_mov_b32 s34, s9
6659; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s36, s22, 16
6660; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s38, s20, 16
6661; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s40, s18, 16
6662; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s42, s16, 16
6663; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[44:45], s[4:5], 0x100000
6664; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
6665; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s46, s14, 16
6666; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s48, s12, 16
6667; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s50, s10, 16
6668; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s8, 16
6669; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[8:9], 0x100000
6670; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[54:55], s[10:11], 0x100000
6671; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[56:57], s[12:13], 0x100000
6672; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[58:59], s[14:15], 0x100000
6673; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[60:61], s[16:17], 0x100000
6674; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[62:63], s[18:19], 0x100000
6675; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[64:65], s[20:21], 0x100000
6676; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[66:67], s[22:23], 0x100000
6677; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
6678; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
6679; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[14:15], s[14:15], 48
6680; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[16:17], s[16:17], 48
6681; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[18:19], s[18:19], 48
6682; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[20:21], s[20:21], 48
6683; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[22:23], s[22:23], 48
6684; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[12:13], s[12:13], 48
6685; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
6686; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s3
6687; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s22
6688; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s23
6689; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s44
6690; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s45
6691; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s20
6692; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s21
6693; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6694; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6695; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[28:29], 0x100000
6696; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[26:27], 0x100000
6697; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
6698; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
6699; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[34:35], 0x100000
6700; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[30:31], 0x100000
6701; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s6
6702; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s7
6703; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s18
6704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s19
6705; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s24
6706; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s25
6707; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s16
6708; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s17
6709; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s22
6710; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s23
6711; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s14
6712; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s15
6713; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s20
6714; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s21
6715; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s12
6716; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s13
6717; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6718; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[52:53], 0x100000
6719; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[50:51], 0x100000
6720; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[48:49], 0x100000
6721; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[46:47], 0x100000
6722; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[42:43], 0x100000
6723; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[40:41], 0x100000
6724; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[38:39], 0x100000
6725; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[36:37], 0x100000
6726; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
6727; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176
6728; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144
6729; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
6730; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
6731; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
6732; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
6733; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
6734; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
6735; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
6736; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6737; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6738; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s26
6739; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
6740; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
6741; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
6742; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6743; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6744; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s66
6745; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s67
6746; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s64
6747; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s65
6748; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s62
6749; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s63
6750; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s60
6751; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s61
6752; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s58
6753; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s59
6754; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s56
6755; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s57
6756; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v24, s54
6757; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, s55
6758; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
6759; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
6760; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6761; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6762; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
6763; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
6764; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s22
6765; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s23
6766; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
6767; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s20
6768; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s21
6769; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
6770; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s18
6771; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s19
6772; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
6773; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s16
6774; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s17
6775; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
6776; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s14
6777; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s15
6778; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
6779; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, s12
6780; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, s13
6781; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
6782; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
6783; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
6784; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6785; GCN-NOHSA-SI-NEXT:    s_endpgm
6786;
6787; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64:
6788; GCN-HSA:       ; %bb.0:
6789; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6790; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6791; GCN-HSA-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x0
6792; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6793; GCN-HSA-NEXT:    s_mov_b32 s8, s51
6794; GCN-HSA-NEXT:    s_mov_b32 s34, s49
6795; GCN-HSA-NEXT:    s_mov_b32 s52, s47
6796; GCN-HSA-NEXT:    s_mov_b32 s54, s45
6797; GCN-HSA-NEXT:    s_mov_b32 s56, s43
6798; GCN-HSA-NEXT:    s_mov_b32 s58, s41
6799; GCN-HSA-NEXT:    s_mov_b32 s60, s39
6800; GCN-HSA-NEXT:    s_mov_b32 s62, s37
6801; GCN-HSA-NEXT:    s_lshr_b32 s30, s46, 16
6802; GCN-HSA-NEXT:    s_lshr_b32 s24, s44, 16
6803; GCN-HSA-NEXT:    s_lshr_b32 s20, s42, 16
6804; GCN-HSA-NEXT:    s_lshr_b32 s16, s40, 16
6805; GCN-HSA-NEXT:    s_lshr_b32 s12, s38, 16
6806; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
6807; GCN-HSA-NEXT:    s_lshr_b32 s64, s50, 16
6808; GCN-HSA-NEXT:    s_lshr_b32 s66, s48, 16
6809; GCN-HSA-NEXT:    s_lshr_b32 s68, s36, 16
6810; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[36:37], 0x100000
6811; GCN-HSA-NEXT:    s_ashr_i64 s[28:29], s[36:37], 48
6812; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[38:39], 0x100000
6813; GCN-HSA-NEXT:    s_ashr_i64 s[36:37], s[38:39], 48
6814; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[40:41], 0x100000
6815; GCN-HSA-NEXT:    s_ashr_i64 s[38:39], s[40:41], 48
6816; GCN-HSA-NEXT:    s_ashr_i64 s[40:41], s[42:43], 48
6817; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[42:43], 0x100000
6818; GCN-HSA-NEXT:    s_ashr_i64 s[42:43], s[44:45], 48
6819; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[44:45], 0x100000
6820; GCN-HSA-NEXT:    s_ashr_i64 s[44:45], s[46:47], 48
6821; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[46:47], 0x100000
6822; GCN-HSA-NEXT:    s_ashr_i64 s[46:47], s[48:49], 48
6823; GCN-HSA-NEXT:    s_bfe_i64 s[22:23], s[48:49], 0x100000
6824; GCN-HSA-NEXT:    s_ashr_i64 s[48:49], s[50:51], 48
6825; GCN-HSA-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x100000
6826; GCN-HSA-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x100000
6827; GCN-HSA-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x100000
6828; GCN-HSA-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x100000
6829; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x100000
6830; GCN-HSA-NEXT:    s_bfe_i64 s[26:27], s[50:51], 0x100000
6831; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
6832; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
6833; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s48
6834; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s49
6835; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[68:69], 0x100000
6836; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
6837; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
6838; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
6839; GCN-HSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
6840; GCN-HSA-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x100000
6841; GCN-HSA-NEXT:    s_bfe_i64 s[48:49], s[66:67], 0x100000
6842; GCN-HSA-NEXT:    s_bfe_i64 s[50:51], s[64:65], 0x100000
6843; GCN-HSA-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x100000
6844; GCN-HSA-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x100000
6845; GCN-HSA-NEXT:    s_add_u32 s64, s0, 0xf0
6846; GCN-HSA-NEXT:    s_addc_u32 s65, s1, 0
6847; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s34
6848; GCN-HSA-NEXT:    s_add_u32 s34, s0, 0xd0
6849; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s35
6850; GCN-HSA-NEXT:    s_addc_u32 s35, s1, 0
6851; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s34
6852; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s35
6853; GCN-HSA-NEXT:    s_add_u32 s34, s0, 0xb0
6854; GCN-HSA-NEXT:    s_addc_u32 s35, s1, 0
6855; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s34
6856; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s35
6857; GCN-HSA-NEXT:    s_add_u32 s34, s0, 0x90
6858; GCN-HSA-NEXT:    s_addc_u32 s35, s1, 0
6859; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s34
6860; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s35
6861; GCN-HSA-NEXT:    s_add_u32 s34, s0, 0x70
6862; GCN-HSA-NEXT:    s_addc_u32 s35, s1, 0
6863; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s34
6864; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s35
6865; GCN-HSA-NEXT:    s_add_u32 s34, s0, 0x50
6866; GCN-HSA-NEXT:    s_addc_u32 s35, s1, 0
6867; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s34
6868; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s35
6869; GCN-HSA-NEXT:    s_add_u32 s34, s0, 48
6870; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s46
6871; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s47
6872; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
6873; GCN-HSA-NEXT:    s_addc_u32 s35, s1, 0
6874; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s34
6875; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s35
6876; GCN-HSA-NEXT:    s_add_u32 s34, s0, 16
6877; GCN-HSA-NEXT:    s_addc_u32 s35, s1, 0
6878; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s52
6879; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s53
6880; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s44
6881; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s45
6882; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
6883; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s64
6884; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s26
6885; GCN-HSA-NEXT:    s_add_u32 s26, s0, 0xe0
6886; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s27
6887; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s54
6888; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s55
6889; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s42
6890; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s43
6891; GCN-HSA-NEXT:    s_addc_u32 s27, s1, 0
6892; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
6893; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s65
6894; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s22
6895; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s56
6896; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s57
6897; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s40
6898; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s41
6899; GCN-HSA-NEXT:    s_add_u32 s22, s0, 0xc0
6900; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[0:3]
6901; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s34
6902; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
6903; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s23
6904; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s26
6905; GCN-HSA-NEXT:    s_addc_u32 s23, s1, 0
6906; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s22
6907; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s58
6908; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s59
6909; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s60
6910; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s38
6911; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s39
6912; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s61
6913; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s36
6914; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s37
6915; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s62
6916; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s63
6917; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s35
6918; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s28
6919; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s29
6920; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s50
6921; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s51
6922; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s27
6923; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
6924; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s48
6925; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s49
6926; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s23
6927; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
6928; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
6929; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6930; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
6931; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
6932; GCN-HSA-NEXT:    s_add_u32 s18, s0, 0xa0
6933; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
6934; GCN-HSA-NEXT:    s_addc_u32 s19, s1, 0
6935; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s18
6936; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s30
6937; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s31
6938; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s19
6939; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6940; GCN-HSA-NEXT:    s_nop 0
6941; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
6942; GCN-HSA-NEXT:    s_add_u32 s14, s0, 0x80
6943; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
6944; GCN-HSA-NEXT:    s_addc_u32 s15, s1, 0
6945; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s14
6946; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s24
6947; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s25
6948; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s15
6949; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6950; GCN-HSA-NEXT:    s_nop 0
6951; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
6952; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0x60
6953; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s11
6954; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
6955; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
6956; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s20
6957; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s21
6958; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
6959; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6960; GCN-HSA-NEXT:    s_nop 0
6961; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
6962; GCN-HSA-NEXT:    s_add_u32 s6, s0, 64
6963; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s7
6964; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
6965; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
6966; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s16
6967; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s17
6968; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
6969; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6970; GCN-HSA-NEXT:    s_nop 0
6971; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
6972; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
6973; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
6974; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6975; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6976; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
6977; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
6978; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6979; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6980; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6981; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6982; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6983; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
6984; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
6985; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6986; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6987; GCN-HSA-NEXT:    s_endpgm
6988;
6989; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64:
6990; GCN-NOHSA-VI:       ; %bb.0:
6991; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
6992; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6993; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[12:27], s[10:11], 0x0
6994; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, 0xf000
6995; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, -1
6996; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6997; GCN-NOHSA-VI-NEXT:    s_mov_b32 s66, s27
6998; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s68, s27, 16
6999; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x100000
7000; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[68:69], s[68:69], 0x100000
7001; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[64:65], s[26:27], 0x100000
7002; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s26, s26, 16
7003; GCN-NOHSA-VI-NEXT:    s_mov_b32 s60, s25
7004; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s62, s25, 16
7005; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
7006; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s66
7007; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s67
7008; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s68
7009; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s69
7010; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:240
7011; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[58:59], s[24:25], 0x100000
7012; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s24, s24, 16
7013; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x100000
7014; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x100000
7015; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s64
7016; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s65
7017; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
7018; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s27
7019; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:224
7020; GCN-NOHSA-VI-NEXT:    s_mov_b32 s54, s23
7021; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s56, s23, 16
7022; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
7023; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s60
7024; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s61
7025; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s62
7026; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s63
7027; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:208
7028; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[52:53], s[22:23], 0x100000
7029; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s22, s22, 16
7030; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x100000
7031; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x100000
7032; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s58
7033; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s59
7034; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s24
7035; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s25
7036; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:192
7037; GCN-NOHSA-VI-NEXT:    s_mov_b32 s48, s21
7038; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s50, s21, 16
7039; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
7040; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s54
7041; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s55
7042; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s56
7043; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s57
7044; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:176
7045; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[46:47], s[20:21], 0x100000
7046; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s20, 16
7047; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x100000
7048; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x100000
7049; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s52
7050; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s53
7051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s22
7052; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s23
7053; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:160
7054; GCN-NOHSA-VI-NEXT:    s_mov_b32 s42, s19
7055; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s44, s19, 16
7056; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
7057; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s48
7058; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s49
7059; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s50
7060; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
7061; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:144
7062; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[40:41], s[18:19], 0x100000
7063; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s18, 16
7064; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x100000
7065; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x100000
7066; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s46
7067; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s47
7068; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s20
7069; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s21
7070; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:128
7071; GCN-NOHSA-VI-NEXT:    s_mov_b32 s36, s17
7072; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s38, s17, 16
7073; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
7074; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s42
7075; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s43
7076; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s44
7077; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
7078; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
7079; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[16:17], 0x100000
7080; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s16, 16
7081; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x100000
7082; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x100000
7083; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s40
7084; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s41
7085; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s18
7086; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s19
7087; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
7088; GCN-NOHSA-VI-NEXT:    s_mov_b32 s30, s15
7089; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s15, 16
7090; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
7091; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
7092; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
7093; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s38
7094; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s39
7095; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
7096; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[14:15], 0x100000
7097; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s14, 16
7098; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x100000
7099; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x100000
7100; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
7101; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
7102; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
7103; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
7104; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
7105; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s12, 16
7106; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s13, 16
7107; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[0:1], s[12:13], 0x100000
7108; GCN-NOHSA-VI-NEXT:    s_mov_b32 s12, s13
7109; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
7110; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
7111; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
7112; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
7113; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
7114; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
7115; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
7116; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x100000
7117; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
7118; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
7119; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s14
7120; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
7121; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
7122; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
7123; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
7124; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
7125; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s28
7126; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s29
7127; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
7128; GCN-NOHSA-VI-NEXT:    s_nop 0
7129; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
7130; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
7131; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
7132; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
7133; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
7134; GCN-NOHSA-VI-NEXT:    s_endpgm
7135;
7136; EG-LABEL: constant_sextload_v32i16_to_v32i64:
7137; EG:       ; %bb.0:
7138; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7139; EG-NEXT:    TEX 0 @22
7140; EG-NEXT:    ALU 55, @31, KC0[CB0:0-32], KC1[]
7141; EG-NEXT:    TEX 2 @24
7142; EG-NEXT:    ALU 74, @87, KC0[CB0:0-32], KC1[]
7143; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
7144; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
7145; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0
7146; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0
7147; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
7148; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
7149; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0
7150; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0
7151; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
7152; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
7153; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
7154; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0
7155; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
7156; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
7157; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
7158; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
7159; EG-NEXT:    CF_END
7160; EG-NEXT:    Fetch clause starting at 22:
7161; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
7162; EG-NEXT:    Fetch clause starting at 24:
7163; EG-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 48, #1
7164; EG-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 32, #1
7165; EG-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 16, #1
7166; EG-NEXT:    ALU clause starting at 30:
7167; EG-NEXT:     MOV * T19.X, KC0[2].Z,
7168; EG-NEXT:    ALU clause starting at 31:
7169; EG-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
7170; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7171; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7172; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
7173; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7174; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7175; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
7176; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7177; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7178; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
7179; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7180; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7181; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
7182; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7183; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7184; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
7185; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7186; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7187; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
7188; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7189; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7190; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
7191; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7192; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7193; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
7194; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7195; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7196; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
7197; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7198; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7199; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
7200; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7201; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7202; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
7203; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7204; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7205; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
7206; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7207; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7208; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
7209; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
7210; EG-NEXT:     ASHR * T35.W, T20.X, literal.z,
7211; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
7212; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7213; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
7214; EG-NEXT:     ASHR T35.Z, T20.X, literal.y,
7215; EG-NEXT:     ASHR * T37.W, T20.Y, literal.z,
7216; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7217; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7218; EG-NEXT:     BFE_INT T35.X, T20.X, 0.0, literal.x,
7219; EG-NEXT:     ASHR * T37.Z, T20.Y, literal.x,
7220; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7221; EG-NEXT:     BFE_INT T37.X, T20.Y, 0.0, literal.x,
7222; EG-NEXT:     ASHR T35.Y, PV.X, literal.y,
7223; EG-NEXT:     ASHR * T19.W, T20.Z, literal.y,
7224; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7225; EG-NEXT:    ALU clause starting at 87:
7226; EG-NEXT:     ASHR T19.Z, T20.Z, literal.x,
7227; EG-NEXT:     ASHR * T41.W, T20.W, literal.y,
7228; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7229; EG-NEXT:     BFE_INT T19.X, T20.Z, 0.0, literal.x,
7230; EG-NEXT:     ASHR T37.Y, T37.X, literal.y,
7231; EG-NEXT:     ASHR T41.Z, T20.W, literal.x,
7232; EG-NEXT:     ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212
7233; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7234; EG-NEXT:     BFE_INT T41.X, T20.W, 0.0, literal.x,
7235; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
7236; EG-NEXT:     ASHR T42.Z, T40.X, literal.x,
7237; EG-NEXT:     ASHR * T20.W, T40.Y, literal.y,
7238; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7239; EG-NEXT:     BFE_INT T42.X, T40.X, 0.0, literal.x,
7240; EG-NEXT:     ASHR T41.Y, PV.X, literal.y,
7241; EG-NEXT:     ASHR T20.Z, T40.Y, literal.x,
7242; EG-NEXT:     ASHR * T43.W, T40.Z, literal.y,
7243; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7244; EG-NEXT:     BFE_INT T20.X, T40.Y, 0.0, literal.x,
7245; EG-NEXT:     ASHR T42.Y, PV.X, literal.y,
7246; EG-NEXT:     ASHR T43.Z, T40.Z, literal.x,
7247; EG-NEXT:     ASHR * T44.W, T40.W, literal.y,
7248; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7249; EG-NEXT:     BFE_INT T43.X, T40.Z, 0.0, literal.x,
7250; EG-NEXT:     ASHR T20.Y, PV.X, literal.y,
7251; EG-NEXT:     ASHR T44.Z, T40.W, literal.x,
7252; EG-NEXT:     ASHR * T45.W, T39.X, literal.y,
7253; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7254; EG-NEXT:     BFE_INT T44.X, T40.W, 0.0, literal.x,
7255; EG-NEXT:     ASHR T43.Y, PV.X, literal.y,
7256; EG-NEXT:     ASHR T45.Z, T39.X, literal.x,
7257; EG-NEXT:     ASHR * T40.W, T39.Y, literal.y,
7258; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7259; EG-NEXT:     BFE_INT T45.X, T39.X, 0.0, literal.x,
7260; EG-NEXT:     ASHR T44.Y, PV.X, literal.y,
7261; EG-NEXT:     ASHR T40.Z, T39.Y, literal.x,
7262; EG-NEXT:     ASHR * T46.W, T39.Z, literal.y,
7263; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7264; EG-NEXT:     BFE_INT T40.X, T39.Y, 0.0, literal.x,
7265; EG-NEXT:     ASHR T45.Y, PV.X, literal.y,
7266; EG-NEXT:     ASHR T46.Z, T39.Z, literal.x,
7267; EG-NEXT:     ASHR * T47.W, T39.W, literal.y,
7268; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7269; EG-NEXT:     BFE_INT T46.X, T39.Z, 0.0, literal.x,
7270; EG-NEXT:     ASHR T40.Y, PV.X, literal.y,
7271; EG-NEXT:     ASHR T47.Z, T39.W, literal.x,
7272; EG-NEXT:     ASHR * T48.W, T38.X, literal.y,
7273; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7274; EG-NEXT:     BFE_INT T47.X, T39.W, 0.0, literal.x,
7275; EG-NEXT:     ASHR T46.Y, PV.X, literal.y,
7276; EG-NEXT:     ASHR T48.Z, T38.X, literal.x,
7277; EG-NEXT:     ASHR * T39.W, T38.Y, literal.y,
7278; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7279; EG-NEXT:     BFE_INT T48.X, T38.X, 0.0, literal.x,
7280; EG-NEXT:     ASHR T47.Y, PV.X, literal.y,
7281; EG-NEXT:     ASHR T39.Z, T38.Y, literal.x,
7282; EG-NEXT:     ASHR * T49.W, T38.Z, literal.y,
7283; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7284; EG-NEXT:     BFE_INT T39.X, T38.Y, 0.0, literal.x,
7285; EG-NEXT:     ASHR T48.Y, PV.X, literal.y,
7286; EG-NEXT:     ASHR T49.Z, T38.Z, literal.x,
7287; EG-NEXT:     ASHR * T50.W, T38.W, literal.y,
7288; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7289; EG-NEXT:     BFE_INT T49.X, T38.Z, 0.0, literal.x,
7290; EG-NEXT:     ASHR T39.Y, PV.X, literal.y,
7291; EG-NEXT:     ASHR * T50.Z, T38.W, literal.x,
7292; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7293; EG-NEXT:     BFE_INT T50.X, T38.W, 0.0, literal.x,
7294; EG-NEXT:     ASHR T49.Y, PV.X, literal.y,
7295; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
7296; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7297; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
7298; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
7299; EG-NEXT:     ASHR * T50.Y, PV.X, literal.y,
7300; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7301  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
7302  %ext = sext <32 x i16> %load to <32 x i64>
7303  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
7304  ret void
7305}
7306
7307; These trigger undefined register machine verifier errors
7308
7309; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
7310;   %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
7311;   %ext = zext <64 x i16> %load to <64 x i64>
7312;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
7313;   ret void
7314; }
7315
7316; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
7317;   %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
7318;   %ext = sext <64 x i16> %load to <64 x i64>
7319;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
7320;   ret void
7321; }
7322
7323attributes #0 = { nounwind }
7324