1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
6
7define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) {
8; GFX8-UNPACKED-LABEL: load_1d_f16_x:
9; GFX8-UNPACKED:       ; %bb.0:
10; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
11; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
12; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
13; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
14; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
15; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
16; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
17; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
18; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
19; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
20; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
21;
22; GFX8-PACKED-LABEL: load_1d_f16_x:
23; GFX8-PACKED:       ; %bb.0:
24; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
25; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
26; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
27; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
28; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
29; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
30; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
31; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
32; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
33; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
34; GFX8-PACKED-NEXT:    ; return to shader part epilog
35;
36; GFX9-LABEL: load_1d_f16_x:
37; GFX9:       ; %bb.0:
38; GFX9-NEXT:    s_mov_b32 s0, s2
39; GFX9-NEXT:    s_mov_b32 s1, s3
40; GFX9-NEXT:    s_mov_b32 s2, s4
41; GFX9-NEXT:    s_mov_b32 s3, s5
42; GFX9-NEXT:    s_mov_b32 s4, s6
43; GFX9-NEXT:    s_mov_b32 s5, s7
44; GFX9-NEXT:    s_mov_b32 s6, s8
45; GFX9-NEXT:    s_mov_b32 s7, s9
46; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
47; GFX9-NEXT:    s_waitcnt vmcnt(0)
48; GFX9-NEXT:    ; return to shader part epilog
49;
50; GFX10-LABEL: load_1d_f16_x:
51; GFX10:       ; %bb.0:
52; GFX10-NEXT:    s_mov_b32 s0, s2
53; GFX10-NEXT:    s_mov_b32 s1, s3
54; GFX10-NEXT:    s_mov_b32 s2, s4
55; GFX10-NEXT:    s_mov_b32 s3, s5
56; GFX10-NEXT:    s_mov_b32 s4, s6
57; GFX10-NEXT:    s_mov_b32 s5, s7
58; GFX10-NEXT:    s_mov_b32 s6, s8
59; GFX10-NEXT:    s_mov_b32 s7, s9
60; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16
61; GFX10-NEXT:    s_waitcnt vmcnt(0)
62; GFX10-NEXT:    ; return to shader part epilog
63  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
64  ret half %v
65}
66
67define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) {
68; GFX8-UNPACKED-LABEL: load_1d_f16_y:
69; GFX8-UNPACKED:       ; %bb.0:
70; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
71; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
72; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
73; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
74; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
75; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
76; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
77; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
78; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
79; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
80; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
81;
82; GFX8-PACKED-LABEL: load_1d_f16_y:
83; GFX8-PACKED:       ; %bb.0:
84; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
85; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
86; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
87; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
88; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
89; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
90; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
91; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
92; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
93; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
94; GFX8-PACKED-NEXT:    ; return to shader part epilog
95;
96; GFX9-LABEL: load_1d_f16_y:
97; GFX9:       ; %bb.0:
98; GFX9-NEXT:    s_mov_b32 s0, s2
99; GFX9-NEXT:    s_mov_b32 s1, s3
100; GFX9-NEXT:    s_mov_b32 s2, s4
101; GFX9-NEXT:    s_mov_b32 s3, s5
102; GFX9-NEXT:    s_mov_b32 s4, s6
103; GFX9-NEXT:    s_mov_b32 s5, s7
104; GFX9-NEXT:    s_mov_b32 s6, s8
105; GFX9-NEXT:    s_mov_b32 s7, s9
106; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
107; GFX9-NEXT:    s_waitcnt vmcnt(0)
108; GFX9-NEXT:    ; return to shader part epilog
109;
110; GFX10-LABEL: load_1d_f16_y:
111; GFX10:       ; %bb.0:
112; GFX10-NEXT:    s_mov_b32 s0, s2
113; GFX10-NEXT:    s_mov_b32 s1, s3
114; GFX10-NEXT:    s_mov_b32 s2, s4
115; GFX10-NEXT:    s_mov_b32 s3, s5
116; GFX10-NEXT:    s_mov_b32 s4, s6
117; GFX10-NEXT:    s_mov_b32 s5, s7
118; GFX10-NEXT:    s_mov_b32 s6, s8
119; GFX10-NEXT:    s_mov_b32 s7, s9
120; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16
121; GFX10-NEXT:    s_waitcnt vmcnt(0)
122; GFX10-NEXT:    ; return to shader part epilog
123  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
124  ret half %v
125}
126
127define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) {
128; GFX8-UNPACKED-LABEL: load_1d_f16_z:
129; GFX8-UNPACKED:       ; %bb.0:
130; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
131; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
132; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
133; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
134; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
135; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
136; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
137; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
138; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
139; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
140; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
141;
142; GFX8-PACKED-LABEL: load_1d_f16_z:
143; GFX8-PACKED:       ; %bb.0:
144; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
145; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
146; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
147; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
148; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
149; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
150; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
151; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
152; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
153; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
154; GFX8-PACKED-NEXT:    ; return to shader part epilog
155;
156; GFX9-LABEL: load_1d_f16_z:
157; GFX9:       ; %bb.0:
158; GFX9-NEXT:    s_mov_b32 s0, s2
159; GFX9-NEXT:    s_mov_b32 s1, s3
160; GFX9-NEXT:    s_mov_b32 s2, s4
161; GFX9-NEXT:    s_mov_b32 s3, s5
162; GFX9-NEXT:    s_mov_b32 s4, s6
163; GFX9-NEXT:    s_mov_b32 s5, s7
164; GFX9-NEXT:    s_mov_b32 s6, s8
165; GFX9-NEXT:    s_mov_b32 s7, s9
166; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
167; GFX9-NEXT:    s_waitcnt vmcnt(0)
168; GFX9-NEXT:    ; return to shader part epilog
169;
170; GFX10-LABEL: load_1d_f16_z:
171; GFX10:       ; %bb.0:
172; GFX10-NEXT:    s_mov_b32 s0, s2
173; GFX10-NEXT:    s_mov_b32 s1, s3
174; GFX10-NEXT:    s_mov_b32 s2, s4
175; GFX10-NEXT:    s_mov_b32 s3, s5
176; GFX10-NEXT:    s_mov_b32 s4, s6
177; GFX10-NEXT:    s_mov_b32 s5, s7
178; GFX10-NEXT:    s_mov_b32 s6, s8
179; GFX10-NEXT:    s_mov_b32 s7, s9
180; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16
181; GFX10-NEXT:    s_waitcnt vmcnt(0)
182; GFX10-NEXT:    ; return to shader part epilog
183  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
184  ret half %v
185}
186
187define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) {
188; GFX8-UNPACKED-LABEL: load_1d_f16_w:
189; GFX8-UNPACKED:       ; %bb.0:
190; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
191; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
192; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
193; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
194; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
195; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
196; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
197; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
198; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
199; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
200; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
201;
202; GFX8-PACKED-LABEL: load_1d_f16_w:
203; GFX8-PACKED:       ; %bb.0:
204; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
205; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
206; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
207; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
208; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
209; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
210; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
211; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
212; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
213; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
214; GFX8-PACKED-NEXT:    ; return to shader part epilog
215;
216; GFX9-LABEL: load_1d_f16_w:
217; GFX9:       ; %bb.0:
218; GFX9-NEXT:    s_mov_b32 s0, s2
219; GFX9-NEXT:    s_mov_b32 s1, s3
220; GFX9-NEXT:    s_mov_b32 s2, s4
221; GFX9-NEXT:    s_mov_b32 s3, s5
222; GFX9-NEXT:    s_mov_b32 s4, s6
223; GFX9-NEXT:    s_mov_b32 s5, s7
224; GFX9-NEXT:    s_mov_b32 s6, s8
225; GFX9-NEXT:    s_mov_b32 s7, s9
226; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
227; GFX9-NEXT:    s_waitcnt vmcnt(0)
228; GFX9-NEXT:    ; return to shader part epilog
229;
230; GFX10-LABEL: load_1d_f16_w:
231; GFX10:       ; %bb.0:
232; GFX10-NEXT:    s_mov_b32 s0, s2
233; GFX10-NEXT:    s_mov_b32 s1, s3
234; GFX10-NEXT:    s_mov_b32 s2, s4
235; GFX10-NEXT:    s_mov_b32 s3, s5
236; GFX10-NEXT:    s_mov_b32 s4, s6
237; GFX10-NEXT:    s_mov_b32 s5, s7
238; GFX10-NEXT:    s_mov_b32 s6, s8
239; GFX10-NEXT:    s_mov_b32 s7, s9
240; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16
241; GFX10-NEXT:    s_waitcnt vmcnt(0)
242; GFX10-NEXT:    ; return to shader part epilog
243  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
244  ret half %v
245}
246
247define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) {
248; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy:
249; GFX8-UNPACKED:       ; %bb.0:
250; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
251; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
252; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
253; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
254; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
255; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
256; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
257; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
258; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16
259; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
260; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
261; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
262; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
263; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
264;
265; GFX8-PACKED-LABEL: load_1d_v2f16_xy:
266; GFX8-PACKED:       ; %bb.0:
267; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
268; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
269; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
270; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
271; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
272; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
273; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
274; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
275; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
276; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
277; GFX8-PACKED-NEXT:    ; return to shader part epilog
278;
279; GFX9-LABEL: load_1d_v2f16_xy:
280; GFX9:       ; %bb.0:
281; GFX9-NEXT:    s_mov_b32 s0, s2
282; GFX9-NEXT:    s_mov_b32 s1, s3
283; GFX9-NEXT:    s_mov_b32 s2, s4
284; GFX9-NEXT:    s_mov_b32 s3, s5
285; GFX9-NEXT:    s_mov_b32 s4, s6
286; GFX9-NEXT:    s_mov_b32 s5, s7
287; GFX9-NEXT:    s_mov_b32 s6, s8
288; GFX9-NEXT:    s_mov_b32 s7, s9
289; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
290; GFX9-NEXT:    s_waitcnt vmcnt(0)
291; GFX9-NEXT:    ; return to shader part epilog
292;
293; GFX10-LABEL: load_1d_v2f16_xy:
294; GFX10:       ; %bb.0:
295; GFX10-NEXT:    s_mov_b32 s0, s2
296; GFX10-NEXT:    s_mov_b32 s1, s3
297; GFX10-NEXT:    s_mov_b32 s2, s4
298; GFX10-NEXT:    s_mov_b32 s3, s5
299; GFX10-NEXT:    s_mov_b32 s4, s6
300; GFX10-NEXT:    s_mov_b32 s5, s7
301; GFX10-NEXT:    s_mov_b32 s6, s8
302; GFX10-NEXT:    s_mov_b32 s7, s9
303; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16
304; GFX10-NEXT:    s_waitcnt vmcnt(0)
305; GFX10-NEXT:    ; return to shader part epilog
306  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
307  ret <2 x half> %v
308}
309
310define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) {
311; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz:
312; GFX8-UNPACKED:       ; %bb.0:
313; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
314; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
315; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
316; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
317; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
318; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
319; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
320; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
321; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16
322; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
323; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
324; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
325; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
326; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
327;
328; GFX8-PACKED-LABEL: load_1d_v2f16_xz:
329; GFX8-PACKED:       ; %bb.0:
330; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
331; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
332; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
333; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
334; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
335; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
336; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
337; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
338; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
339; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
340; GFX8-PACKED-NEXT:    ; return to shader part epilog
341;
342; GFX9-LABEL: load_1d_v2f16_xz:
343; GFX9:       ; %bb.0:
344; GFX9-NEXT:    s_mov_b32 s0, s2
345; GFX9-NEXT:    s_mov_b32 s1, s3
346; GFX9-NEXT:    s_mov_b32 s2, s4
347; GFX9-NEXT:    s_mov_b32 s3, s5
348; GFX9-NEXT:    s_mov_b32 s4, s6
349; GFX9-NEXT:    s_mov_b32 s5, s7
350; GFX9-NEXT:    s_mov_b32 s6, s8
351; GFX9-NEXT:    s_mov_b32 s7, s9
352; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
353; GFX9-NEXT:    s_waitcnt vmcnt(0)
354; GFX9-NEXT:    ; return to shader part epilog
355;
356; GFX10-LABEL: load_1d_v2f16_xz:
357; GFX10:       ; %bb.0:
358; GFX10-NEXT:    s_mov_b32 s0, s2
359; GFX10-NEXT:    s_mov_b32 s1, s3
360; GFX10-NEXT:    s_mov_b32 s2, s4
361; GFX10-NEXT:    s_mov_b32 s3, s5
362; GFX10-NEXT:    s_mov_b32 s4, s6
363; GFX10-NEXT:    s_mov_b32 s5, s7
364; GFX10-NEXT:    s_mov_b32 s6, s8
365; GFX10-NEXT:    s_mov_b32 s7, s9
366; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16
367; GFX10-NEXT:    s_waitcnt vmcnt(0)
368; GFX10-NEXT:    ; return to shader part epilog
369  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
370  ret <2 x half> %v
371}
372
373define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) {
374; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw:
375; GFX8-UNPACKED:       ; %bb.0:
376; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
377; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
378; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
379; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
380; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
381; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
382; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
383; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
384; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16
385; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
386; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
387; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
388; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
389; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
390;
391; GFX8-PACKED-LABEL: load_1d_v2f16_xw:
392; GFX8-PACKED:       ; %bb.0:
393; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
394; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
395; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
396; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
397; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
398; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
399; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
400; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
401; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
402; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
403; GFX8-PACKED-NEXT:    ; return to shader part epilog
404;
405; GFX9-LABEL: load_1d_v2f16_xw:
406; GFX9:       ; %bb.0:
407; GFX9-NEXT:    s_mov_b32 s0, s2
408; GFX9-NEXT:    s_mov_b32 s1, s3
409; GFX9-NEXT:    s_mov_b32 s2, s4
410; GFX9-NEXT:    s_mov_b32 s3, s5
411; GFX9-NEXT:    s_mov_b32 s4, s6
412; GFX9-NEXT:    s_mov_b32 s5, s7
413; GFX9-NEXT:    s_mov_b32 s6, s8
414; GFX9-NEXT:    s_mov_b32 s7, s9
415; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
416; GFX9-NEXT:    s_waitcnt vmcnt(0)
417; GFX9-NEXT:    ; return to shader part epilog
418;
419; GFX10-LABEL: load_1d_v2f16_xw:
420; GFX10:       ; %bb.0:
421; GFX10-NEXT:    s_mov_b32 s0, s2
422; GFX10-NEXT:    s_mov_b32 s1, s3
423; GFX10-NEXT:    s_mov_b32 s2, s4
424; GFX10-NEXT:    s_mov_b32 s3, s5
425; GFX10-NEXT:    s_mov_b32 s4, s6
426; GFX10-NEXT:    s_mov_b32 s5, s7
427; GFX10-NEXT:    s_mov_b32 s6, s8
428; GFX10-NEXT:    s_mov_b32 s7, s9
429; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16
430; GFX10-NEXT:    s_waitcnt vmcnt(0)
431; GFX10-NEXT:    ; return to shader part epilog
432  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
433  ret <2 x half> %v
434}
435
436define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) {
437; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz:
438; GFX8-UNPACKED:       ; %bb.0:
439; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
440; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
441; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
442; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
443; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
444; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
445; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
446; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
447; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16
448; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
449; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
450; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
451; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
452; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
453;
454; GFX8-PACKED-LABEL: load_1d_v2f16_yz:
455; GFX8-PACKED:       ; %bb.0:
456; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
457; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
458; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
459; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
460; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
461; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
462; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
463; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
464; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
465; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
466; GFX8-PACKED-NEXT:    ; return to shader part epilog
467;
468; GFX9-LABEL: load_1d_v2f16_yz:
469; GFX9:       ; %bb.0:
470; GFX9-NEXT:    s_mov_b32 s0, s2
471; GFX9-NEXT:    s_mov_b32 s1, s3
472; GFX9-NEXT:    s_mov_b32 s2, s4
473; GFX9-NEXT:    s_mov_b32 s3, s5
474; GFX9-NEXT:    s_mov_b32 s4, s6
475; GFX9-NEXT:    s_mov_b32 s5, s7
476; GFX9-NEXT:    s_mov_b32 s6, s8
477; GFX9-NEXT:    s_mov_b32 s7, s9
478; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
479; GFX9-NEXT:    s_waitcnt vmcnt(0)
480; GFX9-NEXT:    ; return to shader part epilog
481;
482; GFX10-LABEL: load_1d_v2f16_yz:
483; GFX10:       ; %bb.0:
484; GFX10-NEXT:    s_mov_b32 s0, s2
485; GFX10-NEXT:    s_mov_b32 s1, s3
486; GFX10-NEXT:    s_mov_b32 s2, s4
487; GFX10-NEXT:    s_mov_b32 s3, s5
488; GFX10-NEXT:    s_mov_b32 s4, s6
489; GFX10-NEXT:    s_mov_b32 s5, s7
490; GFX10-NEXT:    s_mov_b32 s6, s8
491; GFX10-NEXT:    s_mov_b32 s7, s9
492; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16
493; GFX10-NEXT:    s_waitcnt vmcnt(0)
494; GFX10-NEXT:    ; return to shader part epilog
495  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
496  ret <2 x half> %v
497}
498
499define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
500; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz:
501; GFX8-UNPACKED:       ; %bb.0:
502; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
503; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
504; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
505; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
506; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
507; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
508; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
509; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
510; GFX8-UNPACKED-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16
511; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, 0xffff
512; GFX8-UNPACKED-NEXT:    s_and_b32 s1, s0, s0
513; GFX8-UNPACKED-NEXT:    s_lshl_b32 s1, s1, 16
514; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v3, s1
515; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
516; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v4, s0, v1
517; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
518; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
519; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
520; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
521;
522; GFX8-PACKED-LABEL: load_1d_v3f16_xyz:
523; GFX8-PACKED:       ; %bb.0:
524; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
525; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
526; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
527; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
528; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
529; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
530; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
531; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
532; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
533; GFX8-PACKED-NEXT:    s_mov_b32 s0, 0xffff
534; GFX8-PACKED-NEXT:    s_and_b32 s0, s0, s0
535; GFX8-PACKED-NEXT:    s_lshl_b32 s0, s0, 16
536; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, s0
537; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
538; GFX8-PACKED-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
539; GFX8-PACKED-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
540; GFX8-PACKED-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
541; GFX8-PACKED-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
542; GFX8-PACKED-NEXT:    ; return to shader part epilog
543;
544; GFX9-LABEL: load_1d_v3f16_xyz:
545; GFX9:       ; %bb.0:
546; GFX9-NEXT:    s_mov_b32 s0, s2
547; GFX9-NEXT:    s_mov_b32 s1, s3
548; GFX9-NEXT:    s_mov_b32 s2, s4
549; GFX9-NEXT:    s_mov_b32 s3, s5
550; GFX9-NEXT:    s_mov_b32 s4, s6
551; GFX9-NEXT:    s_mov_b32 s5, s7
552; GFX9-NEXT:    s_mov_b32 s6, s8
553; GFX9-NEXT:    s_mov_b32 s7, s9
554; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
555; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
556; GFX9-NEXT:    s_lshl_b32 s0, s0, 16
557; GFX9-NEXT:    s_waitcnt vmcnt(0)
558; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
559; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
560; GFX9-NEXT:    v_and_or_b32 v1, v1, v2, s0
561; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v3
562; GFX9-NEXT:    ; return to shader part epilog
563;
564; GFX10-LABEL: load_1d_v3f16_xyz:
565; GFX10:       ; %bb.0:
566; GFX10-NEXT:    s_mov_b32 s0, s2
567; GFX10-NEXT:    s_mov_b32 s1, s3
568; GFX10-NEXT:    s_mov_b32 s2, s4
569; GFX10-NEXT:    s_mov_b32 s3, s5
570; GFX10-NEXT:    s_mov_b32 s4, s6
571; GFX10-NEXT:    s_mov_b32 s5, s7
572; GFX10-NEXT:    s_mov_b32 s6, s8
573; GFX10-NEXT:    s_mov_b32 s7, s9
574; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
575; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
576; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
577; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
578; GFX10-NEXT:    s_waitcnt vmcnt(0)
579; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
580; GFX10-NEXT:    v_and_or_b32 v1, v1, v3, s0
581; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
582; GFX10-NEXT:    v_and_or_b32 v0, v0, v3, v2
583; GFX10-NEXT:    ; return to shader part epilog
584  %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
585  ret <3 x half> %v
586}
587
588define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
589; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw:
590; GFX8-UNPACKED:       ; %bb.0:
591; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
592; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
593; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
594; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
595; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
596; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
597; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
598; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
599; GFX8-UNPACKED-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16
600; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, 0xffff
601; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
602; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, s0, v1
603; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v3, s0, v3
604; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
605; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
606; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
607; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
608; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
609;
610; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw:
611; GFX8-PACKED:       ; %bb.0:
612; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
613; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
614; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
615; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
616; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
617; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
618; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
619; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
620; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
621; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
622; GFX8-PACKED-NEXT:    ; return to shader part epilog
623;
624; GFX9-LABEL: load_1d_v4f16_xyzw:
625; GFX9:       ; %bb.0:
626; GFX9-NEXT:    s_mov_b32 s0, s2
627; GFX9-NEXT:    s_mov_b32 s1, s3
628; GFX9-NEXT:    s_mov_b32 s2, s4
629; GFX9-NEXT:    s_mov_b32 s3, s5
630; GFX9-NEXT:    s_mov_b32 s4, s6
631; GFX9-NEXT:    s_mov_b32 s5, s7
632; GFX9-NEXT:    s_mov_b32 s6, s8
633; GFX9-NEXT:    s_mov_b32 s7, s9
634; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
635; GFX9-NEXT:    s_waitcnt vmcnt(0)
636; GFX9-NEXT:    ; return to shader part epilog
637;
638; GFX10-LABEL: load_1d_v4f16_xyzw:
639; GFX10:       ; %bb.0:
640; GFX10-NEXT:    s_mov_b32 s0, s2
641; GFX10-NEXT:    s_mov_b32 s1, s3
642; GFX10-NEXT:    s_mov_b32 s2, s4
643; GFX10-NEXT:    s_mov_b32 s3, s5
644; GFX10-NEXT:    s_mov_b32 s4, s6
645; GFX10-NEXT:    s_mov_b32 s5, s7
646; GFX10-NEXT:    s_mov_b32 s6, s8
647; GFX10-NEXT:    s_mov_b32 s7, s9
648; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16
649; GFX10-NEXT:    s_waitcnt vmcnt(0)
650; GFX10-NEXT:    ; return to shader part epilog
651  %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
652  ret <4 x half> %v
653}
654
655define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
656; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x:
657; GFX8-UNPACKED:       ; %bb.0:
658; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
659; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
660; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
661; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
662; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
663; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
664; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
665; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
666; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16
667; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
668; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v1
669; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
670;
671; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x:
672; GFX8-PACKED:       ; %bb.0:
673; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
674; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
675; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
676; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
677; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
678; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
679; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
680; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
681; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16
682; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
683; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v1
684; GFX8-PACKED-NEXT:    ; return to shader part epilog
685;
686; GFX9-LABEL: load_1d_f16_tfe_dmask_x:
687; GFX9:       ; %bb.0:
688; GFX9-NEXT:    s_mov_b32 s0, s2
689; GFX9-NEXT:    s_mov_b32 s1, s3
690; GFX9-NEXT:    s_mov_b32 s2, s4
691; GFX9-NEXT:    s_mov_b32 s3, s5
692; GFX9-NEXT:    s_mov_b32 s4, s6
693; GFX9-NEXT:    s_mov_b32 s5, s7
694; GFX9-NEXT:    s_mov_b32 s6, s8
695; GFX9-NEXT:    s_mov_b32 s7, s9
696; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16
697; GFX9-NEXT:    s_waitcnt vmcnt(0)
698; GFX9-NEXT:    v_mov_b32_e32 v0, v1
699; GFX9-NEXT:    ; return to shader part epilog
700;
701; GFX10-LABEL: load_1d_f16_tfe_dmask_x:
702; GFX10:       ; %bb.0:
703; GFX10-NEXT:    s_mov_b32 s0, s2
704; GFX10-NEXT:    s_mov_b32 s1, s3
705; GFX10-NEXT:    s_mov_b32 s2, s4
706; GFX10-NEXT:    s_mov_b32 s3, s5
707; GFX10-NEXT:    s_mov_b32 s4, s6
708; GFX10-NEXT:    s_mov_b32 s5, s7
709; GFX10-NEXT:    s_mov_b32 s6, s8
710; GFX10-NEXT:    s_mov_b32 s7, s9
711; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
712; GFX10-NEXT:    s_waitcnt vmcnt(0)
713; GFX10-NEXT:    v_mov_b32_e32 v0, v1
714; GFX10-NEXT:    ; return to shader part epilog
715  %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
716  %v.err = extractvalue { half, i32 } %v, 1
717  %vv = bitcast i32 %v.err to float
718  ret float %vv
719}
720
721define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
722; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
723; GFX8-UNPACKED:       ; %bb.0:
724; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
725; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
726; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
727; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
728; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
729; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
730; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
731; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
732; GFX8-UNPACKED-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe d16
733; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
734; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2
735; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
736;
737; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
738; GFX8-PACKED:       ; %bb.0:
739; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
740; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
741; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
742; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
743; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
744; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
745; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
746; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
747; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16
748; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
749; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v1
750; GFX8-PACKED-NEXT:    ; return to shader part epilog
751;
752; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy:
753; GFX9:       ; %bb.0:
754; GFX9-NEXT:    s_mov_b32 s0, s2
755; GFX9-NEXT:    s_mov_b32 s1, s3
756; GFX9-NEXT:    s_mov_b32 s2, s4
757; GFX9-NEXT:    s_mov_b32 s3, s5
758; GFX9-NEXT:    s_mov_b32 s4, s6
759; GFX9-NEXT:    s_mov_b32 s5, s7
760; GFX9-NEXT:    s_mov_b32 s6, s8
761; GFX9-NEXT:    s_mov_b32 s7, s9
762; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16
763; GFX9-NEXT:    s_waitcnt vmcnt(0)
764; GFX9-NEXT:    v_mov_b32_e32 v0, v1
765; GFX9-NEXT:    ; return to shader part epilog
766;
767; GFX10-LABEL: load_1d_v2f16_tfe_dmask_xy:
768; GFX10:       ; %bb.0:
769; GFX10-NEXT:    s_mov_b32 s0, s2
770; GFX10-NEXT:    s_mov_b32 s1, s3
771; GFX10-NEXT:    s_mov_b32 s2, s4
772; GFX10-NEXT:    s_mov_b32 s3, s5
773; GFX10-NEXT:    s_mov_b32 s4, s6
774; GFX10-NEXT:    s_mov_b32 s5, s7
775; GFX10-NEXT:    s_mov_b32 s6, s8
776; GFX10-NEXT:    s_mov_b32 s7, s9
777; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
778; GFX10-NEXT:    s_waitcnt vmcnt(0)
779; GFX10-NEXT:    v_mov_b32_e32 v0, v1
780; GFX10-NEXT:    ; return to shader part epilog
781  %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
782  %v.err = extractvalue { <2 x half>, i32 } %v, 1
783  %vv = bitcast i32 %v.err to float
784  ret float %vv
785}
786
787define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
788; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
789; GFX8-UNPACKED:       ; %bb.0:
790; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
791; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
792; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
793; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
794; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
795; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
796; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
797; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
798; GFX8-UNPACKED-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe d16
799; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
800; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v3
801; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
802;
803; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
804; GFX8-PACKED:       ; %bb.0:
805; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
806; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
807; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
808; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
809; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
810; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
811; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
812; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
813; GFX8-PACKED-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm tfe d16
814; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
815; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v2
816; GFX8-PACKED-NEXT:    ; return to shader part epilog
817;
818; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz:
819; GFX9:       ; %bb.0:
820; GFX9-NEXT:    s_mov_b32 s0, s2
821; GFX9-NEXT:    s_mov_b32 s1, s3
822; GFX9-NEXT:    s_mov_b32 s2, s4
823; GFX9-NEXT:    s_mov_b32 s3, s5
824; GFX9-NEXT:    s_mov_b32 s4, s6
825; GFX9-NEXT:    s_mov_b32 s5, s7
826; GFX9-NEXT:    s_mov_b32 s6, s8
827; GFX9-NEXT:    s_mov_b32 s7, s9
828; GFX9-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm tfe d16
829; GFX9-NEXT:    s_waitcnt vmcnt(0)
830; GFX9-NEXT:    v_mov_b32_e32 v0, v2
831; GFX9-NEXT:    ; return to shader part epilog
832;
833; GFX10-LABEL: load_1d_v3f16_tfe_dmask_xyz:
834; GFX10:       ; %bb.0:
835; GFX10-NEXT:    s_mov_b32 s0, s2
836; GFX10-NEXT:    s_mov_b32 s1, s3
837; GFX10-NEXT:    s_mov_b32 s2, s4
838; GFX10-NEXT:    s_mov_b32 s3, s5
839; GFX10-NEXT:    s_mov_b32 s4, s6
840; GFX10-NEXT:    s_mov_b32 s5, s7
841; GFX10-NEXT:    s_mov_b32 s6, s8
842; GFX10-NEXT:    s_mov_b32 s7, s9
843; GFX10-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16
844; GFX10-NEXT:    s_waitcnt vmcnt(0)
845; GFX10-NEXT:    v_mov_b32_e32 v0, v2
846; GFX10-NEXT:    ; return to shader part epilog
847  %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
848  %v.err = extractvalue { <3 x half>, i32 } %v, 1
849  %vv = bitcast i32 %v.err to float
850  ret float %vv
851}
852
853define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
854; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
855; GFX8-UNPACKED:       ; %bb.0:
856; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
857; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
858; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
859; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
860; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
861; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
862; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
863; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
864; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16
865; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
866; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v1
867; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
868;
869; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
870; GFX8-PACKED:       ; %bb.0:
871; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
872; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
873; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
874; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
875; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
876; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
877; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
878; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
879; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16
880; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
881; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v1
882; GFX8-PACKED-NEXT:    ; return to shader part epilog
883;
884; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
885; GFX9:       ; %bb.0:
886; GFX9-NEXT:    s_mov_b32 s0, s2
887; GFX9-NEXT:    s_mov_b32 s1, s3
888; GFX9-NEXT:    s_mov_b32 s2, s4
889; GFX9-NEXT:    s_mov_b32 s3, s5
890; GFX9-NEXT:    s_mov_b32 s4, s6
891; GFX9-NEXT:    s_mov_b32 s5, s7
892; GFX9-NEXT:    s_mov_b32 s6, s8
893; GFX9-NEXT:    s_mov_b32 s7, s9
894; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16
895; GFX9-NEXT:    s_waitcnt vmcnt(0)
896; GFX9-NEXT:    v_mov_b32_e32 v0, v1
897; GFX9-NEXT:    ; return to shader part epilog
898;
899; GFX10-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
900; GFX10:       ; %bb.0:
901; GFX10-NEXT:    s_mov_b32 s0, s2
902; GFX10-NEXT:    s_mov_b32 s1, s3
903; GFX10-NEXT:    s_mov_b32 s2, s4
904; GFX10-NEXT:    s_mov_b32 s3, s5
905; GFX10-NEXT:    s_mov_b32 s4, s6
906; GFX10-NEXT:    s_mov_b32 s5, s7
907; GFX10-NEXT:    s_mov_b32 s6, s8
908; GFX10-NEXT:    s_mov_b32 s7, s9
909; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16
910; GFX10-NEXT:    s_waitcnt vmcnt(0)
911; GFX10-NEXT:    v_mov_b32_e32 v0, v1
912; GFX10-NEXT:    ; return to shader part epilog
913  %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
914  %v.err = extractvalue { <4 x half>, i32 } %v, 1
915  %vv = bitcast i32 %v.err to float
916  ret float %vv
917}
918
919declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
920declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
921declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
922declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
923
924declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
925declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
926declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
927declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
928
929attributes #0 = { nounwind readonly }
930