1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4
5define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
6; GFX9-LABEL: sample_1d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    s_mov_b64 s[12:13], exec
9; GFX9-NEXT:    s_wqm_b64 exec, exec
10; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
11; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
12; GFX9-NEXT:    s_waitcnt vmcnt(0)
13; GFX9-NEXT:    ; return to shader part epilog
14;
15; GFX10-LABEL: sample_1d:
16; GFX10:       ; %bb.0: ; %main_body
17; GFX10-NEXT:    s_mov_b32 s12, exec_lo
18; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
19; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
20; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
21; GFX10-NEXT:    s_waitcnt vmcnt(0)
22; GFX10-NEXT:    ; return to shader part epilog
23main_body:
24  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
25  ret <4 x float> %v
26}
27
28define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
29; GFX9-LABEL: sample_2d:
30; GFX9:       ; %bb.0: ; %main_body
31; GFX9-NEXT:    s_mov_b64 s[12:13], exec
32; GFX9-NEXT:    s_wqm_b64 exec, exec
33; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
34; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
35; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
36; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
37; GFX9-NEXT:    s_waitcnt vmcnt(0)
38; GFX9-NEXT:    ; return to shader part epilog
39;
40; GFX10-LABEL: sample_2d:
41; GFX10:       ; %bb.0: ; %main_body
42; GFX10-NEXT:    s_mov_b32 s12, exec_lo
43; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
44; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
45; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
46; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
47; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
48; GFX10-NEXT:    s_waitcnt vmcnt(0)
49; GFX10-NEXT:    ; return to shader part epilog
50main_body:
51  %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
52  ret <4 x float> %v
53}
54
55define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
56; GFX9-LABEL: sample_3d:
57; GFX9:       ; %bb.0: ; %main_body
58; GFX9-NEXT:    s_mov_b64 s[12:13], exec
59; GFX9-NEXT:    s_wqm_b64 exec, exec
60; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
61; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
62; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
63; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
64; GFX9-NEXT:    s_waitcnt vmcnt(0)
65; GFX9-NEXT:    ; return to shader part epilog
66;
67; GFX10-LABEL: sample_3d:
68; GFX10:       ; %bb.0: ; %main_body
69; GFX10-NEXT:    s_mov_b32 s12, exec_lo
70; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
71; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
72; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
73; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
74; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
75; GFX10-NEXT:    s_waitcnt vmcnt(0)
76; GFX10-NEXT:    ; return to shader part epilog
77main_body:
78  %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
79  ret <4 x float> %v
80}
81
82define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
83; GFX9-LABEL: sample_cube:
84; GFX9:       ; %bb.0: ; %main_body
85; GFX9-NEXT:    s_mov_b64 s[12:13], exec
86; GFX9-NEXT:    s_wqm_b64 exec, exec
87; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
88; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
89; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
90; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
91; GFX9-NEXT:    s_waitcnt vmcnt(0)
92; GFX9-NEXT:    ; return to shader part epilog
93;
94; GFX10-LABEL: sample_cube:
95; GFX10:       ; %bb.0: ; %main_body
96; GFX10-NEXT:    s_mov_b32 s12, exec_lo
97; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
98; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
99; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
100; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
101; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
102; GFX10-NEXT:    s_waitcnt vmcnt(0)
103; GFX10-NEXT:    ; return to shader part epilog
104main_body:
105  %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
106  ret <4 x float> %v
107}
108
109define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
110; GFX9-LABEL: sample_1darray:
111; GFX9:       ; %bb.0: ; %main_body
112; GFX9-NEXT:    s_mov_b64 s[12:13], exec
113; GFX9-NEXT:    s_wqm_b64 exec, exec
114; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
115; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
116; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
117; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
118; GFX9-NEXT:    s_waitcnt vmcnt(0)
119; GFX9-NEXT:    ; return to shader part epilog
120;
121; GFX10-LABEL: sample_1darray:
122; GFX10:       ; %bb.0: ; %main_body
123; GFX10-NEXT:    s_mov_b32 s12, exec_lo
124; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
125; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
126; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
127; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
128; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
129; GFX10-NEXT:    s_waitcnt vmcnt(0)
130; GFX10-NEXT:    ; return to shader part epilog
131main_body:
132  %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
133  ret <4 x float> %v
134}
135
136define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
137; GFX9-LABEL: sample_2darray:
138; GFX9:       ; %bb.0: ; %main_body
139; GFX9-NEXT:    s_mov_b64 s[12:13], exec
140; GFX9-NEXT:    s_wqm_b64 exec, exec
141; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
142; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
143; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
144; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
145; GFX9-NEXT:    s_waitcnt vmcnt(0)
146; GFX9-NEXT:    ; return to shader part epilog
147;
148; GFX10-LABEL: sample_2darray:
149; GFX10:       ; %bb.0: ; %main_body
150; GFX10-NEXT:    s_mov_b32 s12, exec_lo
151; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
152; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
153; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
154; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
155; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
156; GFX10-NEXT:    s_waitcnt vmcnt(0)
157; GFX10-NEXT:    ; return to shader part epilog
158main_body:
159  %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
160  ret <4 x float> %v
161}
162
163define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
164; GFX9-LABEL: sample_c_1d:
165; GFX9:       ; %bb.0: ; %main_body
166; GFX9-NEXT:    s_mov_b64 s[12:13], exec
167; GFX9-NEXT:    s_wqm_b64 exec, exec
168; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
169; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
170; GFX9-NEXT:    s_waitcnt vmcnt(0)
171; GFX9-NEXT:    ; return to shader part epilog
172;
173; GFX10-LABEL: sample_c_1d:
174; GFX10:       ; %bb.0: ; %main_body
175; GFX10-NEXT:    s_mov_b32 s12, exec_lo
176; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
177; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
178; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
179; GFX10-NEXT:    s_waitcnt vmcnt(0)
180; GFX10-NEXT:    ; return to shader part epilog
181main_body:
182  %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
183  ret <4 x float> %v
184}
185
186define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
187; GFX9-LABEL: sample_c_2d:
188; GFX9:       ; %bb.0: ; %main_body
189; GFX9-NEXT:    s_mov_b64 s[12:13], exec
190; GFX9-NEXT:    s_wqm_b64 exec, exec
191; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
192; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
193; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
194; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
195; GFX9-NEXT:    s_waitcnt vmcnt(0)
196; GFX9-NEXT:    ; return to shader part epilog
197;
198; GFX10-LABEL: sample_c_2d:
199; GFX10:       ; %bb.0: ; %main_body
200; GFX10-NEXT:    s_mov_b32 s12, exec_lo
201; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
202; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
203; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
204; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
205; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
206; GFX10-NEXT:    s_waitcnt vmcnt(0)
207; GFX10-NEXT:    ; return to shader part epilog
208main_body:
209  %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
210  ret <4 x float> %v
211}
212
213define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
214; GFX9-LABEL: sample_cl_1d:
215; GFX9:       ; %bb.0: ; %main_body
216; GFX9-NEXT:    s_mov_b64 s[12:13], exec
217; GFX9-NEXT:    s_wqm_b64 exec, exec
218; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
219; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
220; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
221; GFX9-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
222; GFX9-NEXT:    s_waitcnt vmcnt(0)
223; GFX9-NEXT:    ; return to shader part epilog
224;
225; GFX10-LABEL: sample_cl_1d:
226; GFX10:       ; %bb.0: ; %main_body
227; GFX10-NEXT:    s_mov_b32 s12, exec_lo
228; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
229; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
230; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
231; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
232; GFX10-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
233; GFX10-NEXT:    s_waitcnt vmcnt(0)
234; GFX10-NEXT:    ; return to shader part epilog
235main_body:
236  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
237  ret <4 x float> %v
238}
239
240define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
241; GFX9-LABEL: sample_cl_2d:
242; GFX9:       ; %bb.0: ; %main_body
243; GFX9-NEXT:    s_mov_b64 s[12:13], exec
244; GFX9-NEXT:    s_wqm_b64 exec, exec
245; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
246; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
247; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
248; GFX9-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
249; GFX9-NEXT:    s_waitcnt vmcnt(0)
250; GFX9-NEXT:    ; return to shader part epilog
251;
252; GFX10-LABEL: sample_cl_2d:
253; GFX10:       ; %bb.0: ; %main_body
254; GFX10-NEXT:    s_mov_b32 s12, exec_lo
255; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
256; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
257; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
258; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
259; GFX10-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
260; GFX10-NEXT:    s_waitcnt vmcnt(0)
261; GFX10-NEXT:    ; return to shader part epilog
262main_body:
263  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
264  ret <4 x float> %v
265}
266
267define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
268; GFX9-LABEL: sample_c_cl_1d:
269; GFX9:       ; %bb.0: ; %main_body
270; GFX9-NEXT:    s_mov_b64 s[12:13], exec
271; GFX9-NEXT:    s_wqm_b64 exec, exec
272; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
273; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
274; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
275; GFX9-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
276; GFX9-NEXT:    s_waitcnt vmcnt(0)
277; GFX9-NEXT:    ; return to shader part epilog
278;
279; GFX10-LABEL: sample_c_cl_1d:
280; GFX10:       ; %bb.0: ; %main_body
281; GFX10-NEXT:    s_mov_b32 s12, exec_lo
282; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
283; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
284; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
285; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
286; GFX10-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
287; GFX10-NEXT:    s_waitcnt vmcnt(0)
288; GFX10-NEXT:    ; return to shader part epilog
289main_body:
290  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
291  ret <4 x float> %v
292}
293
294define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
295; GFX9-LABEL: sample_c_cl_2d:
296; GFX9:       ; %bb.0: ; %main_body
297; GFX9-NEXT:    s_mov_b64 s[12:13], exec
298; GFX9-NEXT:    s_wqm_b64 exec, exec
299; GFX9-NEXT:    v_mov_b32_e32 v5, v3
300; GFX9-NEXT:    v_mov_b32_e32 v3, v0
301; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
302; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
303; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
304; GFX9-NEXT:    image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
305; GFX9-NEXT:    s_waitcnt vmcnt(0)
306; GFX9-NEXT:    ; return to shader part epilog
307;
308; GFX10-LABEL: sample_c_cl_2d:
309; GFX10:       ; %bb.0: ; %main_body
310; GFX10-NEXT:    s_mov_b32 s12, exec_lo
311; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
312; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
313; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
314; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
315; GFX10-NEXT:    image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
316; GFX10-NEXT:    s_waitcnt vmcnt(0)
317; GFX10-NEXT:    ; return to shader part epilog
318main_body:
319  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
320  ret <4 x float> %v
321}
322
323define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
324; GFX9-LABEL: sample_b_1d:
325; GFX9:       ; %bb.0: ; %main_body
326; GFX9-NEXT:    s_mov_b64 s[12:13], exec
327; GFX9-NEXT:    s_wqm_b64 exec, exec
328; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
329; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
330; GFX9-NEXT:    s_waitcnt vmcnt(0)
331; GFX9-NEXT:    ; return to shader part epilog
332;
333; GFX10-LABEL: sample_b_1d:
334; GFX10:       ; %bb.0: ; %main_body
335; GFX10-NEXT:    s_mov_b32 s12, exec_lo
336; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
337; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
338; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
339; GFX10-NEXT:    s_waitcnt vmcnt(0)
340; GFX10-NEXT:    ; return to shader part epilog
341main_body:
342  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
343  ret <4 x float> %v
344}
345
346define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
347; GFX9-LABEL: sample_b_2d:
348; GFX9:       ; %bb.0: ; %main_body
349; GFX9-NEXT:    s_mov_b64 s[12:13], exec
350; GFX9-NEXT:    s_wqm_b64 exec, exec
351; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
352; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
353; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
354; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
355; GFX9-NEXT:    s_waitcnt vmcnt(0)
356; GFX9-NEXT:    ; return to shader part epilog
357;
358; GFX10-LABEL: sample_b_2d:
359; GFX10:       ; %bb.0: ; %main_body
360; GFX10-NEXT:    s_mov_b32 s12, exec_lo
361; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
362; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
363; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
364; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
365; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
366; GFX10-NEXT:    s_waitcnt vmcnt(0)
367; GFX10-NEXT:    ; return to shader part epilog
368main_body:
369  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
370  ret <4 x float> %v
371}
372
373define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
374; GFX9-LABEL: sample_c_b_1d:
375; GFX9:       ; %bb.0: ; %main_body
376; GFX9-NEXT:    s_mov_b64 s[12:13], exec
377; GFX9-NEXT:    s_wqm_b64 exec, exec
378; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
379; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
380; GFX9-NEXT:    s_waitcnt vmcnt(0)
381; GFX9-NEXT:    ; return to shader part epilog
382;
383; GFX10-LABEL: sample_c_b_1d:
384; GFX10:       ; %bb.0: ; %main_body
385; GFX10-NEXT:    s_mov_b32 s12, exec_lo
386; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
387; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
388; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
389; GFX10-NEXT:    s_waitcnt vmcnt(0)
390; GFX10-NEXT:    ; return to shader part epilog
391main_body:
392  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
393  ret <4 x float> %v
394}
395
396define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
397; GFX9-LABEL: sample_c_b_2d:
398; GFX9:       ; %bb.0: ; %main_body
399; GFX9-NEXT:    s_mov_b64 s[12:13], exec
400; GFX9-NEXT:    s_wqm_b64 exec, exec
401; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
402; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
403; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
404; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
405; GFX9-NEXT:    s_waitcnt vmcnt(0)
406; GFX9-NEXT:    ; return to shader part epilog
407;
408; GFX10-LABEL: sample_c_b_2d:
409; GFX10:       ; %bb.0: ; %main_body
410; GFX10-NEXT:    s_mov_b32 s12, exec_lo
411; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
412; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
413; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
414; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
415; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
416; GFX10-NEXT:    s_waitcnt vmcnt(0)
417; GFX10-NEXT:    ; return to shader part epilog
418main_body:
419  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
420  ret <4 x float> %v
421}
422
423define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
424; GFX9-LABEL: sample_b_cl_1d:
425; GFX9:       ; %bb.0: ; %main_body
426; GFX9-NEXT:    s_mov_b64 s[12:13], exec
427; GFX9-NEXT:    s_wqm_b64 exec, exec
428; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
429; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
430; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
431; GFX9-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
432; GFX9-NEXT:    s_waitcnt vmcnt(0)
433; GFX9-NEXT:    ; return to shader part epilog
434;
435; GFX10-LABEL: sample_b_cl_1d:
436; GFX10:       ; %bb.0: ; %main_body
437; GFX10-NEXT:    s_mov_b32 s12, exec_lo
438; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
439; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
440; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
441; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
442; GFX10-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
443; GFX10-NEXT:    s_waitcnt vmcnt(0)
444; GFX10-NEXT:    ; return to shader part epilog
445main_body:
446  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
447  ret <4 x float> %v
448}
449
450define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
451; GFX9-LABEL: sample_b_cl_2d:
452; GFX9:       ; %bb.0: ; %main_body
453; GFX9-NEXT:    s_mov_b64 s[12:13], exec
454; GFX9-NEXT:    s_wqm_b64 exec, exec
455; GFX9-NEXT:    v_mov_b32_e32 v5, v3
456; GFX9-NEXT:    v_mov_b32_e32 v3, v0
457; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
458; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
459; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
460; GFX9-NEXT:    image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
461; GFX9-NEXT:    s_waitcnt vmcnt(0)
462; GFX9-NEXT:    ; return to shader part epilog
463;
464; GFX10-LABEL: sample_b_cl_2d:
465; GFX10:       ; %bb.0: ; %main_body
466; GFX10-NEXT:    s_mov_b32 s12, exec_lo
467; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
468; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
469; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
470; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
471; GFX10-NEXT:    image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
472; GFX10-NEXT:    s_waitcnt vmcnt(0)
473; GFX10-NEXT:    ; return to shader part epilog
474main_body:
475  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
476  ret <4 x float> %v
477}
478
479define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
480; GFX9-LABEL: sample_c_b_cl_1d:
481; GFX9:       ; %bb.0: ; %main_body
482; GFX9-NEXT:    s_mov_b64 s[12:13], exec
483; GFX9-NEXT:    s_wqm_b64 exec, exec
484; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
485; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
486; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
487; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
488; GFX9-NEXT:    s_waitcnt vmcnt(0)
489; GFX9-NEXT:    ; return to shader part epilog
490;
491; GFX10-LABEL: sample_c_b_cl_1d:
492; GFX10:       ; %bb.0: ; %main_body
493; GFX10-NEXT:    s_mov_b32 s12, exec_lo
494; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
495; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
496; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
497; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
498; GFX10-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
499; GFX10-NEXT:    s_waitcnt vmcnt(0)
500; GFX10-NEXT:    ; return to shader part epilog
501main_body:
502  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
503  ret <4 x float> %v
504}
505
506define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
507; GFX9-LABEL: sample_c_b_cl_2d:
508; GFX9:       ; %bb.0: ; %main_body
509; GFX9-NEXT:    s_mov_b64 s[12:13], exec
510; GFX9-NEXT:    s_wqm_b64 exec, exec
511; GFX9-NEXT:    v_mov_b32_e32 v7, v4
512; GFX9-NEXT:    v_mov_b32_e32 v4, v0
513; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
514; GFX9-NEXT:    v_mov_b32_e32 v5, v1
515; GFX9-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
516; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
517; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
518; GFX9-NEXT:    s_waitcnt vmcnt(0)
519; GFX9-NEXT:    ; return to shader part epilog
520;
521; GFX10-LABEL: sample_c_b_cl_2d:
522; GFX10:       ; %bb.0: ; %main_body
523; GFX10-NEXT:    s_mov_b32 s12, exec_lo
524; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
525; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
526; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
527; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
528; GFX10-NEXT:    image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
529; GFX10-NEXT:    s_waitcnt vmcnt(0)
530; GFX10-NEXT:    ; return to shader part epilog
531main_body:
532  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
533  ret <4 x float> %v
534}
535
536define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
537; GFX9-LABEL: sample_d_1d:
538; GFX9:       ; %bb.0: ; %main_body
539; GFX9-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
540; GFX9-NEXT:    s_waitcnt vmcnt(0)
541; GFX9-NEXT:    ; return to shader part epilog
542;
543; GFX10-LABEL: sample_d_1d:
544; GFX10:       ; %bb.0: ; %main_body
545; GFX10-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
546; GFX10-NEXT:    s_waitcnt vmcnt(0)
547; GFX10-NEXT:    ; return to shader part epilog
548main_body:
549  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
550  ret <4 x float> %v
551}
552
553define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
554; GFX9-LABEL: sample_d_2d:
555; GFX9:       ; %bb.0: ; %main_body
556; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
557; GFX9-NEXT:    v_and_b32_e32 v4, v6, v4
558; GFX9-NEXT:    v_and_b32_e32 v2, v6, v2
559; GFX9-NEXT:    v_and_b32_e32 v0, v6, v0
560; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
561; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
562; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
563; GFX9-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
564; GFX9-NEXT:    s_waitcnt vmcnt(0)
565; GFX9-NEXT:    ; return to shader part epilog
566;
567; GFX10-LABEL: sample_d_2d:
568; GFX10:       ; %bb.0: ; %main_body
569; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
570; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
571; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
572; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
573; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
574; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
575; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
576; GFX10-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
577; GFX10-NEXT:    s_waitcnt vmcnt(0)
578; GFX10-NEXT:    ; return to shader part epilog
579main_body:
580  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
581  ret <4 x float> %v
582}
583
584define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
585; GFX9-LABEL: sample_d_3d:
586; GFX9:       ; %bb.0: ; %main_body
587; GFX9-NEXT:    v_mov_b32_e32 v12, v8
588; GFX9-NEXT:    v_mov_b32_e32 v8, v2
589; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
590; GFX9-NEXT:    v_mov_b32_e32 v10, v5
591; GFX9-NEXT:    v_and_b32_e32 v5, v2, v6
592; GFX9-NEXT:    v_and_b32_e32 v3, v2, v3
593; GFX9-NEXT:    v_and_b32_e32 v0, v2, v0
594; GFX9-NEXT:    v_lshl_or_b32 v11, v7, 16, v5
595; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
596; GFX9-NEXT:    v_lshl_or_b32 v7, v1, 16, v0
597; GFX9-NEXT:    image_sample_d v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
598; GFX9-NEXT:    s_waitcnt vmcnt(0)
599; GFX9-NEXT:    ; return to shader part epilog
600;
601; GFX10-LABEL: sample_d_3d:
602; GFX10:       ; %bb.0: ; %main_body
603; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
604; GFX10-NEXT:    v_and_b32_e32 v6, v9, v6
605; GFX10-NEXT:    v_and_b32_e32 v3, v9, v3
606; GFX10-NEXT:    v_and_b32_e32 v0, v9, v0
607; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
608; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
609; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
610; GFX10-NEXT:    image_sample_d v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
611; GFX10-NEXT:    s_waitcnt vmcnt(0)
612; GFX10-NEXT:    ; return to shader part epilog
613main_body:
614  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
615  ret <4 x float> %v
616}
617
618define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
619; GFX9-LABEL: sample_c_d_1d:
620; GFX9:       ; %bb.0: ; %main_body
621; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
622; GFX9-NEXT:    s_waitcnt vmcnt(0)
623; GFX9-NEXT:    ; return to shader part epilog
624;
625; GFX10-LABEL: sample_c_d_1d:
626; GFX10:       ; %bb.0: ; %main_body
627; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
628; GFX10-NEXT:    s_waitcnt vmcnt(0)
629; GFX10-NEXT:    ; return to shader part epilog
630main_body:
631  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
632  ret <4 x float> %v
633}
634
635define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
636; GFX9-LABEL: sample_c_d_2d:
637; GFX9:       ; %bb.0: ; %main_body
638; GFX9-NEXT:    v_mov_b32_e32 v9, 0xffff
639; GFX9-NEXT:    v_mov_b32_e32 v8, v2
640; GFX9-NEXT:    v_mov_b32_e32 v7, v3
641; GFX9-NEXT:    v_and_b32_e32 v2, v9, v5
642; GFX9-NEXT:    v_and_b32_e32 v1, v9, v1
643; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
644; GFX9-NEXT:    v_and_b32_e32 v2, v9, v7
645; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
646; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
647; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
648; GFX9-NEXT:    s_waitcnt vmcnt(0)
649; GFX9-NEXT:    ; return to shader part epilog
650;
651; GFX10-LABEL: sample_c_d_2d:
652; GFX10:       ; %bb.0: ; %main_body
653; GFX10-NEXT:    v_mov_b32_e32 v10, 0xffff
654; GFX10-NEXT:    v_and_b32_e32 v5, v10, v5
655; GFX10-NEXT:    v_and_b32_e32 v3, v10, v3
656; GFX10-NEXT:    v_and_b32_e32 v1, v10, v1
657; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
658; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
659; GFX10-NEXT:    v_lshl_or_b32 v2, v2, 16, v1
660; GFX10-NEXT:    image_sample_c_d v[0:3], [v0, v2, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
661; GFX10-NEXT:    s_waitcnt vmcnt(0)
662; GFX10-NEXT:    ; return to shader part epilog
663main_body:
664  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
665  ret <4 x float> %v
666}
667
668define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
669; GFX9-LABEL: sample_d_cl_1d:
670; GFX9:       ; %bb.0: ; %main_body
671; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
672; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
673; GFX9-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
674; GFX9-NEXT:    s_waitcnt vmcnt(0)
675; GFX9-NEXT:    ; return to shader part epilog
676;
677; GFX10-LABEL: sample_d_cl_1d:
678; GFX10:       ; %bb.0: ; %main_body
679; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
680; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
681; GFX10-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
682; GFX10-NEXT:    s_waitcnt vmcnt(0)
683; GFX10-NEXT:    ; return to shader part epilog
684main_body:
685  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
686  ret <4 x float> %v
687}
688
689define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
690; GFX9-LABEL: sample_d_cl_2d:
691; GFX9:       ; %bb.0: ; %main_body
692; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffff
693; GFX9-NEXT:    v_and_b32_e32 v4, v7, v4
694; GFX9-NEXT:    v_and_b32_e32 v2, v7, v2
695; GFX9-NEXT:    v_and_b32_e32 v0, v7, v0
696; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
697; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
698; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
699; GFX9-NEXT:    image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
700; GFX9-NEXT:    s_waitcnt vmcnt(0)
701; GFX9-NEXT:    ; return to shader part epilog
702;
703; GFX10-LABEL: sample_d_cl_2d:
704; GFX10:       ; %bb.0: ; %main_body
705; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
706; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
707; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
708; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
709; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
710; GFX10-NEXT:    v_lshl_or_b32 v5, v3, 16, v2
711; GFX10-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
712; GFX10-NEXT:    image_sample_d_cl v[0:3], [v3, v5, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
713; GFX10-NEXT:    s_waitcnt vmcnt(0)
714; GFX10-NEXT:    ; return to shader part epilog
715main_body:
716  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
717  ret <4 x float> %v
718}
719
720define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
721; GFX9-LABEL: sample_c_d_cl_1d:
722; GFX9:       ; %bb.0: ; %main_body
723; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
724; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
725; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
726; GFX9-NEXT:    s_waitcnt vmcnt(0)
727; GFX9-NEXT:    ; return to shader part epilog
728;
729; GFX10-LABEL: sample_c_d_cl_1d:
730; GFX10:       ; %bb.0: ; %main_body
731; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
732; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
733; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
734; GFX10-NEXT:    s_waitcnt vmcnt(0)
735; GFX10-NEXT:    ; return to shader part epilog
736main_body:
737  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
738  ret <4 x float> %v
739}
740
741define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
742; GFX9-LABEL: sample_c_d_cl_2d:
743; GFX9:       ; %bb.0: ; %main_body
744; GFX9-NEXT:    v_mov_b32_e32 v11, v7
745; GFX9-NEXT:    v_mov_b32_e32 v7, v0
746; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
747; GFX9-NEXT:    v_and_b32_e32 v5, v0, v5
748; GFX9-NEXT:    v_and_b32_e32 v3, v0, v3
749; GFX9-NEXT:    v_and_b32_e32 v0, v0, v1
750; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
751; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
752; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
753; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
754; GFX9-NEXT:    s_waitcnt vmcnt(0)
755; GFX9-NEXT:    ; return to shader part epilog
756;
757; GFX10-LABEL: sample_c_d_cl_2d:
758; GFX10:       ; %bb.0: ; %main_body
759; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
760; GFX10-NEXT:    v_and_b32_e32 v5, v8, v5
761; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
762; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
763; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
764; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
765; GFX10-NEXT:    v_lshl_or_b32 v2, v2, 16, v1
766; GFX10-NEXT:    image_sample_c_d_cl v[0:3], [v0, v2, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
767; GFX10-NEXT:    s_waitcnt vmcnt(0)
768; GFX10-NEXT:    ; return to shader part epilog
769main_body:
770  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
771  ret <4 x float> %v
772}
773
774define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
775; GFX9-LABEL: sample_cd_1d:
776; GFX9:       ; %bb.0: ; %main_body
777; GFX9-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
778; GFX9-NEXT:    s_waitcnt vmcnt(0)
779; GFX9-NEXT:    ; return to shader part epilog
780;
781; GFX10-LABEL: sample_cd_1d:
782; GFX10:       ; %bb.0: ; %main_body
783; GFX10-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
784; GFX10-NEXT:    s_waitcnt vmcnt(0)
785; GFX10-NEXT:    ; return to shader part epilog
786main_body:
787  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
788  ret <4 x float> %v
789}
790
791define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
792; GFX9-LABEL: sample_cd_2d:
793; GFX9:       ; %bb.0: ; %main_body
794; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
795; GFX9-NEXT:    v_and_b32_e32 v4, v6, v4
796; GFX9-NEXT:    v_and_b32_e32 v2, v6, v2
797; GFX9-NEXT:    v_and_b32_e32 v0, v6, v0
798; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
799; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
800; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
801; GFX9-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
802; GFX9-NEXT:    s_waitcnt vmcnt(0)
803; GFX9-NEXT:    ; return to shader part epilog
804;
805; GFX10-LABEL: sample_cd_2d:
806; GFX10:       ; %bb.0: ; %main_body
807; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
808; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
809; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
810; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
811; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
812; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
813; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
814; GFX10-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
815; GFX10-NEXT:    s_waitcnt vmcnt(0)
816; GFX10-NEXT:    ; return to shader part epilog
817main_body:
818  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
819  ret <4 x float> %v
820}
821
822define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
823; GFX9-LABEL: sample_c_cd_1d:
824; GFX9:       ; %bb.0: ; %main_body
825; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
826; GFX9-NEXT:    s_waitcnt vmcnt(0)
827; GFX9-NEXT:    ; return to shader part epilog
828;
829; GFX10-LABEL: sample_c_cd_1d:
830; GFX10:       ; %bb.0: ; %main_body
831; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
832; GFX10-NEXT:    s_waitcnt vmcnt(0)
833; GFX10-NEXT:    ; return to shader part epilog
834main_body:
835  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
836  ret <4 x float> %v
837}
838
839define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
840; GFX9-LABEL: sample_c_cd_2d:
841; GFX9:       ; %bb.0: ; %main_body
842; GFX9-NEXT:    v_mov_b32_e32 v9, 0xffff
843; GFX9-NEXT:    v_mov_b32_e32 v8, v2
844; GFX9-NEXT:    v_mov_b32_e32 v7, v3
845; GFX9-NEXT:    v_and_b32_e32 v2, v9, v5
846; GFX9-NEXT:    v_and_b32_e32 v1, v9, v1
847; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
848; GFX9-NEXT:    v_and_b32_e32 v2, v9, v7
849; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
850; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
851; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
852; GFX9-NEXT:    s_waitcnt vmcnt(0)
853; GFX9-NEXT:    ; return to shader part epilog
854;
855; GFX10-LABEL: sample_c_cd_2d:
856; GFX10:       ; %bb.0: ; %main_body
857; GFX10-NEXT:    v_mov_b32_e32 v10, 0xffff
858; GFX10-NEXT:    v_and_b32_e32 v5, v10, v5
859; GFX10-NEXT:    v_and_b32_e32 v3, v10, v3
860; GFX10-NEXT:    v_and_b32_e32 v1, v10, v1
861; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
862; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
863; GFX10-NEXT:    v_lshl_or_b32 v2, v2, 16, v1
864; GFX10-NEXT:    image_sample_c_cd v[0:3], [v0, v2, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
865; GFX10-NEXT:    s_waitcnt vmcnt(0)
866; GFX10-NEXT:    ; return to shader part epilog
867main_body:
868  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
869  ret <4 x float> %v
870}
871
872define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
873; GFX9-LABEL: sample_cd_cl_1d:
874; GFX9:       ; %bb.0: ; %main_body
875; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
876; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
877; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
878; GFX9-NEXT:    s_waitcnt vmcnt(0)
879; GFX9-NEXT:    ; return to shader part epilog
880;
881; GFX10-LABEL: sample_cd_cl_1d:
882; GFX10:       ; %bb.0: ; %main_body
883; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
884; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
885; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
886; GFX10-NEXT:    s_waitcnt vmcnt(0)
887; GFX10-NEXT:    ; return to shader part epilog
888main_body:
889  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
890  ret <4 x float> %v
891}
892
893define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
894; GFX9-LABEL: sample_cd_cl_2d:
895; GFX9:       ; %bb.0: ; %main_body
896; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffff
897; GFX9-NEXT:    v_and_b32_e32 v4, v7, v4
898; GFX9-NEXT:    v_and_b32_e32 v2, v7, v2
899; GFX9-NEXT:    v_and_b32_e32 v0, v7, v0
900; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
901; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
902; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
903; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
904; GFX9-NEXT:    s_waitcnt vmcnt(0)
905; GFX9-NEXT:    ; return to shader part epilog
906;
907; GFX10-LABEL: sample_cd_cl_2d:
908; GFX10:       ; %bb.0: ; %main_body
909; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
910; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
911; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
912; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
913; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
914; GFX10-NEXT:    v_lshl_or_b32 v5, v3, 16, v2
915; GFX10-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
916; GFX10-NEXT:    image_sample_cd_cl v[0:3], [v3, v5, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
917; GFX10-NEXT:    s_waitcnt vmcnt(0)
918; GFX10-NEXT:    ; return to shader part epilog
919main_body:
920  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
921  ret <4 x float> %v
922}
923
924define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
925; GFX9-LABEL: sample_c_cd_cl_1d:
926; GFX9:       ; %bb.0: ; %main_body
927; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
928; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
929; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
930; GFX9-NEXT:    s_waitcnt vmcnt(0)
931; GFX9-NEXT:    ; return to shader part epilog
932;
933; GFX10-LABEL: sample_c_cd_cl_1d:
934; GFX10:       ; %bb.0: ; %main_body
935; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
936; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
937; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
938; GFX10-NEXT:    s_waitcnt vmcnt(0)
939; GFX10-NEXT:    ; return to shader part epilog
940main_body:
941  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
942  ret <4 x float> %v
943}
944
945define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
946; GFX9-LABEL: sample_c_cd_cl_2d:
947; GFX9:       ; %bb.0: ; %main_body
948; GFX9-NEXT:    v_mov_b32_e32 v11, v7
949; GFX9-NEXT:    v_mov_b32_e32 v7, v0
950; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
951; GFX9-NEXT:    v_and_b32_e32 v5, v0, v5
952; GFX9-NEXT:    v_and_b32_e32 v3, v0, v3
953; GFX9-NEXT:    v_and_b32_e32 v0, v0, v1
954; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
955; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
956; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
957; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
958; GFX9-NEXT:    s_waitcnt vmcnt(0)
959; GFX9-NEXT:    ; return to shader part epilog
960;
961; GFX10-LABEL: sample_c_cd_cl_2d:
962; GFX10:       ; %bb.0: ; %main_body
963; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
964; GFX10-NEXT:    v_and_b32_e32 v5, v8, v5
965; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
966; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
967; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
968; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
969; GFX10-NEXT:    v_lshl_or_b32 v2, v2, 16, v1
970; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], [v0, v2, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
971; GFX10-NEXT:    s_waitcnt vmcnt(0)
972; GFX10-NEXT:    ; return to shader part epilog
973main_body:
974  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
975  ret <4 x float> %v
976}
977
978define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
979; GFX9-LABEL: sample_l_1d:
980; GFX9:       ; %bb.0: ; %main_body
981; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
982; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
983; GFX9-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
984; GFX9-NEXT:    s_waitcnt vmcnt(0)
985; GFX9-NEXT:    ; return to shader part epilog
986;
987; GFX10-LABEL: sample_l_1d:
988; GFX10:       ; %bb.0: ; %main_body
989; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
990; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
991; GFX10-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
992; GFX10-NEXT:    s_waitcnt vmcnt(0)
993; GFX10-NEXT:    ; return to shader part epilog
994main_body:
995  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
996  ret <4 x float> %v
997}
998
999define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
1000; GFX9-LABEL: sample_l_2d:
1001; GFX9:       ; %bb.0: ; %main_body
1002; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1003; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
1004; GFX9-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
1005; GFX9-NEXT:    s_waitcnt vmcnt(0)
1006; GFX9-NEXT:    ; return to shader part epilog
1007;
1008; GFX10-LABEL: sample_l_2d:
1009; GFX10:       ; %bb.0: ; %main_body
1010; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1011; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
1012; GFX10-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1013; GFX10-NEXT:    s_waitcnt vmcnt(0)
1014; GFX10-NEXT:    ; return to shader part epilog
1015main_body:
1016  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1017  ret <4 x float> %v
1018}
1019
1020define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
1021; GFX9-LABEL: sample_c_l_1d:
1022; GFX9:       ; %bb.0: ; %main_body
1023; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1024; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1025; GFX9-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
1026; GFX9-NEXT:    s_waitcnt vmcnt(0)
1027; GFX9-NEXT:    ; return to shader part epilog
1028;
1029; GFX10-LABEL: sample_c_l_1d:
1030; GFX10:       ; %bb.0: ; %main_body
1031; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1032; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1033; GFX10-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
1034; GFX10-NEXT:    s_waitcnt vmcnt(0)
1035; GFX10-NEXT:    ; return to shader part epilog
1036main_body:
1037  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1038  ret <4 x float> %v
1039}
1040
1041define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
1042; GFX9-LABEL: sample_c_l_2d:
1043; GFX9:       ; %bb.0: ; %main_body
1044; GFX9-NEXT:    v_mov_b32_e32 v5, v3
1045; GFX9-NEXT:    v_mov_b32_e32 v3, v0
1046; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
1047; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
1048; GFX9-NEXT:    image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
1049; GFX9-NEXT:    s_waitcnt vmcnt(0)
1050; GFX9-NEXT:    ; return to shader part epilog
1051;
1052; GFX10-LABEL: sample_c_l_2d:
1053; GFX10:       ; %bb.0: ; %main_body
1054; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1055; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1056; GFX10-NEXT:    image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1057; GFX10-NEXT:    s_waitcnt vmcnt(0)
1058; GFX10-NEXT:    ; return to shader part epilog
1059main_body:
1060  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1061  ret <4 x float> %v
1062}
1063
1064define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
1065; GFX9-LABEL: sample_lz_1d:
1066; GFX9:       ; %bb.0: ; %main_body
1067; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
1068; GFX9-NEXT:    s_waitcnt vmcnt(0)
1069; GFX9-NEXT:    ; return to shader part epilog
1070;
1071; GFX10-LABEL: sample_lz_1d:
1072; GFX10:       ; %bb.0: ; %main_body
1073; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
1074; GFX10-NEXT:    s_waitcnt vmcnt(0)
1075; GFX10-NEXT:    ; return to shader part epilog
1076main_body:
1077  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1078  ret <4 x float> %v
1079}
1080
1081define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
1082; GFX9-LABEL: sample_lz_2d:
1083; GFX9:       ; %bb.0: ; %main_body
1084; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1085; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
1086; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
1087; GFX9-NEXT:    s_waitcnt vmcnt(0)
1088; GFX9-NEXT:    ; return to shader part epilog
1089;
1090; GFX10-LABEL: sample_lz_2d:
1091; GFX10:       ; %bb.0: ; %main_body
1092; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1093; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
1094; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1095; GFX10-NEXT:    s_waitcnt vmcnt(0)
1096; GFX10-NEXT:    ; return to shader part epilog
1097main_body:
1098  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1099  ret <4 x float> %v
1100}
1101
1102define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
1103; GFX9-LABEL: sample_c_lz_1d:
1104; GFX9:       ; %bb.0: ; %main_body
1105; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
1106; GFX9-NEXT:    s_waitcnt vmcnt(0)
1107; GFX9-NEXT:    ; return to shader part epilog
1108;
1109; GFX10-LABEL: sample_c_lz_1d:
1110; GFX10:       ; %bb.0: ; %main_body
1111; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
1112; GFX10-NEXT:    s_waitcnt vmcnt(0)
1113; GFX10-NEXT:    ; return to shader part epilog
1114main_body:
1115  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1116  ret <4 x float> %v
1117}
1118
1119define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
1120; GFX9-LABEL: sample_c_lz_2d:
1121; GFX9:       ; %bb.0: ; %main_body
1122; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1123; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1124; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
1125; GFX9-NEXT:    s_waitcnt vmcnt(0)
1126; GFX9-NEXT:    ; return to shader part epilog
1127;
1128; GFX10-LABEL: sample_c_lz_2d:
1129; GFX10:       ; %bb.0: ; %main_body
1130; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1131; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1132; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1133; GFX10-NEXT:    s_waitcnt vmcnt(0)
1134; GFX10-NEXT:    ; return to shader part epilog
1135main_body:
1136  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1137  ret <4 x float> %v
1138}
1139
1140define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
1141; GFX9-LABEL: sample_c_d_o_2darray_V1:
1142; GFX9:       ; %bb.0: ; %main_body
1143; GFX9-NEXT:    v_mov_b32_e32 v13, v8
1144; GFX9-NEXT:    v_mov_b32_e32 v8, v0
1145; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
1146; GFX9-NEXT:    v_mov_b32_e32 v9, v1
1147; GFX9-NEXT:    v_and_b32_e32 v1, v0, v6
1148; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
1149; GFX9-NEXT:    v_and_b32_e32 v1, v0, v4
1150; GFX9-NEXT:    v_and_b32_e32 v0, v0, v2
1151; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
1152; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
1153; GFX9-NEXT:    image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 a16 da
1154; GFX9-NEXT:    s_waitcnt vmcnt(0)
1155; GFX9-NEXT:    ; return to shader part epilog
1156;
1157; GFX10-LABEL: sample_c_d_o_2darray_V1:
1158; GFX10:       ; %bb.0: ; %main_body
1159; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
1160; GFX10-NEXT:    v_and_b32_e32 v6, v9, v6
1161; GFX10-NEXT:    v_and_b32_e32 v4, v9, v4
1162; GFX10-NEXT:    v_and_b32_e32 v2, v9, v2
1163; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
1164; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
1165; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
1166; GFX10-NEXT:    image_sample_c_d_o v0, [v0, v1, v3, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
1167; GFX10-NEXT:    s_waitcnt vmcnt(0)
1168; GFX10-NEXT:    ; return to shader part epilog
1169main_body:
1170  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1171  ret float %v
1172}
1173
1174define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
1175; GFX9-LABEL: sample_c_d_o_2darray_V2:
1176; GFX9:       ; %bb.0: ; %main_body
1177; GFX9-NEXT:    v_mov_b32_e32 v13, v8
1178; GFX9-NEXT:    v_mov_b32_e32 v8, v0
1179; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
1180; GFX9-NEXT:    v_mov_b32_e32 v9, v1
1181; GFX9-NEXT:    v_and_b32_e32 v1, v0, v6
1182; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
1183; GFX9-NEXT:    v_and_b32_e32 v1, v0, v4
1184; GFX9-NEXT:    v_and_b32_e32 v0, v0, v2
1185; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
1186; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
1187; GFX9-NEXT:    image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 a16 da
1188; GFX9-NEXT:    s_waitcnt vmcnt(0)
1189; GFX9-NEXT:    ; return to shader part epilog
1190;
1191; GFX10-LABEL: sample_c_d_o_2darray_V2:
1192; GFX10:       ; %bb.0: ; %main_body
1193; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
1194; GFX10-NEXT:    v_and_b32_e32 v6, v9, v6
1195; GFX10-NEXT:    v_and_b32_e32 v4, v9, v4
1196; GFX10-NEXT:    v_and_b32_e32 v2, v9, v2
1197; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
1198; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
1199; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
1200; GFX10-NEXT:    image_sample_c_d_o v[0:1], [v0, v1, v3, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
1201; GFX10-NEXT:    s_waitcnt vmcnt(0)
1202; GFX10-NEXT:    ; return to shader part epilog
1203main_body:
1204  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1205  ret <2 x float> %v
1206}
1207
1208declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1209declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1210declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1211declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1212declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1213declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1214declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1215
1216declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1217declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1218declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1219declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1220declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1221declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1222
1223declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1224declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1225declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1226declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1227declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1228declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1229declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1230declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1231
1232declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1233declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1234declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1235declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1236declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1237declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1238declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1239declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1240declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1241
1242declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1243declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1244declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1245declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1246declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1247declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1248declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1249declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1250
1251declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1252declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1253declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1254declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1255
1256declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1257declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1258declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1259declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1260
1261declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1262declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1263
1264attributes #0 = { nounwind }
1265attributes #1 = { nounwind readonly }
1266attributes #2 = { nounwind readnone }
1267