1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
4
5define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
6; GFX9-LABEL: sample_l_1d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    s_mov_b32 s0, s2
9; GFX9-NEXT:    s_mov_b32 s1, s3
10; GFX9-NEXT:    s_mov_b32 s2, s4
11; GFX9-NEXT:    s_mov_b32 s3, s5
12; GFX9-NEXT:    s_mov_b32 s4, s6
13; GFX9-NEXT:    s_mov_b32 s5, s7
14; GFX9-NEXT:    s_mov_b32 s6, s8
15; GFX9-NEXT:    s_mov_b32 s7, s9
16; GFX9-NEXT:    s_mov_b32 s8, s10
17; GFX9-NEXT:    s_mov_b32 s9, s11
18; GFX9-NEXT:    s_mov_b32 s10, s12
19; GFX9-NEXT:    s_mov_b32 s11, s13
20; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
21; GFX9-NEXT:    s_waitcnt vmcnt(0)
22; GFX9-NEXT:    ; return to shader part epilog
23;
24; GFX10-LABEL: sample_l_1d:
25; GFX10:       ; %bb.0: ; %main_body
26; GFX10-NEXT:    s_mov_b32 s0, s2
27; GFX10-NEXT:    s_mov_b32 s1, s3
28; GFX10-NEXT:    s_mov_b32 s2, s4
29; GFX10-NEXT:    s_mov_b32 s3, s5
30; GFX10-NEXT:    s_mov_b32 s4, s6
31; GFX10-NEXT:    s_mov_b32 s5, s7
32; GFX10-NEXT:    s_mov_b32 s6, s8
33; GFX10-NEXT:    s_mov_b32 s7, s9
34; GFX10-NEXT:    s_mov_b32 s8, s10
35; GFX10-NEXT:    s_mov_b32 s9, s11
36; GFX10-NEXT:    s_mov_b32 s10, s12
37; GFX10-NEXT:    s_mov_b32 s11, s13
38; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
39; GFX10-NEXT:    s_waitcnt vmcnt(0)
40; GFX10-NEXT:    ; return to shader part epilog
41main_body:
42  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
43  ret <4 x float> %v
44}
45
46define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
47; GFX9-LABEL: sample_l_2d:
48; GFX9:       ; %bb.0: ; %main_body
49; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
50; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
51; GFX9-NEXT:    s_mov_b32 s0, s2
52; GFX9-NEXT:    s_mov_b32 s1, s3
53; GFX9-NEXT:    s_mov_b32 s2, s4
54; GFX9-NEXT:    s_mov_b32 s3, s5
55; GFX9-NEXT:    s_mov_b32 s4, s6
56; GFX9-NEXT:    s_mov_b32 s5, s7
57; GFX9-NEXT:    s_mov_b32 s6, s8
58; GFX9-NEXT:    s_mov_b32 s7, s9
59; GFX9-NEXT:    s_mov_b32 s8, s10
60; GFX9-NEXT:    s_mov_b32 s9, s11
61; GFX9-NEXT:    s_mov_b32 s10, s12
62; GFX9-NEXT:    s_mov_b32 s11, s13
63; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
64; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
65; GFX9-NEXT:    s_waitcnt vmcnt(0)
66; GFX9-NEXT:    ; return to shader part epilog
67;
68; GFX10-LABEL: sample_l_2d:
69; GFX10:       ; %bb.0: ; %main_body
70; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
71; GFX10-NEXT:    s_mov_b32 s0, s2
72; GFX10-NEXT:    s_mov_b32 s1, s3
73; GFX10-NEXT:    s_mov_b32 s2, s4
74; GFX10-NEXT:    s_mov_b32 s3, s5
75; GFX10-NEXT:    v_and_or_b32 v0, v0, 0xffff, v1
76; GFX10-NEXT:    s_mov_b32 s4, s6
77; GFX10-NEXT:    s_mov_b32 s5, s7
78; GFX10-NEXT:    s_mov_b32 s6, s8
79; GFX10-NEXT:    s_mov_b32 s7, s9
80; GFX10-NEXT:    s_mov_b32 s8, s10
81; GFX10-NEXT:    s_mov_b32 s9, s11
82; GFX10-NEXT:    s_mov_b32 s10, s12
83; GFX10-NEXT:    s_mov_b32 s11, s13
84; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
85; GFX10-NEXT:    s_waitcnt vmcnt(0)
86; GFX10-NEXT:    ; return to shader part epilog
87main_body:
88  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
89  ret <4 x float> %v
90}
91
92define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
93; GFX9-LABEL: sample_c_l_1d:
94; GFX9:       ; %bb.0: ; %main_body
95; GFX9-NEXT:    s_mov_b32 s0, s2
96; GFX9-NEXT:    s_mov_b32 s2, s4
97; GFX9-NEXT:    s_mov_b32 s4, s6
98; GFX9-NEXT:    s_mov_b32 s6, s8
99; GFX9-NEXT:    s_mov_b32 s8, s10
100; GFX9-NEXT:    s_mov_b32 s10, s12
101; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
102; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
103; GFX9-NEXT:    s_mov_b32 s1, s3
104; GFX9-NEXT:    s_mov_b32 s3, s5
105; GFX9-NEXT:    s_mov_b32 s5, s7
106; GFX9-NEXT:    s_mov_b32 s7, s9
107; GFX9-NEXT:    s_mov_b32 s9, s11
108; GFX9-NEXT:    s_mov_b32 s11, s13
109; GFX9-NEXT:    v_and_or_b32 v1, v1, v2, s12
110; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
111; GFX9-NEXT:    s_waitcnt vmcnt(0)
112; GFX9-NEXT:    ; return to shader part epilog
113;
114; GFX10-LABEL: sample_c_l_1d:
115; GFX10:       ; %bb.0: ; %main_body
116; GFX10-NEXT:    s_mov_b32 s0, s2
117; GFX10-NEXT:    s_mov_b32 s2, s4
118; GFX10-NEXT:    s_mov_b32 s4, s6
119; GFX10-NEXT:    s_mov_b32 s6, s8
120; GFX10-NEXT:    s_mov_b32 s8, s10
121; GFX10-NEXT:    s_mov_b32 s10, s12
122; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
123; GFX10-NEXT:    s_mov_b32 s1, s3
124; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, s12
125; GFX10-NEXT:    s_mov_b32 s3, s5
126; GFX10-NEXT:    s_mov_b32 s5, s7
127; GFX10-NEXT:    s_mov_b32 s7, s9
128; GFX10-NEXT:    s_mov_b32 s9, s11
129; GFX10-NEXT:    s_mov_b32 s11, s13
130; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
131; GFX10-NEXT:    s_waitcnt vmcnt(0)
132; GFX10-NEXT:    ; return to shader part epilog
133main_body:
134  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
135  ret <4 x float> %v
136}
137
138define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
139; GFX9-LABEL: sample_c_l_2d:
140; GFX9:       ; %bb.0: ; %main_body
141; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
142; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
143; GFX9-NEXT:    s_mov_b32 s0, s2
144; GFX9-NEXT:    s_mov_b32 s1, s3
145; GFX9-NEXT:    s_mov_b32 s2, s4
146; GFX9-NEXT:    s_mov_b32 s3, s5
147; GFX9-NEXT:    s_mov_b32 s4, s6
148; GFX9-NEXT:    s_mov_b32 s5, s7
149; GFX9-NEXT:    s_mov_b32 s6, s8
150; GFX9-NEXT:    s_mov_b32 s7, s9
151; GFX9-NEXT:    s_mov_b32 s8, s10
152; GFX9-NEXT:    s_mov_b32 s9, s11
153; GFX9-NEXT:    s_mov_b32 s10, s12
154; GFX9-NEXT:    s_mov_b32 s11, s13
155; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
156; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
157; GFX9-NEXT:    s_waitcnt vmcnt(0)
158; GFX9-NEXT:    ; return to shader part epilog
159;
160; GFX10-LABEL: sample_c_l_2d:
161; GFX10:       ; %bb.0: ; %main_body
162; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
163; GFX10-NEXT:    s_mov_b32 s0, s2
164; GFX10-NEXT:    s_mov_b32 s1, s3
165; GFX10-NEXT:    s_mov_b32 s2, s4
166; GFX10-NEXT:    s_mov_b32 s3, s5
167; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
168; GFX10-NEXT:    s_mov_b32 s4, s6
169; GFX10-NEXT:    s_mov_b32 s5, s7
170; GFX10-NEXT:    s_mov_b32 s6, s8
171; GFX10-NEXT:    s_mov_b32 s7, s9
172; GFX10-NEXT:    s_mov_b32 s8, s10
173; GFX10-NEXT:    s_mov_b32 s9, s11
174; GFX10-NEXT:    s_mov_b32 s10, s12
175; GFX10-NEXT:    s_mov_b32 s11, s13
176; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
177; GFX10-NEXT:    s_waitcnt vmcnt(0)
178; GFX10-NEXT:    ; return to shader part epilog
179main_body:
180  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
181  ret <4 x float> %v
182}
183
184define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %lod) {
185; GFX9-LABEL: sample_l_o_1d:
186; GFX9:       ; %bb.0: ; %main_body
187; GFX9-NEXT:    s_mov_b32 s0, s2
188; GFX9-NEXT:    s_mov_b32 s2, s4
189; GFX9-NEXT:    s_mov_b32 s4, s6
190; GFX9-NEXT:    s_mov_b32 s6, s8
191; GFX9-NEXT:    s_mov_b32 s8, s10
192; GFX9-NEXT:    s_mov_b32 s10, s12
193; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
194; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
195; GFX9-NEXT:    s_mov_b32 s1, s3
196; GFX9-NEXT:    s_mov_b32 s3, s5
197; GFX9-NEXT:    s_mov_b32 s5, s7
198; GFX9-NEXT:    s_mov_b32 s7, s9
199; GFX9-NEXT:    s_mov_b32 s9, s11
200; GFX9-NEXT:    s_mov_b32 s11, s13
201; GFX9-NEXT:    v_and_or_b32 v1, v1, v2, s12
202; GFX9-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
203; GFX9-NEXT:    s_waitcnt vmcnt(0)
204; GFX9-NEXT:    ; return to shader part epilog
205;
206; GFX10-LABEL: sample_l_o_1d:
207; GFX10:       ; %bb.0: ; %main_body
208; GFX10-NEXT:    s_mov_b32 s0, s2
209; GFX10-NEXT:    s_mov_b32 s2, s4
210; GFX10-NEXT:    s_mov_b32 s4, s6
211; GFX10-NEXT:    s_mov_b32 s6, s8
212; GFX10-NEXT:    s_mov_b32 s8, s10
213; GFX10-NEXT:    s_mov_b32 s10, s12
214; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
215; GFX10-NEXT:    s_mov_b32 s1, s3
216; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, s12
217; GFX10-NEXT:    s_mov_b32 s3, s5
218; GFX10-NEXT:    s_mov_b32 s5, s7
219; GFX10-NEXT:    s_mov_b32 s7, s9
220; GFX10-NEXT:    s_mov_b32 s9, s11
221; GFX10-NEXT:    s_mov_b32 s11, s13
222; GFX10-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
223; GFX10-NEXT:    s_waitcnt vmcnt(0)
224; GFX10-NEXT:    ; return to shader part epilog
225main_body:
226  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 15, i32 %offset, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
227  ret <4 x float> %v
228}
229
230define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) {
231; GFX9-LABEL: sample_l_o_2d:
232; GFX9:       ; %bb.0: ; %main_body
233; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
234; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
235; GFX9-NEXT:    s_mov_b32 s0, s2
236; GFX9-NEXT:    s_mov_b32 s1, s3
237; GFX9-NEXT:    s_mov_b32 s2, s4
238; GFX9-NEXT:    s_mov_b32 s3, s5
239; GFX9-NEXT:    s_mov_b32 s4, s6
240; GFX9-NEXT:    s_mov_b32 s5, s7
241; GFX9-NEXT:    s_mov_b32 s6, s8
242; GFX9-NEXT:    s_mov_b32 s7, s9
243; GFX9-NEXT:    s_mov_b32 s8, s10
244; GFX9-NEXT:    s_mov_b32 s9, s11
245; GFX9-NEXT:    s_mov_b32 s10, s12
246; GFX9-NEXT:    s_mov_b32 s11, s13
247; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
248; GFX9-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
249; GFX9-NEXT:    s_waitcnt vmcnt(0)
250; GFX9-NEXT:    ; return to shader part epilog
251;
252; GFX10-LABEL: sample_l_o_2d:
253; GFX10:       ; %bb.0: ; %main_body
254; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
255; GFX10-NEXT:    s_mov_b32 s0, s2
256; GFX10-NEXT:    s_mov_b32 s1, s3
257; GFX10-NEXT:    s_mov_b32 s2, s4
258; GFX10-NEXT:    s_mov_b32 s3, s5
259; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
260; GFX10-NEXT:    s_mov_b32 s4, s6
261; GFX10-NEXT:    s_mov_b32 s5, s7
262; GFX10-NEXT:    s_mov_b32 s6, s8
263; GFX10-NEXT:    s_mov_b32 s7, s9
264; GFX10-NEXT:    s_mov_b32 s8, s10
265; GFX10-NEXT:    s_mov_b32 s9, s11
266; GFX10-NEXT:    s_mov_b32 s10, s12
267; GFX10-NEXT:    s_mov_b32 s11, s13
268; GFX10-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
269; GFX10-NEXT:    s_waitcnt vmcnt(0)
270; GFX10-NEXT:    ; return to shader part epilog
271main_body:
272  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
273  ret <4 x float> %v
274}
275
276define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %lod) {
277; GFX9-LABEL: sample_c_l_o_1d:
278; GFX9:       ; %bb.0: ; %main_body
279; GFX9-NEXT:    s_mov_b32 s0, s2
280; GFX9-NEXT:    s_mov_b32 s2, s4
281; GFX9-NEXT:    s_mov_b32 s4, s6
282; GFX9-NEXT:    s_mov_b32 s6, s8
283; GFX9-NEXT:    s_mov_b32 s8, s10
284; GFX9-NEXT:    s_mov_b32 s10, s12
285; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
286; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
287; GFX9-NEXT:    s_mov_b32 s1, s3
288; GFX9-NEXT:    s_mov_b32 s3, s5
289; GFX9-NEXT:    s_mov_b32 s5, s7
290; GFX9-NEXT:    s_mov_b32 s7, s9
291; GFX9-NEXT:    s_mov_b32 s9, s11
292; GFX9-NEXT:    s_mov_b32 s11, s13
293; GFX9-NEXT:    v_and_or_b32 v2, v2, v3, s12
294; GFX9-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
295; GFX9-NEXT:    s_waitcnt vmcnt(0)
296; GFX9-NEXT:    ; return to shader part epilog
297;
298; GFX10-LABEL: sample_c_l_o_1d:
299; GFX10:       ; %bb.0: ; %main_body
300; GFX10-NEXT:    s_mov_b32 s0, s2
301; GFX10-NEXT:    s_mov_b32 s2, s4
302; GFX10-NEXT:    s_mov_b32 s4, s6
303; GFX10-NEXT:    s_mov_b32 s6, s8
304; GFX10-NEXT:    s_mov_b32 s8, s10
305; GFX10-NEXT:    s_mov_b32 s10, s12
306; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
307; GFX10-NEXT:    s_mov_b32 s1, s3
308; GFX10-NEXT:    v_and_or_b32 v2, v2, 0xffff, s12
309; GFX10-NEXT:    s_mov_b32 s3, s5
310; GFX10-NEXT:    s_mov_b32 s5, s7
311; GFX10-NEXT:    s_mov_b32 s7, s9
312; GFX10-NEXT:    s_mov_b32 s9, s11
313; GFX10-NEXT:    s_mov_b32 s11, s13
314; GFX10-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
315; GFX10-NEXT:    s_waitcnt vmcnt(0)
316; GFX10-NEXT:    ; return to shader part epilog
317main_body:
318  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
319  ret <4 x float> %v
320}
321
322define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) {
323; GFX9-LABEL: sample_c_l_o_2d:
324; GFX9:       ; %bb.0: ; %main_body
325; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
326; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
327; GFX9-NEXT:    s_mov_b32 s0, s2
328; GFX9-NEXT:    s_mov_b32 s1, s3
329; GFX9-NEXT:    s_mov_b32 s2, s4
330; GFX9-NEXT:    s_mov_b32 s3, s5
331; GFX9-NEXT:    s_mov_b32 s4, s6
332; GFX9-NEXT:    s_mov_b32 s5, s7
333; GFX9-NEXT:    s_mov_b32 s6, s8
334; GFX9-NEXT:    s_mov_b32 s7, s9
335; GFX9-NEXT:    s_mov_b32 s8, s10
336; GFX9-NEXT:    s_mov_b32 s9, s11
337; GFX9-NEXT:    s_mov_b32 s10, s12
338; GFX9-NEXT:    s_mov_b32 s11, s13
339; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
340; GFX9-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
341; GFX9-NEXT:    s_waitcnt vmcnt(0)
342; GFX9-NEXT:    ; return to shader part epilog
343;
344; GFX10-LABEL: sample_c_l_o_2d:
345; GFX10:       ; %bb.0: ; %main_body
346; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
347; GFX10-NEXT:    s_mov_b32 s0, s2
348; GFX10-NEXT:    s_mov_b32 s1, s3
349; GFX10-NEXT:    s_mov_b32 s2, s4
350; GFX10-NEXT:    s_mov_b32 s3, s5
351; GFX10-NEXT:    v_and_or_b32 v2, v2, 0xffff, v3
352; GFX10-NEXT:    s_mov_b32 s4, s6
353; GFX10-NEXT:    s_mov_b32 s5, s7
354; GFX10-NEXT:    s_mov_b32 s6, s8
355; GFX10-NEXT:    s_mov_b32 s7, s9
356; GFX10-NEXT:    s_mov_b32 s8, s10
357; GFX10-NEXT:    s_mov_b32 s9, s11
358; GFX10-NEXT:    s_mov_b32 s10, s12
359; GFX10-NEXT:    s_mov_b32 s11, s13
360; GFX10-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
361; GFX10-NEXT:    s_waitcnt vmcnt(0)
362; GFX10-NEXT:    ; return to shader part epilog
363main_body:
364  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
365  ret <4 x float> %v
366}
367
368define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
369; GFX9-LABEL: gather4_l_2d:
370; GFX9:       ; %bb.0: ; %main_body
371; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
372; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
373; GFX9-NEXT:    s_mov_b32 s0, s2
374; GFX9-NEXT:    s_mov_b32 s1, s3
375; GFX9-NEXT:    s_mov_b32 s2, s4
376; GFX9-NEXT:    s_mov_b32 s3, s5
377; GFX9-NEXT:    s_mov_b32 s4, s6
378; GFX9-NEXT:    s_mov_b32 s5, s7
379; GFX9-NEXT:    s_mov_b32 s6, s8
380; GFX9-NEXT:    s_mov_b32 s7, s9
381; GFX9-NEXT:    s_mov_b32 s8, s10
382; GFX9-NEXT:    s_mov_b32 s9, s11
383; GFX9-NEXT:    s_mov_b32 s10, s12
384; GFX9-NEXT:    s_mov_b32 s11, s13
385; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
386; GFX9-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
387; GFX9-NEXT:    s_waitcnt vmcnt(0)
388; GFX9-NEXT:    ; return to shader part epilog
389;
390; GFX10-LABEL: gather4_l_2d:
391; GFX10:       ; %bb.0: ; %main_body
392; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
393; GFX10-NEXT:    s_mov_b32 s0, s2
394; GFX10-NEXT:    s_mov_b32 s1, s3
395; GFX10-NEXT:    s_mov_b32 s2, s4
396; GFX10-NEXT:    s_mov_b32 s3, s5
397; GFX10-NEXT:    v_and_or_b32 v0, v0, 0xffff, v1
398; GFX10-NEXT:    s_mov_b32 s4, s6
399; GFX10-NEXT:    s_mov_b32 s5, s7
400; GFX10-NEXT:    s_mov_b32 s6, s8
401; GFX10-NEXT:    s_mov_b32 s7, s9
402; GFX10-NEXT:    s_mov_b32 s8, s10
403; GFX10-NEXT:    s_mov_b32 s9, s11
404; GFX10-NEXT:    s_mov_b32 s10, s12
405; GFX10-NEXT:    s_mov_b32 s11, s13
406; GFX10-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
407; GFX10-NEXT:    s_waitcnt vmcnt(0)
408; GFX10-NEXT:    ; return to shader part epilog
409main_body:
410  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 15, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
411  ret <4 x float> %v
412}
413
414define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
415; GFX9-LABEL: gather4_c_l_2d:
416; GFX9:       ; %bb.0: ; %main_body
417; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
418; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
419; GFX9-NEXT:    s_mov_b32 s0, s2
420; GFX9-NEXT:    s_mov_b32 s1, s3
421; GFX9-NEXT:    s_mov_b32 s2, s4
422; GFX9-NEXT:    s_mov_b32 s3, s5
423; GFX9-NEXT:    s_mov_b32 s4, s6
424; GFX9-NEXT:    s_mov_b32 s5, s7
425; GFX9-NEXT:    s_mov_b32 s6, s8
426; GFX9-NEXT:    s_mov_b32 s7, s9
427; GFX9-NEXT:    s_mov_b32 s8, s10
428; GFX9-NEXT:    s_mov_b32 s9, s11
429; GFX9-NEXT:    s_mov_b32 s10, s12
430; GFX9-NEXT:    s_mov_b32 s11, s13
431; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
432; GFX9-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
433; GFX9-NEXT:    s_waitcnt vmcnt(0)
434; GFX9-NEXT:    ; return to shader part epilog
435;
436; GFX10-LABEL: gather4_c_l_2d:
437; GFX10:       ; %bb.0: ; %main_body
438; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
439; GFX10-NEXT:    s_mov_b32 s0, s2
440; GFX10-NEXT:    s_mov_b32 s1, s3
441; GFX10-NEXT:    s_mov_b32 s2, s4
442; GFX10-NEXT:    s_mov_b32 s3, s5
443; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
444; GFX10-NEXT:    s_mov_b32 s4, s6
445; GFX10-NEXT:    s_mov_b32 s5, s7
446; GFX10-NEXT:    s_mov_b32 s6, s8
447; GFX10-NEXT:    s_mov_b32 s7, s9
448; GFX10-NEXT:    s_mov_b32 s8, s10
449; GFX10-NEXT:    s_mov_b32 s9, s11
450; GFX10-NEXT:    s_mov_b32 s10, s12
451; GFX10-NEXT:    s_mov_b32 s11, s13
452; GFX10-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
453; GFX10-NEXT:    s_waitcnt vmcnt(0)
454; GFX10-NEXT:    ; return to shader part epilog
455main_body:
456  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
457  ret <4 x float> %v
458}
459
460define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) {
461; GFX9-LABEL: gather4_l_o_2d:
462; GFX9:       ; %bb.0: ; %main_body
463; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
464; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
465; GFX9-NEXT:    s_mov_b32 s0, s2
466; GFX9-NEXT:    s_mov_b32 s1, s3
467; GFX9-NEXT:    s_mov_b32 s2, s4
468; GFX9-NEXT:    s_mov_b32 s3, s5
469; GFX9-NEXT:    s_mov_b32 s4, s6
470; GFX9-NEXT:    s_mov_b32 s5, s7
471; GFX9-NEXT:    s_mov_b32 s6, s8
472; GFX9-NEXT:    s_mov_b32 s7, s9
473; GFX9-NEXT:    s_mov_b32 s8, s10
474; GFX9-NEXT:    s_mov_b32 s9, s11
475; GFX9-NEXT:    s_mov_b32 s10, s12
476; GFX9-NEXT:    s_mov_b32 s11, s13
477; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
478; GFX9-NEXT:    image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
479; GFX9-NEXT:    s_waitcnt vmcnt(0)
480; GFX9-NEXT:    ; return to shader part epilog
481;
482; GFX10-LABEL: gather4_l_o_2d:
483; GFX10:       ; %bb.0: ; %main_body
484; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
485; GFX10-NEXT:    s_mov_b32 s0, s2
486; GFX10-NEXT:    s_mov_b32 s1, s3
487; GFX10-NEXT:    s_mov_b32 s2, s4
488; GFX10-NEXT:    s_mov_b32 s3, s5
489; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
490; GFX10-NEXT:    s_mov_b32 s4, s6
491; GFX10-NEXT:    s_mov_b32 s5, s7
492; GFX10-NEXT:    s_mov_b32 s6, s8
493; GFX10-NEXT:    s_mov_b32 s7, s9
494; GFX10-NEXT:    s_mov_b32 s8, s10
495; GFX10-NEXT:    s_mov_b32 s9, s11
496; GFX10-NEXT:    s_mov_b32 s10, s12
497; GFX10-NEXT:    s_mov_b32 s11, s13
498; GFX10-NEXT:    image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
499; GFX10-NEXT:    s_waitcnt vmcnt(0)
500; GFX10-NEXT:    ; return to shader part epilog
501main_body:
502  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
503  ret <4 x float> %v
504}
505
506define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) {
507; GFX9-LABEL: gather4_c_l_o_2d:
508; GFX9:       ; %bb.0: ; %main_body
509; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
510; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
511; GFX9-NEXT:    s_mov_b32 s0, s2
512; GFX9-NEXT:    s_mov_b32 s1, s3
513; GFX9-NEXT:    s_mov_b32 s2, s4
514; GFX9-NEXT:    s_mov_b32 s3, s5
515; GFX9-NEXT:    s_mov_b32 s4, s6
516; GFX9-NEXT:    s_mov_b32 s5, s7
517; GFX9-NEXT:    s_mov_b32 s6, s8
518; GFX9-NEXT:    s_mov_b32 s7, s9
519; GFX9-NEXT:    s_mov_b32 s8, s10
520; GFX9-NEXT:    s_mov_b32 s9, s11
521; GFX9-NEXT:    s_mov_b32 s10, s12
522; GFX9-NEXT:    s_mov_b32 s11, s13
523; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
524; GFX9-NEXT:    image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
525; GFX9-NEXT:    s_waitcnt vmcnt(0)
526; GFX9-NEXT:    ; return to shader part epilog
527;
528; GFX10-LABEL: gather4_c_l_o_2d:
529; GFX10:       ; %bb.0: ; %main_body
530; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
531; GFX10-NEXT:    s_mov_b32 s0, s2
532; GFX10-NEXT:    s_mov_b32 s1, s3
533; GFX10-NEXT:    s_mov_b32 s2, s4
534; GFX10-NEXT:    s_mov_b32 s3, s5
535; GFX10-NEXT:    v_and_or_b32 v2, v2, 0xffff, v3
536; GFX10-NEXT:    s_mov_b32 s4, s6
537; GFX10-NEXT:    s_mov_b32 s5, s7
538; GFX10-NEXT:    s_mov_b32 s6, s8
539; GFX10-NEXT:    s_mov_b32 s7, s9
540; GFX10-NEXT:    s_mov_b32 s8, s10
541; GFX10-NEXT:    s_mov_b32 s9, s11
542; GFX10-NEXT:    s_mov_b32 s10, s12
543; GFX10-NEXT:    s_mov_b32 s11, s13
544; GFX10-NEXT:    image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
545; GFX10-NEXT:    s_waitcnt vmcnt(0)
546; GFX10-NEXT:    ; return to shader part epilog
547main_body:
548  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
549  ret <4 x float> %v
550}
551
552declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
553declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
554declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
555declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
556declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 immarg, i32, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
557declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
558declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 immarg, i32, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
559declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
560declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
561declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
562declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
563declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
564
565attributes #0 = { nounwind readonly }
566