1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
4
5define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
6; GFX6-LABEL: gather4_o_2d:
7; GFX6:       ; %bb.0: ; %main_body
8; GFX6-NEXT:    s_mov_b32 s0, s2
9; GFX6-NEXT:    s_mov_b32 s1, s3
10; GFX6-NEXT:    s_mov_b32 s2, s4
11; GFX6-NEXT:    s_mov_b32 s3, s5
12; GFX6-NEXT:    s_mov_b32 s4, s6
13; GFX6-NEXT:    s_mov_b32 s5, s7
14; GFX6-NEXT:    s_mov_b32 s6, s8
15; GFX6-NEXT:    s_mov_b32 s7, s9
16; GFX6-NEXT:    s_mov_b32 s8, s10
17; GFX6-NEXT:    s_mov_b32 s9, s11
18; GFX6-NEXT:    s_mov_b64 s[14:15], exec
19; GFX6-NEXT:    s_mov_b32 s10, s12
20; GFX6-NEXT:    s_mov_b32 s11, s13
21; GFX6-NEXT:    s_wqm_b64 exec, exec
22; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
23; GFX6-NEXT:    image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
24; GFX6-NEXT:    s_waitcnt vmcnt(0)
25; GFX6-NEXT:    ; return to shader part epilog
26;
27; GFX10-LABEL: gather4_o_2d:
28; GFX10:       ; %bb.0: ; %main_body
29; GFX10-NEXT:    s_mov_b32 s1, exec_lo
30; GFX10-NEXT:    s_mov_b32 s0, s2
31; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
32; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
33; GFX10-NEXT:    s_mov_b32 s1, s3
34; GFX10-NEXT:    s_mov_b32 s2, s4
35; GFX10-NEXT:    s_mov_b32 s3, s5
36; GFX10-NEXT:    s_mov_b32 s4, s6
37; GFX10-NEXT:    s_mov_b32 s5, s7
38; GFX10-NEXT:    s_mov_b32 s6, s8
39; GFX10-NEXT:    s_mov_b32 s7, s9
40; GFX10-NEXT:    s_mov_b32 s8, s10
41; GFX10-NEXT:    s_mov_b32 s9, s11
42; GFX10-NEXT:    s_mov_b32 s10, s12
43; GFX10-NEXT:    s_mov_b32 s11, s13
44; GFX10-NEXT:    image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
45; GFX10-NEXT:    s_waitcnt vmcnt(0)
46; GFX10-NEXT:    ; return to shader part epilog
47main_body:
48  %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
49  ret <4 x float> %v
50}
51
52define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
53; GFX6-LABEL: gather4_c_o_2d:
54; GFX6:       ; %bb.0: ; %main_body
55; GFX6-NEXT:    s_mov_b32 s0, s2
56; GFX6-NEXT:    s_mov_b32 s1, s3
57; GFX6-NEXT:    s_mov_b32 s2, s4
58; GFX6-NEXT:    s_mov_b32 s3, s5
59; GFX6-NEXT:    s_mov_b32 s4, s6
60; GFX6-NEXT:    s_mov_b32 s5, s7
61; GFX6-NEXT:    s_mov_b32 s6, s8
62; GFX6-NEXT:    s_mov_b32 s7, s9
63; GFX6-NEXT:    s_mov_b32 s8, s10
64; GFX6-NEXT:    s_mov_b32 s9, s11
65; GFX6-NEXT:    s_mov_b64 s[14:15], exec
66; GFX6-NEXT:    s_mov_b32 s10, s12
67; GFX6-NEXT:    s_mov_b32 s11, s13
68; GFX6-NEXT:    s_wqm_b64 exec, exec
69; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
70; GFX6-NEXT:    image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
71; GFX6-NEXT:    s_waitcnt vmcnt(0)
72; GFX6-NEXT:    ; return to shader part epilog
73;
74; GFX10-LABEL: gather4_c_o_2d:
75; GFX10:       ; %bb.0: ; %main_body
76; GFX10-NEXT:    s_mov_b32 s1, exec_lo
77; GFX10-NEXT:    s_mov_b32 s0, s2
78; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
79; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
80; GFX10-NEXT:    s_mov_b32 s1, s3
81; GFX10-NEXT:    s_mov_b32 s2, s4
82; GFX10-NEXT:    s_mov_b32 s3, s5
83; GFX10-NEXT:    s_mov_b32 s4, s6
84; GFX10-NEXT:    s_mov_b32 s5, s7
85; GFX10-NEXT:    s_mov_b32 s6, s8
86; GFX10-NEXT:    s_mov_b32 s7, s9
87; GFX10-NEXT:    s_mov_b32 s8, s10
88; GFX10-NEXT:    s_mov_b32 s9, s11
89; GFX10-NEXT:    s_mov_b32 s10, s12
90; GFX10-NEXT:    s_mov_b32 s11, s13
91; GFX10-NEXT:    image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
92; GFX10-NEXT:    s_waitcnt vmcnt(0)
93; GFX10-NEXT:    ; return to shader part epilog
94main_body:
95  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
96  ret <4 x float> %v
97}
98
99define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
100; GFX6-LABEL: gather4_cl_o_2d:
101; GFX6:       ; %bb.0: ; %main_body
102; GFX6-NEXT:    s_mov_b32 s0, s2
103; GFX6-NEXT:    s_mov_b32 s1, s3
104; GFX6-NEXT:    s_mov_b32 s2, s4
105; GFX6-NEXT:    s_mov_b32 s3, s5
106; GFX6-NEXT:    s_mov_b32 s4, s6
107; GFX6-NEXT:    s_mov_b32 s5, s7
108; GFX6-NEXT:    s_mov_b32 s6, s8
109; GFX6-NEXT:    s_mov_b32 s7, s9
110; GFX6-NEXT:    s_mov_b32 s8, s10
111; GFX6-NEXT:    s_mov_b32 s9, s11
112; GFX6-NEXT:    s_mov_b64 s[14:15], exec
113; GFX6-NEXT:    s_mov_b32 s10, s12
114; GFX6-NEXT:    s_mov_b32 s11, s13
115; GFX6-NEXT:    s_wqm_b64 exec, exec
116; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
117; GFX6-NEXT:    image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
118; GFX6-NEXT:    s_waitcnt vmcnt(0)
119; GFX6-NEXT:    ; return to shader part epilog
120;
121; GFX10-LABEL: gather4_cl_o_2d:
122; GFX10:       ; %bb.0: ; %main_body
123; GFX10-NEXT:    s_mov_b32 s1, exec_lo
124; GFX10-NEXT:    s_mov_b32 s0, s2
125; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
126; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
127; GFX10-NEXT:    s_mov_b32 s1, s3
128; GFX10-NEXT:    s_mov_b32 s2, s4
129; GFX10-NEXT:    s_mov_b32 s3, s5
130; GFX10-NEXT:    s_mov_b32 s4, s6
131; GFX10-NEXT:    s_mov_b32 s5, s7
132; GFX10-NEXT:    s_mov_b32 s6, s8
133; GFX10-NEXT:    s_mov_b32 s7, s9
134; GFX10-NEXT:    s_mov_b32 s8, s10
135; GFX10-NEXT:    s_mov_b32 s9, s11
136; GFX10-NEXT:    s_mov_b32 s10, s12
137; GFX10-NEXT:    s_mov_b32 s11, s13
138; GFX10-NEXT:    image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
139; GFX10-NEXT:    s_waitcnt vmcnt(0)
140; GFX10-NEXT:    ; return to shader part epilog
141main_body:
142  %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
143  ret <4 x float> %v
144}
145
146define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
147; GFX6-LABEL: gather4_c_cl_o_2d:
148; GFX6:       ; %bb.0: ; %main_body
149; GFX6-NEXT:    s_mov_b32 s0, s2
150; GFX6-NEXT:    s_mov_b32 s1, s3
151; GFX6-NEXT:    s_mov_b32 s2, s4
152; GFX6-NEXT:    s_mov_b32 s3, s5
153; GFX6-NEXT:    s_mov_b32 s4, s6
154; GFX6-NEXT:    s_mov_b32 s5, s7
155; GFX6-NEXT:    s_mov_b32 s6, s8
156; GFX6-NEXT:    s_mov_b32 s7, s9
157; GFX6-NEXT:    s_mov_b32 s8, s10
158; GFX6-NEXT:    s_mov_b32 s9, s11
159; GFX6-NEXT:    s_mov_b64 s[14:15], exec
160; GFX6-NEXT:    s_mov_b32 s10, s12
161; GFX6-NEXT:    s_mov_b32 s11, s13
162; GFX6-NEXT:    s_wqm_b64 exec, exec
163; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
164; GFX6-NEXT:    image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
165; GFX6-NEXT:    s_waitcnt vmcnt(0)
166; GFX6-NEXT:    ; return to shader part epilog
167;
168; GFX10-LABEL: gather4_c_cl_o_2d:
169; GFX10:       ; %bb.0: ; %main_body
170; GFX10-NEXT:    s_mov_b32 s1, exec_lo
171; GFX10-NEXT:    s_mov_b32 s0, s2
172; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
173; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
174; GFX10-NEXT:    s_mov_b32 s1, s3
175; GFX10-NEXT:    s_mov_b32 s2, s4
176; GFX10-NEXT:    s_mov_b32 s3, s5
177; GFX10-NEXT:    s_mov_b32 s4, s6
178; GFX10-NEXT:    s_mov_b32 s5, s7
179; GFX10-NEXT:    s_mov_b32 s6, s8
180; GFX10-NEXT:    s_mov_b32 s7, s9
181; GFX10-NEXT:    s_mov_b32 s8, s10
182; GFX10-NEXT:    s_mov_b32 s9, s11
183; GFX10-NEXT:    s_mov_b32 s10, s12
184; GFX10-NEXT:    s_mov_b32 s11, s13
185; GFX10-NEXT:    image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
186; GFX10-NEXT:    s_waitcnt vmcnt(0)
187; GFX10-NEXT:    ; return to shader part epilog
188main_body:
189  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
190  ret <4 x float> %v
191}
192
193define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
194; GFX6-LABEL: gather4_b_o_2d:
195; GFX6:       ; %bb.0: ; %main_body
196; GFX6-NEXT:    s_mov_b32 s0, s2
197; GFX6-NEXT:    s_mov_b32 s1, s3
198; GFX6-NEXT:    s_mov_b32 s2, s4
199; GFX6-NEXT:    s_mov_b32 s3, s5
200; GFX6-NEXT:    s_mov_b32 s4, s6
201; GFX6-NEXT:    s_mov_b32 s5, s7
202; GFX6-NEXT:    s_mov_b32 s6, s8
203; GFX6-NEXT:    s_mov_b32 s7, s9
204; GFX6-NEXT:    s_mov_b32 s8, s10
205; GFX6-NEXT:    s_mov_b32 s9, s11
206; GFX6-NEXT:    s_mov_b64 s[14:15], exec
207; GFX6-NEXT:    s_mov_b32 s10, s12
208; GFX6-NEXT:    s_mov_b32 s11, s13
209; GFX6-NEXT:    s_wqm_b64 exec, exec
210; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
211; GFX6-NEXT:    image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
212; GFX6-NEXT:    s_waitcnt vmcnt(0)
213; GFX6-NEXT:    ; return to shader part epilog
214;
215; GFX10-LABEL: gather4_b_o_2d:
216; GFX10:       ; %bb.0: ; %main_body
217; GFX10-NEXT:    s_mov_b32 s1, exec_lo
218; GFX10-NEXT:    s_mov_b32 s0, s2
219; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
220; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
221; GFX10-NEXT:    s_mov_b32 s1, s3
222; GFX10-NEXT:    s_mov_b32 s2, s4
223; GFX10-NEXT:    s_mov_b32 s3, s5
224; GFX10-NEXT:    s_mov_b32 s4, s6
225; GFX10-NEXT:    s_mov_b32 s5, s7
226; GFX10-NEXT:    s_mov_b32 s6, s8
227; GFX10-NEXT:    s_mov_b32 s7, s9
228; GFX10-NEXT:    s_mov_b32 s8, s10
229; GFX10-NEXT:    s_mov_b32 s9, s11
230; GFX10-NEXT:    s_mov_b32 s10, s12
231; GFX10-NEXT:    s_mov_b32 s11, s13
232; GFX10-NEXT:    image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
233; GFX10-NEXT:    s_waitcnt vmcnt(0)
234; GFX10-NEXT:    ; return to shader part epilog
235main_body:
236  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
237  ret <4 x float> %v
238}
239
240define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
241; GFX6-LABEL: gather4_c_b_o_2d:
242; GFX6:       ; %bb.0: ; %main_body
243; GFX6-NEXT:    s_mov_b32 s0, s2
244; GFX6-NEXT:    s_mov_b32 s1, s3
245; GFX6-NEXT:    s_mov_b32 s2, s4
246; GFX6-NEXT:    s_mov_b32 s3, s5
247; GFX6-NEXT:    s_mov_b32 s4, s6
248; GFX6-NEXT:    s_mov_b32 s5, s7
249; GFX6-NEXT:    s_mov_b32 s6, s8
250; GFX6-NEXT:    s_mov_b32 s7, s9
251; GFX6-NEXT:    s_mov_b32 s8, s10
252; GFX6-NEXT:    s_mov_b32 s9, s11
253; GFX6-NEXT:    s_mov_b64 s[14:15], exec
254; GFX6-NEXT:    s_mov_b32 s10, s12
255; GFX6-NEXT:    s_mov_b32 s11, s13
256; GFX6-NEXT:    s_wqm_b64 exec, exec
257; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
258; GFX6-NEXT:    image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
259; GFX6-NEXT:    s_waitcnt vmcnt(0)
260; GFX6-NEXT:    ; return to shader part epilog
261;
262; GFX10-LABEL: gather4_c_b_o_2d:
263; GFX10:       ; %bb.0: ; %main_body
264; GFX10-NEXT:    s_mov_b32 s1, exec_lo
265; GFX10-NEXT:    s_mov_b32 s0, s2
266; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
267; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
268; GFX10-NEXT:    s_mov_b32 s1, s3
269; GFX10-NEXT:    s_mov_b32 s2, s4
270; GFX10-NEXT:    s_mov_b32 s3, s5
271; GFX10-NEXT:    s_mov_b32 s4, s6
272; GFX10-NEXT:    s_mov_b32 s5, s7
273; GFX10-NEXT:    s_mov_b32 s6, s8
274; GFX10-NEXT:    s_mov_b32 s7, s9
275; GFX10-NEXT:    s_mov_b32 s8, s10
276; GFX10-NEXT:    s_mov_b32 s9, s11
277; GFX10-NEXT:    s_mov_b32 s10, s12
278; GFX10-NEXT:    s_mov_b32 s11, s13
279; GFX10-NEXT:    image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
280; GFX10-NEXT:    s_waitcnt vmcnt(0)
281; GFX10-NEXT:    ; return to shader part epilog
282main_body:
283  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
284  ret <4 x float> %v
285}
286
287define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
288; GFX6-LABEL: gather4_b_cl_o_2d:
289; GFX6:       ; %bb.0: ; %main_body
290; GFX6-NEXT:    s_mov_b32 s0, s2
291; GFX6-NEXT:    s_mov_b32 s1, s3
292; GFX6-NEXT:    s_mov_b32 s2, s4
293; GFX6-NEXT:    s_mov_b32 s3, s5
294; GFX6-NEXT:    s_mov_b32 s4, s6
295; GFX6-NEXT:    s_mov_b32 s5, s7
296; GFX6-NEXT:    s_mov_b32 s6, s8
297; GFX6-NEXT:    s_mov_b32 s7, s9
298; GFX6-NEXT:    s_mov_b32 s8, s10
299; GFX6-NEXT:    s_mov_b32 s9, s11
300; GFX6-NEXT:    s_mov_b32 s10, s12
301; GFX6-NEXT:    s_mov_b32 s11, s13
302; GFX6-NEXT:    image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
303; GFX6-NEXT:    s_waitcnt vmcnt(0)
304; GFX6-NEXT:    ; return to shader part epilog
305;
306; GFX10-LABEL: gather4_b_cl_o_2d:
307; GFX10:       ; %bb.0: ; %main_body
308; GFX10-NEXT:    s_mov_b32 s0, s2
309; GFX10-NEXT:    s_mov_b32 s1, s3
310; GFX10-NEXT:    s_mov_b32 s2, s4
311; GFX10-NEXT:    s_mov_b32 s3, s5
312; GFX10-NEXT:    s_mov_b32 s4, s6
313; GFX10-NEXT:    s_mov_b32 s5, s7
314; GFX10-NEXT:    s_mov_b32 s6, s8
315; GFX10-NEXT:    s_mov_b32 s7, s9
316; GFX10-NEXT:    s_mov_b32 s8, s10
317; GFX10-NEXT:    s_mov_b32 s9, s11
318; GFX10-NEXT:    s_mov_b32 s10, s12
319; GFX10-NEXT:    s_mov_b32 s11, s13
320; GFX10-NEXT:    image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
321; GFX10-NEXT:    s_waitcnt vmcnt(0)
322; GFX10-NEXT:    ; return to shader part epilog
323main_body:
324  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
325  ret <4 x float> %v
326}
327
328define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
329; GFX6-LABEL: gather4_c_b_cl_o_2d:
330; GFX6:       ; %bb.0: ; %main_body
331; GFX6-NEXT:    s_mov_b32 s0, s2
332; GFX6-NEXT:    s_mov_b32 s1, s3
333; GFX6-NEXT:    s_mov_b32 s2, s4
334; GFX6-NEXT:    s_mov_b32 s3, s5
335; GFX6-NEXT:    s_mov_b32 s4, s6
336; GFX6-NEXT:    s_mov_b32 s5, s7
337; GFX6-NEXT:    s_mov_b32 s6, s8
338; GFX6-NEXT:    s_mov_b32 s7, s9
339; GFX6-NEXT:    s_mov_b32 s8, s10
340; GFX6-NEXT:    s_mov_b32 s9, s11
341; GFX6-NEXT:    s_mov_b64 s[14:15], exec
342; GFX6-NEXT:    s_mov_b32 s10, s12
343; GFX6-NEXT:    s_mov_b32 s11, s13
344; GFX6-NEXT:    s_wqm_b64 exec, exec
345; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
346; GFX6-NEXT:    image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
347; GFX6-NEXT:    s_waitcnt vmcnt(0)
348; GFX6-NEXT:    ; return to shader part epilog
349;
350; GFX10-LABEL: gather4_c_b_cl_o_2d:
351; GFX10:       ; %bb.0: ; %main_body
352; GFX10-NEXT:    s_mov_b32 s1, exec_lo
353; GFX10-NEXT:    s_mov_b32 s0, s2
354; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
355; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
356; GFX10-NEXT:    s_mov_b32 s1, s3
357; GFX10-NEXT:    s_mov_b32 s2, s4
358; GFX10-NEXT:    s_mov_b32 s3, s5
359; GFX10-NEXT:    s_mov_b32 s4, s6
360; GFX10-NEXT:    s_mov_b32 s5, s7
361; GFX10-NEXT:    s_mov_b32 s6, s8
362; GFX10-NEXT:    s_mov_b32 s7, s9
363; GFX10-NEXT:    s_mov_b32 s8, s10
364; GFX10-NEXT:    s_mov_b32 s9, s11
365; GFX10-NEXT:    s_mov_b32 s10, s12
366; GFX10-NEXT:    s_mov_b32 s11, s13
367; GFX10-NEXT:    image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
368; GFX10-NEXT:    s_waitcnt vmcnt(0)
369; GFX10-NEXT:    ; return to shader part epilog
370main_body:
371  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
372  ret <4 x float> %v
373}
374
375define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
376; GFX6-LABEL: gather4_l_o_2d:
377; GFX6:       ; %bb.0: ; %main_body
378; GFX6-NEXT:    s_mov_b32 s0, s2
379; GFX6-NEXT:    s_mov_b32 s1, s3
380; GFX6-NEXT:    s_mov_b32 s2, s4
381; GFX6-NEXT:    s_mov_b32 s3, s5
382; GFX6-NEXT:    s_mov_b32 s4, s6
383; GFX6-NEXT:    s_mov_b32 s5, s7
384; GFX6-NEXT:    s_mov_b32 s6, s8
385; GFX6-NEXT:    s_mov_b32 s7, s9
386; GFX6-NEXT:    s_mov_b32 s8, s10
387; GFX6-NEXT:    s_mov_b32 s9, s11
388; GFX6-NEXT:    s_mov_b32 s10, s12
389; GFX6-NEXT:    s_mov_b32 s11, s13
390; GFX6-NEXT:    image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
391; GFX6-NEXT:    s_waitcnt vmcnt(0)
392; GFX6-NEXT:    ; return to shader part epilog
393;
394; GFX10-LABEL: gather4_l_o_2d:
395; GFX10:       ; %bb.0: ; %main_body
396; GFX10-NEXT:    s_mov_b32 s0, s2
397; GFX10-NEXT:    s_mov_b32 s1, s3
398; GFX10-NEXT:    s_mov_b32 s2, s4
399; GFX10-NEXT:    s_mov_b32 s3, s5
400; GFX10-NEXT:    s_mov_b32 s4, s6
401; GFX10-NEXT:    s_mov_b32 s5, s7
402; GFX10-NEXT:    s_mov_b32 s6, s8
403; GFX10-NEXT:    s_mov_b32 s7, s9
404; GFX10-NEXT:    s_mov_b32 s8, s10
405; GFX10-NEXT:    s_mov_b32 s9, s11
406; GFX10-NEXT:    s_mov_b32 s10, s12
407; GFX10-NEXT:    s_mov_b32 s11, s13
408; GFX10-NEXT:    image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
409; GFX10-NEXT:    s_waitcnt vmcnt(0)
410; GFX10-NEXT:    ; return to shader part epilog
411main_body:
412  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
413  ret <4 x float> %v
414}
415
416define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
417; GFX6-LABEL: gather4_c_l_o_2d:
418; GFX6:       ; %bb.0: ; %main_body
419; GFX6-NEXT:    s_mov_b32 s0, s2
420; GFX6-NEXT:    s_mov_b32 s1, s3
421; GFX6-NEXT:    s_mov_b32 s2, s4
422; GFX6-NEXT:    s_mov_b32 s3, s5
423; GFX6-NEXT:    s_mov_b32 s4, s6
424; GFX6-NEXT:    s_mov_b32 s5, s7
425; GFX6-NEXT:    s_mov_b32 s6, s8
426; GFX6-NEXT:    s_mov_b32 s7, s9
427; GFX6-NEXT:    s_mov_b32 s8, s10
428; GFX6-NEXT:    s_mov_b32 s9, s11
429; GFX6-NEXT:    s_mov_b32 s10, s12
430; GFX6-NEXT:    s_mov_b32 s11, s13
431; GFX6-NEXT:    image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
432; GFX6-NEXT:    s_waitcnt vmcnt(0)
433; GFX6-NEXT:    ; return to shader part epilog
434;
435; GFX10-LABEL: gather4_c_l_o_2d:
436; GFX10:       ; %bb.0: ; %main_body
437; GFX10-NEXT:    s_mov_b32 s0, s2
438; GFX10-NEXT:    s_mov_b32 s1, s3
439; GFX10-NEXT:    s_mov_b32 s2, s4
440; GFX10-NEXT:    s_mov_b32 s3, s5
441; GFX10-NEXT:    s_mov_b32 s4, s6
442; GFX10-NEXT:    s_mov_b32 s5, s7
443; GFX10-NEXT:    s_mov_b32 s6, s8
444; GFX10-NEXT:    s_mov_b32 s7, s9
445; GFX10-NEXT:    s_mov_b32 s8, s10
446; GFX10-NEXT:    s_mov_b32 s9, s11
447; GFX10-NEXT:    s_mov_b32 s10, s12
448; GFX10-NEXT:    s_mov_b32 s11, s13
449; GFX10-NEXT:    image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
450; GFX10-NEXT:    s_waitcnt vmcnt(0)
451; GFX10-NEXT:    ; return to shader part epilog
452main_body:
453  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
454  ret <4 x float> %v
455}
456
457define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
458; GFX6-LABEL: gather4_lz_o_2d:
459; GFX6:       ; %bb.0: ; %main_body
460; GFX6-NEXT:    s_mov_b32 s0, s2
461; GFX6-NEXT:    s_mov_b32 s1, s3
462; GFX6-NEXT:    s_mov_b32 s2, s4
463; GFX6-NEXT:    s_mov_b32 s3, s5
464; GFX6-NEXT:    s_mov_b32 s4, s6
465; GFX6-NEXT:    s_mov_b32 s5, s7
466; GFX6-NEXT:    s_mov_b32 s6, s8
467; GFX6-NEXT:    s_mov_b32 s7, s9
468; GFX6-NEXT:    s_mov_b32 s8, s10
469; GFX6-NEXT:    s_mov_b32 s9, s11
470; GFX6-NEXT:    s_mov_b32 s10, s12
471; GFX6-NEXT:    s_mov_b32 s11, s13
472; GFX6-NEXT:    image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
473; GFX6-NEXT:    s_waitcnt vmcnt(0)
474; GFX6-NEXT:    ; return to shader part epilog
475;
476; GFX10-LABEL: gather4_lz_o_2d:
477; GFX10:       ; %bb.0: ; %main_body
478; GFX10-NEXT:    s_mov_b32 s0, s2
479; GFX10-NEXT:    s_mov_b32 s1, s3
480; GFX10-NEXT:    s_mov_b32 s2, s4
481; GFX10-NEXT:    s_mov_b32 s3, s5
482; GFX10-NEXT:    s_mov_b32 s4, s6
483; GFX10-NEXT:    s_mov_b32 s5, s7
484; GFX10-NEXT:    s_mov_b32 s6, s8
485; GFX10-NEXT:    s_mov_b32 s7, s9
486; GFX10-NEXT:    s_mov_b32 s8, s10
487; GFX10-NEXT:    s_mov_b32 s9, s11
488; GFX10-NEXT:    s_mov_b32 s10, s12
489; GFX10-NEXT:    s_mov_b32 s11, s13
490; GFX10-NEXT:    image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
491; GFX10-NEXT:    s_waitcnt vmcnt(0)
492; GFX10-NEXT:    ; return to shader part epilog
493main_body:
494  %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
495  ret <4 x float> %v
496}
497
498define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
499; GFX6-LABEL: gather4_c_lz_o_2d:
500; GFX6:       ; %bb.0: ; %main_body
501; GFX6-NEXT:    s_mov_b32 s0, s2
502; GFX6-NEXT:    s_mov_b32 s1, s3
503; GFX6-NEXT:    s_mov_b32 s2, s4
504; GFX6-NEXT:    s_mov_b32 s3, s5
505; GFX6-NEXT:    s_mov_b32 s4, s6
506; GFX6-NEXT:    s_mov_b32 s5, s7
507; GFX6-NEXT:    s_mov_b32 s6, s8
508; GFX6-NEXT:    s_mov_b32 s7, s9
509; GFX6-NEXT:    s_mov_b32 s8, s10
510; GFX6-NEXT:    s_mov_b32 s9, s11
511; GFX6-NEXT:    s_mov_b32 s10, s12
512; GFX6-NEXT:    s_mov_b32 s11, s13
513; GFX6-NEXT:    image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
514; GFX6-NEXT:    s_waitcnt vmcnt(0)
515; GFX6-NEXT:    ; return to shader part epilog
516;
517; GFX10-LABEL: gather4_c_lz_o_2d:
518; GFX10:       ; %bb.0: ; %main_body
519; GFX10-NEXT:    s_mov_b32 s0, s2
520; GFX10-NEXT:    s_mov_b32 s1, s3
521; GFX10-NEXT:    s_mov_b32 s2, s4
522; GFX10-NEXT:    s_mov_b32 s3, s5
523; GFX10-NEXT:    s_mov_b32 s4, s6
524; GFX10-NEXT:    s_mov_b32 s5, s7
525; GFX10-NEXT:    s_mov_b32 s6, s8
526; GFX10-NEXT:    s_mov_b32 s7, s9
527; GFX10-NEXT:    s_mov_b32 s8, s10
528; GFX10-NEXT:    s_mov_b32 s9, s11
529; GFX10-NEXT:    s_mov_b32 s10, s12
530; GFX10-NEXT:    s_mov_b32 s11, s13
531; GFX10-NEXT:    image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
532; GFX10-NEXT:    s_waitcnt vmcnt(0)
533; GFX10-NEXT:    ; return to shader part epilog
534main_body:
535  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
536  ret <4 x float> %v
537}
538
539declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
540declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
541declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
542declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
543declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
544declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
545declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
546declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
547declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
548declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
549declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
550declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
551
552attributes #0 = { nounwind readonly }
553