1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
4
5define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
6; GFX9-LABEL: atomic_swap_i32_1d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    s_mov_b32 s0, s2
9; GFX9-NEXT:    s_mov_b32 s1, s3
10; GFX9-NEXT:    s_mov_b32 s2, s4
11; GFX9-NEXT:    s_mov_b32 s3, s5
12; GFX9-NEXT:    s_mov_b32 s4, s6
13; GFX9-NEXT:    s_mov_b32 s5, s7
14; GFX9-NEXT:    s_mov_b32 s6, s8
15; GFX9-NEXT:    s_mov_b32 s7, s9
16; GFX9-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc a16
17; GFX9-NEXT:    s_waitcnt vmcnt(0)
18; GFX9-NEXT:    ; return to shader part epilog
19;
20; GFX10-LABEL: atomic_swap_i32_1d:
21; GFX10:       ; %bb.0: ; %main_body
22; GFX10-NEXT:    s_mov_b32 s0, s2
23; GFX10-NEXT:    s_mov_b32 s1, s3
24; GFX10-NEXT:    s_mov_b32 s2, s4
25; GFX10-NEXT:    s_mov_b32 s3, s5
26; GFX10-NEXT:    s_mov_b32 s4, s6
27; GFX10-NEXT:    s_mov_b32 s5, s7
28; GFX10-NEXT:    s_mov_b32 s6, s8
29; GFX10-NEXT:    s_mov_b32 s7, s9
30; GFX10-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
31; GFX10-NEXT:    s_waitcnt vmcnt(0)
32; GFX10-NEXT:    ; return to shader part epilog
33main_body:
34  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
35  %out = bitcast i32 %v to float
36  ret float %out
37}
38
39define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
40; GFX9-LABEL: atomic_add_i32_1d:
41; GFX9:       ; %bb.0: ; %main_body
42; GFX9-NEXT:    s_mov_b32 s0, s2
43; GFX9-NEXT:    s_mov_b32 s1, s3
44; GFX9-NEXT:    s_mov_b32 s2, s4
45; GFX9-NEXT:    s_mov_b32 s3, s5
46; GFX9-NEXT:    s_mov_b32 s4, s6
47; GFX9-NEXT:    s_mov_b32 s5, s7
48; GFX9-NEXT:    s_mov_b32 s6, s8
49; GFX9-NEXT:    s_mov_b32 s7, s9
50; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16
51; GFX9-NEXT:    s_waitcnt vmcnt(0)
52; GFX9-NEXT:    ; return to shader part epilog
53;
54; GFX10-LABEL: atomic_add_i32_1d:
55; GFX10:       ; %bb.0: ; %main_body
56; GFX10-NEXT:    s_mov_b32 s0, s2
57; GFX10-NEXT:    s_mov_b32 s1, s3
58; GFX10-NEXT:    s_mov_b32 s2, s4
59; GFX10-NEXT:    s_mov_b32 s3, s5
60; GFX10-NEXT:    s_mov_b32 s4, s6
61; GFX10-NEXT:    s_mov_b32 s5, s7
62; GFX10-NEXT:    s_mov_b32 s6, s8
63; GFX10-NEXT:    s_mov_b32 s7, s9
64; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
65; GFX10-NEXT:    s_waitcnt vmcnt(0)
66; GFX10-NEXT:    ; return to shader part epilog
67main_body:
68  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
69  %out = bitcast i32 %v to float
70  ret float %out
71}
72
73define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
74; GFX9-LABEL: atomic_sub_i32_1d:
75; GFX9:       ; %bb.0: ; %main_body
76; GFX9-NEXT:    s_mov_b32 s0, s2
77; GFX9-NEXT:    s_mov_b32 s1, s3
78; GFX9-NEXT:    s_mov_b32 s2, s4
79; GFX9-NEXT:    s_mov_b32 s3, s5
80; GFX9-NEXT:    s_mov_b32 s4, s6
81; GFX9-NEXT:    s_mov_b32 s5, s7
82; GFX9-NEXT:    s_mov_b32 s6, s8
83; GFX9-NEXT:    s_mov_b32 s7, s9
84; GFX9-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc a16
85; GFX9-NEXT:    s_waitcnt vmcnt(0)
86; GFX9-NEXT:    ; return to shader part epilog
87;
88; GFX10-LABEL: atomic_sub_i32_1d:
89; GFX10:       ; %bb.0: ; %main_body
90; GFX10-NEXT:    s_mov_b32 s0, s2
91; GFX10-NEXT:    s_mov_b32 s1, s3
92; GFX10-NEXT:    s_mov_b32 s2, s4
93; GFX10-NEXT:    s_mov_b32 s3, s5
94; GFX10-NEXT:    s_mov_b32 s4, s6
95; GFX10-NEXT:    s_mov_b32 s5, s7
96; GFX10-NEXT:    s_mov_b32 s6, s8
97; GFX10-NEXT:    s_mov_b32 s7, s9
98; GFX10-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
99; GFX10-NEXT:    s_waitcnt vmcnt(0)
100; GFX10-NEXT:    ; return to shader part epilog
101main_body:
102  %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
103  %out = bitcast i32 %v to float
104  ret float %out
105}
106
107define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
108; GFX9-LABEL: atomic_smin_i32_1d:
109; GFX9:       ; %bb.0: ; %main_body
110; GFX9-NEXT:    s_mov_b32 s0, s2
111; GFX9-NEXT:    s_mov_b32 s1, s3
112; GFX9-NEXT:    s_mov_b32 s2, s4
113; GFX9-NEXT:    s_mov_b32 s3, s5
114; GFX9-NEXT:    s_mov_b32 s4, s6
115; GFX9-NEXT:    s_mov_b32 s5, s7
116; GFX9-NEXT:    s_mov_b32 s6, s8
117; GFX9-NEXT:    s_mov_b32 s7, s9
118; GFX9-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc a16
119; GFX9-NEXT:    s_waitcnt vmcnt(0)
120; GFX9-NEXT:    ; return to shader part epilog
121;
122; GFX10-LABEL: atomic_smin_i32_1d:
123; GFX10:       ; %bb.0: ; %main_body
124; GFX10-NEXT:    s_mov_b32 s0, s2
125; GFX10-NEXT:    s_mov_b32 s1, s3
126; GFX10-NEXT:    s_mov_b32 s2, s4
127; GFX10-NEXT:    s_mov_b32 s3, s5
128; GFX10-NEXT:    s_mov_b32 s4, s6
129; GFX10-NEXT:    s_mov_b32 s5, s7
130; GFX10-NEXT:    s_mov_b32 s6, s8
131; GFX10-NEXT:    s_mov_b32 s7, s9
132; GFX10-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
133; GFX10-NEXT:    s_waitcnt vmcnt(0)
134; GFX10-NEXT:    ; return to shader part epilog
135main_body:
136  %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
137  %out = bitcast i32 %v to float
138  ret float %out
139}
140
141define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
142; GFX9-LABEL: atomic_umin_i32_1d:
143; GFX9:       ; %bb.0: ; %main_body
144; GFX9-NEXT:    s_mov_b32 s0, s2
145; GFX9-NEXT:    s_mov_b32 s1, s3
146; GFX9-NEXT:    s_mov_b32 s2, s4
147; GFX9-NEXT:    s_mov_b32 s3, s5
148; GFX9-NEXT:    s_mov_b32 s4, s6
149; GFX9-NEXT:    s_mov_b32 s5, s7
150; GFX9-NEXT:    s_mov_b32 s6, s8
151; GFX9-NEXT:    s_mov_b32 s7, s9
152; GFX9-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc a16
153; GFX9-NEXT:    s_waitcnt vmcnt(0)
154; GFX9-NEXT:    ; return to shader part epilog
155;
156; GFX10-LABEL: atomic_umin_i32_1d:
157; GFX10:       ; %bb.0: ; %main_body
158; GFX10-NEXT:    s_mov_b32 s0, s2
159; GFX10-NEXT:    s_mov_b32 s1, s3
160; GFX10-NEXT:    s_mov_b32 s2, s4
161; GFX10-NEXT:    s_mov_b32 s3, s5
162; GFX10-NEXT:    s_mov_b32 s4, s6
163; GFX10-NEXT:    s_mov_b32 s5, s7
164; GFX10-NEXT:    s_mov_b32 s6, s8
165; GFX10-NEXT:    s_mov_b32 s7, s9
166; GFX10-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
167; GFX10-NEXT:    s_waitcnt vmcnt(0)
168; GFX10-NEXT:    ; return to shader part epilog
169main_body:
170  %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
171  %out = bitcast i32 %v to float
172  ret float %out
173}
174
175define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
176; GFX9-LABEL: atomic_smax_i32_1d:
177; GFX9:       ; %bb.0: ; %main_body
178; GFX9-NEXT:    s_mov_b32 s0, s2
179; GFX9-NEXT:    s_mov_b32 s1, s3
180; GFX9-NEXT:    s_mov_b32 s2, s4
181; GFX9-NEXT:    s_mov_b32 s3, s5
182; GFX9-NEXT:    s_mov_b32 s4, s6
183; GFX9-NEXT:    s_mov_b32 s5, s7
184; GFX9-NEXT:    s_mov_b32 s6, s8
185; GFX9-NEXT:    s_mov_b32 s7, s9
186; GFX9-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc a16
187; GFX9-NEXT:    s_waitcnt vmcnt(0)
188; GFX9-NEXT:    ; return to shader part epilog
189;
190; GFX10-LABEL: atomic_smax_i32_1d:
191; GFX10:       ; %bb.0: ; %main_body
192; GFX10-NEXT:    s_mov_b32 s0, s2
193; GFX10-NEXT:    s_mov_b32 s1, s3
194; GFX10-NEXT:    s_mov_b32 s2, s4
195; GFX10-NEXT:    s_mov_b32 s3, s5
196; GFX10-NEXT:    s_mov_b32 s4, s6
197; GFX10-NEXT:    s_mov_b32 s5, s7
198; GFX10-NEXT:    s_mov_b32 s6, s8
199; GFX10-NEXT:    s_mov_b32 s7, s9
200; GFX10-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
201; GFX10-NEXT:    s_waitcnt vmcnt(0)
202; GFX10-NEXT:    ; return to shader part epilog
203main_body:
204  %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
205  %out = bitcast i32 %v to float
206  ret float %out
207}
208
209define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
210; GFX9-LABEL: atomic_umax_i32_1d:
211; GFX9:       ; %bb.0: ; %main_body
212; GFX9-NEXT:    s_mov_b32 s0, s2
213; GFX9-NEXT:    s_mov_b32 s1, s3
214; GFX9-NEXT:    s_mov_b32 s2, s4
215; GFX9-NEXT:    s_mov_b32 s3, s5
216; GFX9-NEXT:    s_mov_b32 s4, s6
217; GFX9-NEXT:    s_mov_b32 s5, s7
218; GFX9-NEXT:    s_mov_b32 s6, s8
219; GFX9-NEXT:    s_mov_b32 s7, s9
220; GFX9-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc a16
221; GFX9-NEXT:    s_waitcnt vmcnt(0)
222; GFX9-NEXT:    ; return to shader part epilog
223;
224; GFX10-LABEL: atomic_umax_i32_1d:
225; GFX10:       ; %bb.0: ; %main_body
226; GFX10-NEXT:    s_mov_b32 s0, s2
227; GFX10-NEXT:    s_mov_b32 s1, s3
228; GFX10-NEXT:    s_mov_b32 s2, s4
229; GFX10-NEXT:    s_mov_b32 s3, s5
230; GFX10-NEXT:    s_mov_b32 s4, s6
231; GFX10-NEXT:    s_mov_b32 s5, s7
232; GFX10-NEXT:    s_mov_b32 s6, s8
233; GFX10-NEXT:    s_mov_b32 s7, s9
234; GFX10-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
235; GFX10-NEXT:    s_waitcnt vmcnt(0)
236; GFX10-NEXT:    ; return to shader part epilog
237main_body:
238  %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
239  %out = bitcast i32 %v to float
240  ret float %out
241}
242
243define amdgpu_ps float @atomic_and_i321d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
244; GFX9-LABEL: atomic_and_i321d:
245; GFX9:       ; %bb.0: ; %main_body
246; GFX9-NEXT:    s_mov_b32 s0, s2
247; GFX9-NEXT:    s_mov_b32 s1, s3
248; GFX9-NEXT:    s_mov_b32 s2, s4
249; GFX9-NEXT:    s_mov_b32 s3, s5
250; GFX9-NEXT:    s_mov_b32 s4, s6
251; GFX9-NEXT:    s_mov_b32 s5, s7
252; GFX9-NEXT:    s_mov_b32 s6, s8
253; GFX9-NEXT:    s_mov_b32 s7, s9
254; GFX9-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc a16
255; GFX9-NEXT:    s_waitcnt vmcnt(0)
256; GFX9-NEXT:    ; return to shader part epilog
257;
258; GFX10-LABEL: atomic_and_i321d:
259; GFX10:       ; %bb.0: ; %main_body
260; GFX10-NEXT:    s_mov_b32 s0, s2
261; GFX10-NEXT:    s_mov_b32 s1, s3
262; GFX10-NEXT:    s_mov_b32 s2, s4
263; GFX10-NEXT:    s_mov_b32 s3, s5
264; GFX10-NEXT:    s_mov_b32 s4, s6
265; GFX10-NEXT:    s_mov_b32 s5, s7
266; GFX10-NEXT:    s_mov_b32 s6, s8
267; GFX10-NEXT:    s_mov_b32 s7, s9
268; GFX10-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
269; GFX10-NEXT:    s_waitcnt vmcnt(0)
270; GFX10-NEXT:    ; return to shader part epilog
271main_body:
272  %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
273  %out = bitcast i32 %v to float
274  ret float %out
275}
276
277define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
278; GFX9-LABEL: atomic_or_i32_1d:
279; GFX9:       ; %bb.0: ; %main_body
280; GFX9-NEXT:    s_mov_b32 s0, s2
281; GFX9-NEXT:    s_mov_b32 s1, s3
282; GFX9-NEXT:    s_mov_b32 s2, s4
283; GFX9-NEXT:    s_mov_b32 s3, s5
284; GFX9-NEXT:    s_mov_b32 s4, s6
285; GFX9-NEXT:    s_mov_b32 s5, s7
286; GFX9-NEXT:    s_mov_b32 s6, s8
287; GFX9-NEXT:    s_mov_b32 s7, s9
288; GFX9-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc a16
289; GFX9-NEXT:    s_waitcnt vmcnt(0)
290; GFX9-NEXT:    ; return to shader part epilog
291;
292; GFX10-LABEL: atomic_or_i32_1d:
293; GFX10:       ; %bb.0: ; %main_body
294; GFX10-NEXT:    s_mov_b32 s0, s2
295; GFX10-NEXT:    s_mov_b32 s1, s3
296; GFX10-NEXT:    s_mov_b32 s2, s4
297; GFX10-NEXT:    s_mov_b32 s3, s5
298; GFX10-NEXT:    s_mov_b32 s4, s6
299; GFX10-NEXT:    s_mov_b32 s5, s7
300; GFX10-NEXT:    s_mov_b32 s6, s8
301; GFX10-NEXT:    s_mov_b32 s7, s9
302; GFX10-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
303; GFX10-NEXT:    s_waitcnt vmcnt(0)
304; GFX10-NEXT:    ; return to shader part epilog
305main_body:
306  %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
307  %out = bitcast i32 %v to float
308  ret float %out
309}
310
311define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
312; GFX9-LABEL: atomic_xor_i32_1d:
313; GFX9:       ; %bb.0: ; %main_body
314; GFX9-NEXT:    s_mov_b32 s0, s2
315; GFX9-NEXT:    s_mov_b32 s1, s3
316; GFX9-NEXT:    s_mov_b32 s2, s4
317; GFX9-NEXT:    s_mov_b32 s3, s5
318; GFX9-NEXT:    s_mov_b32 s4, s6
319; GFX9-NEXT:    s_mov_b32 s5, s7
320; GFX9-NEXT:    s_mov_b32 s6, s8
321; GFX9-NEXT:    s_mov_b32 s7, s9
322; GFX9-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc a16
323; GFX9-NEXT:    s_waitcnt vmcnt(0)
324; GFX9-NEXT:    ; return to shader part epilog
325;
326; GFX10-LABEL: atomic_xor_i32_1d:
327; GFX10:       ; %bb.0: ; %main_body
328; GFX10-NEXT:    s_mov_b32 s0, s2
329; GFX10-NEXT:    s_mov_b32 s1, s3
330; GFX10-NEXT:    s_mov_b32 s2, s4
331; GFX10-NEXT:    s_mov_b32 s3, s5
332; GFX10-NEXT:    s_mov_b32 s4, s6
333; GFX10-NEXT:    s_mov_b32 s5, s7
334; GFX10-NEXT:    s_mov_b32 s6, s8
335; GFX10-NEXT:    s_mov_b32 s7, s9
336; GFX10-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
337; GFX10-NEXT:    s_waitcnt vmcnt(0)
338; GFX10-NEXT:    ; return to shader part epilog
339main_body:
340  %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
341  %out = bitcast i32 %v to float
342  ret float %out
343}
344
345define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
346; GFX9-LABEL: atomic_inc_i32_1d:
347; GFX9:       ; %bb.0: ; %main_body
348; GFX9-NEXT:    s_mov_b32 s0, s2
349; GFX9-NEXT:    s_mov_b32 s1, s3
350; GFX9-NEXT:    s_mov_b32 s2, s4
351; GFX9-NEXT:    s_mov_b32 s3, s5
352; GFX9-NEXT:    s_mov_b32 s4, s6
353; GFX9-NEXT:    s_mov_b32 s5, s7
354; GFX9-NEXT:    s_mov_b32 s6, s8
355; GFX9-NEXT:    s_mov_b32 s7, s9
356; GFX9-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc a16
357; GFX9-NEXT:    s_waitcnt vmcnt(0)
358; GFX9-NEXT:    ; return to shader part epilog
359;
360; GFX10-LABEL: atomic_inc_i32_1d:
361; GFX10:       ; %bb.0: ; %main_body
362; GFX10-NEXT:    s_mov_b32 s0, s2
363; GFX10-NEXT:    s_mov_b32 s1, s3
364; GFX10-NEXT:    s_mov_b32 s2, s4
365; GFX10-NEXT:    s_mov_b32 s3, s5
366; GFX10-NEXT:    s_mov_b32 s4, s6
367; GFX10-NEXT:    s_mov_b32 s5, s7
368; GFX10-NEXT:    s_mov_b32 s6, s8
369; GFX10-NEXT:    s_mov_b32 s7, s9
370; GFX10-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
371; GFX10-NEXT:    s_waitcnt vmcnt(0)
372; GFX10-NEXT:    ; return to shader part epilog
373main_body:
374  %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
375  %out = bitcast i32 %v to float
376  ret float %out
377}
378
379define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
380; GFX9-LABEL: atomic_dec_i32_1d:
381; GFX9:       ; %bb.0: ; %main_body
382; GFX9-NEXT:    s_mov_b32 s0, s2
383; GFX9-NEXT:    s_mov_b32 s1, s3
384; GFX9-NEXT:    s_mov_b32 s2, s4
385; GFX9-NEXT:    s_mov_b32 s3, s5
386; GFX9-NEXT:    s_mov_b32 s4, s6
387; GFX9-NEXT:    s_mov_b32 s5, s7
388; GFX9-NEXT:    s_mov_b32 s6, s8
389; GFX9-NEXT:    s_mov_b32 s7, s9
390; GFX9-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc a16
391; GFX9-NEXT:    s_waitcnt vmcnt(0)
392; GFX9-NEXT:    ; return to shader part epilog
393;
394; GFX10-LABEL: atomic_dec_i32_1d:
395; GFX10:       ; %bb.0: ; %main_body
396; GFX10-NEXT:    s_mov_b32 s0, s2
397; GFX10-NEXT:    s_mov_b32 s1, s3
398; GFX10-NEXT:    s_mov_b32 s2, s4
399; GFX10-NEXT:    s_mov_b32 s3, s5
400; GFX10-NEXT:    s_mov_b32 s4, s6
401; GFX10-NEXT:    s_mov_b32 s5, s7
402; GFX10-NEXT:    s_mov_b32 s6, s8
403; GFX10-NEXT:    s_mov_b32 s7, s9
404; GFX10-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
405; GFX10-NEXT:    s_waitcnt vmcnt(0)
406; GFX10-NEXT:    ; return to shader part epilog
407main_body:
408  %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
409  %out = bitcast i32 %v to float
410  ret float %out
411}
412
413define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) {
414; GFX9-LABEL: atomic_cmpswap_i32_1d:
415; GFX9:       ; %bb.0: ; %main_body
416; GFX9-NEXT:    s_mov_b32 s0, s2
417; GFX9-NEXT:    s_mov_b32 s1, s3
418; GFX9-NEXT:    s_mov_b32 s2, s4
419; GFX9-NEXT:    s_mov_b32 s3, s5
420; GFX9-NEXT:    s_mov_b32 s4, s6
421; GFX9-NEXT:    s_mov_b32 s5, s7
422; GFX9-NEXT:    s_mov_b32 s6, s8
423; GFX9-NEXT:    s_mov_b32 s7, s9
424; GFX9-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
425; GFX9-NEXT:    s_waitcnt vmcnt(0)
426; GFX9-NEXT:    ; return to shader part epilog
427;
428; GFX10-LABEL: atomic_cmpswap_i32_1d:
429; GFX10:       ; %bb.0: ; %main_body
430; GFX10-NEXT:    s_mov_b32 s0, s2
431; GFX10-NEXT:    s_mov_b32 s1, s3
432; GFX10-NEXT:    s_mov_b32 s2, s4
433; GFX10-NEXT:    s_mov_b32 s3, s5
434; GFX10-NEXT:    s_mov_b32 s4, s6
435; GFX10-NEXT:    s_mov_b32 s5, s7
436; GFX10-NEXT:    s_mov_b32 s6, s8
437; GFX10-NEXT:    s_mov_b32 s7, s9
438; GFX10-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
439; GFX10-NEXT:    s_waitcnt vmcnt(0)
440; GFX10-NEXT:    ; return to shader part epilog
441main_body:
442  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
443  %out = bitcast i32 %v to float
444  ret float %out
445}
446
447define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
448; GFX9-LABEL: atomic_add_i32_2d:
449; GFX9:       ; %bb.0: ; %main_body
450; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
451; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
452; GFX9-NEXT:    s_mov_b32 s0, s2
453; GFX9-NEXT:    s_mov_b32 s1, s3
454; GFX9-NEXT:    s_mov_b32 s2, s4
455; GFX9-NEXT:    s_mov_b32 s3, s5
456; GFX9-NEXT:    s_mov_b32 s4, s6
457; GFX9-NEXT:    s_mov_b32 s5, s7
458; GFX9-NEXT:    s_mov_b32 s6, s8
459; GFX9-NEXT:    s_mov_b32 s7, s9
460; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
461; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16
462; GFX9-NEXT:    s_waitcnt vmcnt(0)
463; GFX9-NEXT:    ; return to shader part epilog
464;
465; GFX10-LABEL: atomic_add_i32_2d:
466; GFX10:       ; %bb.0: ; %main_body
467; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
468; GFX10-NEXT:    s_mov_b32 s0, s2
469; GFX10-NEXT:    s_mov_b32 s1, s3
470; GFX10-NEXT:    s_mov_b32 s2, s4
471; GFX10-NEXT:    s_mov_b32 s3, s5
472; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
473; GFX10-NEXT:    s_mov_b32 s4, s6
474; GFX10-NEXT:    s_mov_b32 s5, s7
475; GFX10-NEXT:    s_mov_b32 s6, s8
476; GFX10-NEXT:    s_mov_b32 s7, s9
477; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc a16
478; GFX10-NEXT:    s_waitcnt vmcnt(0)
479; GFX10-NEXT:    ; return to shader part epilog
480main_body:
481  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
482  %out = bitcast i32 %v to float
483  ret float %out
484}
485
486define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
487; GFX9-LABEL: atomic_add_i32_3d:
488; GFX9:       ; %bb.0: ; %main_body
489; GFX9-NEXT:    s_mov_b32 s0, s2
490; GFX9-NEXT:    s_mov_b32 s2, s4
491; GFX9-NEXT:    s_mov_b32 s4, s6
492; GFX9-NEXT:    s_mov_b32 s6, s8
493; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
494; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
495; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
496; GFX9-NEXT:    s_mov_b32 s1, s3
497; GFX9-NEXT:    s_mov_b32 s3, s5
498; GFX9-NEXT:    s_mov_b32 s5, s7
499; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
500; GFX9-NEXT:    s_mov_b32 s7, s9
501; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
502; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16
503; GFX9-NEXT:    s_waitcnt vmcnt(0)
504; GFX9-NEXT:    ; return to shader part epilog
505;
506; GFX10-LABEL: atomic_add_i32_3d:
507; GFX10:       ; %bb.0: ; %main_body
508; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
509; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
510; GFX10-NEXT:    s_mov_b32 s0, s2
511; GFX10-NEXT:    s_mov_b32 s2, s4
512; GFX10-NEXT:    s_mov_b32 s4, s6
513; GFX10-NEXT:    s_mov_b32 s6, s8
514; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
515; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
516; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
517; GFX10-NEXT:    s_mov_b32 s1, s3
518; GFX10-NEXT:    s_mov_b32 s3, s5
519; GFX10-NEXT:    s_mov_b32 s5, s7
520; GFX10-NEXT:    s_mov_b32 s7, s9
521; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc a16
522; GFX10-NEXT:    s_waitcnt vmcnt(0)
523; GFX10-NEXT:    ; return to shader part epilog
524main_body:
525  %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
526  %out = bitcast i32 %v to float
527  ret float %out
528}
529
530define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
531; GFX9-LABEL: atomic_add_i32_cube:
532; GFX9:       ; %bb.0: ; %main_body
533; GFX9-NEXT:    s_mov_b32 s0, s2
534; GFX9-NEXT:    s_mov_b32 s2, s4
535; GFX9-NEXT:    s_mov_b32 s4, s6
536; GFX9-NEXT:    s_mov_b32 s6, s8
537; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
538; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
539; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
540; GFX9-NEXT:    s_mov_b32 s1, s3
541; GFX9-NEXT:    s_mov_b32 s3, s5
542; GFX9-NEXT:    s_mov_b32 s5, s7
543; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
544; GFX9-NEXT:    s_mov_b32 s7, s9
545; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
546; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
547; GFX9-NEXT:    s_waitcnt vmcnt(0)
548; GFX9-NEXT:    ; return to shader part epilog
549;
550; GFX10-LABEL: atomic_add_i32_cube:
551; GFX10:       ; %bb.0: ; %main_body
552; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
553; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
554; GFX10-NEXT:    s_mov_b32 s0, s2
555; GFX10-NEXT:    s_mov_b32 s2, s4
556; GFX10-NEXT:    s_mov_b32 s4, s6
557; GFX10-NEXT:    s_mov_b32 s6, s8
558; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
559; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
560; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
561; GFX10-NEXT:    s_mov_b32 s1, s3
562; GFX10-NEXT:    s_mov_b32 s3, s5
563; GFX10-NEXT:    s_mov_b32 s5, s7
564; GFX10-NEXT:    s_mov_b32 s7, s9
565; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc a16
566; GFX10-NEXT:    s_waitcnt vmcnt(0)
567; GFX10-NEXT:    ; return to shader part epilog
568main_body:
569  %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0)
570  %out = bitcast i32 %v to float
571  ret float %out
572}
573
574define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
575; GFX9-LABEL: atomic_add_i32_1darray:
576; GFX9:       ; %bb.0: ; %main_body
577; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
578; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
579; GFX9-NEXT:    s_mov_b32 s0, s2
580; GFX9-NEXT:    s_mov_b32 s1, s3
581; GFX9-NEXT:    s_mov_b32 s2, s4
582; GFX9-NEXT:    s_mov_b32 s3, s5
583; GFX9-NEXT:    s_mov_b32 s4, s6
584; GFX9-NEXT:    s_mov_b32 s5, s7
585; GFX9-NEXT:    s_mov_b32 s6, s8
586; GFX9-NEXT:    s_mov_b32 s7, s9
587; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
588; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da
589; GFX9-NEXT:    s_waitcnt vmcnt(0)
590; GFX9-NEXT:    ; return to shader part epilog
591;
592; GFX10-LABEL: atomic_add_i32_1darray:
593; GFX10:       ; %bb.0: ; %main_body
594; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
595; GFX10-NEXT:    s_mov_b32 s0, s2
596; GFX10-NEXT:    s_mov_b32 s1, s3
597; GFX10-NEXT:    s_mov_b32 s2, s4
598; GFX10-NEXT:    s_mov_b32 s3, s5
599; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
600; GFX10-NEXT:    s_mov_b32 s4, s6
601; GFX10-NEXT:    s_mov_b32 s5, s7
602; GFX10-NEXT:    s_mov_b32 s6, s8
603; GFX10-NEXT:    s_mov_b32 s7, s9
604; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16
605; GFX10-NEXT:    s_waitcnt vmcnt(0)
606; GFX10-NEXT:    ; return to shader part epilog
607main_body:
608  %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
609  %out = bitcast i32 %v to float
610  ret float %out
611}
612
613define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
614; GFX9-LABEL: atomic_add_i32_2darray:
615; GFX9:       ; %bb.0: ; %main_body
616; GFX9-NEXT:    s_mov_b32 s0, s2
617; GFX9-NEXT:    s_mov_b32 s2, s4
618; GFX9-NEXT:    s_mov_b32 s4, s6
619; GFX9-NEXT:    s_mov_b32 s6, s8
620; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
621; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
622; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
623; GFX9-NEXT:    s_mov_b32 s1, s3
624; GFX9-NEXT:    s_mov_b32 s3, s5
625; GFX9-NEXT:    s_mov_b32 s5, s7
626; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
627; GFX9-NEXT:    s_mov_b32 s7, s9
628; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
629; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
630; GFX9-NEXT:    s_waitcnt vmcnt(0)
631; GFX9-NEXT:    ; return to shader part epilog
632;
633; GFX10-LABEL: atomic_add_i32_2darray:
634; GFX10:       ; %bb.0: ; %main_body
635; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
636; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
637; GFX10-NEXT:    s_mov_b32 s0, s2
638; GFX10-NEXT:    s_mov_b32 s2, s4
639; GFX10-NEXT:    s_mov_b32 s4, s6
640; GFX10-NEXT:    s_mov_b32 s6, s8
641; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
642; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
643; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
644; GFX10-NEXT:    s_mov_b32 s1, s3
645; GFX10-NEXT:    s_mov_b32 s3, s5
646; GFX10-NEXT:    s_mov_b32 s5, s7
647; GFX10-NEXT:    s_mov_b32 s7, s9
648; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16
649; GFX10-NEXT:    s_waitcnt vmcnt(0)
650; GFX10-NEXT:    ; return to shader part epilog
651main_body:
652  %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
653  %out = bitcast i32 %v to float
654  ret float %out
655}
656
657define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
658; GFX9-LABEL: atomic_add_i32_2dmsaa:
659; GFX9:       ; %bb.0: ; %main_body
660; GFX9-NEXT:    s_mov_b32 s0, s2
661; GFX9-NEXT:    s_mov_b32 s2, s4
662; GFX9-NEXT:    s_mov_b32 s4, s6
663; GFX9-NEXT:    s_mov_b32 s6, s8
664; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
665; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
666; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
667; GFX9-NEXT:    s_mov_b32 s1, s3
668; GFX9-NEXT:    s_mov_b32 s3, s5
669; GFX9-NEXT:    s_mov_b32 s5, s7
670; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
671; GFX9-NEXT:    s_mov_b32 s7, s9
672; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
673; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16
674; GFX9-NEXT:    s_waitcnt vmcnt(0)
675; GFX9-NEXT:    ; return to shader part epilog
676;
677; GFX10-LABEL: atomic_add_i32_2dmsaa:
678; GFX10:       ; %bb.0: ; %main_body
679; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
680; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
681; GFX10-NEXT:    s_mov_b32 s0, s2
682; GFX10-NEXT:    s_mov_b32 s2, s4
683; GFX10-NEXT:    s_mov_b32 s4, s6
684; GFX10-NEXT:    s_mov_b32 s6, s8
685; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
686; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
687; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
688; GFX10-NEXT:    s_mov_b32 s1, s3
689; GFX10-NEXT:    s_mov_b32 s3, s5
690; GFX10-NEXT:    s_mov_b32 s5, s7
691; GFX10-NEXT:    s_mov_b32 s7, s9
692; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16
693; GFX10-NEXT:    s_waitcnt vmcnt(0)
694; GFX10-NEXT:    ; return to shader part epilog
695main_body:
696  %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
697  %out = bitcast i32 %v to float
698  ret float %out
699}
700
701define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
702; GFX9-LABEL: atomic_add_i32_2darraymsaa:
703; GFX9:       ; %bb.0: ; %main_body
704; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
705; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
706; GFX9-NEXT:    v_and_or_b32 v1, v1, v5, v2
707; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
708; GFX9-NEXT:    s_mov_b32 s0, s2
709; GFX9-NEXT:    s_mov_b32 s1, s3
710; GFX9-NEXT:    s_mov_b32 s2, s4
711; GFX9-NEXT:    s_mov_b32 s3, s5
712; GFX9-NEXT:    s_mov_b32 s4, s6
713; GFX9-NEXT:    s_mov_b32 s5, s7
714; GFX9-NEXT:    s_mov_b32 s6, s8
715; GFX9-NEXT:    s_mov_b32 s7, s9
716; GFX9-NEXT:    v_and_or_b32 v2, v3, v5, v2
717; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
718; GFX9-NEXT:    s_waitcnt vmcnt(0)
719; GFX9-NEXT:    ; return to shader part epilog
720;
721; GFX10-LABEL: atomic_add_i32_2darraymsaa:
722; GFX10:       ; %bb.0: ; %main_body
723; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
724; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
725; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
726; GFX10-NEXT:    s_mov_b32 s0, s2
727; GFX10-NEXT:    s_mov_b32 s1, s3
728; GFX10-NEXT:    s_mov_b32 s2, s4
729; GFX10-NEXT:    v_and_or_b32 v1, v1, v5, v2
730; GFX10-NEXT:    v_and_or_b32 v2, v3, v5, v4
731; GFX10-NEXT:    s_mov_b32 s3, s5
732; GFX10-NEXT:    s_mov_b32 s4, s6
733; GFX10-NEXT:    s_mov_b32 s5, s7
734; GFX10-NEXT:    s_mov_b32 s6, s8
735; GFX10-NEXT:    s_mov_b32 s7, s9
736; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16
737; GFX10-NEXT:    s_waitcnt vmcnt(0)
738; GFX10-NEXT:    ; return to shader part epilog
739main_body:
740  %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
741  %out = bitcast i32 %v to float
742  ret float %out
743}
744
745define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
746; GFX9-LABEL: atomic_add_i32_1d_slc:
747; GFX9:       ; %bb.0: ; %main_body
748; GFX9-NEXT:    s_mov_b32 s0, s2
749; GFX9-NEXT:    s_mov_b32 s1, s3
750; GFX9-NEXT:    s_mov_b32 s2, s4
751; GFX9-NEXT:    s_mov_b32 s3, s5
752; GFX9-NEXT:    s_mov_b32 s4, s6
753; GFX9-NEXT:    s_mov_b32 s5, s7
754; GFX9-NEXT:    s_mov_b32 s6, s8
755; GFX9-NEXT:    s_mov_b32 s7, s9
756; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc a16
757; GFX9-NEXT:    s_waitcnt vmcnt(0)
758; GFX9-NEXT:    ; return to shader part epilog
759;
760; GFX10-LABEL: atomic_add_i32_1d_slc:
761; GFX10:       ; %bb.0: ; %main_body
762; GFX10-NEXT:    s_mov_b32 s0, s2
763; GFX10-NEXT:    s_mov_b32 s1, s3
764; GFX10-NEXT:    s_mov_b32 s2, s4
765; GFX10-NEXT:    s_mov_b32 s3, s5
766; GFX10-NEXT:    s_mov_b32 s4, s6
767; GFX10-NEXT:    s_mov_b32 s5, s7
768; GFX10-NEXT:    s_mov_b32 s6, s8
769; GFX10-NEXT:    s_mov_b32 s7, s9
770; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc a16
771; GFX10-NEXT:    s_waitcnt vmcnt(0)
772; GFX10-NEXT:    ; return to shader part epilog
773main_body:
774  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
775  %out = bitcast i32 %v to float
776  ret float %out
777}
778
779define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
780; GFX9-LABEL: atomic_swap_i64_1d:
781; GFX9:       ; %bb.0: ; %main_body
782; GFX9-NEXT:    s_mov_b32 s0, s2
783; GFX9-NEXT:    s_mov_b32 s1, s3
784; GFX9-NEXT:    s_mov_b32 s2, s4
785; GFX9-NEXT:    s_mov_b32 s3, s5
786; GFX9-NEXT:    s_mov_b32 s4, s6
787; GFX9-NEXT:    s_mov_b32 s5, s7
788; GFX9-NEXT:    s_mov_b32 s6, s8
789; GFX9-NEXT:    s_mov_b32 s7, s9
790; GFX9-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
791; GFX9-NEXT:    s_waitcnt vmcnt(0)
792; GFX9-NEXT:    ; return to shader part epilog
793;
794; GFX10-LABEL: atomic_swap_i64_1d:
795; GFX10:       ; %bb.0: ; %main_body
796; GFX10-NEXT:    s_mov_b32 s0, s2
797; GFX10-NEXT:    s_mov_b32 s1, s3
798; GFX10-NEXT:    s_mov_b32 s2, s4
799; GFX10-NEXT:    s_mov_b32 s3, s5
800; GFX10-NEXT:    s_mov_b32 s4, s6
801; GFX10-NEXT:    s_mov_b32 s5, s7
802; GFX10-NEXT:    s_mov_b32 s6, s8
803; GFX10-NEXT:    s_mov_b32 s7, s9
804; GFX10-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
805; GFX10-NEXT:    s_waitcnt vmcnt(0)
806; GFX10-NEXT:    ; return to shader part epilog
807main_body:
808  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
809  %out = bitcast i64 %v to <2 x float>
810  ret <2 x float> %out
811}
812
813define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
814; GFX9-LABEL: atomic_add_i64_1d:
815; GFX9:       ; %bb.0: ; %main_body
816; GFX9-NEXT:    s_mov_b32 s0, s2
817; GFX9-NEXT:    s_mov_b32 s1, s3
818; GFX9-NEXT:    s_mov_b32 s2, s4
819; GFX9-NEXT:    s_mov_b32 s3, s5
820; GFX9-NEXT:    s_mov_b32 s4, s6
821; GFX9-NEXT:    s_mov_b32 s5, s7
822; GFX9-NEXT:    s_mov_b32 s6, s8
823; GFX9-NEXT:    s_mov_b32 s7, s9
824; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
825; GFX9-NEXT:    s_waitcnt vmcnt(0)
826; GFX9-NEXT:    ; return to shader part epilog
827;
828; GFX10-LABEL: atomic_add_i64_1d:
829; GFX10:       ; %bb.0: ; %main_body
830; GFX10-NEXT:    s_mov_b32 s0, s2
831; GFX10-NEXT:    s_mov_b32 s1, s3
832; GFX10-NEXT:    s_mov_b32 s2, s4
833; GFX10-NEXT:    s_mov_b32 s3, s5
834; GFX10-NEXT:    s_mov_b32 s4, s6
835; GFX10-NEXT:    s_mov_b32 s5, s7
836; GFX10-NEXT:    s_mov_b32 s6, s8
837; GFX10-NEXT:    s_mov_b32 s7, s9
838; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
839; GFX10-NEXT:    s_waitcnt vmcnt(0)
840; GFX10-NEXT:    ; return to shader part epilog
841main_body:
842  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
843  %out = bitcast i64 %v to <2 x float>
844  ret <2 x float> %out
845}
846
847define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
848; GFX9-LABEL: atomic_sub_i64_1d:
849; GFX9:       ; %bb.0: ; %main_body
850; GFX9-NEXT:    s_mov_b32 s0, s2
851; GFX9-NEXT:    s_mov_b32 s1, s3
852; GFX9-NEXT:    s_mov_b32 s2, s4
853; GFX9-NEXT:    s_mov_b32 s3, s5
854; GFX9-NEXT:    s_mov_b32 s4, s6
855; GFX9-NEXT:    s_mov_b32 s5, s7
856; GFX9-NEXT:    s_mov_b32 s6, s8
857; GFX9-NEXT:    s_mov_b32 s7, s9
858; GFX9-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
859; GFX9-NEXT:    s_waitcnt vmcnt(0)
860; GFX9-NEXT:    ; return to shader part epilog
861;
862; GFX10-LABEL: atomic_sub_i64_1d:
863; GFX10:       ; %bb.0: ; %main_body
864; GFX10-NEXT:    s_mov_b32 s0, s2
865; GFX10-NEXT:    s_mov_b32 s1, s3
866; GFX10-NEXT:    s_mov_b32 s2, s4
867; GFX10-NEXT:    s_mov_b32 s3, s5
868; GFX10-NEXT:    s_mov_b32 s4, s6
869; GFX10-NEXT:    s_mov_b32 s5, s7
870; GFX10-NEXT:    s_mov_b32 s6, s8
871; GFX10-NEXT:    s_mov_b32 s7, s9
872; GFX10-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
873; GFX10-NEXT:    s_waitcnt vmcnt(0)
874; GFX10-NEXT:    ; return to shader part epilog
875main_body:
876  %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
877  %out = bitcast i64 %v to <2 x float>
878  ret <2 x float> %out
879}
880
881define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
882; GFX9-LABEL: atomic_smin_i64_1d:
883; GFX9:       ; %bb.0: ; %main_body
884; GFX9-NEXT:    s_mov_b32 s0, s2
885; GFX9-NEXT:    s_mov_b32 s1, s3
886; GFX9-NEXT:    s_mov_b32 s2, s4
887; GFX9-NEXT:    s_mov_b32 s3, s5
888; GFX9-NEXT:    s_mov_b32 s4, s6
889; GFX9-NEXT:    s_mov_b32 s5, s7
890; GFX9-NEXT:    s_mov_b32 s6, s8
891; GFX9-NEXT:    s_mov_b32 s7, s9
892; GFX9-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
893; GFX9-NEXT:    s_waitcnt vmcnt(0)
894; GFX9-NEXT:    ; return to shader part epilog
895;
896; GFX10-LABEL: atomic_smin_i64_1d:
897; GFX10:       ; %bb.0: ; %main_body
898; GFX10-NEXT:    s_mov_b32 s0, s2
899; GFX10-NEXT:    s_mov_b32 s1, s3
900; GFX10-NEXT:    s_mov_b32 s2, s4
901; GFX10-NEXT:    s_mov_b32 s3, s5
902; GFX10-NEXT:    s_mov_b32 s4, s6
903; GFX10-NEXT:    s_mov_b32 s5, s7
904; GFX10-NEXT:    s_mov_b32 s6, s8
905; GFX10-NEXT:    s_mov_b32 s7, s9
906; GFX10-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
907; GFX10-NEXT:    s_waitcnt vmcnt(0)
908; GFX10-NEXT:    ; return to shader part epilog
909main_body:
910  %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
911  %out = bitcast i64 %v to <2 x float>
912  ret <2 x float> %out
913}
914
915define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
916; GFX9-LABEL: atomic_umin_i64_1d:
917; GFX9:       ; %bb.0: ; %main_body
918; GFX9-NEXT:    s_mov_b32 s0, s2
919; GFX9-NEXT:    s_mov_b32 s1, s3
920; GFX9-NEXT:    s_mov_b32 s2, s4
921; GFX9-NEXT:    s_mov_b32 s3, s5
922; GFX9-NEXT:    s_mov_b32 s4, s6
923; GFX9-NEXT:    s_mov_b32 s5, s7
924; GFX9-NEXT:    s_mov_b32 s6, s8
925; GFX9-NEXT:    s_mov_b32 s7, s9
926; GFX9-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
927; GFX9-NEXT:    s_waitcnt vmcnt(0)
928; GFX9-NEXT:    ; return to shader part epilog
929;
930; GFX10-LABEL: atomic_umin_i64_1d:
931; GFX10:       ; %bb.0: ; %main_body
932; GFX10-NEXT:    s_mov_b32 s0, s2
933; GFX10-NEXT:    s_mov_b32 s1, s3
934; GFX10-NEXT:    s_mov_b32 s2, s4
935; GFX10-NEXT:    s_mov_b32 s3, s5
936; GFX10-NEXT:    s_mov_b32 s4, s6
937; GFX10-NEXT:    s_mov_b32 s5, s7
938; GFX10-NEXT:    s_mov_b32 s6, s8
939; GFX10-NEXT:    s_mov_b32 s7, s9
940; GFX10-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
941; GFX10-NEXT:    s_waitcnt vmcnt(0)
942; GFX10-NEXT:    ; return to shader part epilog
943main_body:
944  %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
945  %out = bitcast i64 %v to <2 x float>
946  ret <2 x float> %out
947}
948
949define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
950; GFX9-LABEL: atomic_smax_i64_1d:
951; GFX9:       ; %bb.0: ; %main_body
952; GFX9-NEXT:    s_mov_b32 s0, s2
953; GFX9-NEXT:    s_mov_b32 s1, s3
954; GFX9-NEXT:    s_mov_b32 s2, s4
955; GFX9-NEXT:    s_mov_b32 s3, s5
956; GFX9-NEXT:    s_mov_b32 s4, s6
957; GFX9-NEXT:    s_mov_b32 s5, s7
958; GFX9-NEXT:    s_mov_b32 s6, s8
959; GFX9-NEXT:    s_mov_b32 s7, s9
960; GFX9-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
961; GFX9-NEXT:    s_waitcnt vmcnt(0)
962; GFX9-NEXT:    ; return to shader part epilog
963;
964; GFX10-LABEL: atomic_smax_i64_1d:
965; GFX10:       ; %bb.0: ; %main_body
966; GFX10-NEXT:    s_mov_b32 s0, s2
967; GFX10-NEXT:    s_mov_b32 s1, s3
968; GFX10-NEXT:    s_mov_b32 s2, s4
969; GFX10-NEXT:    s_mov_b32 s3, s5
970; GFX10-NEXT:    s_mov_b32 s4, s6
971; GFX10-NEXT:    s_mov_b32 s5, s7
972; GFX10-NEXT:    s_mov_b32 s6, s8
973; GFX10-NEXT:    s_mov_b32 s7, s9
974; GFX10-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
975; GFX10-NEXT:    s_waitcnt vmcnt(0)
976; GFX10-NEXT:    ; return to shader part epilog
977main_body:
978  %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
979  %out = bitcast i64 %v to <2 x float>
980  ret <2 x float> %out
981}
982
983define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
984; GFX9-LABEL: atomic_umax_i64_1d:
985; GFX9:       ; %bb.0: ; %main_body
986; GFX9-NEXT:    s_mov_b32 s0, s2
987; GFX9-NEXT:    s_mov_b32 s1, s3
988; GFX9-NEXT:    s_mov_b32 s2, s4
989; GFX9-NEXT:    s_mov_b32 s3, s5
990; GFX9-NEXT:    s_mov_b32 s4, s6
991; GFX9-NEXT:    s_mov_b32 s5, s7
992; GFX9-NEXT:    s_mov_b32 s6, s8
993; GFX9-NEXT:    s_mov_b32 s7, s9
994; GFX9-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
995; GFX9-NEXT:    s_waitcnt vmcnt(0)
996; GFX9-NEXT:    ; return to shader part epilog
997;
998; GFX10-LABEL: atomic_umax_i64_1d:
999; GFX10:       ; %bb.0: ; %main_body
1000; GFX10-NEXT:    s_mov_b32 s0, s2
1001; GFX10-NEXT:    s_mov_b32 s1, s3
1002; GFX10-NEXT:    s_mov_b32 s2, s4
1003; GFX10-NEXT:    s_mov_b32 s3, s5
1004; GFX10-NEXT:    s_mov_b32 s4, s6
1005; GFX10-NEXT:    s_mov_b32 s5, s7
1006; GFX10-NEXT:    s_mov_b32 s6, s8
1007; GFX10-NEXT:    s_mov_b32 s7, s9
1008; GFX10-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1009; GFX10-NEXT:    s_waitcnt vmcnt(0)
1010; GFX10-NEXT:    ; return to shader part epilog
1011main_body:
1012  %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1013  %out = bitcast i64 %v to <2 x float>
1014  ret <2 x float> %out
1015}
1016
1017define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1018; GFX9-LABEL: atomic_and_i64_1d:
1019; GFX9:       ; %bb.0: ; %main_body
1020; GFX9-NEXT:    s_mov_b32 s0, s2
1021; GFX9-NEXT:    s_mov_b32 s1, s3
1022; GFX9-NEXT:    s_mov_b32 s2, s4
1023; GFX9-NEXT:    s_mov_b32 s3, s5
1024; GFX9-NEXT:    s_mov_b32 s4, s6
1025; GFX9-NEXT:    s_mov_b32 s5, s7
1026; GFX9-NEXT:    s_mov_b32 s6, s8
1027; GFX9-NEXT:    s_mov_b32 s7, s9
1028; GFX9-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1029; GFX9-NEXT:    s_waitcnt vmcnt(0)
1030; GFX9-NEXT:    ; return to shader part epilog
1031;
1032; GFX10-LABEL: atomic_and_i64_1d:
1033; GFX10:       ; %bb.0: ; %main_body
1034; GFX10-NEXT:    s_mov_b32 s0, s2
1035; GFX10-NEXT:    s_mov_b32 s1, s3
1036; GFX10-NEXT:    s_mov_b32 s2, s4
1037; GFX10-NEXT:    s_mov_b32 s3, s5
1038; GFX10-NEXT:    s_mov_b32 s4, s6
1039; GFX10-NEXT:    s_mov_b32 s5, s7
1040; GFX10-NEXT:    s_mov_b32 s6, s8
1041; GFX10-NEXT:    s_mov_b32 s7, s9
1042; GFX10-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1043; GFX10-NEXT:    s_waitcnt vmcnt(0)
1044; GFX10-NEXT:    ; return to shader part epilog
1045main_body:
1046  %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1047  %out = bitcast i64 %v to <2 x float>
1048  ret <2 x float> %out
1049}
1050
1051define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1052; GFX9-LABEL: atomic_or_i64_1d:
1053; GFX9:       ; %bb.0: ; %main_body
1054; GFX9-NEXT:    s_mov_b32 s0, s2
1055; GFX9-NEXT:    s_mov_b32 s1, s3
1056; GFX9-NEXT:    s_mov_b32 s2, s4
1057; GFX9-NEXT:    s_mov_b32 s3, s5
1058; GFX9-NEXT:    s_mov_b32 s4, s6
1059; GFX9-NEXT:    s_mov_b32 s5, s7
1060; GFX9-NEXT:    s_mov_b32 s6, s8
1061; GFX9-NEXT:    s_mov_b32 s7, s9
1062; GFX9-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1063; GFX9-NEXT:    s_waitcnt vmcnt(0)
1064; GFX9-NEXT:    ; return to shader part epilog
1065;
1066; GFX10-LABEL: atomic_or_i64_1d:
1067; GFX10:       ; %bb.0: ; %main_body
1068; GFX10-NEXT:    s_mov_b32 s0, s2
1069; GFX10-NEXT:    s_mov_b32 s1, s3
1070; GFX10-NEXT:    s_mov_b32 s2, s4
1071; GFX10-NEXT:    s_mov_b32 s3, s5
1072; GFX10-NEXT:    s_mov_b32 s4, s6
1073; GFX10-NEXT:    s_mov_b32 s5, s7
1074; GFX10-NEXT:    s_mov_b32 s6, s8
1075; GFX10-NEXT:    s_mov_b32 s7, s9
1076; GFX10-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1077; GFX10-NEXT:    s_waitcnt vmcnt(0)
1078; GFX10-NEXT:    ; return to shader part epilog
1079main_body:
1080  %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1081  %out = bitcast i64 %v to <2 x float>
1082  ret <2 x float> %out
1083}
1084
1085define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1086; GFX9-LABEL: atomic_xor_i64_1d:
1087; GFX9:       ; %bb.0: ; %main_body
1088; GFX9-NEXT:    s_mov_b32 s0, s2
1089; GFX9-NEXT:    s_mov_b32 s1, s3
1090; GFX9-NEXT:    s_mov_b32 s2, s4
1091; GFX9-NEXT:    s_mov_b32 s3, s5
1092; GFX9-NEXT:    s_mov_b32 s4, s6
1093; GFX9-NEXT:    s_mov_b32 s5, s7
1094; GFX9-NEXT:    s_mov_b32 s6, s8
1095; GFX9-NEXT:    s_mov_b32 s7, s9
1096; GFX9-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1097; GFX9-NEXT:    s_waitcnt vmcnt(0)
1098; GFX9-NEXT:    ; return to shader part epilog
1099;
1100; GFX10-LABEL: atomic_xor_i64_1d:
1101; GFX10:       ; %bb.0: ; %main_body
1102; GFX10-NEXT:    s_mov_b32 s0, s2
1103; GFX10-NEXT:    s_mov_b32 s1, s3
1104; GFX10-NEXT:    s_mov_b32 s2, s4
1105; GFX10-NEXT:    s_mov_b32 s3, s5
1106; GFX10-NEXT:    s_mov_b32 s4, s6
1107; GFX10-NEXT:    s_mov_b32 s5, s7
1108; GFX10-NEXT:    s_mov_b32 s6, s8
1109; GFX10-NEXT:    s_mov_b32 s7, s9
1110; GFX10-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1111; GFX10-NEXT:    s_waitcnt vmcnt(0)
1112; GFX10-NEXT:    ; return to shader part epilog
1113main_body:
1114  %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1115  %out = bitcast i64 %v to <2 x float>
1116  ret <2 x float> %out
1117}
1118
1119define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1120; GFX9-LABEL: atomic_inc_i64_1d:
1121; GFX9:       ; %bb.0: ; %main_body
1122; GFX9-NEXT:    s_mov_b32 s0, s2
1123; GFX9-NEXT:    s_mov_b32 s1, s3
1124; GFX9-NEXT:    s_mov_b32 s2, s4
1125; GFX9-NEXT:    s_mov_b32 s3, s5
1126; GFX9-NEXT:    s_mov_b32 s4, s6
1127; GFX9-NEXT:    s_mov_b32 s5, s7
1128; GFX9-NEXT:    s_mov_b32 s6, s8
1129; GFX9-NEXT:    s_mov_b32 s7, s9
1130; GFX9-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1131; GFX9-NEXT:    s_waitcnt vmcnt(0)
1132; GFX9-NEXT:    ; return to shader part epilog
1133;
1134; GFX10-LABEL: atomic_inc_i64_1d:
1135; GFX10:       ; %bb.0: ; %main_body
1136; GFX10-NEXT:    s_mov_b32 s0, s2
1137; GFX10-NEXT:    s_mov_b32 s1, s3
1138; GFX10-NEXT:    s_mov_b32 s2, s4
1139; GFX10-NEXT:    s_mov_b32 s3, s5
1140; GFX10-NEXT:    s_mov_b32 s4, s6
1141; GFX10-NEXT:    s_mov_b32 s5, s7
1142; GFX10-NEXT:    s_mov_b32 s6, s8
1143; GFX10-NEXT:    s_mov_b32 s7, s9
1144; GFX10-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1145; GFX10-NEXT:    s_waitcnt vmcnt(0)
1146; GFX10-NEXT:    ; return to shader part epilog
1147main_body:
1148  %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1149  %out = bitcast i64 %v to <2 x float>
1150  ret <2 x float> %out
1151}
1152
1153define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1154; GFX9-LABEL: atomic_dec_i64_1d:
1155; GFX9:       ; %bb.0: ; %main_body
1156; GFX9-NEXT:    s_mov_b32 s0, s2
1157; GFX9-NEXT:    s_mov_b32 s1, s3
1158; GFX9-NEXT:    s_mov_b32 s2, s4
1159; GFX9-NEXT:    s_mov_b32 s3, s5
1160; GFX9-NEXT:    s_mov_b32 s4, s6
1161; GFX9-NEXT:    s_mov_b32 s5, s7
1162; GFX9-NEXT:    s_mov_b32 s6, s8
1163; GFX9-NEXT:    s_mov_b32 s7, s9
1164; GFX9-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1165; GFX9-NEXT:    s_waitcnt vmcnt(0)
1166; GFX9-NEXT:    ; return to shader part epilog
1167;
1168; GFX10-LABEL: atomic_dec_i64_1d:
1169; GFX10:       ; %bb.0: ; %main_body
1170; GFX10-NEXT:    s_mov_b32 s0, s2
1171; GFX10-NEXT:    s_mov_b32 s1, s3
1172; GFX10-NEXT:    s_mov_b32 s2, s4
1173; GFX10-NEXT:    s_mov_b32 s3, s5
1174; GFX10-NEXT:    s_mov_b32 s4, s6
1175; GFX10-NEXT:    s_mov_b32 s5, s7
1176; GFX10-NEXT:    s_mov_b32 s6, s8
1177; GFX10-NEXT:    s_mov_b32 s7, s9
1178; GFX10-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1179; GFX10-NEXT:    s_waitcnt vmcnt(0)
1180; GFX10-NEXT:    ; return to shader part epilog
1181main_body:
1182  %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1183  %out = bitcast i64 %v to <2 x float>
1184  ret <2 x float> %out
1185}
1186
1187define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i16 %s) {
1188; GFX9-LABEL: atomic_cmpswap_i64_1d:
1189; GFX9:       ; %bb.0: ; %main_body
1190; GFX9-NEXT:    s_mov_b32 s0, s2
1191; GFX9-NEXT:    s_mov_b32 s1, s3
1192; GFX9-NEXT:    s_mov_b32 s2, s4
1193; GFX9-NEXT:    s_mov_b32 s3, s5
1194; GFX9-NEXT:    s_mov_b32 s4, s6
1195; GFX9-NEXT:    s_mov_b32 s5, s7
1196; GFX9-NEXT:    s_mov_b32 s6, s8
1197; GFX9-NEXT:    s_mov_b32 s7, s9
1198; GFX9-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc a16
1199; GFX9-NEXT:    s_waitcnt vmcnt(0)
1200; GFX9-NEXT:    ; return to shader part epilog
1201;
1202; GFX10-LABEL: atomic_cmpswap_i64_1d:
1203; GFX10:       ; %bb.0: ; %main_body
1204; GFX10-NEXT:    s_mov_b32 s0, s2
1205; GFX10-NEXT:    s_mov_b32 s1, s3
1206; GFX10-NEXT:    s_mov_b32 s2, s4
1207; GFX10-NEXT:    s_mov_b32 s3, s5
1208; GFX10-NEXT:    s_mov_b32 s4, s6
1209; GFX10-NEXT:    s_mov_b32 s5, s7
1210; GFX10-NEXT:    s_mov_b32 s6, s8
1211; GFX10-NEXT:    s_mov_b32 s7, s9
1212; GFX10-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
1213; GFX10-NEXT:    s_waitcnt vmcnt(0)
1214; GFX10-NEXT:    ; return to shader part epilog
1215main_body:
1216  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64 %cmp, i64 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1217  %out = bitcast i64 %v to <2 x float>
1218  ret <2 x float> %out
1219}
1220
1221define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) {
1222; GFX9-LABEL: atomic_add_i64_2d:
1223; GFX9:       ; %bb.0: ; %main_body
1224; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
1225; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1226; GFX9-NEXT:    s_mov_b32 s0, s2
1227; GFX9-NEXT:    s_mov_b32 s1, s3
1228; GFX9-NEXT:    s_mov_b32 s2, s4
1229; GFX9-NEXT:    s_mov_b32 s3, s5
1230; GFX9-NEXT:    s_mov_b32 s4, s6
1231; GFX9-NEXT:    s_mov_b32 s5, s7
1232; GFX9-NEXT:    s_mov_b32 s6, s8
1233; GFX9-NEXT:    s_mov_b32 s7, s9
1234; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
1235; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1236; GFX9-NEXT:    s_waitcnt vmcnt(0)
1237; GFX9-NEXT:    ; return to shader part epilog
1238;
1239; GFX10-LABEL: atomic_add_i64_2d:
1240; GFX10:       ; %bb.0: ; %main_body
1241; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1242; GFX10-NEXT:    s_mov_b32 s0, s2
1243; GFX10-NEXT:    s_mov_b32 s1, s3
1244; GFX10-NEXT:    s_mov_b32 s2, s4
1245; GFX10-NEXT:    s_mov_b32 s3, s5
1246; GFX10-NEXT:    v_and_or_b32 v2, v2, 0xffff, v3
1247; GFX10-NEXT:    s_mov_b32 s4, s6
1248; GFX10-NEXT:    s_mov_b32 s5, s7
1249; GFX10-NEXT:    s_mov_b32 s6, s8
1250; GFX10-NEXT:    s_mov_b32 s7, s9
1251; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc a16
1252; GFX10-NEXT:    s_waitcnt vmcnt(0)
1253; GFX10-NEXT:    ; return to shader part epilog
1254main_body:
1255  %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1256  %out = bitcast i64 %v to <2 x float>
1257  ret <2 x float> %out
1258}
1259
1260define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) {
1261; GFX9-LABEL: atomic_add_i64_3d:
1262; GFX9:       ; %bb.0: ; %main_body
1263; GFX9-NEXT:    s_mov_b32 s0, s2
1264; GFX9-NEXT:    s_mov_b32 s2, s4
1265; GFX9-NEXT:    s_mov_b32 s4, s6
1266; GFX9-NEXT:    s_mov_b32 s6, s8
1267; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1268; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1269; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1270; GFX9-NEXT:    s_mov_b32 s1, s3
1271; GFX9-NEXT:    s_mov_b32 s3, s5
1272; GFX9-NEXT:    s_mov_b32 s5, s7
1273; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1274; GFX9-NEXT:    s_mov_b32 s7, s9
1275; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1276; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16
1277; GFX9-NEXT:    s_waitcnt vmcnt(0)
1278; GFX9-NEXT:    ; return to shader part epilog
1279;
1280; GFX10-LABEL: atomic_add_i64_3d:
1281; GFX10:       ; %bb.0: ; %main_body
1282; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1283; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1284; GFX10-NEXT:    s_mov_b32 s0, s2
1285; GFX10-NEXT:    s_mov_b32 s2, s4
1286; GFX10-NEXT:    s_mov_b32 s4, s6
1287; GFX10-NEXT:    s_mov_b32 s6, s8
1288; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1289; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1290; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1291; GFX10-NEXT:    s_mov_b32 s1, s3
1292; GFX10-NEXT:    s_mov_b32 s3, s5
1293; GFX10-NEXT:    s_mov_b32 s5, s7
1294; GFX10-NEXT:    s_mov_b32 s7, s9
1295; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc a16
1296; GFX10-NEXT:    s_waitcnt vmcnt(0)
1297; GFX10-NEXT:    ; return to shader part epilog
1298main_body:
1299  %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1300  %out = bitcast i64 %v to <2 x float>
1301  ret <2 x float> %out
1302}
1303
1304define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) {
1305; GFX9-LABEL: atomic_add_i64_cube:
1306; GFX9:       ; %bb.0: ; %main_body
1307; GFX9-NEXT:    s_mov_b32 s0, s2
1308; GFX9-NEXT:    s_mov_b32 s2, s4
1309; GFX9-NEXT:    s_mov_b32 s4, s6
1310; GFX9-NEXT:    s_mov_b32 s6, s8
1311; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1312; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1313; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1314; GFX9-NEXT:    s_mov_b32 s1, s3
1315; GFX9-NEXT:    s_mov_b32 s3, s5
1316; GFX9-NEXT:    s_mov_b32 s5, s7
1317; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1318; GFX9-NEXT:    s_mov_b32 s7, s9
1319; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1320; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1321; GFX9-NEXT:    s_waitcnt vmcnt(0)
1322; GFX9-NEXT:    ; return to shader part epilog
1323;
1324; GFX10-LABEL: atomic_add_i64_cube:
1325; GFX10:       ; %bb.0: ; %main_body
1326; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1327; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1328; GFX10-NEXT:    s_mov_b32 s0, s2
1329; GFX10-NEXT:    s_mov_b32 s2, s4
1330; GFX10-NEXT:    s_mov_b32 s4, s6
1331; GFX10-NEXT:    s_mov_b32 s6, s8
1332; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1333; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1334; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1335; GFX10-NEXT:    s_mov_b32 s1, s3
1336; GFX10-NEXT:    s_mov_b32 s3, s5
1337; GFX10-NEXT:    s_mov_b32 s5, s7
1338; GFX10-NEXT:    s_mov_b32 s7, s9
1339; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc a16
1340; GFX10-NEXT:    s_waitcnt vmcnt(0)
1341; GFX10-NEXT:    ; return to shader part epilog
1342main_body:
1343  %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64 %data, i16 %s, i16 %t, i16 %face , <8 x i32> %rsrc, i32 0, i32 0)
1344  %out = bitcast i64 %v to <2 x float>
1345  ret <2 x float> %out
1346}
1347
1348define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) {
1349; GFX9-LABEL: atomic_add_i64_1darray:
1350; GFX9:       ; %bb.0: ; %main_body
1351; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
1352; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1353; GFX9-NEXT:    s_mov_b32 s0, s2
1354; GFX9-NEXT:    s_mov_b32 s1, s3
1355; GFX9-NEXT:    s_mov_b32 s2, s4
1356; GFX9-NEXT:    s_mov_b32 s3, s5
1357; GFX9-NEXT:    s_mov_b32 s4, s6
1358; GFX9-NEXT:    s_mov_b32 s5, s7
1359; GFX9-NEXT:    s_mov_b32 s6, s8
1360; GFX9-NEXT:    s_mov_b32 s7, s9
1361; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
1362; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da
1363; GFX9-NEXT:    s_waitcnt vmcnt(0)
1364; GFX9-NEXT:    ; return to shader part epilog
1365;
1366; GFX10-LABEL: atomic_add_i64_1darray:
1367; GFX10:       ; %bb.0: ; %main_body
1368; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1369; GFX10-NEXT:    s_mov_b32 s0, s2
1370; GFX10-NEXT:    s_mov_b32 s1, s3
1371; GFX10-NEXT:    s_mov_b32 s2, s4
1372; GFX10-NEXT:    s_mov_b32 s3, s5
1373; GFX10-NEXT:    v_and_or_b32 v2, v2, 0xffff, v3
1374; GFX10-NEXT:    s_mov_b32 s4, s6
1375; GFX10-NEXT:    s_mov_b32 s5, s7
1376; GFX10-NEXT:    s_mov_b32 s6, s8
1377; GFX10-NEXT:    s_mov_b32 s7, s9
1378; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16
1379; GFX10-NEXT:    s_waitcnt vmcnt(0)
1380; GFX10-NEXT:    ; return to shader part epilog
1381main_body:
1382  %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1383  %out = bitcast i64 %v to <2 x float>
1384  ret <2 x float> %out
1385}
1386
1387define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) {
1388; GFX9-LABEL: atomic_add_i64_2darray:
1389; GFX9:       ; %bb.0: ; %main_body
1390; GFX9-NEXT:    s_mov_b32 s0, s2
1391; GFX9-NEXT:    s_mov_b32 s2, s4
1392; GFX9-NEXT:    s_mov_b32 s4, s6
1393; GFX9-NEXT:    s_mov_b32 s6, s8
1394; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1395; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1396; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1397; GFX9-NEXT:    s_mov_b32 s1, s3
1398; GFX9-NEXT:    s_mov_b32 s3, s5
1399; GFX9-NEXT:    s_mov_b32 s5, s7
1400; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1401; GFX9-NEXT:    s_mov_b32 s7, s9
1402; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1403; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1404; GFX9-NEXT:    s_waitcnt vmcnt(0)
1405; GFX9-NEXT:    ; return to shader part epilog
1406;
1407; GFX10-LABEL: atomic_add_i64_2darray:
1408; GFX10:       ; %bb.0: ; %main_body
1409; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1410; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1411; GFX10-NEXT:    s_mov_b32 s0, s2
1412; GFX10-NEXT:    s_mov_b32 s2, s4
1413; GFX10-NEXT:    s_mov_b32 s4, s6
1414; GFX10-NEXT:    s_mov_b32 s6, s8
1415; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1416; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1417; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1418; GFX10-NEXT:    s_mov_b32 s1, s3
1419; GFX10-NEXT:    s_mov_b32 s3, s5
1420; GFX10-NEXT:    s_mov_b32 s5, s7
1421; GFX10-NEXT:    s_mov_b32 s7, s9
1422; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16
1423; GFX10-NEXT:    s_waitcnt vmcnt(0)
1424; GFX10-NEXT:    ; return to shader part epilog
1425main_body:
1426  %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1427  %out = bitcast i64 %v to <2 x float>
1428  ret <2 x float> %out
1429}
1430
1431define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) {
1432; GFX9-LABEL: atomic_add_i64_2dmsaa:
1433; GFX9:       ; %bb.0: ; %main_body
1434; GFX9-NEXT:    s_mov_b32 s0, s2
1435; GFX9-NEXT:    s_mov_b32 s2, s4
1436; GFX9-NEXT:    s_mov_b32 s4, s6
1437; GFX9-NEXT:    s_mov_b32 s6, s8
1438; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1439; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1440; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1441; GFX9-NEXT:    s_mov_b32 s1, s3
1442; GFX9-NEXT:    s_mov_b32 s3, s5
1443; GFX9-NEXT:    s_mov_b32 s5, s7
1444; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1445; GFX9-NEXT:    s_mov_b32 s7, s9
1446; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1447; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16
1448; GFX9-NEXT:    s_waitcnt vmcnt(0)
1449; GFX9-NEXT:    ; return to shader part epilog
1450;
1451; GFX10-LABEL: atomic_add_i64_2dmsaa:
1452; GFX10:       ; %bb.0: ; %main_body
1453; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1454; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1455; GFX10-NEXT:    s_mov_b32 s0, s2
1456; GFX10-NEXT:    s_mov_b32 s2, s4
1457; GFX10-NEXT:    s_mov_b32 s4, s6
1458; GFX10-NEXT:    s_mov_b32 s6, s8
1459; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1460; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1461; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1462; GFX10-NEXT:    s_mov_b32 s1, s3
1463; GFX10-NEXT:    s_mov_b32 s3, s5
1464; GFX10-NEXT:    s_mov_b32 s5, s7
1465; GFX10-NEXT:    s_mov_b32 s7, s9
1466; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16
1467; GFX10-NEXT:    s_waitcnt vmcnt(0)
1468; GFX10-NEXT:    ; return to shader part epilog
1469main_body:
1470  %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1471  %out = bitcast i64 %v to <2 x float>
1472  ret <2 x float> %out
1473}
1474
1475define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1476; GFX9-LABEL: atomic_add_i64_2darraymsaa:
1477; GFX9:       ; %bb.0: ; %main_body
1478; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
1479; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1480; GFX9-NEXT:    v_and_or_b32 v2, v2, v6, v3
1481; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
1482; GFX9-NEXT:    s_mov_b32 s0, s2
1483; GFX9-NEXT:    s_mov_b32 s1, s3
1484; GFX9-NEXT:    s_mov_b32 s2, s4
1485; GFX9-NEXT:    s_mov_b32 s3, s5
1486; GFX9-NEXT:    s_mov_b32 s4, s6
1487; GFX9-NEXT:    s_mov_b32 s5, s7
1488; GFX9-NEXT:    s_mov_b32 s6, s8
1489; GFX9-NEXT:    s_mov_b32 s7, s9
1490; GFX9-NEXT:    v_and_or_b32 v3, v4, v6, v3
1491; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1492; GFX9-NEXT:    s_waitcnt vmcnt(0)
1493; GFX9-NEXT:    ; return to shader part epilog
1494;
1495; GFX10-LABEL: atomic_add_i64_2darraymsaa:
1496; GFX10:       ; %bb.0: ; %main_body
1497; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
1498; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1499; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1500; GFX10-NEXT:    s_mov_b32 s0, s2
1501; GFX10-NEXT:    s_mov_b32 s1, s3
1502; GFX10-NEXT:    s_mov_b32 s2, s4
1503; GFX10-NEXT:    v_and_or_b32 v2, v2, v6, v3
1504; GFX10-NEXT:    v_and_or_b32 v3, v4, v6, v5
1505; GFX10-NEXT:    s_mov_b32 s3, s5
1506; GFX10-NEXT:    s_mov_b32 s4, s6
1507; GFX10-NEXT:    s_mov_b32 s5, s7
1508; GFX10-NEXT:    s_mov_b32 s6, s8
1509; GFX10-NEXT:    s_mov_b32 s7, s9
1510; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16
1511; GFX10-NEXT:    s_waitcnt vmcnt(0)
1512; GFX10-NEXT:    ; return to shader part epilog
1513main_body:
1514  %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1515  %out = bitcast i64 %v to <2 x float>
1516  ret <2 x float> %out
1517}
1518
1519define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1520; GFX9-LABEL: atomic_add_i64_1d_slc:
1521; GFX9:       ; %bb.0: ; %main_body
1522; GFX9-NEXT:    s_mov_b32 s0, s2
1523; GFX9-NEXT:    s_mov_b32 s1, s3
1524; GFX9-NEXT:    s_mov_b32 s2, s4
1525; GFX9-NEXT:    s_mov_b32 s3, s5
1526; GFX9-NEXT:    s_mov_b32 s4, s6
1527; GFX9-NEXT:    s_mov_b32 s5, s7
1528; GFX9-NEXT:    s_mov_b32 s6, s8
1529; GFX9-NEXT:    s_mov_b32 s7, s9
1530; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc a16
1531; GFX9-NEXT:    s_waitcnt vmcnt(0)
1532; GFX9-NEXT:    ; return to shader part epilog
1533;
1534; GFX10-LABEL: atomic_add_i64_1d_slc:
1535; GFX10:       ; %bb.0: ; %main_body
1536; GFX10-NEXT:    s_mov_b32 s0, s2
1537; GFX10-NEXT:    s_mov_b32 s1, s3
1538; GFX10-NEXT:    s_mov_b32 s2, s4
1539; GFX10-NEXT:    s_mov_b32 s3, s5
1540; GFX10-NEXT:    s_mov_b32 s4, s6
1541; GFX10-NEXT:    s_mov_b32 s5, s7
1542; GFX10-NEXT:    s_mov_b32 s6, s8
1543; GFX10-NEXT:    s_mov_b32 s7, s9
1544; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc a16
1545; GFX10-NEXT:    s_waitcnt vmcnt(0)
1546; GFX10-NEXT:    ; return to shader part epilog
1547main_body:
1548  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
1549  %out = bitcast i64 %v to <2 x float>
1550  ret <2 x float> %out
1551}
1552
1553declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1554declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1555declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1556declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1557declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1558declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1559declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1560declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1561declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1562declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1563declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1564declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1565declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1566declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1567declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1568declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1569declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1570declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1571declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1572declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1573
1574declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1575declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1576declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1577declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1578declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1579declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1580declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1581declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1582declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1583declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1584declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1585declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1586declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1587declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1588declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1589declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1590declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1591declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1592declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1593declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1594
1595attributes #0 = { nounwind }
1596