1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4
5; GCN-LABEL: {{^}}atomic_add_i32_offset:
6; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
7; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
8define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
9entry:
10  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
11  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
12  ret void
13}
14
15; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset:
16; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:-4096{{$}}
17define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) {
18entry:
19  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024
20  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
21  ret void
22}
23
24; GCN-LABEL: {{^}}atomic_add_i32_soffset:
25; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
26; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
27
28; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
29define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
30entry:
31  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
32  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
33  ret void
34}
35
36; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
37; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
38; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
39; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
40
41; VI: flat_atomic_add
42
43; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
44define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
45entry:
46  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
47
48  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
49  ret void
50}
51
52; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
53; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
54; SIVI: buffer_store_dword [[RET]]
55
56; GFX9: global_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
57define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
58entry:
59  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
60  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
61  store i32 %val, i32 addrspace(1)* %out2
62  ret void
63}
64
65; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
66; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
67; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
68; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
69define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
70entry:
71  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
72  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
73  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
74  ret void
75}
76
77; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
78; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
79; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
80; SIVI: buffer_store_dword [[RET]]
81
82; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
83; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
84define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
85entry:
86  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
87  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
88  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
89  store i32 %val, i32 addrspace(1)* %out2
90  ret void
91}
92
93; GCN-LABEL: {{^}}atomic_add_i32:
94; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
95; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
96define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
97entry:
98  %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
99  ret void
100}
101
102; GCN-LABEL: {{^}}atomic_add_i32_ret:
103; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
104; SIVI: buffer_store_dword [[RET]]
105
106; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
107; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
108define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
109entry:
110  %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
111  store i32 %val, i32 addrspace(1)* %out2
112  ret void
113}
114
115; GCN-LABEL: {{^}}atomic_add_i32_addr64:
116; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
117; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
118; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
119define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
120entry:
121  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
122  %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
123  ret void
124}
125
126; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
127; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
128; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
129; SIVI: buffer_store_dword [[RET]]
130
131; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
132define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
133entry:
134  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
135  %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
136  store i32 %val, i32 addrspace(1)* %out2
137  ret void
138}
139
140; GCN-LABEL: {{^}}atomic_and_i32_offset:
141; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
142
143; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
144define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
145entry:
146  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
147  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
148  ret void
149}
150
151; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
152; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
153; SIVI: buffer_store_dword [[RET]]
154
155; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
156define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
157entry:
158  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
159  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
160  store i32 %val, i32 addrspace(1)* %out2
161  ret void
162}
163
164; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
165; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
166; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
167
168; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
169define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
170entry:
171  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
172  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
173  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
174  ret void
175}
176
177; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
178; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
179; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
180; SIVI: buffer_store_dword [[RET]]
181
182; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
183define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
184entry:
185  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
186  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
187  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
188  store i32 %val, i32 addrspace(1)* %out2
189  ret void
190}
191
192; GCN-LABEL: {{^}}atomic_and_i32:
193; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
194
195; GFX9: global_atomic_and v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
196define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
197entry:
198  %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
199  ret void
200}
201
202; GCN-LABEL: {{^}}atomic_and_i32_ret:
203; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
204; SIVI: buffer_store_dword [[RET]]
205
206; GFX9: global_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
207define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
208entry:
209  %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
210  store i32 %val, i32 addrspace(1)* %out2
211  ret void
212}
213
214; GCN-LABEL: {{^}}atomic_and_i32_addr64:
215; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
216; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
217
218; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
219define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
220entry:
221  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
222  %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
223  ret void
224}
225
226; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
227; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
228; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
229; SIVI: buffer_store_dword [[RET]]
230
231; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
232define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
233entry:
234  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
235  %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
236  store i32 %val, i32 addrspace(1)* %out2
237  ret void
238}
239
240; GCN-LABEL: {{^}}atomic_sub_i32_offset:
241; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
242
243; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
244define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
245entry:
246  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
247  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
248  ret void
249}
250
251; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
252; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
253; SIVI: buffer_store_dword [[RET]]
254
255; GFX9: global_atomic_sub v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
256define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
257entry:
258  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
259  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
260  store i32 %val, i32 addrspace(1)* %out2
261  ret void
262}
263
264; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
265; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
266; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
267
268; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
269define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
270entry:
271  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
272  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
273  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
274  ret void
275}
276
277; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
278; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
279; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
280; SIVI: buffer_store_dword [[RET]]
281
282; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
283define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
284entry:
285  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
286  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
287  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
288  store i32 %val, i32 addrspace(1)* %out2
289  ret void
290}
291
292; GCN-LABEL: {{^}}atomic_sub_i32:
293; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
294
295; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
296define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
297entry:
298  %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
299  ret void
300}
301
302; GCN-LABEL: {{^}}atomic_sub_i32_ret:
303; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
304; SIVI: buffer_store_dword [[RET]]
305
306; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
307define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
308entry:
309  %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
310  store i32 %val, i32 addrspace(1)* %out2
311  ret void
312}
313
314; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
315; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
316; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
317
318; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
319define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
320entry:
321  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
322  %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
323  ret void
324}
325
326; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
327; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
328; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
329; SIVI: buffer_store_dword [[RET]]
330
331; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
332define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
333entry:
334  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
335  %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
336  store i32 %val, i32 addrspace(1)* %out2
337  ret void
338}
339
340; GCN-LABEL: {{^}}atomic_max_i32_offset:
341; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
342
343; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
344define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
345entry:
346  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
347  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
348  ret void
349}
350
351; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
352; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
353; SIVI: buffer_store_dword [[RET]]
354
355; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
356define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
357entry:
358  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
359  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
360  store i32 %val, i32 addrspace(1)* %out2
361  ret void
362}
363
364; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
365; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
366; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
367
368; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
369define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
370entry:
371  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
372  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
373  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
374  ret void
375}
376
377; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
378; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
379; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
380; SIVI: buffer_store_dword [[RET]]
381
382; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
383define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
384entry:
385  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
386  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
387  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
388  store i32 %val, i32 addrspace(1)* %out2
389  ret void
390}
391
392; GCN-LABEL: {{^}}atomic_max_i32:
393; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
394
395; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
396define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
397entry:
398  %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
399  ret void
400}
401
402; GCN-LABEL: {{^}}atomic_max_i32_ret:
403; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
404; SIVI: buffer_store_dword [[RET]]
405
406; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
407define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
408entry:
409  %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
410  store i32 %val, i32 addrspace(1)* %out2
411  ret void
412}
413
414; GCN-LABEL: {{^}}atomic_max_i32_addr64:
415; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
416; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
417
418; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
419define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
420entry:
421  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
422  %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
423  ret void
424}
425
426; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
427; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
428; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
429; SIVI: buffer_store_dword [[RET]]
430
431; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
432define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
433entry:
434  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
435  %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
436  store i32 %val, i32 addrspace(1)* %out2
437  ret void
438}
439
440; GCN-LABEL: {{^}}atomic_umax_i32_offset:
441; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
442
443; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
444define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
445entry:
446  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
447  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
448  ret void
449}
450
451; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
452; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
453; SIVI: buffer_store_dword [[RET]]
454
455; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
456define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
457entry:
458  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
459  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
460  store i32 %val, i32 addrspace(1)* %out2
461  ret void
462}
463
464; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
465; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
466; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
467; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
468define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
469entry:
470  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
471  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
472  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
473  ret void
474}
475
476; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
477; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
478; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
479; SIVI: buffer_store_dword [[RET]]
480
481; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
482define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
483entry:
484  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
485  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
486  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
487  store i32 %val, i32 addrspace(1)* %out2
488  ret void
489}
490
491; GCN-LABEL: {{^}}atomic_umax_i32:
492; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
493
494; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
495define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
496entry:
497  %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
498  ret void
499}
500
501; GCN-LABEL: {{^}}atomic_umax_i32_ret:
502; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
503; SIVI: buffer_store_dword [[RET]]
504
505; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
506define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
507entry:
508  %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
509  store i32 %val, i32 addrspace(1)* %out2
510  ret void
511}
512
513; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
514; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
515; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
516; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
517define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
518entry:
519  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
520  %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
521  ret void
522}
523
524; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
525; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
526; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
527; SIVI: buffer_store_dword [[RET]]
528
529; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
530define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
531entry:
532  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
533  %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
534  store i32 %val, i32 addrspace(1)* %out2
535  ret void
536}
537
538; GCN-LABEL: {{^}}atomic_min_i32_offset:
539; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
540
541; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
542define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
543entry:
544  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
545  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
546  ret void
547}
548
549; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
550; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
551; SIVI: buffer_store_dword [[RET]]
552
553; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
554define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
555entry:
556  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
557  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
558  store i32 %val, i32 addrspace(1)* %out2
559  ret void
560}
561
562; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
563; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
564; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
565; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
566define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
567entry:
568  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
569  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
570  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
571  ret void
572}
573
574; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
575; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
576; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
577; SIVI: buffer_store_dword [[RET]]
578
579; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
580define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
581entry:
582  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
583  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
584  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
585  store i32 %val, i32 addrspace(1)* %out2
586  ret void
587}
588
589; GCN-LABEL: {{^}}atomic_min_i32:
590; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
591
592; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
593define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
594entry:
595  %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
596  ret void
597}
598
599; GCN-LABEL: {{^}}atomic_min_i32_ret:
600; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
601; SIVI: buffer_store_dword [[RET]]
602
603; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
604define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
605entry:
606  %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
607  store i32 %val, i32 addrspace(1)* %out2
608  ret void
609}
610
611; GCN-LABEL: {{^}}atomic_min_i32_addr64:
612; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
613; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
614; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
615define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
616entry:
617  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
618  %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
619  ret void
620}
621
622; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
623; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
624; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
625; SIVI: buffer_store_dword [[RET]]
626
627; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
628define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
629entry:
630  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
631  %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
632  store i32 %val, i32 addrspace(1)* %out2
633  ret void
634}
635
636; GCN-LABEL: {{^}}atomic_umin_i32_offset:
637; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
638
639; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
640define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
641entry:
642  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
643  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
644  ret void
645}
646
647; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
648; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
649; SIVI: buffer_store_dword [[RET]]
650
651; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
652define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
653entry:
654  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
655  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
656  store i32 %val, i32 addrspace(1)* %out2
657  ret void
658}
659
660; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
661; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
662; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
663; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
664define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
665entry:
666  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
667  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
668  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
669  ret void
670}
671
672; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
673; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
674; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
675; SIVI: buffer_store_dword [[RET]]
676
677; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
678define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
679entry:
680  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
681  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
682  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
683  store i32 %val, i32 addrspace(1)* %out2
684  ret void
685}
686
687; GCN-LABEL: {{^}}atomic_umin_i32:
688; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
689; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
690define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
691entry:
692  %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
693  ret void
694}
695
696; GCN-LABEL: {{^}}atomic_umin_i32_ret:
697; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
698; SIVI: buffer_store_dword [[RET]]
699
700; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
701define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
702entry:
703  %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
704  store i32 %val, i32 addrspace(1)* %out2
705  ret void
706}
707
708; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
709; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
710; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
711; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
712define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
713entry:
714  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
715  %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
716  ret void
717}
718
719; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
720; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
721; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
722; SIVI: buffer_store_dword [[RET]]
723
724; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
725define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
726entry:
727  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
728  %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
729  store i32 %val, i32 addrspace(1)* %out2
730  ret void
731}
732
733; GCN-LABEL: {{^}}atomic_or_i32_offset:
734; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
735
736; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
737define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
738entry:
739  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
740  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
741  ret void
742}
743
744; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
745; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
746; SIVI: buffer_store_dword [[RET]]
747
748; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
749define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
750entry:
751  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
752  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
753  store i32 %val, i32 addrspace(1)* %out2
754  ret void
755}
756
757; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
758; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
759; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
760; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
761define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
762entry:
763  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
764  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
765  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
766  ret void
767}
768
769; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
770; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
771; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
772; SIVI: buffer_store_dword [[RET]]
773
774; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
775define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
776entry:
777  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
778  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
779  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
780  store i32 %val, i32 addrspace(1)* %out2
781  ret void
782}
783
784; GCN-LABEL: {{^}}atomic_or_i32:
785; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
786
787; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
788define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
789entry:
790  %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
791  ret void
792}
793
794; GCN-LABEL: {{^}}atomic_or_i32_ret:
795; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
796; SIVI: buffer_store_dword [[RET]]
797
798; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
799define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
800entry:
801  %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
802  store i32 %val, i32 addrspace(1)* %out2
803  ret void
804}
805
806; GCN-LABEL: {{^}}atomic_or_i32_addr64:
807; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
808; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
809; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
810define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
811entry:
812  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
813  %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
814  ret void
815}
816
817; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
818; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
819; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
820; SIVI: buffer_store_dword [[RET]]
821
822; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
823define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
824entry:
825  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
826  %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
827  store i32 %val, i32 addrspace(1)* %out2
828  ret void
829}
830
831; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
832; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
833
834; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
835define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
836entry:
837  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
838  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
839  ret void
840}
841
842; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
843; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
844; SIVI: buffer_store_dword [[RET]]
845
846; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
847define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
848entry:
849  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
850  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
851  store i32 %val, i32 addrspace(1)* %out2
852  ret void
853}
854
855; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
856; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
857; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
858; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
859define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
860entry:
861  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
862  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
863  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
864  ret void
865}
866
867; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
868; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
869; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
870; SIVI: buffer_store_dword [[RET]]
871
872; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
873define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
874entry:
875  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
876  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
877  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
878  store i32 %val, i32 addrspace(1)* %out2
879  ret void
880}
881
882; GCN-LABEL: {{^}}atomic_xchg_i32:
883; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
884; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
885define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
886entry:
887  %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
888  ret void
889}
890
891; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
892; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
893; SIVI: buffer_store_dword [[RET]]
894
895; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
896define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
897entry:
898  %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
899  store i32 %val, i32 addrspace(1)* %out2
900  ret void
901}
902
903; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
904; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
905; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
906; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
907define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
908entry:
909  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
910  %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
911  ret void
912}
913
914; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
915; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
916; VI: flat_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
917; SIVI: buffer_store_dword [[RET]]
918
919; GFX9: global_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
920define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
921entry:
922  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
923  %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
924  store i32 %val, i32 addrspace(1)* %out2
925  ret void
926}
927
928; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
929; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
930
931; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16{{$}}
932define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
933entry:
934  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
935  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
936  ret void
937}
938
939; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
940; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
941; SIVI: buffer_store_dword v[[RET]]
942
943; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:16 glc{{$}}
944define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
945entry:
946  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
947  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
948  %extract0 = extractvalue { i32, i1 } %val, 0
949  store i32 %extract0, i32 addrspace(1)* %out2
950  ret void
951}
952
953; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
954; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
955
956; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
957; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}}
958define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
959entry:
960  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
961  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
962  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
963  ret void
964}
965
966; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
967; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
968; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
969; SIVI: buffer_store_dword v[[RET]]
970
971; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
972define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
973entry:
974  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
975  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
976  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
977  %extract0 = extractvalue { i32, i1 } %val, 0
978  store i32 %extract0, i32 addrspace(1)* %out2
979  ret void
980}
981
982; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
983; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
984
985; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
986define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
987entry:
988  %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
989  ret void
990}
991
992; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
993; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
994; SIVI: buffer_store_dword v[[RET]]
995
996; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off glc{{$}}
997define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
998entry:
999  %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1000  %extract0 = extractvalue { i32, i1 } %val, 0
1001  store i32 %extract0, i32 addrspace(1)* %out2
1002  ret void
1003}
1004
1005; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
1006; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1007; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1008; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
1009define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
1010entry:
1011  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1012  %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1013  ret void
1014}
1015
1016; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
1017; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1018; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1019; SIVI: buffer_store_dword v[[RET]]
1020
1021; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1022define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
1023entry:
1024  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1025  %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1026  %extract0 = extractvalue { i32, i1 } %val, 0
1027  store i32 %extract0, i32 addrspace(1)* %out2
1028  ret void
1029}
1030
1031; GCN-LABEL: {{^}}atomic_xor_i32_offset:
1032; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1033
1034; GFX9: global_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
1035define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
1036entry:
1037  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1038  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1039  ret void
1040}
1041
1042; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
1043; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1044; SIVI: buffer_store_dword [[RET]]
1045
1046; GFX9: global_atomic_xor v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
1047define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1048entry:
1049  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1050  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1051  store i32 %val, i32 addrspace(1)* %out2
1052  ret void
1053}
1054
1055; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
1056; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1057; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1058; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
1059define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1060entry:
1061  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1062  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1063  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1064  ret void
1065}
1066
1067; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
1068; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1069; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1070; SIVI: buffer_store_dword [[RET]]
1071
1072; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
1073define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1074entry:
1075  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1076  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1077  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1078  store i32 %val, i32 addrspace(1)* %out2
1079  ret void
1080}
1081
1082; GCN-LABEL: {{^}}atomic_xor_i32:
1083; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1084; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
1085define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
1086entry:
1087  %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1088  ret void
1089}
1090
1091; GCN-LABEL: {{^}}atomic_xor_i32_ret:
1092; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1093; SIVI: buffer_store_dword [[RET]]
1094
1095; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
1096define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1097entry:
1098  %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1099  store i32 %val, i32 addrspace(1)* %out2
1100  ret void
1101}
1102
1103; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
1104; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1105; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1106; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
1107define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1108entry:
1109  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1110  %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1111  ret void
1112}
1113
1114; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
1115; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1116; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1117; SIVI: buffer_store_dword [[RET]]
1118
1119; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
1120define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1121entry:
1122  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1123  %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1124  store i32 %val, i32 addrspace(1)* %out2
1125  ret void
1126}
1127
1128; GCN-LABEL: {{^}}atomic_load_i32_offset:
1129; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1130; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1131; SIVI: buffer_store_dword [[RET]]
1132
1133; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16 glc{{$}}
1134define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1135entry:
1136  %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4
1137  %val = load atomic i32, i32 addrspace(1)* %gep  seq_cst, align 4
1138  store i32 %val, i32 addrspace(1)* %out
1139  ret void
1140}
1141
1142; GCN-LABEL: {{^}}atomic_load_i32:
1143; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1144; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1145; SIVI: buffer_store_dword [[RET]]
1146
1147; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off glc
1148define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1149entry:
1150  %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
1151  store i32 %val, i32 addrspace(1)* %out
1152  ret void
1153}
1154
1155; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
1156; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1157; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1158; SIVI: buffer_store_dword [[RET]]
1159
1160; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
1161define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1162entry:
1163  %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1164  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1165  %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1166  store i32 %val, i32 addrspace(1)* %out
1167  ret void
1168}
1169
1170; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1171; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1172; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1173; SIVI: buffer_store_dword [[RET]]
1174
1175; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1176define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1177entry:
1178  %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1179  %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
1180  store i32 %val, i32 addrspace(1)* %out
1181  ret void
1182}
1183
1184; GCN-LABEL: {{^}}atomic_store_i32_offset:
1185; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1186; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1187; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1188define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
1189entry:
1190  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1191  store atomic i32 %in, i32 addrspace(1)* %gep  seq_cst, align 4
1192  ret void
1193}
1194
1195; GCN-LABEL: {{^}}atomic_store_i32:
1196; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1197; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1198; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1199define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
1200entry:
1201  store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
1202  ret void
1203}
1204
1205; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1206; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1207; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1208; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1209define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1210entry:
1211  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1212  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1213  store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1214  ret void
1215}
1216
1217; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1218; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1219; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1220; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1221define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1222entry:
1223  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1224  store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
1225  ret void
1226}
1227