1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4
5; GCN-LABEL: {{^}}atomic_add_i32_offset:
6; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
7; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
8define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
9entry:
10  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
11  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
12  ret void
13}
14
15; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset:
16; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}}
17define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) {
18entry:
19  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024
20  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
21  ret void
22}
23
24; GCN-LABEL: {{^}}atomic_add_i32_soffset:
25; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
26; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
27
28; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}}
29; GFX9: global_atomic_add [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}}
30define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
31entry:
32  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
33  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
34  ret void
35}
36
37; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
38; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
39; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
40; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
41
42; VI: flat_atomic_add
43
44; GFX9: v_mov_b32_e32 [[HIGH_K:v[0-9]+]], 0xabcd
45; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xd000,
46; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, [[HIGH_K]], v{{[0-9]+}}, vcc
47; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3756{{$}}
48define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
49entry:
50  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
51
52  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
53  ret void
54}
55
56; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
57; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
58; SIVI: buffer_store_dword [[RET]]
59
60; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
61define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
62entry:
63  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
64  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
65  store i32 %val, i32 addrspace(1)* %out2
66  ret void
67}
68
69; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
70; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
71; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
72; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
73define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
74entry:
75  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
76  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
77  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
78  ret void
79}
80
81; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
82; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
83; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
84; SIVI: buffer_store_dword [[RET]]
85
86; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
87; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
88define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
89entry:
90  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
91  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
92  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
93  store i32 %val, i32 addrspace(1)* %out2
94  ret void
95}
96
97; GCN-LABEL: {{^}}atomic_add_i32:
98; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
99; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
100define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
101entry:
102  %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
103  ret void
104}
105
106; GCN-LABEL: {{^}}atomic_add_i32_ret:
107; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
108; SIVI: buffer_store_dword [[RET]]
109
110; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
111; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
112define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
113entry:
114  %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
115  store i32 %val, i32 addrspace(1)* %out2
116  ret void
117}
118
119; GCN-LABEL: {{^}}atomic_add_i32_addr64:
120; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
121; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
122; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
123define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
124entry:
125  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
126  %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
127  ret void
128}
129
130; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
131; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
132; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
133; SIVI: buffer_store_dword [[RET]]
134
135; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
136define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
137entry:
138  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
139  %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
140  store i32 %val, i32 addrspace(1)* %out2
141  ret void
142}
143
144; GCN-LABEL: {{^}}atomic_and_i32_offset:
145; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
146
147; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
148define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
149entry:
150  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
151  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
152  ret void
153}
154
155; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
156; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
157; SIVI: buffer_store_dword [[RET]]
158
159; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
160define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
161entry:
162  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
163  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
164  store i32 %val, i32 addrspace(1)* %out2
165  ret void
166}
167
168; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
169; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
170; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
171
172; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
173define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
174entry:
175  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
176  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
177  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
178  ret void
179}
180
181; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
182; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
183; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
184; SIVI: buffer_store_dword [[RET]]
185
186; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
187define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
188entry:
189  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
190  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
191  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
192  store i32 %val, i32 addrspace(1)* %out2
193  ret void
194}
195
196; GCN-LABEL: {{^}}atomic_and_i32:
197; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
198
199; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
200define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
201entry:
202  %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
203  ret void
204}
205
206; GCN-LABEL: {{^}}atomic_and_i32_ret:
207; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
208; SIVI: buffer_store_dword [[RET]]
209
210; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
211define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
212entry:
213  %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
214  store i32 %val, i32 addrspace(1)* %out2
215  ret void
216}
217
218; GCN-LABEL: {{^}}atomic_and_i32_addr64:
219; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
220; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
221
222; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
223define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
224entry:
225  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
226  %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
227  ret void
228}
229
230; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
231; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
232; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
233; SIVI: buffer_store_dword [[RET]]
234
235; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
236define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
237entry:
238  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
239  %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
240  store i32 %val, i32 addrspace(1)* %out2
241  ret void
242}
243
244; GCN-LABEL: {{^}}atomic_sub_i32_offset:
245; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
246
247; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
248define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
249entry:
250  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
251  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
252  ret void
253}
254
255; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
256; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
257; SIVI: buffer_store_dword [[RET]]
258
259; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
260define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
261entry:
262  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
263  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
264  store i32 %val, i32 addrspace(1)* %out2
265  ret void
266}
267
268; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
269; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
270; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
271
272; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
273define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
274entry:
275  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
276  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
277  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
278  ret void
279}
280
281; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
282; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
283; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
284; SIVI: buffer_store_dword [[RET]]
285
286; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
287define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
288entry:
289  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
290  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
291  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
292  store i32 %val, i32 addrspace(1)* %out2
293  ret void
294}
295
296; GCN-LABEL: {{^}}atomic_sub_i32:
297; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
298
299; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
300define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
301entry:
302  %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
303  ret void
304}
305
306; GCN-LABEL: {{^}}atomic_sub_i32_ret:
307; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
308; SIVI: buffer_store_dword [[RET]]
309
310; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
311define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
312entry:
313  %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
314  store i32 %val, i32 addrspace(1)* %out2
315  ret void
316}
317
318; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
319; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
320; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
321
322; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
323define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
324entry:
325  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
326  %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
327  ret void
328}
329
330; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
331; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
332; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
333; SIVI: buffer_store_dword [[RET]]
334
335; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
336define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
337entry:
338  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
339  %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
340  store i32 %val, i32 addrspace(1)* %out2
341  ret void
342}
343
344; GCN-LABEL: {{^}}atomic_max_i32_offset:
345; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
346
347; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
348define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
349entry:
350  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
351  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
352  ret void
353}
354
355; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
356; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
357; SIVI: buffer_store_dword [[RET]]
358
359; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
360define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
361entry:
362  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
363  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
364  store i32 %val, i32 addrspace(1)* %out2
365  ret void
366}
367
368; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
369; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
370; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
371
372; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
373define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
374entry:
375  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
376  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
377  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
378  ret void
379}
380
381; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
382; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
383; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
384; SIVI: buffer_store_dword [[RET]]
385
386; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
387define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
388entry:
389  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
390  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
391  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
392  store i32 %val, i32 addrspace(1)* %out2
393  ret void
394}
395
396; GCN-LABEL: {{^}}atomic_max_i32:
397; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
398
399; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
400define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
401entry:
402  %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
403  ret void
404}
405
406; GCN-LABEL: {{^}}atomic_max_i32_ret:
407; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
408; SIVI: buffer_store_dword [[RET]]
409
410; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
411define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
412entry:
413  %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
414  store i32 %val, i32 addrspace(1)* %out2
415  ret void
416}
417
418; GCN-LABEL: {{^}}atomic_max_i32_addr64:
419; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
420; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
421
422; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
423define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
424entry:
425  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
426  %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
427  ret void
428}
429
430; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
431; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
432; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
433; SIVI: buffer_store_dword [[RET]]
434
435; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
436define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
437entry:
438  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
439  %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
440  store i32 %val, i32 addrspace(1)* %out2
441  ret void
442}
443
444; GCN-LABEL: {{^}}atomic_umax_i32_offset:
445; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
446
447; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
448define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
449entry:
450  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
451  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
452  ret void
453}
454
455; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
456; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
457; SIVI: buffer_store_dword [[RET]]
458
459; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
460define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
461entry:
462  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
463  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
464  store i32 %val, i32 addrspace(1)* %out2
465  ret void
466}
467
468; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
469; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
470; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
471; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
472define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
473entry:
474  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
475  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
476  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
477  ret void
478}
479
480; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
481; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
482; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
483; SIVI: buffer_store_dword [[RET]]
484
485; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
486define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
487entry:
488  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
489  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
490  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
491  store i32 %val, i32 addrspace(1)* %out2
492  ret void
493}
494
495; GCN-LABEL: {{^}}atomic_umax_i32:
496; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
497
498; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
499define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
500entry:
501  %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
502  ret void
503}
504
505; GCN-LABEL: {{^}}atomic_umax_i32_ret:
506; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
507; SIVI: buffer_store_dword [[RET]]
508
509; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
510define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
511entry:
512  %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
513  store i32 %val, i32 addrspace(1)* %out2
514  ret void
515}
516
517; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
518; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
519; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
520; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
521define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
522entry:
523  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
524  %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
525  ret void
526}
527
528; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
529; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
530; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
531; SIVI: buffer_store_dword [[RET]]
532
533; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
534define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
535entry:
536  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
537  %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
538  store i32 %val, i32 addrspace(1)* %out2
539  ret void
540}
541
542; GCN-LABEL: {{^}}atomic_min_i32_offset:
543; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
544
545; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
546define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
547entry:
548  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
549  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
550  ret void
551}
552
553; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
554; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
555; SIVI: buffer_store_dword [[RET]]
556
557; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
558define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
559entry:
560  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
561  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
562  store i32 %val, i32 addrspace(1)* %out2
563  ret void
564}
565
566; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
567; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
568; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
569; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
570define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
571entry:
572  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
573  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
574  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
575  ret void
576}
577
578; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
579; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
580; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
581; SIVI: buffer_store_dword [[RET]]
582
583; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
584define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
585entry:
586  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
587  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
588  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
589  store i32 %val, i32 addrspace(1)* %out2
590  ret void
591}
592
593; GCN-LABEL: {{^}}atomic_min_i32:
594; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
595
596; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
597define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
598entry:
599  %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
600  ret void
601}
602
603; GCN-LABEL: {{^}}atomic_min_i32_ret:
604; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
605; SIVI: buffer_store_dword [[RET]]
606
607; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
608define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
609entry:
610  %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
611  store i32 %val, i32 addrspace(1)* %out2
612  ret void
613}
614
615; GCN-LABEL: {{^}}atomic_min_i32_addr64:
616; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
617; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
618; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
619define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
620entry:
621  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
622  %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
623  ret void
624}
625
626; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
627; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
628; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
629; SIVI: buffer_store_dword [[RET]]
630
631; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
632define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
633entry:
634  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
635  %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
636  store i32 %val, i32 addrspace(1)* %out2
637  ret void
638}
639
640; GCN-LABEL: {{^}}atomic_umin_i32_offset:
641; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
642
643; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
644define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
645entry:
646  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
647  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
648  ret void
649}
650
651; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
652; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
653; SIVI: buffer_store_dword [[RET]]
654
655; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
656define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
657entry:
658  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
659  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
660  store i32 %val, i32 addrspace(1)* %out2
661  ret void
662}
663
664; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
665; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
666; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
667; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
668define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
669entry:
670  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
671  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
672  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
673  ret void
674}
675
676; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
677; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
678; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
679; SIVI: buffer_store_dword [[RET]]
680
681; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
682define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
683entry:
684  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
685  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
686  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
687  store i32 %val, i32 addrspace(1)* %out2
688  ret void
689}
690
691; GCN-LABEL: {{^}}atomic_umin_i32:
692; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
693; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
694define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
695entry:
696  %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
697  ret void
698}
699
700; GCN-LABEL: {{^}}atomic_umin_i32_ret:
701; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
702; SIVI: buffer_store_dword [[RET]]
703
704; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
705define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
706entry:
707  %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
708  store i32 %val, i32 addrspace(1)* %out2
709  ret void
710}
711
712; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
713; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
714; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
715; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
716define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
717entry:
718  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
719  %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
720  ret void
721}
722
723; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
724; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
725; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
726; SIVI: buffer_store_dword [[RET]]
727
728; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
729define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
730entry:
731  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
732  %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
733  store i32 %val, i32 addrspace(1)* %out2
734  ret void
735}
736
737; GCN-LABEL: {{^}}atomic_or_i32_offset:
738; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
739
740; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
741define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
742entry:
743  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
744  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
745  ret void
746}
747
748; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
749; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
750; SIVI: buffer_store_dword [[RET]]
751
752; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
753define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
754entry:
755  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
756  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
757  store i32 %val, i32 addrspace(1)* %out2
758  ret void
759}
760
761; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
762; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
763; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
764; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16
765define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
766entry:
767  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
768  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
769  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
770  ret void
771}
772
773; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
774; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
775; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
776; SIVI: buffer_store_dword [[RET]]
777
778; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
779define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
780entry:
781  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
782  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
783  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
784  store i32 %val, i32 addrspace(1)* %out2
785  ret void
786}
787
788; GCN-LABEL: {{^}}atomic_or_i32:
789; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
790
791; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
792define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
793entry:
794  %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
795  ret void
796}
797
798; GCN-LABEL: {{^}}atomic_or_i32_ret:
799; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
800; SIVI: buffer_store_dword [[RET]]
801
802; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
803define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
804entry:
805  %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
806  store i32 %val, i32 addrspace(1)* %out2
807  ret void
808}
809
810; GCN-LABEL: {{^}}atomic_or_i32_addr64:
811; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
812; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
813; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
814define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
815entry:
816  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
817  %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
818  ret void
819}
820
821; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
822; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
823; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
824; SIVI: buffer_store_dword [[RET]]
825
826; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
827define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
828entry:
829  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
830  %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
831  store i32 %val, i32 addrspace(1)* %out2
832  ret void
833}
834
835; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
836; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
837
838; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
839define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
840entry:
841  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
842  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
843  ret void
844}
845
846; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
847; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
848
849; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
850define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
851entry:
852  %gep = getelementptr float, float addrspace(1)* %out, i64 4
853  %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
854  ret void
855}
856
857; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
858; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
859; SIVI: buffer_store_dword [[RET]]
860
861; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
862define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
863entry:
864  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
865  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
866  store i32 %val, i32 addrspace(1)* %out2
867  ret void
868}
869
870; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
871; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
872; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
873; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
874define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
875entry:
876  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
877  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
878  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
879  ret void
880}
881
882; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
883; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
884; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
885; SIVI: buffer_store_dword [[RET]]
886
887; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
888define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
889entry:
890  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
891  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
892  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
893  store i32 %val, i32 addrspace(1)* %out2
894  ret void
895}
896
897; GCN-LABEL: {{^}}atomic_xchg_i32:
898; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
899; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
900define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
901entry:
902  %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
903  ret void
904}
905
906; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
907; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
908; SIVI: buffer_store_dword [[RET]]
909
910; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
911define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
912entry:
913  %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
914  store i32 %val, i32 addrspace(1)* %out2
915  ret void
916}
917
918; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
919; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
920; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
921; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
922define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
923entry:
924  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
925  %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
926  ret void
927}
928
929; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
930; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
931; VI: flat_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
932; SIVI: buffer_store_dword [[RET]]
933
934; GFX9: global_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
935define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
936entry:
937  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
938  %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
939  store i32 %val, i32 addrspace(1)* %out2
940  ret void
941}
942
943; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
944; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
945
946; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
947define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
948entry:
949  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
950  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
951  ret void
952}
953
954; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
955; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
956; SIVI: buffer_store_dword v[[RET]]
957
958; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
959define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
960entry:
961  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
962  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
963  %extract0 = extractvalue { i32, i1 } %val, 0
964  store i32 %extract0, i32 addrspace(1)* %out2
965  ret void
966}
967
968; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
969; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
970
971; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
972; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}}
973define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
974entry:
975  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
976  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
977  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
978  ret void
979}
980
981; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
982; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
983; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
984; SIVI: buffer_store_dword v[[RET]]
985
986; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
987define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
988entry:
989  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
990  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
991  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
992  %extract0 = extractvalue { i32, i1 } %val, 0
993  store i32 %extract0, i32 addrspace(1)* %out2
994  ret void
995}
996
997; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
998; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
999
1000; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1001define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
1002entry:
1003  %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1004  ret void
1005}
1006
1007; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
1008; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1009; SIVI: buffer_store_dword v[[RET]]
1010
1011; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1012define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
1013entry:
1014  %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1015  %extract0 = extractvalue { i32, i1 } %val, 0
1016  store i32 %extract0, i32 addrspace(1)* %out2
1017  ret void
1018}
1019
1020; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
1021; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1022; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1023; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
1024define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
1025entry:
1026  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1027  %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1028  ret void
1029}
1030
1031; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
1032; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1033; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1034; SIVI: buffer_store_dword v[[RET]]
1035
1036; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1037define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
1038entry:
1039  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1040  %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1041  %extract0 = extractvalue { i32, i1 } %val, 0
1042  store i32 %extract0, i32 addrspace(1)* %out2
1043  ret void
1044}
1045
1046; GCN-LABEL: {{^}}atomic_xor_i32_offset:
1047; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1048
1049; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
1050define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
1051entry:
1052  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1053  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1054  ret void
1055}
1056
1057; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
1058; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1059; SIVI: buffer_store_dword [[RET]]
1060
1061; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1062define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1063entry:
1064  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1065  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1066  store i32 %val, i32 addrspace(1)* %out2
1067  ret void
1068}
1069
1070; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
1071; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1072; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1073; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
1074define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1075entry:
1076  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1077  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1078  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1079  ret void
1080}
1081
1082; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
1083; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1084; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1085; SIVI: buffer_store_dword [[RET]]
1086
1087; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
1088define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1089entry:
1090  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1091  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1092  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1093  store i32 %val, i32 addrspace(1)* %out2
1094  ret void
1095}
1096
1097; GCN-LABEL: {{^}}atomic_xor_i32:
1098; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1099; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
1100define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
1101entry:
1102  %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1103  ret void
1104}
1105
1106; GCN-LABEL: {{^}}atomic_xor_i32_ret:
1107; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1108; SIVI: buffer_store_dword [[RET]]
1109
1110; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
1111define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1112entry:
1113  %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1114  store i32 %val, i32 addrspace(1)* %out2
1115  ret void
1116}
1117
1118; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
1119; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1120; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1121; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}
1122define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1123entry:
1124  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1125  %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1126  ret void
1127}
1128
1129; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
1130; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1131; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1132; SIVI: buffer_store_dword [[RET]]
1133
1134; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}
1135define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1136entry:
1137  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1138  %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1139  store i32 %val, i32 addrspace(1)* %out2
1140  ret void
1141}
1142
1143; GCN-LABEL: {{^}}atomic_load_i32_offset:
1144; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1145; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1146; SIVI: buffer_store_dword [[RET]]
1147
1148; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1149define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1150entry:
1151  %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4
1152  %val = load atomic i32, i32 addrspace(1)* %gep  seq_cst, align 4
1153  store i32 %val, i32 addrspace(1)* %out
1154  ret void
1155}
1156
1157; GCN-LABEL: {{^}}atomic_load_i32_negoffset:
1158; SI: buffer_load_dword [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1159
1160; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
1161; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1162; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1163
1164; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}}
1165define amdgpu_kernel void @atomic_load_i32_negoffset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1166entry:
1167  %gep = getelementptr i32, i32 addrspace(1)* %in, i64 -128
1168  %val = load atomic i32, i32 addrspace(1)* %gep  seq_cst, align 4
1169  store i32 %val, i32 addrspace(1)* %out
1170  ret void
1171}
1172
1173; GCN-LABEL: {{^}}atomic_load_f32_offset:
1174; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1175; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1176; SIVI: buffer_store_dword [[RET]]
1177
1178; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1179define amdgpu_kernel void @atomic_load_f32_offset(float addrspace(1)* %in, float addrspace(1)* %out) {
1180entry:
1181  %gep = getelementptr float, float addrspace(1)* %in, i64 4
1182  %val = load atomic float, float addrspace(1)* %gep  seq_cst, align 4
1183  store float %val, float addrspace(1)* %out
1184  ret void
1185}
1186
1187; GCN-LABEL: {{^}}atomic_load_i32:
1188; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1189; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1190; SIVI: buffer_store_dword [[RET]]
1191
1192; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc
1193define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1194entry:
1195  %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
1196  store i32 %val, i32 addrspace(1)* %out
1197  ret void
1198}
1199
1200; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
1201; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1202; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1203; SIVI: buffer_store_dword [[RET]]
1204
1205; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
1206define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1207entry:
1208  %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1209  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1210  %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1211  store i32 %val, i32 addrspace(1)* %out
1212  ret void
1213}
1214
1215; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1216; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1217; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1218; SIVI: buffer_store_dword [[RET]]
1219
1220; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}}
1221define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1222entry:
1223  %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1224  %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
1225  store i32 %val, i32 addrspace(1)* %out
1226  ret void
1227}
1228
1229; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
1230; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1231; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1232; SIVI: buffer_store_dword [[RET]]
1233
1234; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
1235define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) {
1236entry:
1237  %ptr = getelementptr float, float addrspace(1)* %in, i64 %index
1238  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4
1239  %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4
1240  store float %val, float addrspace(1)* %out
1241  ret void
1242}
1243
1244; GCN-LABEL: {{^}}atomic_store_i32_offset:
1245; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1246; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1247; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
1248define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
1249entry:
1250  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1251  store atomic i32 %in, i32 addrspace(1)* %gep  seq_cst, align 4
1252  ret void
1253}
1254
1255; GCN-LABEL: {{^}}atomic_store_i32:
1256; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1257; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1258; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
1259define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
1260entry:
1261  store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
1262  ret void
1263}
1264
1265; GCN-LABEL: {{^}}atomic_store_f32:
1266; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1267; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1268; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1269define amdgpu_kernel void @atomic_store_f32(float %in, float addrspace(1)* %out) {
1270entry:
1271  store atomic float %in, float addrspace(1)* %out seq_cst, align 4
1272  ret void
1273}
1274
1275; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1276; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1277; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1278; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1279define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1280entry:
1281  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1282  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1283  store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1284  ret void
1285}
1286
1287; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
1288; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1289; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1290; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
1291define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) {
1292entry:
1293  %ptr = getelementptr float, float addrspace(1)* %out, i64 %index
1294  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4
1295  store atomic float %in, float addrspace(1)* %gep seq_cst, align 4
1296  ret void
1297}
1298
1299; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1300; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1301; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1302; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1303define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1304entry:
1305  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1306  store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
1307  ret void
1308}
1309
1310; GCN-LABEL: {{^}}atomic_store_f32_addr64:
1311; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1312; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1313; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
1314define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) {
1315entry:
1316  %ptr = getelementptr float, float addrspace(1)* %out, i64 %index
1317  store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4
1318  ret void
1319}
1320