1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
4
5; GCN-LABEL: {{^}}atomic_add_i32_offset:
6; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
7; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
8define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) {
9entry:
10  %gep = getelementptr i32, i32* %out, i32 4
11  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
12  ret void
13}
14
15; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
16; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
17; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
18define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) {
19entry:
20  %gep = getelementptr i32, i32* %out, i32 1023
21  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
22  ret void
23}
24
25; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
26; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
27define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) {
28entry:
29  %gep = getelementptr i32, i32* %out, i32 1024
30  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
31  ret void
32}
33
34; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
35; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
36; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
37; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
38define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
39entry:
40  %gep = getelementptr i32, i32* %out, i32 4
41  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
42  store i32 %val, i32* %out2
43  ret void
44}
45
46; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
47; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
48; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
49define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
50entry:
51  %ptr = getelementptr i32, i32* %out, i64 %index
52  %gep = getelementptr i32, i32* %ptr, i32 4
53  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
54  ret void
55}
56
57; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
58; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
59; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
60; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
61define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
62entry:
63  %ptr = getelementptr i32, i32* %out, i64 %index
64  %gep = getelementptr i32, i32* %ptr, i32 4
65  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
66  store i32 %val, i32* %out2
67  ret void
68}
69
70; GCN-LABEL: {{^}}atomic_add_i32:
71; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
72define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) {
73entry:
74  %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
75  ret void
76}
77
78; GCN-LABEL: {{^}}atomic_add_i32_ret:
79; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
80; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
81define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) {
82entry:
83  %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
84  store i32 %val, i32* %out2
85  ret void
86}
87
88; GCN-LABEL: {{^}}atomic_add_i32_addr64:
89; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
90define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) {
91entry:
92  %ptr = getelementptr i32, i32* %out, i64 %index
93  %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
94  ret void
95}
96
97; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
98; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
99; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
100define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
101entry:
102  %ptr = getelementptr i32, i32* %out, i64 %index
103  %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
104  store i32 %val, i32* %out2
105  ret void
106}
107
108; GCN-LABEL: {{^}}atomic_and_i32_offset:
109; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
110; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
111define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) {
112entry:
113  %gep = getelementptr i32, i32* %out, i32 4
114  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
115  ret void
116}
117
118; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
119; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
120; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
121; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
122define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
123entry:
124  %gep = getelementptr i32, i32* %out, i32 4
125  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
126  store i32 %val, i32* %out2
127  ret void
128}
129
130; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
131; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
132; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
133define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
134entry:
135  %ptr = getelementptr i32, i32* %out, i64 %index
136  %gep = getelementptr i32, i32* %ptr, i32 4
137  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
138  ret void
139}
140
141; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
142; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
143; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
144; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
145define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
146entry:
147  %ptr = getelementptr i32, i32* %out, i64 %index
148  %gep = getelementptr i32, i32* %ptr, i32 4
149  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
150  store i32 %val, i32* %out2
151  ret void
152}
153
154; GCN-LABEL: {{^}}atomic_and_i32:
155; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
156define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) {
157entry:
158  %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
159  ret void
160}
161
162; GCN-LABEL: {{^}}atomic_and_i32_ret:
163; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
164; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
165define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) {
166entry:
167  %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
168  store i32 %val, i32* %out2
169  ret void
170}
171
172; GCN-LABEL: {{^}}atomic_and_i32_addr64:
173; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
174define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) {
175entry:
176  %ptr = getelementptr i32, i32* %out, i64 %index
177  %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
178  ret void
179}
180
181; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
182; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
183; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
184define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
185entry:
186  %ptr = getelementptr i32, i32* %out, i64 %index
187  %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
188  store i32 %val, i32* %out2
189  ret void
190}
191
192; GCN-LABEL: {{^}}atomic_sub_i32_offset:
193; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
194; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
195define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) {
196entry:
197  %gep = getelementptr i32, i32* %out, i32 4
198  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
199  ret void
200}
201
202; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
203; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
204; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
205; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
206define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
207entry:
208  %gep = getelementptr i32, i32* %out, i32 4
209  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
210  store i32 %val, i32* %out2
211  ret void
212}
213
214; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
215; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
216; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
217define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
218entry:
219  %ptr = getelementptr i32, i32* %out, i64 %index
220  %gep = getelementptr i32, i32* %ptr, i32 4
221  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
222  ret void
223}
224
225; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
226; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
227; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
228; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
229define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
230entry:
231  %ptr = getelementptr i32, i32* %out, i64 %index
232  %gep = getelementptr i32, i32* %ptr, i32 4
233  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
234  store i32 %val, i32* %out2
235  ret void
236}
237
238; GCN-LABEL: {{^}}atomic_sub_i32:
239; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
240define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) {
241entry:
242  %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
243  ret void
244}
245
246; GCN-LABEL: {{^}}atomic_sub_i32_ret:
247; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
248; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
249define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) {
250entry:
251  %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
252  store i32 %val, i32* %out2
253  ret void
254}
255
256; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
257; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
258define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) {
259entry:
260  %ptr = getelementptr i32, i32* %out, i64 %index
261  %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
262  ret void
263}
264
265; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
266; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
267; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
268define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
269entry:
270  %ptr = getelementptr i32, i32* %out, i64 %index
271  %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
272  store i32 %val, i32* %out2
273  ret void
274}
275
276; GCN-LABEL: {{^}}atomic_max_i32_offset:
277; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
278; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
279define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) {
280entry:
281  %gep = getelementptr i32, i32* %out, i32 4
282  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
283  ret void
284}
285
286; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
287; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
288; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
289; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
290define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
291entry:
292  %gep = getelementptr i32, i32* %out, i32 4
293  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
294  store i32 %val, i32* %out2
295  ret void
296}
297
298; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
299; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
300; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
301define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
302entry:
303  %ptr = getelementptr i32, i32* %out, i64 %index
304  %gep = getelementptr i32, i32* %ptr, i32 4
305  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
306  ret void
307}
308
309; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
310; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
311; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
312; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
313define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
314entry:
315  %ptr = getelementptr i32, i32* %out, i64 %index
316  %gep = getelementptr i32, i32* %ptr, i32 4
317  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
318  store i32 %val, i32* %out2
319  ret void
320}
321
322; GCN-LABEL: {{^}}atomic_max_i32:
323; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
324define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) {
325entry:
326  %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
327  ret void
328}
329
330; GCN-LABEL: {{^}}atomic_max_i32_ret:
331; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
332; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
333define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) {
334entry:
335  %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
336  store i32 %val, i32* %out2
337  ret void
338}
339
340; GCN-LABEL: {{^}}atomic_max_i32_addr64:
341; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
342define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) {
343entry:
344  %ptr = getelementptr i32, i32* %out, i64 %index
345  %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
346  ret void
347}
348
349; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
350; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
351; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
352define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
353entry:
354  %ptr = getelementptr i32, i32* %out, i64 %index
355  %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
356  store i32 %val, i32* %out2
357  ret void
358}
359
360; GCN-LABEL: {{^}}atomic_umax_i32_offset:
361; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
362; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
363define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) {
364entry:
365  %gep = getelementptr i32, i32* %out, i32 4
366  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
367  ret void
368}
369
370; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
371; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
372; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
373; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
374define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
375entry:
376  %gep = getelementptr i32, i32* %out, i32 4
377  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
378  store i32 %val, i32* %out2
379  ret void
380}
381
382; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
383; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
384; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
385define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
386entry:
387  %ptr = getelementptr i32, i32* %out, i64 %index
388  %gep = getelementptr i32, i32* %ptr, i32 4
389  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
390  ret void
391}
392
393; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
394; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
395; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
396; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
397define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
398entry:
399  %ptr = getelementptr i32, i32* %out, i64 %index
400  %gep = getelementptr i32, i32* %ptr, i32 4
401  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
402  store i32 %val, i32* %out2
403  ret void
404}
405
406; GCN-LABEL: {{^}}atomic_umax_i32:
407; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
408define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) {
409entry:
410  %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
411  ret void
412}
413
414; GCN-LABEL: {{^}}atomic_umax_i32_ret:
415; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
416; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
417define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) {
418entry:
419  %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
420  store i32 %val, i32* %out2
421  ret void
422}
423
424; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
425; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
426define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) {
427entry:
428  %ptr = getelementptr i32, i32* %out, i64 %index
429  %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
430  ret void
431}
432
433; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
434; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
435; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
436define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
437entry:
438  %ptr = getelementptr i32, i32* %out, i64 %index
439  %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
440  store i32 %val, i32* %out2
441  ret void
442}
443
444; GCN-LABEL: {{^}}atomic_min_i32_offset:
445; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
446; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
447define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) {
448entry:
449  %gep = getelementptr i32, i32* %out, i32 4
450  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
451  ret void
452}
453
454; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
455; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
456; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
457; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
458define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
459entry:
460  %gep = getelementptr i32, i32* %out, i32 4
461  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
462  store i32 %val, i32* %out2
463  ret void
464}
465
466; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
467; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
468; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
469define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
470entry:
471  %ptr = getelementptr i32, i32* %out, i64 %index
472  %gep = getelementptr i32, i32* %ptr, i32 4
473  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
474  ret void
475}
476
477; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
478; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
479; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
480; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
481define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
482entry:
483  %ptr = getelementptr i32, i32* %out, i64 %index
484  %gep = getelementptr i32, i32* %ptr, i32 4
485  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
486  store i32 %val, i32* %out2
487  ret void
488}
489
490; GCN-LABEL: {{^}}atomic_min_i32:
491; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
492define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) {
493entry:
494  %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
495  ret void
496}
497
498; GCN-LABEL: {{^}}atomic_min_i32_ret:
499; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
500; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
501define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) {
502entry:
503  %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
504  store i32 %val, i32* %out2
505  ret void
506}
507
508; GCN-LABEL: {{^}}atomic_min_i32_addr64:
509; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
510define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) {
511entry:
512  %ptr = getelementptr i32, i32* %out, i64 %index
513  %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
514  ret void
515}
516
517; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
518; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
519; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
520define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
521entry:
522  %ptr = getelementptr i32, i32* %out, i64 %index
523  %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
524  store i32 %val, i32* %out2
525  ret void
526}
527
528; GCN-LABEL: {{^}}atomic_umin_i32_offset:
529; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
530; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
531define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) {
532entry:
533  %gep = getelementptr i32, i32* %out, i32 4
534  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
535  ret void
536}
537
538; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
539; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
540; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
541; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
542define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
543entry:
544  %gep = getelementptr i32, i32* %out, i32 4
545  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
546  store i32 %val, i32* %out2
547  ret void
548}
549
550; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
551; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
552; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
553define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
554entry:
555  %ptr = getelementptr i32, i32* %out, i64 %index
556  %gep = getelementptr i32, i32* %ptr, i32 4
557  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
558  ret void
559}
560
561; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
562; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
563; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
564; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
565define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
566entry:
567  %ptr = getelementptr i32, i32* %out, i64 %index
568  %gep = getelementptr i32, i32* %ptr, i32 4
569  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
570  store i32 %val, i32* %out2
571  ret void
572}
573
574; GCN-LABEL: {{^}}atomic_umin_i32:
575; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
576define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) {
577entry:
578  %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
579  ret void
580}
581
582; GCN-LABEL: {{^}}atomic_umin_i32_ret:
583; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
584; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
585define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) {
586entry:
587  %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
588  store i32 %val, i32* %out2
589  ret void
590}
591
592; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
593; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
594define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) {
595entry:
596  %ptr = getelementptr i32, i32* %out, i64 %index
597  %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
598  ret void
599}
600
601; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
602; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
603; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}}
604  define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
605entry:
606  %ptr = getelementptr i32, i32* %out, i64 %index
607  %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
608  store i32 %val, i32* %out2
609  ret void
610}
611
612; GCN-LABEL: {{^}}atomic_or_i32_offset:
613; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
614; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
615define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) {
616entry:
617  %gep = getelementptr i32, i32* %out, i32 4
618  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
619  ret void
620}
621
622; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
623; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
624; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
625; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
626define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
627entry:
628  %gep = getelementptr i32, i32* %out, i32 4
629  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
630  store i32 %val, i32* %out2
631  ret void
632}
633
634; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
635; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
636; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
637define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
638entry:
639  %ptr = getelementptr i32, i32* %out, i64 %index
640  %gep = getelementptr i32, i32* %ptr, i32 4
641  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
642  ret void
643}
644
645; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
646; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
647; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
648; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
649define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
650entry:
651  %ptr = getelementptr i32, i32* %out, i64 %index
652  %gep = getelementptr i32, i32* %ptr, i32 4
653  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
654  store i32 %val, i32* %out2
655  ret void
656}
657
658; GCN-LABEL: {{^}}atomic_or_i32:
659; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
660define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) {
661entry:
662  %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
663  ret void
664}
665
666; GCN-LABEL: {{^}}atomic_or_i32_ret:
667; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
668; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
669define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) {
670entry:
671  %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
672  store i32 %val, i32* %out2
673  ret void
674}
675
676; GCN-LABEL: {{^}}atomic_or_i32_addr64:
677; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
678define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) {
679entry:
680  %ptr = getelementptr i32, i32* %out, i64 %index
681  %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
682  ret void
683}
684
685; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
686; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
687; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
688define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
689entry:
690  %ptr = getelementptr i32, i32* %out, i64 %index
691  %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
692  store i32 %val, i32* %out2
693  ret void
694}
695
696; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
697; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
698; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
699define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {
700entry:
701  %gep = getelementptr i32, i32* %out, i32 4
702  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
703  ret void
704}
705
706; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
707; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
708; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
709; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
710define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
711entry:
712  %gep = getelementptr i32, i32* %out, i32 4
713  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
714  store i32 %val, i32* %out2
715  ret void
716}
717
718; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
719; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
720; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
721define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
722entry:
723  %ptr = getelementptr i32, i32* %out, i64 %index
724  %gep = getelementptr i32, i32* %ptr, i32 4
725  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
726  ret void
727}
728
729; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
730; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
731; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
732; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
733define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
734entry:
735  %ptr = getelementptr i32, i32* %out, i64 %index
736  %gep = getelementptr i32, i32* %ptr, i32 4
737  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
738  store i32 %val, i32* %out2
739  ret void
740}
741
742; GCN-LABEL: {{^}}atomic_xchg_i32:
743; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
744define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) {
745entry:
746  %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
747  ret void
748}
749
750; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
751; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
752; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
753define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) {
754entry:
755  %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
756  store i32 %val, i32* %out2
757  ret void
758}
759
760; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
761; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
762define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) {
763entry:
764  %ptr = getelementptr i32, i32* %out, i64 %index
765  %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
766  ret void
767}
768
769; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
770; GCN: flat_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
771; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
772define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
773entry:
774  %ptr = getelementptr i32, i32* %out, i64 %index
775  %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
776  store i32 %val, i32* %out2
777  ret void
778}
779
780; CMP_SWAP
781
782; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
783; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
784; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
785define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) {
786entry:
787  %gep = getelementptr i32, i32* %out, i32 4
788  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
789  ret void
790}
791
792; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
793; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
794; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
795; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
796define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) {
797entry:
798  %gep = getelementptr i32, i32* %out, i32 4
799  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
800  %flag = extractvalue { i32, i1 } %val, 0
801  store i32 %flag, i32* %out2
802  ret void
803}
804
805; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
806; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
807; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
808define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) {
809entry:
810  %ptr = getelementptr i32, i32* %out, i64 %index
811  %gep = getelementptr i32, i32* %ptr, i32 4
812  %val  = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
813  ret void
814}
815
816; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
817; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
818; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
819; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
820define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
821entry:
822  %ptr = getelementptr i32, i32* %out, i64 %index
823  %gep = getelementptr i32, i32* %ptr, i32 4
824  %val  = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
825  %flag = extractvalue { i32, i1 } %val, 0
826  store i32 %flag, i32* %out2
827  ret void
828}
829
830; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
831; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
832define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) {
833entry:
834  %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
835  ret void
836}
837
838; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
839; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
840; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
841define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) {
842entry:
843  %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
844  %flag = extractvalue { i32, i1 } %val, 0
845  store i32 %flag, i32* %out2
846  ret void
847}
848
849; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
850; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
851define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) {
852entry:
853  %ptr = getelementptr i32, i32* %out, i64 %index
854  %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
855  ret void
856}
857
858; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
859; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
860; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
861define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
862entry:
863  %ptr = getelementptr i32, i32* %out, i64 %index
864  %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
865  %flag = extractvalue { i32, i1 } %val, 0
866  store i32 %flag, i32* %out2
867  ret void
868}
869
870; GCN-LABEL: {{^}}atomic_xor_i32_offset:
871; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
872; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
873define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) {
874entry:
875  %gep = getelementptr i32, i32* %out, i32 4
876  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
877  ret void
878}
879
880; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
881; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
882; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
883; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
884define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
885entry:
886  %gep = getelementptr i32, i32* %out, i32 4
887  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
888  store i32 %val, i32* %out2
889  ret void
890}
891
892; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
893; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
894; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
895define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
896entry:
897  %ptr = getelementptr i32, i32* %out, i64 %index
898  %gep = getelementptr i32, i32* %ptr, i32 4
899  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
900  ret void
901}
902
903; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
904; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
905; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
906; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
907define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
908entry:
909  %ptr = getelementptr i32, i32* %out, i64 %index
910  %gep = getelementptr i32, i32* %ptr, i32 4
911  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
912  store i32 %val, i32* %out2
913  ret void
914}
915
916; GCN-LABEL: {{^}}atomic_xor_i32:
917; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
918define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) {
919entry:
920  %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
921  ret void
922}
923
924; GCN-LABEL: {{^}}atomic_xor_i32_ret:
925; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
926; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
927define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) {
928entry:
929  %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
930  store i32 %val, i32* %out2
931  ret void
932}
933
934; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
935; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
936define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) {
937entry:
938  %ptr = getelementptr i32, i32* %out, i64 %index
939  %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
940  ret void
941}
942
943; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
944; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
945; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
946define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
947entry:
948  %ptr = getelementptr i32, i32* %out, i64 %index
949  %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
950  store i32 %val, i32* %out2
951  ret void
952}
953
954; GCN-LABEL: {{^}}atomic_load_i32_offset:
955; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
956; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
957; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
958define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) {
959entry:
960  %gep = getelementptr i32, i32* %in, i32 4
961  %val = load atomic i32, i32* %gep  seq_cst, align 4
962  store i32 %val, i32* %out
963  ret void
964}
965
966; GCN-LABEL: {{^}}atomic_load_i32:
967; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
968; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
969define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) {
970entry:
971  %val = load atomic i32, i32* %in seq_cst, align 4
972  store i32 %val, i32* %out
973  ret void
974}
975
976; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
977; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
978; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
979; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
980define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) {
981entry:
982  %ptr = getelementptr i32, i32* %in, i64 %index
983  %gep = getelementptr i32, i32* %ptr, i32 4
984  %val = load atomic i32, i32* %gep seq_cst, align 4
985  store i32 %val, i32* %out
986  ret void
987}
988
989; GCN-LABEL: {{^}}atomic_load_i32_addr64:
990; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
991; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
992define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) {
993entry:
994  %ptr = getelementptr i32, i32* %in, i64 %index
995  %val = load atomic i32, i32* %ptr seq_cst, align 4
996  store i32 %val, i32* %out
997  ret void
998}
999
1000; GCN-LABEL: {{^}}atomic_store_i32_offset:
1001; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1002; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1003define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) {
1004entry:
1005  %gep = getelementptr i32, i32* %out, i32 4
1006  store atomic i32 %in, i32* %gep  seq_cst, align 4
1007  ret void
1008}
1009
1010; GCN-LABEL: {{^}}atomic_store_i32:
1011; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1012define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) {
1013entry:
1014  store atomic i32 %in, i32* %out seq_cst, align 4
1015  ret void
1016}
1017
1018; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1019; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1020; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1021define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) {
1022entry:
1023  %ptr = getelementptr i32, i32* %out, i64 %index
1024  %gep = getelementptr i32, i32* %ptr, i32 4
1025  store atomic i32 %in, i32* %gep seq_cst, align 4
1026  ret void
1027}
1028
1029; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1030; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1031define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) {
1032entry:
1033  %ptr = getelementptr i32, i32* %out, i64 %index
1034  store atomic i32 %in, i32* %ptr seq_cst, align 4
1035  ret void
1036}
1037