1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
4
5; GCN-LABEL: {{^}}atomic_add_i32_offset:
6; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
7; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
8define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) {
9entry:
10  %gep = getelementptr i32, i32* %out, i32 4
11  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
12  ret void
13}
14
15; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
16; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
17; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
18define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) {
19entry:
20  %gep = getelementptr i32, i32* %out, i32 1023
21  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
22  ret void
23}
24
25; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
26; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
27define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) {
28entry:
29  %gep = getelementptr i32, i32* %out, i32 1024
30  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
31  ret void
32}
33
34; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
35; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
36; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
37; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
38define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
39entry:
40  %gep = getelementptr i32, i32* %out, i32 4
41  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
42  store i32 %val, i32* %out2
43  ret void
44}
45
46; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
47; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
48; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
49define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
50entry:
51  %ptr = getelementptr i32, i32* %out, i64 %index
52  %gep = getelementptr i32, i32* %ptr, i32 4
53  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
54  ret void
55}
56
57; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
58; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
59; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
60; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
61define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
62entry:
63  %ptr = getelementptr i32, i32* %out, i64 %index
64  %gep = getelementptr i32, i32* %ptr, i32 4
65  %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
66  store i32 %val, i32* %out2
67  ret void
68}
69
70; GCN-LABEL: {{^}}atomic_add_i32:
71; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
72define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) {
73entry:
74  %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
75  ret void
76}
77
78; GCN-LABEL: {{^}}atomic_add_i32_ret:
79; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
80; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
81define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) {
82entry:
83  %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
84  store i32 %val, i32* %out2
85  ret void
86}
87
88; GCN-LABEL: {{^}}atomic_add_i32_addr64:
89; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
90define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) {
91entry:
92  %ptr = getelementptr i32, i32* %out, i64 %index
93  %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
94  ret void
95}
96
97; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
98; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
99; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
100define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
101entry:
102  %ptr = getelementptr i32, i32* %out, i64 %index
103  %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
104  store i32 %val, i32* %out2
105  ret void
106}
107
108; GCN-LABEL: {{^}}atomic_and_i32_offset:
109; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
110; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
111define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) {
112entry:
113  %gep = getelementptr i32, i32* %out, i32 4
114  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
115  ret void
116}
117
118; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
119; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
120; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
121; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
122define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
123entry:
124  %gep = getelementptr i32, i32* %out, i32 4
125  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
126  store i32 %val, i32* %out2
127  ret void
128}
129
130; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
131; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
132; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
133define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
134entry:
135  %ptr = getelementptr i32, i32* %out, i64 %index
136  %gep = getelementptr i32, i32* %ptr, i32 4
137  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
138  ret void
139}
140
141; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
142; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
143; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
144; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
145define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
146entry:
147  %ptr = getelementptr i32, i32* %out, i64 %index
148  %gep = getelementptr i32, i32* %ptr, i32 4
149  %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
150  store i32 %val, i32* %out2
151  ret void
152}
153
154; GCN-LABEL: {{^}}atomic_and_i32:
155; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
156define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) {
157entry:
158  %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
159  ret void
160}
161
162; GCN-LABEL: {{^}}atomic_and_i32_ret:
163; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
164; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
165define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) {
166entry:
167  %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
168  store i32 %val, i32* %out2
169  ret void
170}
171
172; GCN-LABEL: {{^}}atomic_and_i32_addr64:
173; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
174define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) {
175entry:
176  %ptr = getelementptr i32, i32* %out, i64 %index
177  %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
178  ret void
179}
180
181; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
182; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
183; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
184define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
185entry:
186  %ptr = getelementptr i32, i32* %out, i64 %index
187  %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
188  store i32 %val, i32* %out2
189  ret void
190}
191
192; GCN-LABEL: {{^}}atomic_sub_i32_offset:
193; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
194; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
195define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) {
196entry:
197  %gep = getelementptr i32, i32* %out, i32 4
198  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
199  ret void
200}
201
202; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
203; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
204; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
205; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
206define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
207entry:
208  %gep = getelementptr i32, i32* %out, i32 4
209  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
210  store i32 %val, i32* %out2
211  ret void
212}
213
214; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
215; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
216; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
217define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
218entry:
219  %ptr = getelementptr i32, i32* %out, i64 %index
220  %gep = getelementptr i32, i32* %ptr, i32 4
221  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
222  ret void
223}
224
225; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
226; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
227; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
228; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
229define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
230entry:
231  %ptr = getelementptr i32, i32* %out, i64 %index
232  %gep = getelementptr i32, i32* %ptr, i32 4
233  %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
234  store i32 %val, i32* %out2
235  ret void
236}
237
238; GCN-LABEL: {{^}}atomic_sub_i32:
239; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
240define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) {
241entry:
242  %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
243  ret void
244}
245
246; GCN-LABEL: {{^}}atomic_sub_i32_ret:
247; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
248; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
249define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) {
250entry:
251  %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
252  store i32 %val, i32* %out2
253  ret void
254}
255
256; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
257; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
258define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) {
259entry:
260  %ptr = getelementptr i32, i32* %out, i64 %index
261  %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
262  ret void
263}
264
265; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
266; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
267; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
268define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
269entry:
270  %ptr = getelementptr i32, i32* %out, i64 %index
271  %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
272  store i32 %val, i32* %out2
273  ret void
274}
275
276; GCN-LABEL: {{^}}atomic_max_i32_offset:
277; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
278; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
279define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) {
280entry:
281  %gep = getelementptr i32, i32* %out, i32 4
282  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
283  ret void
284}
285
286; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
287; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
288; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
289; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
290define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
291entry:
292  %gep = getelementptr i32, i32* %out, i32 4
293  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
294  store i32 %val, i32* %out2
295  ret void
296}
297
298; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
299; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
300; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
301define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
302entry:
303  %ptr = getelementptr i32, i32* %out, i64 %index
304  %gep = getelementptr i32, i32* %ptr, i32 4
305  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
306  ret void
307}
308
309; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
310; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
311; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
312; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
313define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
314entry:
315  %ptr = getelementptr i32, i32* %out, i64 %index
316  %gep = getelementptr i32, i32* %ptr, i32 4
317  %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
318  store i32 %val, i32* %out2
319  ret void
320}
321
322; GCN-LABEL: {{^}}atomic_max_i32:
323; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
324define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) {
325entry:
326  %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
327  ret void
328}
329
330; GCN-LABEL: {{^}}atomic_max_i32_ret:
331; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
332; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
333define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) {
334entry:
335  %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
336  store i32 %val, i32* %out2
337  ret void
338}
339
340; GCN-LABEL: {{^}}atomic_max_i32_addr64:
341; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
342define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) {
343entry:
344  %ptr = getelementptr i32, i32* %out, i64 %index
345  %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
346  ret void
347}
348
349; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
350; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
351; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
352define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
353entry:
354  %ptr = getelementptr i32, i32* %out, i64 %index
355  %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
356  store i32 %val, i32* %out2
357  ret void
358}
359
360; GCN-LABEL: {{^}}atomic_umax_i32_offset:
361; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
362; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
363define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) {
364entry:
365  %gep = getelementptr i32, i32* %out, i32 4
366  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
367  ret void
368}
369
370; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
371; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
372; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
373; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
374define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
375entry:
376  %gep = getelementptr i32, i32* %out, i32 4
377  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
378  store i32 %val, i32* %out2
379  ret void
380}
381
382; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
383; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
384; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
385define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
386entry:
387  %ptr = getelementptr i32, i32* %out, i64 %index
388  %gep = getelementptr i32, i32* %ptr, i32 4
389  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
390  ret void
391}
392
393; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
394; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
395; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
396; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
397define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
398entry:
399  %ptr = getelementptr i32, i32* %out, i64 %index
400  %gep = getelementptr i32, i32* %ptr, i32 4
401  %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
402  store i32 %val, i32* %out2
403  ret void
404}
405
406; GCN-LABEL: {{^}}atomic_umax_i32:
407; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
408define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) {
409entry:
410  %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
411  ret void
412}
413
414; GCN-LABEL: {{^}}atomic_umax_i32_ret:
415; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
416; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
417define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) {
418entry:
419  %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
420  store i32 %val, i32* %out2
421  ret void
422}
423
424; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
425; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
426define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) {
427entry:
428  %ptr = getelementptr i32, i32* %out, i64 %index
429  %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
430  ret void
431}
432
433; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
434; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
435; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
436define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
437entry:
438  %ptr = getelementptr i32, i32* %out, i64 %index
439  %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
440  store i32 %val, i32* %out2
441  ret void
442}
443
444; GCN-LABEL: {{^}}atomic_min_i32_offset:
445; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
446; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
447define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) {
448entry:
449  %gep = getelementptr i32, i32* %out, i32 4
450  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
451  ret void
452}
453
454; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
455; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
456; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
457; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
458define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
459entry:
460  %gep = getelementptr i32, i32* %out, i32 4
461  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
462  store i32 %val, i32* %out2
463  ret void
464}
465
466; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
467; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
468; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
469define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
470entry:
471  %ptr = getelementptr i32, i32* %out, i64 %index
472  %gep = getelementptr i32, i32* %ptr, i32 4
473  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
474  ret void
475}
476
477; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
478; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
479; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
480; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
481define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
482entry:
483  %ptr = getelementptr i32, i32* %out, i64 %index
484  %gep = getelementptr i32, i32* %ptr, i32 4
485  %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
486  store i32 %val, i32* %out2
487  ret void
488}
489
490; GCN-LABEL: {{^}}atomic_min_i32:
491; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
492define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) {
493entry:
494  %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
495  ret void
496}
497
498; GCN-LABEL: {{^}}atomic_min_i32_ret:
499; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
500; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
501define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) {
502entry:
503  %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
504  store i32 %val, i32* %out2
505  ret void
506}
507
508; GCN-LABEL: {{^}}atomic_min_i32_addr64:
509; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
510define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) {
511entry:
512  %ptr = getelementptr i32, i32* %out, i64 %index
513  %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
514  ret void
515}
516
517; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
518; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
519; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
520define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
521entry:
522  %ptr = getelementptr i32, i32* %out, i64 %index
523  %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
524  store i32 %val, i32* %out2
525  ret void
526}
527
528; GCN-LABEL: {{^}}atomic_umin_i32_offset:
529; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
530; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
531define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) {
532entry:
533  %gep = getelementptr i32, i32* %out, i32 4
534  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
535  ret void
536}
537
538; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
539; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
540; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
541; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
542define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
543entry:
544  %gep = getelementptr i32, i32* %out, i32 4
545  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
546  store i32 %val, i32* %out2
547  ret void
548}
549
550; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
551; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
552; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
553define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
554entry:
555  %ptr = getelementptr i32, i32* %out, i64 %index
556  %gep = getelementptr i32, i32* %ptr, i32 4
557  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
558  ret void
559}
560
561; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
562; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
563; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
564; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
565define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
566entry:
567  %ptr = getelementptr i32, i32* %out, i64 %index
568  %gep = getelementptr i32, i32* %ptr, i32 4
569  %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
570  store i32 %val, i32* %out2
571  ret void
572}
573
574; GCN-LABEL: {{^}}atomic_umin_i32:
575; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
576define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) {
577entry:
578  %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
579  ret void
580}
581
582; GCN-LABEL: {{^}}atomic_umin_i32_ret:
583; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
584; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
585define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) {
586entry:
587  %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
588  store i32 %val, i32* %out2
589  ret void
590}
591
592; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
593; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
594define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) {
595entry:
596  %ptr = getelementptr i32, i32* %out, i64 %index
597  %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
598  ret void
599}
600
601; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
602; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
603; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}}
604  define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
605entry:
606  %ptr = getelementptr i32, i32* %out, i64 %index
607  %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
608  store i32 %val, i32* %out2
609  ret void
610}
611
612; GCN-LABEL: {{^}}atomic_or_i32_offset:
613; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
614; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
615define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) {
616entry:
617  %gep = getelementptr i32, i32* %out, i32 4
618  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
619  ret void
620}
621
622; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
623; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
624; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
625; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
626define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
627entry:
628  %gep = getelementptr i32, i32* %out, i32 4
629  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
630  store i32 %val, i32* %out2
631  ret void
632}
633
634; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
635; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
636; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
637define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
638entry:
639  %ptr = getelementptr i32, i32* %out, i64 %index
640  %gep = getelementptr i32, i32* %ptr, i32 4
641  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
642  ret void
643}
644
645; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
646; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
647; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
648; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
649define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
650entry:
651  %ptr = getelementptr i32, i32* %out, i64 %index
652  %gep = getelementptr i32, i32* %ptr, i32 4
653  %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
654  store i32 %val, i32* %out2
655  ret void
656}
657
658; GCN-LABEL: {{^}}atomic_or_i32:
659; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
660define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) {
661entry:
662  %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
663  ret void
664}
665
666; GCN-LABEL: {{^}}atomic_or_i32_ret:
667; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
668; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
669define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) {
670entry:
671  %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
672  store i32 %val, i32* %out2
673  ret void
674}
675
676; GCN-LABEL: {{^}}atomic_or_i32_addr64:
677; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
678define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) {
679entry:
680  %ptr = getelementptr i32, i32* %out, i64 %index
681  %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
682  ret void
683}
684
685; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
686; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
687; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
688define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
689entry:
690  %ptr = getelementptr i32, i32* %out, i64 %index
691  %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
692  store i32 %val, i32* %out2
693  ret void
694}
695
696; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
697; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
698; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
699define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {
700entry:
701  %gep = getelementptr i32, i32* %out, i32 4
702  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
703  ret void
704}
705
706; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
707; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
708; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
709define amdgpu_kernel void @atomic_xchg_f32_offset(float* %out, float %in) {
710entry:
711  %gep = getelementptr float, float* %out, i32 4
712  %val = atomicrmw volatile xchg float* %gep, float %in seq_cst
713  ret void
714}
715
716; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
717; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
718; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
719; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
720define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
721entry:
722  %gep = getelementptr i32, i32* %out, i32 4
723  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
724  store i32 %val, i32* %out2
725  ret void
726}
727
728; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
729; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
730; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
731define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
732entry:
733  %ptr = getelementptr i32, i32* %out, i64 %index
734  %gep = getelementptr i32, i32* %ptr, i32 4
735  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
736  ret void
737}
738
739; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
740; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
741; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
742; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
743define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
744entry:
745  %ptr = getelementptr i32, i32* %out, i64 %index
746  %gep = getelementptr i32, i32* %ptr, i32 4
747  %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
748  store i32 %val, i32* %out2
749  ret void
750}
751
752; GCN-LABEL: {{^}}atomic_xchg_i32:
753; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
754define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) {
755entry:
756  %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
757  ret void
758}
759
760; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
761; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
762; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
763define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) {
764entry:
765  %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
766  store i32 %val, i32* %out2
767  ret void
768}
769
770; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
771; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
772define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) {
773entry:
774  %ptr = getelementptr i32, i32* %out, i64 %index
775  %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
776  ret void
777}
778
779; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
780; GCN: flat_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
781; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
782define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
783entry:
784  %ptr = getelementptr i32, i32* %out, i64 %index
785  %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
786  store i32 %val, i32* %out2
787  ret void
788}
789
790; CMP_SWAP
791
792; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
793; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
794; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
795define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) {
796entry:
797  %gep = getelementptr i32, i32* %out, i32 4
798  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
799  ret void
800}
801
802; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
803; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
804; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
805; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
806define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) {
807entry:
808  %gep = getelementptr i32, i32* %out, i32 4
809  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
810  %flag = extractvalue { i32, i1 } %val, 0
811  store i32 %flag, i32* %out2
812  ret void
813}
814
815; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
816; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
817; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
818define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) {
819entry:
820  %ptr = getelementptr i32, i32* %out, i64 %index
821  %gep = getelementptr i32, i32* %ptr, i32 4
822  %val  = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
823  ret void
824}
825
826; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
827; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
828; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
829; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
830define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
831entry:
832  %ptr = getelementptr i32, i32* %out, i64 %index
833  %gep = getelementptr i32, i32* %ptr, i32 4
834  %val  = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
835  %flag = extractvalue { i32, i1 } %val, 0
836  store i32 %flag, i32* %out2
837  ret void
838}
839
840; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
841; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
842define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) {
843entry:
844  %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
845  ret void
846}
847
848; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
849; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
850; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
851define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) {
852entry:
853  %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
854  %flag = extractvalue { i32, i1 } %val, 0
855  store i32 %flag, i32* %out2
856  ret void
857}
858
859; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
860; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
861define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) {
862entry:
863  %ptr = getelementptr i32, i32* %out, i64 %index
864  %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
865  ret void
866}
867
868; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
869; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
870; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
871define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
872entry:
873  %ptr = getelementptr i32, i32* %out, i64 %index
874  %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
875  %flag = extractvalue { i32, i1 } %val, 0
876  store i32 %flag, i32* %out2
877  ret void
878}
879
880; GCN-LABEL: {{^}}atomic_xor_i32_offset:
881; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
882; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
883define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) {
884entry:
885  %gep = getelementptr i32, i32* %out, i32 4
886  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
887  ret void
888}
889
890; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
891; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
892; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
893; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
894define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
895entry:
896  %gep = getelementptr i32, i32* %out, i32 4
897  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
898  store i32 %val, i32* %out2
899  ret void
900}
901
902; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
903; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
904; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
905define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
906entry:
907  %ptr = getelementptr i32, i32* %out, i64 %index
908  %gep = getelementptr i32, i32* %ptr, i32 4
909  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
910  ret void
911}
912
913; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
914; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
915; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
916; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
917define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
918entry:
919  %ptr = getelementptr i32, i32* %out, i64 %index
920  %gep = getelementptr i32, i32* %ptr, i32 4
921  %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
922  store i32 %val, i32* %out2
923  ret void
924}
925
926; GCN-LABEL: {{^}}atomic_xor_i32:
927; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
928define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) {
929entry:
930  %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
931  ret void
932}
933
934; GCN-LABEL: {{^}}atomic_xor_i32_ret:
935; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
936; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
937define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) {
938entry:
939  %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
940  store i32 %val, i32* %out2
941  ret void
942}
943
944; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
945; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
946define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) {
947entry:
948  %ptr = getelementptr i32, i32* %out, i64 %index
949  %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
950  ret void
951}
952
953; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
954; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
955; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
956define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
957entry:
958  %ptr = getelementptr i32, i32* %out, i64 %index
959  %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
960  store i32 %val, i32* %out2
961  ret void
962}
963
964; GCN-LABEL: {{^}}atomic_load_i32_offset:
965; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
966; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
967; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
968define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) {
969entry:
970  %gep = getelementptr i32, i32* %in, i32 4
971  %val = load atomic i32, i32* %gep  seq_cst, align 4
972  store i32 %val, i32* %out
973  ret void
974}
975
976; GCN-LABEL: {{^}}atomic_load_i32:
977; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
978; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
979define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) {
980entry:
981  %val = load atomic i32, i32* %in seq_cst, align 4
982  store i32 %val, i32* %out
983  ret void
984}
985
986; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
987; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
988; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
989; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
990define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) {
991entry:
992  %ptr = getelementptr i32, i32* %in, i64 %index
993  %gep = getelementptr i32, i32* %ptr, i32 4
994  %val = load atomic i32, i32* %gep seq_cst, align 4
995  store i32 %val, i32* %out
996  ret void
997}
998
999; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1000; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1001; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1002define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) {
1003entry:
1004  %ptr = getelementptr i32, i32* %in, i64 %index
1005  %val = load atomic i32, i32* %ptr seq_cst, align 4
1006  store i32 %val, i32* %out
1007  ret void
1008}
1009
1010; GCN-LABEL: {{^}}atomic_store_i32_offset:
1011; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1012; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1013define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) {
1014entry:
1015  %gep = getelementptr i32, i32* %out, i32 4
1016  store atomic i32 %in, i32* %gep  seq_cst, align 4
1017  ret void
1018}
1019
1020; GCN-LABEL: {{^}}atomic_store_i32:
1021; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1022define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) {
1023entry:
1024  store atomic i32 %in, i32* %out seq_cst, align 4
1025  ret void
1026}
1027
1028; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1029; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1030; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1031define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) {
1032entry:
1033  %ptr = getelementptr i32, i32* %out, i64 %index
1034  %gep = getelementptr i32, i32* %ptr, i32 4
1035  store atomic i32 %in, i32* %gep seq_cst, align 4
1036  ret void
1037}
1038
1039; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1040; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1041define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) {
1042entry:
1043  %ptr = getelementptr i32, i32* %out, i64 %index
1044  store atomic i32 %in, i32* %ptr seq_cst, align 4
1045  ret void
1046}
1047
1048; GCN-LABEL: {{^}}atomic_load_f32_offset:
1049; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1050; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1051; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1052define amdgpu_kernel void @atomic_load_f32_offset(float* %in, float* %out) {
1053entry:
1054  %gep = getelementptr float, float* %in, i32 4
1055  %val = load atomic float, float* %gep  seq_cst, align 4
1056  store float %val, float* %out
1057  ret void
1058}
1059
1060; GCN-LABEL: {{^}}atomic_load_f32:
1061; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1062; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1063define amdgpu_kernel void @atomic_load_f32(float* %in, float* %out) {
1064entry:
1065  %val = load atomic float, float* %in seq_cst, align 4
1066  store float %val, float* %out
1067  ret void
1068}
1069
1070; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
1071; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1072; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1073; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1074define amdgpu_kernel void @atomic_load_f32_addr64_offset(float* %in, float* %out, i64 %index) {
1075entry:
1076  %ptr = getelementptr float, float* %in, i64 %index
1077  %gep = getelementptr float, float* %ptr, i32 4
1078  %val = load atomic float, float* %gep seq_cst, align 4
1079  store float %val, float* %out
1080  ret void
1081}
1082
1083; GCN-LABEL: {{^}}atomic_load_f32_addr64:
1084; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1085; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1086define amdgpu_kernel void @atomic_load_f32_addr64(float* %in, float* %out, i64 %index) {
1087entry:
1088  %ptr = getelementptr float, float* %in, i64 %index
1089  %val = load atomic float, float* %ptr seq_cst, align 4
1090  store float %val, float* %out
1091  ret void
1092}
1093
1094; GCN-LABEL: {{^}}atomic_store_f32_offset:
1095; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1096; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1097define amdgpu_kernel void @atomic_store_f32_offset(float %in, float* %out) {
1098entry:
1099  %gep = getelementptr float, float* %out, i32 4
1100  store atomic float %in, float* %gep  seq_cst, align 4
1101  ret void
1102}
1103
1104; GCN-LABEL: {{^}}atomic_store_f32:
1105; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1106define amdgpu_kernel void @atomic_store_f32(float %in, float* %out) {
1107entry:
1108  store atomic float %in, float* %out seq_cst, align 4
1109  ret void
1110}
1111
1112; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
1113; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1114; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
1115define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float* %out, i64 %index) {
1116entry:
1117  %ptr = getelementptr float, float* %out, i64 %index
1118  %gep = getelementptr float, float* %ptr, i32 4
1119  store atomic float %in, float* %gep seq_cst, align 4
1120  ret void
1121}
1122
1123; GCN-LABEL: {{^}}atomic_store_f32_addr64:
1124; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
1125define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float* %out, i64 %index) {
1126entry:
1127  %ptr = getelementptr float, float* %out, i64 %index
1128  store atomic float %in, float* %ptr seq_cst, align 4
1129  ret void
1130}
1131