1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s
5
6declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
7declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
8declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
9
10declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
11declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
12declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
13
14declare i32 @llvm.amdgcn.workitem.id.x() #1
15
16; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
17; CIVI-DAG: s_mov_b32 m0
18; GFX9-NOT: m0
19
20; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
21; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
22; MIR-LABEL: @lds_atomic_inc_ret_i32
23; MIR: DS_INC_RTN_U32 {{.*}} :: (load store 4 on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3)
24define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
25  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0
26  store i32 %result, i32 addrspace(1)* %out
27  ret void
28}
29
30!0 = distinct !{!0, !"noalias-scope"}
31
32; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
33; CIVI-DAG: s_mov_b32 m0
34; GFX9-NOT: m0
35
36; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
37; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
38define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
39  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
40  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
41  store i32 %result, i32 addrspace(1)* %out
42  ret void
43}
44
45; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32:
46; CIVI-DAG: s_mov_b32 m0
47; GFX9-NOT: m0
48
49; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
50; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
51; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
52; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
53define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
54  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
55  ret void
56}
57
58; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
59; CIVI-DAG: s_mov_b32 m0
60; GFX9-NOT: m0
61
62; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
63; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
64define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
65  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
66  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
67  ret void
68}
69
70; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
71; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
72; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
73; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
74define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
75  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
76  store i32 %result, i32 addrspace(1)* %out
77  ret void
78}
79
80; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
81; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
82; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
83; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
84define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
85  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
86  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
87  store i32 %result, i32 addrspace(1)* %out
88  ret void
89}
90
91; GCN-LABEL: {{^}}global_atomic_inc_noret_i32:
92; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
93; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
94; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]$}}
95define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
96  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
97  ret void
98}
99
100; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
101; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
102; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
103; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
104define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
105  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
106  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
107  ret void
108}
109
110; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
111; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
112; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
113; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
114define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
115  %id = call i32 @llvm.amdgcn.workitem.id.x()
116  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
117  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
118  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
119  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
120  store i32 %result, i32 addrspace(1)* %out.gep
121  ret void
122}
123
124; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
125; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
126; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
127; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
128define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
129  %id = call i32 @llvm.amdgcn.workitem.id.x()
130  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
131  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
132  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
133  ret void
134}
135
136@lds0 = addrspace(3) global [512 x i32] undef, align 4
137
138; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
139; CIVI-DAG: v_lshlrev_b32_e32 [[OFS:v[0-9]+]], 2, {{v[0-9]+}}
140; CIVI-DAG: v_add_{{[ui]}}32_e32 [[PTR:v[0-9]+]], vcc, lds0@abs32@lo, [[OFS]]
141; GFX9-DAG: s_mov_b32 [[BASE:s[0-9]+]], lds0@abs32@lo
142; GFX9-DAG: v_lshl_add_u32 [[PTR:v[0-9]+]], {{v[0-9]+}}, 2, [[BASE]]
143; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
144define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
145  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
146  %idx.0 = add nsw i32 %tid.x, 2
147  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
148  %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
149  store i32 %idx.0, i32 addrspace(1)* %add_use
150  store i32 %val0, i32 addrspace(1)* %out
151  ret void
152}
153
154; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
155; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
156; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
157; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
158define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
159  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
160  store i64 %result, i64 addrspace(1)* %out
161  ret void
162}
163
164; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
165; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
166; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
167; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
168define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
169  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
170  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
171  store i64 %result, i64 addrspace(1)* %out
172  ret void
173}
174
175; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64:
176; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
177; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
178; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
179define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
180  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
181  ret void
182}
183
184; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
185; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
186; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
187; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
188define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
189  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
190  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
191  ret void
192}
193
194; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
195; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
196; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
197; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
198; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
199; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
200define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
201  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
202  store i64 %result, i64 addrspace(1)* %out
203  ret void
204}
205
206; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
207; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
208; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
209; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
210; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
211; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
212define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
213  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
214  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
215  store i64 %result, i64 addrspace(1)* %out
216  ret void
217}
218
219; GCN-LABEL: {{^}}global_atomic_inc_noret_i64:
220; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
221; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
222; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
223; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
224
225; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
226define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
227  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
228  ret void
229}
230
231; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
232; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
233; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
234; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
235; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
236; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
237define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
238  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
239  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
240  ret void
241}
242
243; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
244; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
245; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
246; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
247; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
248; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
249define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
250  %id = call i32 @llvm.amdgcn.workitem.id.x()
251  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
252  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
253  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
254  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
255  store i64 %result, i64 addrspace(1)* %out.gep
256  ret void
257}
258
259; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
260; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
261; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
262; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
263; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
264; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
265define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
266  %id = call i32 @llvm.amdgcn.workitem.id.x()
267  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
268  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
269  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
270  ret void
271}
272
273; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
274; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
275; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
276define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
277  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
278  store i32 %result, i32* %out
279  ret void
280}
281
282; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset:
283; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
284; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
285; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
286define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
287  %gep = getelementptr i32, i32* %ptr, i32 4
288  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
289  store i32 %result, i32* %out
290  ret void
291}
292
293; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
294; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
295; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
296define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
297  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
298  ret void
299}
300
301; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset:
302; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
303; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
304; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
305define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
306  %gep = getelementptr i32, i32* %ptr, i32 4
307  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
308  ret void
309}
310
311; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64:
312; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
313; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
314; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
315define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
316  %id = call i32 @llvm.amdgcn.workitem.id.x()
317  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
318  %out.gep = getelementptr i32, i32* %out, i32 %id
319  %gep = getelementptr i32, i32* %gep.tid, i32 5
320  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
321  store i32 %result, i32* %out.gep
322  ret void
323}
324
325; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64:
326; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
327; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
328; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
329define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
330  %id = call i32 @llvm.amdgcn.workitem.id.x()
331  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
332  %gep = getelementptr i32, i32* %gep.tid, i32 5
333  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
334  ret void
335}
336
337@lds1 = addrspace(3) global [512 x i64] undef, align 8
338
339; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
340; CIVI-DAG: v_lshlrev_b32_e32 [[OFS:v[0-9]+]], 3, {{v[0-9]+}}
341; CIVI-DAG: v_add_{{[ui]}}32_e32 [[PTR:v[0-9]+]], vcc, lds1@abs32@lo, [[OFS]]
342; GFX9-DAG: v_mov_b32_e32 [[BASE:v[0-9]+]], lds1@abs32@lo
343; GFX9-DAG: v_lshl_add_u32 [[PTR:v[0-9]+]], {{v[0-9]+}}, 3, [[BASE]]
344; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
345define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
346  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
347  %idx.0 = add nsw i32 %tid.x, 2
348  %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
349  %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
350  store i32 %idx.0, i32 addrspace(1)* %add_use
351  store i64 %val0, i64 addrspace(1)* %out
352  ret void
353}
354
355; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
356; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
357; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
358; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
359define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
360  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
361  store i64 %result, i64* %out
362  ret void
363}
364
365; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
366; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
367; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
368; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
369; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
370define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
371  %gep = getelementptr i64, i64* %ptr, i32 4
372  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
373  store i64 %result, i64* %out
374  ret void
375}
376
377; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64:
378; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
379; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
380; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
381define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
382  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
383  ret void
384}
385
386; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
387; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
388; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
389; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
390; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
391define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
392  %gep = getelementptr i64, i64* %ptr, i32 4
393  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
394  ret void
395}
396
397; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
398; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
399; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
400; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
401; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
402define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
403  %id = call i32 @llvm.amdgcn.workitem.id.x()
404  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
405  %out.gep = getelementptr i64, i64* %out, i32 %id
406  %gep = getelementptr i64, i64* %gep.tid, i32 5
407  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
408  store i64 %result, i64* %out.gep
409  ret void
410}
411
412; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
413; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
414; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
415; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
416; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
417define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
418  %id = call i32 @llvm.amdgcn.workitem.id.x()
419  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
420  %gep = getelementptr i64, i64* %gep.tid, i32 5
421  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
422  ret void
423}
424
425; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32:
426; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
427; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
428; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
429define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
430  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
431  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
432
433  store i32 %result0, i32 addrspace(1)* %out0
434  store i32 %result1, i32 addrspace(1)* %out1
435  ret void
436}
437
438attributes #0 = { nounwind }
439attributes #1 = { nounwind readnone }
440attributes #2 = { nounwind argmemonly }
441