1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
6
7; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
8; EG: LDS_WRXCHG_RET *
9
10; SICIVI-DAG: s_mov_b32 m0
11; GFX9-NOT: m0
12
13; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
14; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
15; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
16; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
17; GCN: buffer_store_dword [[RESULT]],
18; GCN: s_endpgm
19define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
20  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
21  store i32 %result, i32 addrspace(1)* %out, align 4
22  ret void
23}
24
25; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
26; SICIVI: s_mov_b32 m0
27; GFX9-NOT: m0
28
29; EG: LDS_WRXCHG_RET *
30; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
31; GCN: s_endpgm
32define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
33  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
34  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
35  store i32 %result, i32 addrspace(1)* %out, align 4
36  ret void
37}
38
39; XXX - Is it really necessary to load 4 into VGPR?
40; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
41; EG: LDS_ADD_RET *
42
43; SICIVI-DAG: s_mov_b32 m0
44; GFX9-NOT: m0
45
46; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
47; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
48; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
49; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
50; GCN: buffer_store_dword [[RESULT]],
51; GCN: s_endpgm
52define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
53  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
54  store i32 %result, i32 addrspace(1)* %out, align 4
55  ret void
56}
57
58; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
59; SICIVI: s_mov_b32 m0
60; GFX9-NOT: m0
61
62; EG: LDS_ADD_RET *
63; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
64; GCN: s_endpgm
65define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
66  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
67  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
68  store i32 %result, i32 addrspace(1)* %out, align 4
69  ret void
70}
71
72; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
73; SICIVI: s_mov_b32 m0
74; GFX9-NOT: m0
75
76; EG: LDS_ADD_RET *
77; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
78; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
79; GCN: s_endpgm
80define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
81  %sub = sub i32 %a, %b
82  %add = add i32 %sub, 4
83  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
84  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
85  store i32 %result, i32 addrspace(1)* %out, align 4
86  ret void
87}
88
89; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32:
90; EG: LDS_ADD_RET *
91
92; SICIVI-DAG: s_mov_b32 m0
93; GFX9-NOT: m0
94
95; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
96; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
97; GCN: s_endpgm
98define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
99  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
100  store i32 %result, i32 addrspace(1)* %out, align 4
101  ret void
102}
103
104; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset:
105; EG: LDS_ADD_RET *
106
107; SICIVI-DAG: s_mov_b32 m0
108; GFX9-NOT: m0
109
110; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
111; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
112; GCN: s_endpgm
113define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
114  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
115  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
116  store i32 %result, i32 addrspace(1)* %out, align 4
117  ret void
118}
119
120; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset:
121; SICIVI: s_mov_b32 m0
122; GFX9-NOT: m0
123
124; EG: LDS_ADD_RET *
125; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
126; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
127; GCN: s_endpgm
128define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
129  %sub = sub i32 %a, %b
130  %add = add i32 %sub, 4
131  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
132  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
133  store i32 %result, i32 addrspace(1)* %out, align 4
134  ret void
135}
136
137; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
138; EG: LDS_SUB_RET *
139
140; SICIVI: s_mov_b32 m0
141; GFX9-NOT: m0
142
143; GCN: ds_sub_rtn_u32
144; GCN: s_endpgm
145define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
146  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
147  store i32 %result, i32 addrspace(1)* %out, align 4
148  ret void
149}
150
151; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
152; EG: LDS_SUB_RET *
153
154; SICIVI: s_mov_b32 m0
155; GFX9-NOT: m0
156
157; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
158; GCN: s_endpgm
159define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
160  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
161  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
162  store i32 %result, i32 addrspace(1)* %out, align 4
163  ret void
164}
165
166; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32:
167; EG: LDS_SUB_RET *
168
169; SICIVI-DAG: s_mov_b32 m0
170; GFX9-NOT: m0
171
172; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
173; GCN: ds_sub_rtn_u32  v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
174; GCN: s_endpgm
175define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
176  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
177  store i32 %result, i32 addrspace(1)* %out, align 4
178  ret void
179}
180
181; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset:
182; EG: LDS_SUB_RET *
183
184; SICIVI-DAG: s_mov_b32 m0
185; GFX9-NOT: m0
186
187; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
188; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
189; GCN: s_endpgm
190define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
191  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
192  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
193  store i32 %result, i32 addrspace(1)* %out, align 4
194  ret void
195}
196
197; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
198; EG: LDS_AND_RET *
199
200; SICIVI-DAG: s_mov_b32 m0
201; GFX9-NOT: m0
202
203; GCN: ds_and_rtn_b32
204; GCN: s_endpgm
205define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
206  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
207  store i32 %result, i32 addrspace(1)* %out, align 4
208  ret void
209}
210
211; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
212; SICIVI: s_mov_b32 m0
213; GFX9-NOT: m0
214
215; EG: LDS_AND_RET *
216; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
217; GCN: s_endpgm
218define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
219  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
220  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
221  store i32 %result, i32 addrspace(1)* %out, align 4
222  ret void
223}
224
225; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
226; SICIVI: s_mov_b32 m0
227; GFX9-NOT: m0
228
229; EG: LDS_OR_RET *
230; GCN: ds_or_rtn_b32
231; GCN: s_endpgm
232define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
233  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
234  store i32 %result, i32 addrspace(1)* %out, align 4
235  ret void
236}
237
238; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
239; SICIVI: s_mov_b32 m0
240; GFX9-NOT: m0
241
242; EG: LDS_OR_RET *
243; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
244; GCN: s_endpgm
245define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
246  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
247  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
248  store i32 %result, i32 addrspace(1)* %out, align 4
249  ret void
250}
251
252; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
253; SICIVI: s_mov_b32 m0
254; GFX9-NOT: m0
255
256; EG: LDS_XOR_RET *
257; GCN: ds_xor_rtn_b32
258; GCN: s_endpgm
259define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
260  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
261  store i32 %result, i32 addrspace(1)* %out, align 4
262  ret void
263}
264
265; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
266; SICIVI: s_mov_b32 m0
267; GFX9-NOT: m0
268
269; EG: LDS_XOR_RET *
270; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
271; GCN: s_endpgm
272define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
273  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
274  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
275  store i32 %result, i32 addrspace(1)* %out, align 4
276  ret void
277}
278
279; FIXME: There is no atomic nand instr
280; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
281; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
282;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
283;   store i32 %result, i32 addrspace(1)* %out, align 4
284;   ret void
285; }
286
287; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
288; SICIVI: s_mov_b32 m0
289; GFX9-NOT: m0
290
291; EG: LDS_MIN_INT_RET *
292; GCN: ds_min_rtn_i32
293; GCN: s_endpgm
294define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
295  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
296  store i32 %result, i32 addrspace(1)* %out, align 4
297  ret void
298}
299
300; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
301; SICIVI: s_mov_b32 m0
302; GFX9-NOT: m0
303
304; EG: LDS_MIN_INT_RET *
305; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
306; GCN: s_endpgm
307define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
308  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
309  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
310  store i32 %result, i32 addrspace(1)* %out, align 4
311  ret void
312}
313
314; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
315; SICIVI: s_mov_b32 m0
316; GFX9-NOT: m0
317
318; EG: LDS_MAX_INT_RET *
319; GCN: ds_max_rtn_i32
320; GCN: s_endpgm
321define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
322  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
323  store i32 %result, i32 addrspace(1)* %out, align 4
324  ret void
325}
326
327; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
328; SICIVI: s_mov_b32 m0
329; GFX9-NOT: m0
330
331; EG: LDS_MAX_INT_RET *
332; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
333; GCN: s_endpgm
334define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
335  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
336  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
337  store i32 %result, i32 addrspace(1)* %out, align 4
338  ret void
339}
340
341; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
342; SICIVI: s_mov_b32 m0
343; GFX9-NOT: m0
344
345; EG: LDS_MIN_UINT_RET *
346; GCN: ds_min_rtn_u32
347; GCN: s_endpgm
348define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
349  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
350  store i32 %result, i32 addrspace(1)* %out, align 4
351  ret void
352}
353
354; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
355; SICIVI: s_mov_b32 m0
356; GFX9-NOT: m0
357
358; EG: LDS_MIN_UINT_RET *
359; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
360; GCN: s_endpgm
361define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
362  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
363  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
364  store i32 %result, i32 addrspace(1)* %out, align 4
365  ret void
366}
367
368; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
369; SICIVI: s_mov_b32 m0
370; GFX9-NOT: m0
371
372; EG: LDS_MAX_UINT_RET *
373; GCN: ds_max_rtn_u32
374; GCN: s_endpgm
375define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
376  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
377  store i32 %result, i32 addrspace(1)* %out, align 4
378  ret void
379}
380
381; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
382; SICIVI: s_mov_b32 m0
383; GFX9-NOT: m0
384
385; EG: LDS_MAX_UINT_RET *
386; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
387; GCN: s_endpgm
388define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
389  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
390  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
391  store i32 %result, i32 addrspace(1)* %out, align 4
392  ret void
393}
394
395; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
396; SICIVI-DAG: s_mov_b32 m0
397; GFX9-NOT: m0
398
399; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
400; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
401; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
402; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
403; GCN: s_endpgm
404define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
405  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
406  ret void
407}
408
409; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
410; SICIVI: s_mov_b32 m0
411; GFX9-NOT: m0
412
413; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
414; GCN: s_endpgm
415define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
416  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
417  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
418  ret void
419}
420
421; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
422; SICIVI-DAG: s_mov_b32 m0
423; GFX9-NOT: m0
424
425; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
426; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
427; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
428; GCN: ds_add_u32 [[VPTR]], [[DATA]]
429; GCN: s_endpgm
430define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
431  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
432  ret void
433}
434
435; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
436; SICIVI: s_mov_b32 m0
437; GFX9-NOT: m0
438
439; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
440; GCN: s_endpgm
441define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
442  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
443  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
444  ret void
445}
446
447; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
448; SICIVI: s_mov_b32 m0
449; GFX9-NOT: m0
450
451; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
452; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
453; GCN: s_endpgm
454define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
455  %sub = sub i32 %a, %b
456  %add = add i32 %sub, 4
457  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
458  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
459  ret void
460}
461
462; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32:
463; SICIVI-DAG: s_mov_b32 m0
464; GFX9-NOT: m0
465
466; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
467; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]]
468; GCN: s_endpgm
469define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
470  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
471  ret void
472}
473
474; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset:
475; SICIVI-DAG: s_mov_b32 m0
476; GFX9-NOT: m0
477
478; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
479; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16
480; GCN: s_endpgm
481define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
482  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
483  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
484  ret void
485}
486
487; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset:
488; SICIVI: s_mov_b32 m0
489; GFX9-NOT: m0
490
491; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
492; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
493; GCN: s_endpgm
494define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
495  %sub = sub i32 %a, %b
496  %add = add i32 %sub, 4
497  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
498  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
499  ret void
500}
501
502; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
503; SICIVI: s_mov_b32 m0
504; GFX9-NOT: m0
505
506; GCN: ds_sub_u32
507; GCN: s_endpgm
508define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
509  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
510  ret void
511}
512
513; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
514; SICIVI: s_mov_b32 m0
515; GFX9-NOT: m0
516
517; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
518; GCN: s_endpgm
519define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
520  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
521  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
522  ret void
523}
524
525; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32:
526; SICIVI-DAG: s_mov_b32 m0
527; GFX9-NOT: m0
528
529; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
530; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]]
531; GCN: s_endpgm
532define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
533  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
534  ret void
535}
536
537; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset:
538; SICIVI-DAG: s_mov_b32 m0
539; GFX9-NOT: m0
540
541; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
542; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16
543; GCN: s_endpgm
544define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
545  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
546  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
547  ret void
548}
549
550; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
551; SICIVI: s_mov_b32 m0
552; GFX9-NOT: m0
553
554; GCN: ds_and_b32
555; GCN: s_endpgm
556define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
557  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
558  ret void
559}
560
561; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
562; SICIVI: s_mov_b32 m0
563; GFX9-NOT: m0
564
565; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
566; GCN: s_endpgm
567define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
568  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
569  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
570  ret void
571}
572
573; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
574; SICIVI: s_mov_b32 m0
575; GFX9-NOT: m0
576
577; GCN: ds_or_b32
578; GCN: s_endpgm
579define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
580  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
581  ret void
582}
583
584; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
585; SICIVI: s_mov_b32 m0
586; GFX9-NOT: m0
587
588; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
589; GCN: s_endpgm
590define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
591  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
592  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
593  ret void
594}
595
596; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
597; SICIVI: s_mov_b32 m0
598; GFX9-NOT: m0
599
600; GCN: ds_xor_b32
601; GCN: s_endpgm
602define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
603  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
604  ret void
605}
606
607; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
608; SICIVI: s_mov_b32 m0
609; GFX9-NOT: m0
610
611; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
612; GCN: s_endpgm
613define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
614  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
615  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
616  ret void
617}
618
619; FIXME: There is no atomic nand instr
620; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
621; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
622;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
623;   ret void
624; }
625
626; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
627; SICIVI: s_mov_b32 m0
628; GFX9-NOT: m0
629
630; GCN: ds_min_i32
631; GCN: s_endpgm
632define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
633  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
634  ret void
635}
636
637; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
638; SICIVI: s_mov_b32 m0
639; GFX9-NOT: m0
640
641; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
642; GCN: s_endpgm
643define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
644  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
645  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
646  ret void
647}
648
649; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
650; SICIVI: s_mov_b32 m0
651; GFX9-NOT: m0
652
653; GCN: ds_max_i32
654; GCN: s_endpgm
655define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
656  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
657  ret void
658}
659
660; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
661; SICIVI: s_mov_b32 m0
662; GFX9-NOT: m0
663
664; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
665; GCN: s_endpgm
666define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
667  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
668  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
669  ret void
670}
671
672; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
673; SICIVI: s_mov_b32 m0
674; GFX9-NOT: m0
675
676; GCN: ds_min_u32
677; GCN: s_endpgm
678define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
679  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
680  ret void
681}
682
683; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
684; SICIVI: s_mov_b32 m0
685; GFX9-NOT: m0
686
687; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
688; GCN: s_endpgm
689define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
690  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
691  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
692  ret void
693}
694
695; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
696; SICIVI: s_mov_b32 m0
697; GFX9-NOT: m0
698
699; GCN: ds_max_u32
700; GCN: s_endpgm
701define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
702  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
703  ret void
704}
705
706; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
707; SICIVI: s_mov_b32 m0
708; GFX9-NOT: m0
709
710; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
711; GCN: s_endpgm
712define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
713  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
714  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
715  ret void
716}
717