1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
5; RUN: llc -march=r600 -mcpu=redwood -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
6
7; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
8; EG: LDS_WRXCHG_RET *
9
10; SICIVI-DAG: s_mov_b32 m0
11; GFX9-NOT: m0
12
13; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
14; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
15; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
16; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
17; GCN: buffer_store_dword [[RESULT]],
18; GCN: s_endpgm
19define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
20  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
21  store i32 %result, i32 addrspace(1)* %out, align 4
22  ret void
23}
24
25; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
26; SICIVI: s_mov_b32 m0
27; GFX9-NOT: m0
28
29; EG: LDS_WRXCHG_RET *
30; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
31; GCN: s_endpgm
32define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
33  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
34  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
35  store i32 %result, i32 addrspace(1)* %out, align 4
36  ret void
37}
38
39; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset:
40; SICIVI: s_mov_b32 m0
41; GFX9-NOT: m0
42
43; EG: LDS_WRXCHG_RET *
44; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
45; GCN: s_endpgm
46define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(float addrspace(1)* %out, float addrspace(3)* %ptr) nounwind {
47  %gep = getelementptr float, float addrspace(3)* %ptr, i32 4
48  %result = atomicrmw xchg float addrspace(3)* %gep, float 4.0 seq_cst
49  store float %result, float addrspace(1)* %out, align 4
50  ret void
51}
52
53; XXX - Is it really necessary to load 4 into VGPR?
54; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
55; EG: LDS_ADD_RET *
56
57; SICIVI-DAG: s_mov_b32 m0
58; GFX9-NOT: m0
59
60; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
61; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
62; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
63; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
64; GCN: buffer_store_dword [[RESULT]],
65; GCN: s_endpgm
66define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
67  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
68  store i32 %result, i32 addrspace(1)* %out, align 4
69  ret void
70}
71
72; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
73; SICIVI: s_mov_b32 m0
74; GFX9-NOT: m0
75
76; EG: LDS_ADD_RET *
77; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
78; GCN: s_endpgm
79define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
80  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
81  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
82  store i32 %result, i32 addrspace(1)* %out, align 4
83  ret void
84}
85
86; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
87; SICIVI: s_mov_b32 m0
88; GFX9-NOT: m0
89
90; EG: LDS_ADD_RET *
91; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
92; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
93; GCN: s_endpgm
94define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
95  %sub = sub i32 %a, %b
96  %add = add i32 %sub, 4
97  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
98  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
99  store i32 %result, i32 addrspace(1)* %out, align 4
100  ret void
101}
102
103; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32:
104; EG: LDS_ADD_RET *
105
106; SICIVI-DAG: s_mov_b32 m0
107; GFX9-NOT: m0
108
109; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
110; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
111; GCN: s_endpgm
112define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
113  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
114  store i32 %result, i32 addrspace(1)* %out, align 4
115  ret void
116}
117
118; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset:
119; EG: LDS_ADD_RET *
120
121; SICIVI-DAG: s_mov_b32 m0
122; GFX9-NOT: m0
123
124; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
125; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
126; GCN: s_endpgm
127define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
128  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
129  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
130  store i32 %result, i32 addrspace(1)* %out, align 4
131  ret void
132}
133
134; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset:
135; SICIVI: s_mov_b32 m0
136; GFX9-NOT: m0
137
138; EG: LDS_ADD_RET *
139; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
140; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
141; GCN: s_endpgm
142define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
143  %sub = sub i32 %a, %b
144  %add = add i32 %sub, 4
145  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
146  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
147  store i32 %result, i32 addrspace(1)* %out, align 4
148  ret void
149}
150
151; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
152; EG: LDS_SUB_RET *
153
154; SICIVI: s_mov_b32 m0
155; GFX9-NOT: m0
156
157; GCN: ds_sub_rtn_u32
158; GCN: s_endpgm
159define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
160  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
161  store i32 %result, i32 addrspace(1)* %out, align 4
162  ret void
163}
164
165; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
166; EG: LDS_SUB_RET *
167
168; SICIVI: s_mov_b32 m0
169; GFX9-NOT: m0
170
171; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
172; GCN: s_endpgm
173define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
174  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
175  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
176  store i32 %result, i32 addrspace(1)* %out, align 4
177  ret void
178}
179
180; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32:
181; EG: LDS_SUB_RET *
182
183; SICIVI-DAG: s_mov_b32 m0
184; GFX9-NOT: m0
185
186; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
187; GCN: ds_sub_rtn_u32  v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
188; GCN: s_endpgm
189define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
190  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
191  store i32 %result, i32 addrspace(1)* %out, align 4
192  ret void
193}
194
195; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset:
196; EG: LDS_SUB_RET *
197
198; SICIVI-DAG: s_mov_b32 m0
199; GFX9-NOT: m0
200
201; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
202; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
203; GCN: s_endpgm
204define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
205  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
206  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
207  store i32 %result, i32 addrspace(1)* %out, align 4
208  ret void
209}
210
211; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
212; EG: LDS_AND_RET *
213
214; SICIVI-DAG: s_mov_b32 m0
215; GFX9-NOT: m0
216
217; GCN: ds_and_rtn_b32
218; GCN: s_endpgm
219define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
220  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
221  store i32 %result, i32 addrspace(1)* %out, align 4
222  ret void
223}
224
225; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
226; SICIVI: s_mov_b32 m0
227; GFX9-NOT: m0
228
229; EG: LDS_AND_RET *
230; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
231; GCN: s_endpgm
232define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
233  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
234  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
235  store i32 %result, i32 addrspace(1)* %out, align 4
236  ret void
237}
238
239; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
240; SICIVI: s_mov_b32 m0
241; GFX9-NOT: m0
242
243; EG: LDS_OR_RET *
244; GCN: ds_or_rtn_b32
245; GCN: s_endpgm
246define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
247  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
248  store i32 %result, i32 addrspace(1)* %out, align 4
249  ret void
250}
251
252; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
253; SICIVI: s_mov_b32 m0
254; GFX9-NOT: m0
255
256; EG: LDS_OR_RET *
257; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
258; GCN: s_endpgm
259define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
260  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
261  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
262  store i32 %result, i32 addrspace(1)* %out, align 4
263  ret void
264}
265
266; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
267; SICIVI: s_mov_b32 m0
268; GFX9-NOT: m0
269
270; EG: LDS_XOR_RET *
271; GCN: ds_xor_rtn_b32
272; GCN: s_endpgm
273define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
274  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
275  store i32 %result, i32 addrspace(1)* %out, align 4
276  ret void
277}
278
279; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
280; SICIVI: s_mov_b32 m0
281; GFX9-NOT: m0
282
283; EG: LDS_XOR_RET *
284; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
285; GCN: s_endpgm
286define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
287  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
288  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
289  store i32 %result, i32 addrspace(1)* %out, align 4
290  ret void
291}
292
293; FIXME: There is no atomic nand instr
294; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
295; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
296;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
297;   store i32 %result, i32 addrspace(1)* %out, align 4
298;   ret void
299; }
300
301; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
302; SICIVI: s_mov_b32 m0
303; GFX9-NOT: m0
304
305; EG: LDS_MIN_INT_RET *
306; GCN: ds_min_rtn_i32
307; GCN: s_endpgm
308define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
309  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
310  store i32 %result, i32 addrspace(1)* %out, align 4
311  ret void
312}
313
314; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
315; SICIVI: s_mov_b32 m0
316; GFX9-NOT: m0
317
318; EG: LDS_MIN_INT_RET *
319; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
320; GCN: s_endpgm
321define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
322  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
323  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
324  store i32 %result, i32 addrspace(1)* %out, align 4
325  ret void
326}
327
328; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
329; SICIVI: s_mov_b32 m0
330; GFX9-NOT: m0
331
332; EG: LDS_MAX_INT_RET *
333; GCN: ds_max_rtn_i32
334; GCN: s_endpgm
335define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
336  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
337  store i32 %result, i32 addrspace(1)* %out, align 4
338  ret void
339}
340
341; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
342; SICIVI: s_mov_b32 m0
343; GFX9-NOT: m0
344
345; EG: LDS_MAX_INT_RET *
346; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
347; GCN: s_endpgm
348define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
349  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
350  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
351  store i32 %result, i32 addrspace(1)* %out, align 4
352  ret void
353}
354
355; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
356; SICIVI: s_mov_b32 m0
357; GFX9-NOT: m0
358
359; EG: LDS_MIN_UINT_RET *
360; GCN: ds_min_rtn_u32
361; GCN: s_endpgm
362define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
363  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
364  store i32 %result, i32 addrspace(1)* %out, align 4
365  ret void
366}
367
368; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
369; SICIVI: s_mov_b32 m0
370; GFX9-NOT: m0
371
372; EG: LDS_MIN_UINT_RET *
373; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
374; GCN: s_endpgm
375define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
376  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
377  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
378  store i32 %result, i32 addrspace(1)* %out, align 4
379  ret void
380}
381
382; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
383; SICIVI: s_mov_b32 m0
384; GFX9-NOT: m0
385
386; EG: LDS_MAX_UINT_RET *
387; GCN: ds_max_rtn_u32
388; GCN: s_endpgm
389define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
390  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
391  store i32 %result, i32 addrspace(1)* %out, align 4
392  ret void
393}
394
395; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
396; SICIVI: s_mov_b32 m0
397; GFX9-NOT: m0
398
399; EG: LDS_MAX_UINT_RET *
400; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
401; GCN: s_endpgm
402define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
403  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
404  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
405  store i32 %result, i32 addrspace(1)* %out, align 4
406  ret void
407}
408
409; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
410; SICIVI-DAG: s_mov_b32 m0
411; GFX9-NOT: m0
412
413; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
414; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
415; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
416; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
417; GCN: s_endpgm
418define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
419  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
420  ret void
421}
422
423; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
424; SICIVI: s_mov_b32 m0
425; GFX9-NOT: m0
426
427; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
428; GCN: s_endpgm
429define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
430  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
431  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
432  ret void
433}
434
435; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
436; SICIVI-DAG: s_mov_b32 m0
437; GFX9-NOT: m0
438
439; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
440; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
441; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
442; GCN: ds_add_u32 [[VPTR]], [[DATA]]
443; GCN: s_endpgm
444define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
445  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
446  ret void
447}
448
449; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
450; SICIVI: s_mov_b32 m0
451; GFX9-NOT: m0
452
453; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
454; GCN: s_endpgm
455define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
456  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
457  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
458  ret void
459}
460
461; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
462; SICIVI: s_mov_b32 m0
463; GFX9-NOT: m0
464
465; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
466; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
467; GCN: s_endpgm
468define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
469  %sub = sub i32 %a, %b
470  %add = add i32 %sub, 4
471  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
472  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
473  ret void
474}
475
476; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32:
477; SICIVI-DAG: s_mov_b32 m0
478; GFX9-NOT: m0
479
480; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
481; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]]
482; GCN: s_endpgm
483define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
484  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
485  ret void
486}
487
488; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset:
489; SICIVI-DAG: s_mov_b32 m0
490; GFX9-NOT: m0
491
492; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
493; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16
494; GCN: s_endpgm
495define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
496  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
497  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
498  ret void
499}
500
501; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset:
502; SICIVI: s_mov_b32 m0
503; GFX9-NOT: m0
504
505; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
506; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
507; GCN: s_endpgm
508define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
509  %sub = sub i32 %a, %b
510  %add = add i32 %sub, 4
511  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
512  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
513  ret void
514}
515
516; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
517; SICIVI: s_mov_b32 m0
518; GFX9-NOT: m0
519
520; GCN: ds_sub_u32
521; GCN: s_endpgm
522define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
523  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
524  ret void
525}
526
527; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
528; SICIVI: s_mov_b32 m0
529; GFX9-NOT: m0
530
531; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
532; GCN: s_endpgm
533define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
534  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
535  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
536  ret void
537}
538
539; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32:
540; SICIVI-DAG: s_mov_b32 m0
541; GFX9-NOT: m0
542
543; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
544; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]]
545; GCN: s_endpgm
546define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
547  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
548  ret void
549}
550
551; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset:
552; SICIVI-DAG: s_mov_b32 m0
553; GFX9-NOT: m0
554
555; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
556; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16
557; GCN: s_endpgm
558define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
559  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
560  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
561  ret void
562}
563
564; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
565; SICIVI: s_mov_b32 m0
566; GFX9-NOT: m0
567
568; GCN: ds_and_b32
569; GCN: s_endpgm
570define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
571  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
572  ret void
573}
574
575; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
576; SICIVI: s_mov_b32 m0
577; GFX9-NOT: m0
578
579; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
580; GCN: s_endpgm
581define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
582  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
583  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
584  ret void
585}
586
587; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
588; SICIVI: s_mov_b32 m0
589; GFX9-NOT: m0
590
591; GCN: ds_or_b32
592; GCN: s_endpgm
593define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
594  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
595  ret void
596}
597
598; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
599; SICIVI: s_mov_b32 m0
600; GFX9-NOT: m0
601
602; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
603; GCN: s_endpgm
604define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
605  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
606  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
607  ret void
608}
609
610; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
611; SICIVI: s_mov_b32 m0
612; GFX9-NOT: m0
613
614; GCN: ds_xor_b32
615; GCN: s_endpgm
616define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
617  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
618  ret void
619}
620
621; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
622; SICIVI: s_mov_b32 m0
623; GFX9-NOT: m0
624
625; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
626; GCN: s_endpgm
627define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
628  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
629  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
630  ret void
631}
632
633; FIXME: There is no atomic nand instr
634; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
635; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
636;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
637;   ret void
638; }
639
640; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
641; SICIVI: s_mov_b32 m0
642; GFX9-NOT: m0
643
644; GCN: ds_min_i32
645; GCN: s_endpgm
646define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
647  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
648  ret void
649}
650
651; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
652; SICIVI: s_mov_b32 m0
653; GFX9-NOT: m0
654
655; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
656; GCN: s_endpgm
657define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
658  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
659  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
660  ret void
661}
662
663; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
664; SICIVI: s_mov_b32 m0
665; GFX9-NOT: m0
666
667; GCN: ds_max_i32
668; GCN: s_endpgm
669define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
670  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
671  ret void
672}
673
674; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
675; SICIVI: s_mov_b32 m0
676; GFX9-NOT: m0
677
678; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
679; GCN: s_endpgm
680define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
681  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
682  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
683  ret void
684}
685
686; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
687; SICIVI: s_mov_b32 m0
688; GFX9-NOT: m0
689
690; GCN: ds_min_u32
691; GCN: s_endpgm
692define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
693  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
694  ret void
695}
696
697; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
698; SICIVI: s_mov_b32 m0
699; GFX9-NOT: m0
700
701; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
702; GCN: s_endpgm
703define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
704  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
705  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
706  ret void
707}
708
709; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
710; SICIVI: s_mov_b32 m0
711; GFX9-NOT: m0
712
713; GCN: ds_max_u32
714; GCN: s_endpgm
715define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
716  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
717  ret void
718}
719
720; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
721; SICIVI: s_mov_b32 m0
722; GFX9-NOT: m0
723
724; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
725; GCN: s_endpgm
726define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
727  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
728  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
729  ret void
730}
731