1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI,SICIVI,GFX89 %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s
4
5; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64:
6; SICIVI: s_mov_b32 m0
7; GFX9-NOT: m0
8
9; GCN: ds_wrxchg_rtn_b64
10; GCN: s_endpgm
11define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
12  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
13  store i64 %result, i64 addrspace(1)* %out, align 8
14  ret void
15}
16
17; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
18; SICIVI: s_mov_b32 m0
19; GFX9-NOT: m0
20
21; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
22; GCN: s_endpgm
23define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
24  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
25  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
26  store i64 %result, i64 addrspace(1)* %out, align 8
27  ret void
28}
29
30; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
31; SICIVI: s_mov_b32 m0
32; GFX9-NOT: m0
33
34; GCN: ds_add_rtn_u64
35; GCN: s_endpgm
36define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
37  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
38  store i64 %result, i64 addrspace(1)* %out, align 8
39  ret void
40}
41
42; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
43; SICIVI-DAG: s_mov_b32 m0
44; GFX9-NOT: m0
45
46; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
47; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
48; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
49; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
50; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
51; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
52; GCN: buffer_store_dwordx2 [[RESULT]],
53; GCN: s_endpgm
54define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
55  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
56  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
57  store i64 %result, i64 addrspace(1)* %out, align 8
58  ret void
59}
60
61; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64:
62; SICIVI-DAG: s_mov_b32 m0
63; GFX9-NOT: m0
64
65; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
66; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
67; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
68; GCN: buffer_store_dwordx2 [[RESULT]],
69; GCN: s_endpgm
70define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
71  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
72  store i64 %result, i64 addrspace(1)* %out, align 8
73  ret void
74}
75
76; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset:
77; SICIVI: s_mov_b32 m0
78; GFX9-NOT: m0
79
80; GCN: ds_add_rtn_u64 {{.*}} offset:32
81; GCN: s_endpgm
82define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
83  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
84  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
85  store i64 %result, i64 addrspace(1)* %out, align 8
86  ret void
87}
88
89; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64:
90; SICIVI: s_mov_b32 m0
91; GFX9-NOT: m0
92
93; GCN: ds_sub_rtn_u64
94; GCN: s_endpgm
95define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
96  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
97  store i64 %result, i64 addrspace(1)* %out, align 8
98  ret void
99}
100
101; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
102; SICIVI: s_mov_b32 m0
103; GFX9-NOT: m0
104
105; GCN: ds_sub_rtn_u64 {{.*}} offset:32
106; GCN: s_endpgm
107define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
108  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
109  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
110  store i64 %result, i64 addrspace(1)* %out, align 8
111  ret void
112}
113
114; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64:
115; SICIVI-DAG: s_mov_b32 m0
116; GFX9-NOT: m0
117
118; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
119; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
120; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
121; GCN: buffer_store_dwordx2 [[RESULT]],
122; GCN: s_endpgm
123define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
124  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
125  store i64 %result, i64 addrspace(1)* %out, align 8
126  ret void
127}
128
129; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset:
130; SICIVI: s_mov_b32 m0
131; GFX9-NOT: m0
132
133; GCN: ds_sub_rtn_u64 {{.*}} offset:32
134; GCN: s_endpgm
135define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
136  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
137  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
138  store i64 %result, i64 addrspace(1)* %out, align 8
139  ret void
140}
141
142; GCN-LABEL: {{^}}lds_atomic_and_ret_i64:
143; SICIVI: s_mov_b32 m0
144; GFX9-NOT: m0
145
146; GCN: ds_and_rtn_b64
147; GCN: s_endpgm
148define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
149  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
150  store i64 %result, i64 addrspace(1)* %out, align 8
151  ret void
152}
153
154; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
155; SICIVI: s_mov_b32 m0
156; GFX9-NOT: m0
157
158; GCN: ds_and_rtn_b64 {{.*}} offset:32
159; GCN: s_endpgm
160define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
161  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
162  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
163  store i64 %result, i64 addrspace(1)* %out, align 8
164  ret void
165}
166
167; GCN-LABEL: {{^}}lds_atomic_or_ret_i64:
168; SICIVI: s_mov_b32 m0
169; GFX9-NOT: m0
170
171; GCN: ds_or_rtn_b64
172; GCN: s_endpgm
173define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
174  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
175  store i64 %result, i64 addrspace(1)* %out, align 8
176  ret void
177}
178
179; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
180; SICIVI: s_mov_b32 m0
181; GFX9-NOT: m0
182
183; GCN: ds_or_rtn_b64 {{.*}} offset:32
184; GCN: s_endpgm
185define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
186  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
187  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
188  store i64 %result, i64 addrspace(1)* %out, align 8
189  ret void
190}
191
192; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64:
193; SICIVI: s_mov_b32 m0
194; GFX9-NOT: m0
195
196; GCN: ds_xor_rtn_b64
197; GCN: s_endpgm
198define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
199  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
200  store i64 %result, i64 addrspace(1)* %out, align 8
201  ret void
202}
203
204; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
205; SICIVI: s_mov_b32 m0
206; GFX9-NOT: m0
207
208; GCN: ds_xor_rtn_b64 {{.*}} offset:32
209; GCN: s_endpgm
210define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
211  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
212  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
213  store i64 %result, i64 addrspace(1)* %out, align 8
214  ret void
215}
216
217; FIXME: There is no atomic nand instr
218; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
219; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
220;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
221;   store i64 %result, i64 addrspace(1)* %out, align 8
222;   ret void
223; }
224
225; GCN-LABEL: {{^}}lds_atomic_min_ret_i64:
226; SICIVI: s_mov_b32 m0
227; GFX9-NOT: m0
228
229; GCN: ds_min_rtn_i64
230; GCN: s_endpgm
231define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
232  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
233  store i64 %result, i64 addrspace(1)* %out, align 8
234  ret void
235}
236
237; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
238; SICIVI: s_mov_b32 m0
239; GFX9-NOT: m0
240
241; GCN: ds_min_rtn_i64 {{.*}} offset:32
242; GCN: s_endpgm
243define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
244  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
245  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
246  store i64 %result, i64 addrspace(1)* %out, align 8
247  ret void
248}
249
250; GCN-LABEL: {{^}}lds_atomic_max_ret_i64:
251; SICIVI: s_mov_b32 m0
252; GFX9-NOT: m0
253
254; GCN: ds_max_rtn_i64
255; GCN: s_endpgm
256define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
257  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
258  store i64 %result, i64 addrspace(1)* %out, align 8
259  ret void
260}
261
262; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
263; SICIVI: s_mov_b32 m0
264; GFX9-NOT: m0
265
266; GCN: ds_max_rtn_i64 {{.*}} offset:32
267; GCN: s_endpgm
268define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
269  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
270  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
271  store i64 %result, i64 addrspace(1)* %out, align 8
272  ret void
273}
274
275; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64:
276; SICIVI: s_mov_b32 m0
277; GFX9-NOT: m0
278
279; GCN: ds_min_rtn_u64
280; GCN: s_endpgm
281define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
282  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
283  store i64 %result, i64 addrspace(1)* %out, align 8
284  ret void
285}
286
287; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
288; SICIVI: s_mov_b32 m0
289; GFX9-NOT: m0
290
291; GCN: ds_min_rtn_u64 {{.*}} offset:32
292; GCN: s_endpgm
293define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
294  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
295  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
296  store i64 %result, i64 addrspace(1)* %out, align 8
297  ret void
298}
299
300; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64:
301; SICIVI: s_mov_b32 m0
302; GFX9-NOT: m0
303
304; GCN: ds_max_rtn_u64
305; GCN: s_endpgm
306define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
307  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
308  store i64 %result, i64 addrspace(1)* %out, align 8
309  ret void
310}
311
312; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
313; SICIVI: s_mov_b32 m0
314; GFX9-NOT: m0
315
316; GCN: ds_max_rtn_u64 {{.*}} offset:32
317; GCN: s_endpgm
318define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
319  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
320  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
321  store i64 %result, i64 addrspace(1)* %out, align 8
322  ret void
323}
324
325; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64:
326; SICIVI: s_mov_b32 m0
327; GFX9-NOT: m0
328
329; GCN: ds_wrxchg_rtn_b64
330; GCN: s_endpgm
331define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
332  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
333  ret void
334}
335
336; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
337; SICIVI: s_mov_b32 m0
338; GFX9-NOT: m0
339
340; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
341; GCN: s_endpgm
342define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
343  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
344  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
345  ret void
346}
347
348; GCN-LABEL: {{^}}lds_atomic_add_noret_i64:
349; SICIVI: s_mov_b32 m0
350; GFX9-NOT: m0
351
352; GCN: ds_add_u64
353; GCN: s_endpgm
354define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
355  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
356  ret void
357}
358
359; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
360; SICIVI-DAG: s_mov_b32 m0
361; GFX9-NOT: m0
362
363; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
364; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
365; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
366; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
367; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
368; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
369; GCN: s_endpgm
370define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
371  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
372  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
373  ret void
374}
375
376; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64:
377; SICIVI-DAG: s_mov_b32 m0
378; GFX9-NOT: m0
379
380; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
381; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
382; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
383; GCN: s_endpgm
384define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
385  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
386  ret void
387}
388
389; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset:
390; SICIVI: s_mov_b32 m0
391; GFX9-NOT: m0
392
393; GCN: ds_add_u64 {{.*}} offset:32
394; GCN: s_endpgm
395define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
396  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
397  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
398  ret void
399}
400
401; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64:
402; SICIVI: s_mov_b32 m0
403; GFX9-NOT: m0
404
405; GCN: ds_sub_u64
406; GCN: s_endpgm
407define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
408  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
409  ret void
410}
411
412; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
413; SICIVI: s_mov_b32 m0
414; GFX9-NOT: m0
415
416; GCN: ds_sub_u64 {{.*}} offset:32
417; GCN: s_endpgm
418define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
419  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
420  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
421  ret void
422}
423
424; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64:
425; SICIVI-DAG: s_mov_b32 m0
426; GFX9-NOT: m0
427
428; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
429; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
430; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
431; GCN: s_endpgm
432define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
433  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
434  ret void
435}
436
437; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset:
438; SICIVI: s_mov_b32 m0
439; GFX9-NOT: m0
440
441; GCN: ds_sub_u64 {{.*}} offset:32
442; GCN: s_endpgm
443define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
444  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
445  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
446  ret void
447}
448
449; GCN-LABEL: {{^}}lds_atomic_and_noret_i64:
450; SICIVI: s_mov_b32 m0
451; GFX9-NOT: m0
452
453; GCN: ds_and_b64
454; GCN: s_endpgm
455define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
456  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
457  ret void
458}
459
460; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
461; SICIVI: s_mov_b32 m0
462; GFX9-NOT: m0
463
464; GCN: ds_and_b64 {{.*}} offset:32
465; GCN: s_endpgm
466define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
467  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
468  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
469  ret void
470}
471
472; GCN-LABEL: {{^}}lds_atomic_or_noret_i64:
473; SICIVI: s_mov_b32 m0
474; GFX9-NOT: m0
475
476; GCN: ds_or_b64
477; GCN: s_endpgm
478define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
479  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
480  ret void
481}
482
483; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
484; SICIVI: s_mov_b32 m0
485; GFX9-NOT: m0
486
487; GCN: ds_or_b64 {{.*}} offset:32
488; GCN: s_endpgm
489define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
490  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
491  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
492  ret void
493}
494
495; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64:
496; SICIVI: s_mov_b32 m0
497; GFX9-NOT: m0
498
499; GCN: ds_xor_b64
500; GCN: s_endpgm
501define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
502  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
503  ret void
504}
505
506; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
507; SICIVI: s_mov_b32 m0
508; GFX9-NOT: m0
509
510; GCN: ds_xor_b64 {{.*}} offset:32
511; GCN: s_endpgm
512define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
513  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
514  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
515  ret void
516}
517
518; FIXME: There is no atomic nand instr
519; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
520; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
521;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
522;   ret void
523; }
524
525; GCN-LABEL: {{^}}lds_atomic_min_noret_i64:
526; SICIVI: s_mov_b32 m0
527; GFX9-NOT: m0
528
529; GCN: ds_min_i64
530; GCN: s_endpgm
531define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
532  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
533  ret void
534}
535
536; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
537; SICIVI: s_mov_b32 m0
538; GFX9-NOT: m0
539
540; GCN: ds_min_i64 {{.*}} offset:32
541; GCN: s_endpgm
542define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
543  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
544  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
545  ret void
546}
547
548; GCN-LABEL: {{^}}lds_atomic_max_noret_i64:
549; SICIVI: s_mov_b32 m0
550; GFX9-NOT: m0
551
552; GCN: ds_max_i64
553; GCN: s_endpgm
554define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
555  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
556  ret void
557}
558
559; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
560; SICIVI: s_mov_b32 m0
561; GFX9-NOT: m0
562
563; GCN: ds_max_i64 {{.*}} offset:32
564; GCN: s_endpgm
565define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
566  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
567  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
568  ret void
569}
570
571; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64:
572; SICIVI: s_mov_b32 m0
573; GFX9-NOT: m0
574
575; GCN: ds_min_u64
576; GCN: s_endpgm
577define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
578  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
579  ret void
580}
581
582; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
583; SICIVI: s_mov_b32 m0
584; GFX9-NOT: m0
585
586; GCN: ds_min_u64 {{.*}} offset:32
587; GCN: s_endpgm
588define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
589  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
590  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
591  ret void
592}
593
594; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64:
595; SICIVI: s_mov_b32 m0
596; GFX9-NOT: m0
597
598; GCN: ds_max_u64
599; GCN: s_endpgm
600define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
601  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
602  ret void
603}
604
605; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
606; SICIVI: s_mov_b32 m0
607; GFX9-NOT: m0
608
609; GCN: ds_max_u64 {{.*}} offset:32
610; GCN: s_endpgm
611define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
612  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
613  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
614  ret void
615}
616