1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX678,HAS-ATOMICS %s
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,HAS-ATOMICS %s
3; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX678,NO-ATOMICS %s
4; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX678,NO-ATOMICS %s
5
6; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f32:
7; GFX678-DAG: s_mov_b32 m0
8; GFX9-NOT: m0
9; HAS-ATOMICS-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 4.0
10; HAS-ATOMICS: ds_add_rtn_f32 v0, v0, [[K]]
11
12; NO-ATOMICS: ds_read_b32
13; NO-ATOMICS: v_add_f32
14; NO-ATOMICS: ds_cmpst_rtn_b32
15; NO-ATOMICS: s_cbranch_execnz
16define float @lds_atomic_fadd_ret_f32(float addrspace(3)* %ptr) nounwind {
17  %result = atomicrmw fadd float addrspace(3)* %ptr, float 4.0 seq_cst
18  ret float %result
19}
20
21; GCN-LABEL: {{^}}lds_atomic_fadd_noret_f32:
22; GFX678-DAG: s_mov_b32 m0
23; GFX9-NOT: m0
24; HAS-ATOMICS-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 4.0
25; HAS-ATOMICS: ds_add_f32 v0, [[K]]
26define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
27  %result = atomicrmw fadd float addrspace(3)* %ptr, float 4.0 seq_cst
28  ret void
29}
30
31; GCN-LABEL: {{^}}lds_ds_fadd:
32; VI-DAG: s_mov_b32 m0
33; GFX9-NOT: m0
34; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
35; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
36; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
37; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
38; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
39define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
40  %idx.add = add nuw i32 %idx, 4
41  %shl0 = shl i32 %idx.add, 3
42  %shl1 = shl i32 %idx.add, 4
43  %ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
44  %ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
45  %a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 seq_cst
46  %a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 seq_cst
47  %a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 seq_cst
48  store float %a3, float addrspace(1)* %out
49  ret void
50}
51
52; GCN-LABEL: {{^}}lds_ds_fadd_one_as:
53; VI-DAG: s_mov_b32 m0
54; GFX9-NOT: m0
55; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
56; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
57; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
58; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
59; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
60define amdgpu_kernel void @lds_ds_fadd_one_as(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
61  %idx.add = add nuw i32 %idx, 4
62  %shl0 = shl i32 %idx.add, 3
63  %shl1 = shl i32 %idx.add, 4
64  %ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
65  %ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
66  %a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 syncscope("one-as") seq_cst
67  %a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 syncscope("one-as") seq_cst
68  %a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 syncscope("one-as") seq_cst
69  store float %a3, float addrspace(1)* %out
70  ret void
71}
72
73; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64:
74; GCN: ds_read_b64
75; GCN: v_add_f64
76; GCN: ds_cmpst_rtn_b64
77; GCN: s_cbranch_execnz
78define double @lds_atomic_fadd_ret_f64(double addrspace(3)* %ptr) nounwind {
79  %result = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
80  ret double %result
81}
82
83; GCN-LABEL: {{^}}lds_atomic_fadd_noret_f64:
84; GCN: ds_read_b64
85; GCN: v_add_f64
86; GCN: ds_cmpst_rtn_b64
87; GCN: s_cbranch_execnz
88define void @lds_atomic_fadd_noret_f64(double addrspace(3)* %ptr) nounwind {
89  %result = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
90  ret void
91}
92
93; GCN-LABEL: {{^}}lds_atomic_fsub_ret_f32:
94; GCN: ds_read_b32
95; GCN: v_sub_f32
96; GCN: ds_cmpst_rtn_b32
97; GCN: s_cbranch_execnz
98define float @lds_atomic_fsub_ret_f32(float addrspace(3)* %ptr, float %val) nounwind {
99  %result = atomicrmw fsub float addrspace(3)* %ptr, float %val seq_cst
100  ret float %result
101}
102
103; GCN-LABEL: {{^}}lds_atomic_fsub_noret_f32:
104; GCN: ds_read_b32
105; GCN: v_sub_f32
106; GCN: ds_cmpst_rtn_b32
107define void @lds_atomic_fsub_noret_f32(float addrspace(3)* %ptr, float %val) nounwind {
108  %result = atomicrmw fsub float addrspace(3)* %ptr, float %val seq_cst
109  ret void
110}
111
112; GCN-LABEL: {{^}}lds_atomic_fsub_ret_f64:
113; GCN: ds_read_b64
114; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
115; GCN: ds_cmpst_rtn_b64
116
117define double @lds_atomic_fsub_ret_f64(double addrspace(3)* %ptr, double %val) nounwind {
118  %result = atomicrmw fsub double addrspace(3)* %ptr, double %val seq_cst
119  ret double %result
120}
121
122; GCN-LABEL: {{^}}lds_atomic_fsub_noret_f64:
123; GCN: ds_read_b64
124; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
125; GCN: ds_cmpst_rtn_b64
126; GCN: s_cbranch_execnz
127define void @lds_atomic_fsub_noret_f64(double addrspace(3)* %ptr, double %val) nounwind {
128  %result = atomicrmw fsub double addrspace(3)* %ptr, double %val seq_cst
129  ret void
130}
131