1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
2
3; Check that volatile users of addrspacecast are not replaced.
4
5; CHECK-LABEL: @volatile_load_flat_from_global(
6; CHECK: load volatile i32, i32*
7; CHECK: store i32 %val, i32 addrspace(1)*
8define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
9  %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
10  %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
11  %val = load volatile i32, i32* %tmp0, align 4
12  store i32 %val, i32* %tmp1, align 4
13  ret void
14}
15
16; CHECK-LABEL: @volatile_load_flat_from_constant(
17; CHECK: load volatile i32, i32*
18; CHECK: store i32 %val, i32 addrspace(1)*
19define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(4)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
20  %tmp0 = addrspacecast i32 addrspace(4)* %input to i32*
21  %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
22  %val = load volatile i32, i32* %tmp0, align 4
23  store i32 %val, i32* %tmp1, align 4
24  ret void
25}
26
27; CHECK-LABEL: @volatile_load_flat_from_group(
28; CHECK: load volatile i32, i32*
29; CHECK: store i32 %val, i32 addrspace(3)*
30define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
31  %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
32  %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
33  %val = load volatile i32, i32* %tmp0, align 4
34  store i32 %val, i32* %tmp1, align 4
35  ret void
36}
37
38; CHECK-LABEL: @volatile_load_flat_from_private(
39; CHECK: load volatile i32, i32*
40; CHECK: store i32 %val, i32 addrspace(5)*
41define amdgpu_kernel void @volatile_load_flat_from_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
42  %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
43  %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
44  %val = load volatile i32, i32* %tmp0, align 4
45  store i32 %val, i32* %tmp1, align 4
46  ret void
47}
48
49; CHECK-LABEL: @volatile_store_flat_to_global(
50; CHECK: load i32, i32 addrspace(1)*
51; CHECK: store volatile i32 %val, i32*
52define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
53  %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
54  %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
55  %val = load i32, i32* %tmp0, align 4
56  store volatile i32 %val, i32* %tmp1, align 4
57  ret void
58}
59
60; CHECK-LABEL: @volatile_store_flat_to_group(
61; CHECK: load i32, i32 addrspace(3)*
62; CHECK: store volatile i32 %val, i32*
63define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
64  %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
65  %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
66  %val = load i32, i32* %tmp0, align 4
67  store volatile i32 %val, i32* %tmp1, align 4
68  ret void
69}
70
71; CHECK-LABEL: @volatile_store_flat_to_private(
72; CHECK: load i32, i32 addrspace(5)*
73; CHECK: store volatile i32 %val, i32*
74define amdgpu_kernel void @volatile_store_flat_to_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
75  %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
76  %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
77  %val = load i32, i32* %tmp0, align 4
78  store volatile i32 %val, i32* %tmp1, align 4
79  ret void
80}
81
82; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat(
83; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
84; CHECK: atomicrmw volatile add i32*
85define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
86  %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
87  %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
88  ret i32 %ret
89}
90
91; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat(
92; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
93; CHECK: %ret = atomicrmw volatile add i32*
94define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
95  %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
96  %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
97  ret i32 %ret
98}
99
100; CHECK-LABEL: @volatile_cmpxchg_global_to_flat(
101; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
102; CHECK: cmpxchg volatile i32*
103define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
104  %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
105  %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
106  ret { i32, i1 } %ret
107}
108
109; CHECK-LABEL: @volatile_cmpxchg_group_to_flat(
110; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
111; CHECK: cmpxchg volatile i32*
112define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
113  %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
114  %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
115  ret { i32, i1 } %ret
116}
117
118; CHECK-LABEL: @volatile_memset_group_to_flat(
119; CHECK: %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
120; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
121define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
122  %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
123  call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
124  ret void
125}
126
127; CHECK-LABEL: @volatile_memset_global_to_flat(
128; CHECK: %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
129; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
130define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
131  %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
132  call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
133  ret void
134}
135
136declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
137
138attributes #0 = { nounwind }
139attributes #1 = { argmemonly nounwind }
140