1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s 2 3; Check that volatile users of addrspacecast are not replaced. 4 5; CHECK-LABEL: @volatile_load_flat_from_global( 6; CHECK: load volatile i32, i32* 7; CHECK: store i32 %val, i32 addrspace(1)* 8define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { 9 %tmp0 = addrspacecast i32 addrspace(1)* %input to i32* 10 %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* 11 %val = load volatile i32, i32* %tmp0, align 4 12 store i32 %val, i32* %tmp1, align 4 13 ret void 14} 15 16; CHECK-LABEL: @volatile_load_flat_from_constant( 17; CHECK: load volatile i32, i32* 18; CHECK: store i32 %val, i32 addrspace(1)* 19define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(4)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { 20 %tmp0 = addrspacecast i32 addrspace(4)* %input to i32* 21 %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* 22 %val = load volatile i32, i32* %tmp0, align 4 23 store i32 %val, i32* %tmp1, align 4 24 ret void 25} 26 27; CHECK-LABEL: @volatile_load_flat_from_group( 28; CHECK: load volatile i32, i32* 29; CHECK: store i32 %val, i32 addrspace(3)* 30define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { 31 %tmp0 = addrspacecast i32 addrspace(3)* %input to i32* 32 %tmp1 = addrspacecast i32 addrspace(3)* %output to i32* 33 %val = load volatile i32, i32* %tmp0, align 4 34 store i32 %val, i32* %tmp1, align 4 35 ret void 36} 37 38; CHECK-LABEL: @volatile_load_flat_from_private( 39; CHECK: load volatile i32, i32* 40; CHECK: store i32 %val, i32 addrspace(5)* 41define amdgpu_kernel void @volatile_load_flat_from_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 { 42 %tmp0 = addrspacecast i32 addrspace(5)* %input to i32* 43 %tmp1 = addrspacecast i32 addrspace(5)* %output to i32* 44 %val = load volatile i32, i32* %tmp0, align 4 45 store i32 %val, i32* %tmp1, align 4 46 ret void 47} 48 49; CHECK-LABEL: @volatile_store_flat_to_global( 50; CHECK: load i32, i32 addrspace(1)* 51; CHECK: store volatile i32 %val, i32* 52define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { 53 %tmp0 = addrspacecast i32 addrspace(1)* %input to i32* 54 %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* 55 %val = load i32, i32* %tmp0, align 4 56 store volatile i32 %val, i32* %tmp1, align 4 57 ret void 58} 59 60; CHECK-LABEL: @volatile_store_flat_to_group( 61; CHECK: load i32, i32 addrspace(3)* 62; CHECK: store volatile i32 %val, i32* 63define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { 64 %tmp0 = addrspacecast i32 addrspace(3)* %input to i32* 65 %tmp1 = addrspacecast i32 addrspace(3)* %output to i32* 66 %val = load i32, i32* %tmp0, align 4 67 store volatile i32 %val, i32* %tmp1, align 4 68 ret void 69} 70 71; CHECK-LABEL: @volatile_store_flat_to_private( 72; CHECK: load i32, i32 addrspace(5)* 73; CHECK: store volatile i32 %val, i32* 74define amdgpu_kernel void @volatile_store_flat_to_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 { 75 %tmp0 = addrspacecast i32 addrspace(5)* %input to i32* 76 %tmp1 = addrspacecast i32 addrspace(5)* %output to i32* 77 %val = load i32, i32* %tmp0, align 4 78 store volatile i32 %val, i32* %tmp1, align 4 79 ret void 80} 81 82; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat( 83; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32* 84; CHECK: atomicrmw volatile add i32* 85define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { 86 %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* 87 %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst 88 ret i32 %ret 89} 90 91; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat( 92; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32* 93; CHECK: %ret = atomicrmw volatile add i32* 94define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { 95 %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* 96 %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst 97 ret i32 %ret 98} 99 100; CHECK-LABEL: @volatile_cmpxchg_global_to_flat( 101; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32* 102; CHECK: cmpxchg volatile i32* 103define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { 104 %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* 105 %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic 106 ret { i32, i1 } %ret 107} 108 109; CHECK-LABEL: @volatile_cmpxchg_group_to_flat( 110; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32* 111; CHECK: cmpxchg volatile i32* 112define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { 113 %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* 114 %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic 115 ret { i32, i1 } %ret 116} 117 118; CHECK-LABEL: @volatile_memset_group_to_flat( 119; CHECK: %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8* 120; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true) 121define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { 122 %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8* 123 call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true) 124 ret void 125} 126 127; CHECK-LABEL: @volatile_memset_global_to_flat( 128; CHECK: %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8* 129; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true) 130define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { 131 %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8* 132 call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true) 133 ret void 134} 135 136declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 137 138attributes #0 = { nounwind } 139attributes #1 = { argmemonly nounwind } 140