1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s 2; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll 3 4@scalar = internal addrspace(3) global float 0.0, align 4 5@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 6 7; CHECK-LABEL: @load_store_lds_f32( 8; CHECK: %tmp = load float, float addrspace(3)* @scalar, align 4 9; CHECK: call void @use(float %tmp) 10; CHECK: store float %v, float addrspace(3)* @scalar, align 4 11; CHECK: call void @llvm.amdgcn.s.barrier() 12; CHECK: %tmp2 = load float, float addrspace(3)* @scalar, align 4 13; CHECK: call void @use(float %tmp2) 14; CHECK: store float %v, float addrspace(3)* @scalar, align 4 15; CHECK: call void @llvm.amdgcn.s.barrier() 16; CHECK: %tmp3 = load float, float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 5), align 4 17; CHECK: call void @use(float %tmp3) 18; CHECK: store float %v, float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 5), align 4 19; CHECK: call void @llvm.amdgcn.s.barrier() 20; CHECK: %tmp4 = getelementptr inbounds [10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 5 21; CHECK: %tmp5 = load float, float addrspace(3)* %tmp4, align 4 22; CHECK: call void @use(float %tmp5) 23; CHECK: store float %v, float addrspace(3)* %tmp4, align 4 24; CHECK: call void @llvm.amdgcn.s.barrier() 25; CHECK: %tmp7 = getelementptr inbounds [10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 %i 26; CHECK: %tmp8 = load float, float addrspace(3)* %tmp7, align 4 27; CHECK: call void @use(float %tmp8) 28; CHECK: store float %v, float addrspace(3)* %tmp7, align 4 29; CHECK: call void @llvm.amdgcn.s.barrier() 30; CHECK: ret void 31define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { 32bb: 33 %tmp = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 34 call void @use(float %tmp) 35 store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 36 call void @llvm.amdgcn.s.barrier() 37 %tmp1 = addrspacecast float addrspace(3)* @scalar to float* 38 %tmp2 = load float, float* %tmp1, align 4 39 call void @use(float %tmp2) 40 store float %v, float* %tmp1, align 4 41 call void @llvm.amdgcn.s.barrier() 42 %tmp3 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 43 call void @use(float %tmp3) 44 store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 45 call void @llvm.amdgcn.s.barrier() 46 %tmp4 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5 47 %tmp5 = load float, float* %tmp4, align 4 48 call void @use(float %tmp5) 49 store float %v, float* %tmp4, align 4 50 call void @llvm.amdgcn.s.barrier() 51 %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]* 52 %tmp7 = getelementptr inbounds [10 x float], [10 x float]* %tmp6, i32 0, i32 %i 53 %tmp8 = load float, float* %tmp7, align 4 54 call void @use(float %tmp8) 55 store float %v, float* %tmp7, align 4 56 call void @llvm.amdgcn.s.barrier() 57 ret void 58} 59 60; CHECK-LABEL: @constexpr_load_int_from_float_lds( 61; CHECK: %tmp = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*), align 4 62define i32 @constexpr_load_int_from_float_lds() #0 { 63bb: 64 %tmp = load i32, i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32*), align 4 65 ret i32 %tmp 66} 67 68; CHECK-LABEL: @load_int_from_global_float( 69; CHECK: %tmp1 = getelementptr float, float addrspace(1)* %input, i32 %i 70; CHECK: %tmp2 = getelementptr float, float addrspace(1)* %tmp1, i32 %j 71; CHECK: %tmp3 = bitcast float addrspace(1)* %tmp2 to i32 addrspace(1)* 72; CHECK: %tmp4 = load i32, i32 addrspace(1)* %tmp3 73; CHECK: ret i32 %tmp4 74define i32 @load_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) #0 { 75bb: 76 %tmp = addrspacecast float addrspace(1)* %input to float* 77 %tmp1 = getelementptr float, float* %tmp, i32 %i 78 %tmp2 = getelementptr float, float* %tmp1, i32 %j 79 %tmp3 = bitcast float* %tmp2 to i32* 80 %tmp4 = load i32, i32* %tmp3 81 ret i32 %tmp4 82} 83 84; CHECK-LABEL: @nested_const_expr( 85; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4 86define amdgpu_kernel void @nested_const_expr() #0 { 87 store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4 88 ret void 89} 90 91; CHECK-LABEL: @rauw( 92; CHECK: %addr = getelementptr float, float addrspace(1)* %input, i64 10 93; CHECK-NEXT: %v = load float, float addrspace(1)* %addr 94; CHECK-NEXT: store float %v, float addrspace(1)* %addr 95; CHECK-NEXT: ret void 96define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 { 97bb: 98 %generic_input = addrspacecast float addrspace(1)* %input to float* 99 %addr = getelementptr float, float* %generic_input, i64 10 100 %v = load float, float* %addr 101 store float %v, float* %addr 102 ret void 103} 104 105; FIXME: Should be able to eliminate the cast inside the loop 106; CHECK-LABEL: @loop( 107 108; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)* 109; CHECK: %end = getelementptr float, float addrspace(3)* %p, i64 10 110; CHECK: br label %loop 111 112; CHECK: loop: ; preds = %loop, %entry 113; CHECK: %i = phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ] 114; CHECK: %v = load float, float addrspace(3)* %i 115; CHECK: call void @use(float %v) 116; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1 117; CHECK: %exit_cond = icmp eq float addrspace(3)* %i2, %end 118 119; CHECK: br i1 %exit_cond, label %exit, label %loop 120define amdgpu_kernel void @loop() #0 { 121entry: 122 %p = addrspacecast [10 x float] addrspace(3)* @array to float* 123 %end = getelementptr float, float* %p, i64 10 124 br label %loop 125 126loop: ; preds = %loop, %entry 127 %i = phi float* [ %p, %entry ], [ %i2, %loop ] 128 %v = load float, float* %i 129 call void @use(float %v) 130 %i2 = getelementptr float, float* %i, i64 1 131 %exit_cond = icmp eq float* %i2, %end 132 br i1 %exit_cond, label %exit, label %loop 133 134exit: ; preds = %loop 135 ret void 136} 137 138@generic_end = external addrspace(1) global float* 139 140; CHECK-LABEL: @loop_with_generic_bound( 141; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)* 142; CHECK: %end = load float*, float* addrspace(1)* @generic_end 143; CHECK: br label %loop 144 145; CHECK: loop: 146; CHECK: %i = phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ] 147; CHECK: %v = load float, float addrspace(3)* %i 148; CHECK: call void @use(float %v) 149; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1 150; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float* 151; CHECK: %exit_cond = icmp eq float* %0, %end 152; CHECK: br i1 %exit_cond, label %exit, label %loop 153define amdgpu_kernel void @loop_with_generic_bound() #0 { 154entry: 155 %p = addrspacecast [10 x float] addrspace(3)* @array to float* 156 %end = load float*, float* addrspace(1)* @generic_end 157 br label %loop 158 159loop: ; preds = %loop, %entry 160 %i = phi float* [ %p, %entry ], [ %i2, %loop ] 161 %v = load float, float* %i 162 call void @use(float %v) 163 %i2 = getelementptr float, float* %i, i64 1 164 %exit_cond = icmp eq float* %i2, %end 165 br i1 %exit_cond, label %exit, label %loop 166 167exit: ; preds = %loop 168 ret void 169} 170 171; CHECK-LABEL: @select_bug( 172; CHECK: %add.ptr157 = getelementptr inbounds i64, i64* undef, i64 select (i1 icmp ne (i32* inttoptr (i64 4873 to i32*), i32* null), i64 73, i64 93) 173; CHECK: %cmp169 = icmp uge i64* undef, %add.ptr157 174define void @select_bug() #0 { 175 %add.ptr157 = getelementptr inbounds i64, i64* undef, i64 select (i1 icmp ne (i32* inttoptr (i64 4873 to i32*), i32* null), i64 73, i64 93) 176 %cmp169 = icmp uge i64* undef, %add.ptr157 177 unreachable 178} 179 180declare void @llvm.amdgcn.s.barrier() #1 181declare void @use(float) #0 182 183attributes #0 = { nounwind } 184attributes #1 = { convergent nounwind } 185