1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s 2 3; Instcombine pulls the addrspacecast out of the select, make sure 4; this doesn't do something insane on non-canonical IR. 5 6; CHECK-LABEL: @return_select_group_flat( 7; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 8; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* 9; CHECK-NEXT: %select = select i1 %c, i32* %cast0, i32* %cast1 10; CHECK-NEXT: ret i32* %select 11define i32* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { 12 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 13 %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* 14 %select = select i1 %c, i32* %cast0, i32* %cast1 15 ret i32* %select 16} 17 18; CHECK-LABEL: @store_select_group_flat( 19; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1 20; CHECK: store i32 -1, i32 addrspace(3)* %select 21define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { 22 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 23 %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* 24 %select = select i1 %c, i32* %cast0, i32* %cast1 25 store i32 -1, i32* %select 26 ret void 27} 28 29; Make sure metadata is preserved 30; CHECK-LABEL: @load_select_group_flat_md( 31; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1, !prof !0 32; CHECK: %load = load i32, i32 addrspace(3)* %select 33define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { 34 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 35 %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* 36 %select = select i1 %c, i32* %cast0, i32* %cast1, !prof !0 37 %load = load i32, i32* %select 38 ret i32 %load 39} 40 41; CHECK-LABEL: @store_select_mismatch_group_private_flat( 42; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 43; CHECK: %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32* 44; CHECK: %select = select i1 %c, i32* %cast0, i32* %cast1 45; CHECK: store i32 -1, i32* %select 46define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 { 47 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 48 %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32* 49 %select = select i1 %c, i32* %cast0, i32* %cast1 50 store i32 -1, i32* %select 51 ret void 52} 53 54@lds0 = internal addrspace(3) global i32 123, align 4 55@lds1 = internal addrspace(3) global i32 456, align 4 56 57; CHECK-LABEL: @constexpr_select_group_flat( 58; CHECK: %tmp = load i32, i32 addrspace(3)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(3)* @lds0, i32 addrspace(3)* @lds1) 59define i32 @constexpr_select_group_flat() #0 { 60bb: 61 %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*)) 62 ret i32 %tmp 63} 64 65; CHECK-LABEL: @constexpr_select_group_global_flat_mismatch( 66; CHECK: %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)) 67define i32 @constexpr_select_group_global_flat_mismatch() #0 { 68bb: 69 %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)) 70 ret i32 %tmp 71} 72 73; CHECK-LABEL: @store_select_group_flat_null( 74; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) 75; CHECK: store i32 -1, i32 addrspace(3)* %select 76define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 77 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 78 %select = select i1 %c, i32* %cast0, i32* null 79 store i32 -1, i32* %select 80 ret void 81} 82 83; CHECK-LABEL: @store_select_group_flat_null_swap( 84; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0 85; CHECK: store i32 -1, i32 addrspace(3)* %select 86define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 87 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 88 %select = select i1 %c, i32* null, i32* %cast0 89 store i32 -1, i32* %select 90 ret void 91} 92 93; CHECK-LABEL: @store_select_group_flat_undef( 94; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef 95; CHECK: store i32 -1, i32 addrspace(3)* %select 96define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 97 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 98 %select = select i1 %c, i32* %cast0, i32* undef 99 store i32 -1, i32* %select 100 ret void 101} 102 103; CHECK-LABEL: @store_select_group_flat_undef_swap( 104; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0 105; CHECK: store i32 -1, i32 addrspace(3)* %select 106define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 107 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 108 %select = select i1 %c, i32* undef, i32* %cast0 109 store i32 -1, i32* %select 110 ret void 111} 112 113; CHECK-LABEL: @store_select_gep_group_flat_null( 114; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) 115; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16 116; CHECK: store i32 -1, i32 addrspace(3)* %gep 117define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 118 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 119 %select = select i1 %c, i32* %cast0, i32* null 120 %gep = getelementptr i32, i32* %select, i64 16 121 store i32 -1, i32* %gep 122 ret void 123} 124 125@global0 = internal addrspace(1) global i32 123, align 4 126 127; CHECK-LABEL: @store_select_group_flat_constexpr( 128; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1 129; CHECK: store i32 7, i32 addrspace(3)* %select 130define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 131 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 132 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*) 133 store i32 7, i32* %select 134 ret void 135} 136 137; CHECK-LABEL: @store_select_group_flat_inttoptr_flat( 138; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* inttoptr (i64 12345 to i32*) to i32 addrspace(3)*) 139; CHECK: store i32 7, i32 addrspace(3)* %select 140define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 141 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 142 %select = select i1 %c, i32* %cast0, i32* inttoptr (i64 12345 to i32*) 143 store i32 7, i32* %select 144 ret void 145} 146 147; CHECK-LABEL: @store_select_group_flat_inttoptr_group( 148; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) 149; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select 150define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 151 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 152 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32*) 153 store i32 7, i32* %select 154 ret void 155} 156 157; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr( 158; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 159; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*) 160; CHECK: store i32 7, i32* %select 161define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 162 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 163 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*) 164 store i32 7, i32* %select 165 ret void 166} 167 168; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap( 169; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 170; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0 171; CHECK: store i32 7, i32* %select 172define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 173 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 174 %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0 175 store i32 7, i32* %select 176 ret void 177} 178 179; CHECK-LABEL: @store_select_group_global_mismatch_null_null( 180; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*) 181; CHECK: store i32 7, i32* %select 182define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 { 183 %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*) 184 store i32 7, i32* %select 185 ret void 186} 187 188; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr( 189; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 190define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 { 191 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 192 ret void 193} 194 195; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr( 196; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 197define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 { 198 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 199 ret void 200} 201 202; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr( 203; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4 204define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 { 205 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4 206 ret void 207} 208 209; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr( 210; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 211define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 { 212 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 213 ret void 214} 215 216; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr( 217; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32* inttoptr (i64 123 to i32*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4 218define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 { 219 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* inttoptr (i64 123 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 220 ret void 221} 222 223; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr( 224; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* undef), align 4 225define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { 226 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4 227 ret void 228} 229 230@lds2 = external addrspace(3) global [1024 x i32], align 4 231 232; CHECK-LABEL: @store_select_group_constexpr_ptrtoint( 233; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 234; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*) 235; CHECK: store i32 7, i32* %select 236define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { 237 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* 238 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*) 239 store i32 7, i32* %select 240 ret void 241} 242 243; CHECK-LABEL: @store_select_group_flat_vector( 244; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*> 245; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*> 246; CHECK: %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1 247; CHECK: %extract0 = extractelement <2 x i32*> %select, i32 0 248; CHECK: %extract1 = extractelement <2 x i32*> %select, i32 1 249; CHECK: store i32 -1, i32* %extract0 250; CHECK: store i32 -2, i32* %extract1 251define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 { 252 %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*> 253 %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*> 254 %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1 255 %extract0 = extractelement <2 x i32*> %select, i32 0 256 %extract1 = extractelement <2 x i32*> %select, i32 1 257 store i32 -1, i32* %extract0 258 store i32 -2, i32* %extract1 259 ret void 260} 261 262attributes #0 = { nounwind } 263 264!0 = !{!"branch_weights", i32 2, i32 10} 265