1; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s 2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s 3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s 4; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 6; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 7 8declare i32 @llvm.r600.read.tidig.x() #0 9 10; OPT-LABEL: @test_sink_global_small_offset_i32( 11; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in 12; OPT-VI: getelementptr i32, i32 addrspace(1)* %in 13; OPT: br i1 14; OPT-CI: ptrtoint 15 16; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: 17; GCN: {{^}}BB0_2: 18define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) { 19entry: 20 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 21 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7 22 %tmp0 = icmp eq i32 %cond, 0 23 br i1 %tmp0, label %endif, label %if 24 25if: 26 %tmp1 = load i32, i32 addrspace(1)* %in.gep 27 br label %endif 28 29endif: 30 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 31 store i32 %x, i32 addrspace(1)* %out.gep 32 br label %done 33 34done: 35 ret void 36} 37 38; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset( 39; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 40; OPT: br i1 41 42; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: 43; GCN: s_and_saveexec_b64 44; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 45; GCN: {{^}}BB1_2: 46; GCN: s_or_b64 exec 47define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) { 48entry: 49 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 50 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 51 %tmp0 = icmp eq i32 %cond, 0 52 br i1 %tmp0, label %endif, label %if 53 54if: 55 %tmp1 = load i8, i8 addrspace(1)* %in.gep 56 %tmp2 = sext i8 %tmp1 to i32 57 br label %endif 58 59endif: 60 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 61 store i32 %x, i32 addrspace(1)* %out.gep 62 br label %done 63 64done: 65 ret void 66} 67 68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset: 69; GCN: s_and_saveexec_b64 70; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} 71; GCN: {{^}}BB2_2: 72; GCN: s_or_b64 exec 73define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) { 74entry: 75 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 76 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095 77 %tmp0 = icmp eq i32 %cond, 0 78 br i1 %tmp0, label %endif, label %if 79 80if: 81 %tmp1 = load i8, i8 addrspace(1)* %in.gep 82 %tmp2 = sext i8 %tmp1 to i32 83 br label %endif 84 85endif: 86 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 87 store i32 %x, i32 addrspace(1)* %out.gep 88 br label %done 89 90done: 91 ret void 92} 93 94; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset: 95; GCN: s_and_saveexec_b64 96; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 97; GCN: {{^}}BB3_2: 98; GCN: s_or_b64 exec 99define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) { 100entry: 101 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 102 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096 103 %tmp0 = icmp eq i32 %cond, 0 104 br i1 %tmp0, label %endif, label %if 105 106if: 107 %tmp1 = load i8, i8 addrspace(1)* %in.gep 108 %tmp2 = sext i8 %tmp1 to i32 109 br label %endif 110 111endif: 112 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 113 store i32 %x, i32 addrspace(1)* %out.gep 114 br label %done 115 116done: 117 ret void 118} 119 120; OPT-LABEL: @test_sink_scratch_small_offset_i32( 121; OPT-NOT: getelementptr [512 x i32] 122; OPT: br i1 123; OPT: ptrtoint 124 125; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32: 126; GCN: s_and_saveexec_b64 127; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} 128; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} 129; GCN: {{^}}BB4_2: 130define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { 131entry: 132 %alloca = alloca [512 x i32], align 4 133 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 134 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 135 %add.arg = add i32 %arg, 8 136 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023 137 %tmp0 = icmp eq i32 %cond, 0 138 br i1 %tmp0, label %endif, label %if 139 140if: 141 store volatile i32 123, i32* %alloca.gep 142 %tmp1 = load volatile i32, i32* %alloca.gep 143 br label %endif 144 145endif: 146 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 147 store i32 %x, i32 addrspace(1)* %out.gep.0 148 %load = load volatile i32, i32* %alloca.gep 149 store i32 %load, i32 addrspace(1)* %out.gep.1 150 br label %done 151 152done: 153 ret void 154} 155 156; OPT-LABEL: @test_no_sink_scratch_large_offset_i32( 157; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 158; OPT: br i1 159; OPT-NOT: ptrtoint 160 161; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32: 162; GCN: s_and_saveexec_b64 163; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 164; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 165; GCN: {{^}}BB5_2: 166define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { 167entry: 168 %alloca = alloca [512 x i32], align 4 169 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 170 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 171 %add.arg = add i32 %arg, 8 172 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 173 %tmp0 = icmp eq i32 %cond, 0 174 br i1 %tmp0, label %endif, label %if 175 176if: 177 store volatile i32 123, i32* %alloca.gep 178 %tmp1 = load volatile i32, i32* %alloca.gep 179 br label %endif 180 181endif: 182 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 183 store i32 %x, i32 addrspace(1)* %out.gep.0 184 %load = load volatile i32, i32* %alloca.gep 185 store i32 %load, i32 addrspace(1)* %out.gep.1 186 br label %done 187 188done: 189 ret void 190} 191 192; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: 193; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0 194; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0 195; GCN: s_and_saveexec_b64 196; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 197; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] 198; GCN: {{^}}BB6_2: 199define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) { 200entry: 201 %offset.ext = zext i32 %offset to i64 202 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 203 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext 204 %tmp0 = icmp eq i32 %cond, 0 205 br i1 %tmp0, label %endif, label %if 206 207if: 208 %tmp1 = load i32, i32 addrspace(1)* %in.gep 209 br label %endif 210 211endif: 212 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 213 store i32 %x, i32 addrspace(1)* %out.gep 214 br label %done 215 216done: 217 ret void 218} 219 220attributes #0 = { nounwind readnone } 221attributes #1 = { nounwind } 222 223 224 225; OPT-LABEL: @test_sink_constant_small_offset_i32 226; OPT-NOT: getelementptr i32, i32 addrspace(2)* 227; OPT: br i1 228 229; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: 230; GCN: s_and_saveexec_b64 231; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} 232; GCN: s_or_b64 exec, exec 233define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 234entry: 235 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 236 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 237 %tmp0 = icmp eq i32 %cond, 0 238 br i1 %tmp0, label %endif, label %if 239 240if: 241 %tmp1 = load i32, i32 addrspace(2)* %in.gep 242 br label %endif 243 244endif: 245 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 246 store i32 %x, i32 addrspace(1)* %out.gep 247 br label %done 248 249done: 250 ret void 251} 252 253; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 254; OPT-NOT: getelementptr i32, i32 addrspace(2)* 255; OPT: br i1 256 257; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: 258; GCN: s_and_saveexec_b64 259; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} 260; GCN: s_or_b64 exec, exec 261define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 262entry: 263 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 264 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 265 %tmp0 = icmp eq i32 %cond, 0 266 br i1 %tmp0, label %endif, label %if 267 268if: 269 %tmp1 = load i32, i32 addrspace(2)* %in.gep 270 br label %endif 271 272endif: 273 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 274 store i32 %x, i32 addrspace(1)* %out.gep 275 br label %done 276 277done: 278 ret void 279} 280 281; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 282; OPT-SI: getelementptr i32, i32 addrspace(2)* 283; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 284; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)* 285; OPT: br i1 286 287; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: 288; GCN: s_and_saveexec_b64 289; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400 290 291; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 292; GCN: s_or_b64 exec, exec 293define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 294entry: 295 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 296 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 297 %tmp0 = icmp eq i32 %cond, 0 298 br i1 %tmp0, label %endif, label %if 299 300if: 301 %tmp1 = load i32, i32 addrspace(2)* %in.gep 302 br label %endif 303 304endif: 305 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 306 store i32 %x, i32 addrspace(1)* %out.gep 307 br label %done 308 309done: 310 ret void 311} 312 313; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 314; OPT-SI: getelementptr i32, i32 addrspace(2)* 315; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 316; OPT: br i1 317 318; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: 319; GCN: s_and_saveexec_b64 320; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} 321; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} 322; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 323; GCN: s_or_b64 exec, exec 324define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 325entry: 326 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 327 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 328 %tmp0 = icmp eq i32 %cond, 0 329 br i1 %tmp0, label %endif, label %if 330 331if: 332 %tmp1 = load i32, i32 addrspace(2)* %in.gep 333 br label %endif 334 335endif: 336 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 337 store i32 %x, i32 addrspace(1)* %out.gep 338 br label %done 339 340done: 341 ret void 342} 343 344; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 345; OPT: getelementptr i32, i32 addrspace(2)* 346; OPT: br i1 347 348; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: 349; GCN: s_and_saveexec_b64 350; GCN: s_add_u32 351; GCN: s_addc_u32 352; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 353; GCN: s_or_b64 exec, exec 354define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 355entry: 356 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 357 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 358 %tmp0 = icmp eq i32 %cond, 0 359 br i1 %tmp0, label %endif, label %if 360 361if: 362 %tmp1 = load i32, i32 addrspace(2)* %in.gep 363 br label %endif 364 365endif: 366 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 367 store i32 %x, i32 addrspace(1)* %out.gep 368 br label %done 369 370done: 371 ret void 372} 373 374; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32: 375; GCN: s_and_saveexec_b64 376; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}} 377; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 378 379; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}} 380; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} 381 382; GCN: s_or_b64 exec, exec 383define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 384entry: 385 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 386 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 387 %tmp0 = icmp eq i32 %cond, 0 388 br i1 %tmp0, label %endif, label %if 389 390if: 391 %tmp1 = load i32, i32 addrspace(2)* %in.gep 392 br label %endif 393 394endif: 395 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 396 store i32 %x, i32 addrspace(1)* %out.gep 397 br label %done 398 399done: 400 ret void 401} 402 403; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 404; OPT-SI: getelementptr i32, i32 addrspace(2)* 405; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 406; OPT-VI: getelementptr i32, i32 addrspace(2)* 407; OPT: br i1 408 409; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: 410; GCN: s_and_saveexec_b64 411; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 412; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 413 414; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}} 415 416; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 417; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 418 419; GCN: s_or_b64 exec, exec 420define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 421entry: 422 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 423 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 424 %tmp0 = icmp eq i32 %cond, 0 425 br i1 %tmp0, label %endif, label %if 426 427if: 428 %tmp1 = load i32, i32 addrspace(2)* %in.gep 429 br label %endif 430 431endif: 432 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 433 store i32 %x, i32 addrspace(1)* %out.gep 434 br label %done 435 436done: 437 ret void 438} 439