1; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s 2; RUN: opt -disable-output -debugify < %s 3 4declare void @error(i32 %i, i32 %a, i32 %b) 5 6define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) { 7; Test a chain of ifs, where the block guarded by the if is error handling code 8; that is not expected to run. 9; CHECK-LABEL: test_ifchains: 10; CHECK: %entry 11; CHECK-NOT: .p2align 12; CHECK: %else1 13; CHECK-NOT: .p2align 14; CHECK: %else2 15; CHECK-NOT: .p2align 16; CHECK: %else3 17; CHECK-NOT: .p2align 18; CHECK: %else4 19; CHECK-NOT: .p2align 20; CHECK: %exit 21; CHECK: %then1 22; CHECK: %then2 23; CHECK: %then3 24; CHECK: %then4 25; CHECK: %then5 26 27entry: 28 %gep1 = getelementptr i32, i32* %a, i32 1 29 %val1 = load i32, i32* %gep1 30 %cond1 = icmp ugt i32 %val1, 1 31 br i1 %cond1, label %then1, label %else1, !prof !0 32 33then1: 34 call void @error(i32 %i, i32 1, i32 %b) 35 br label %else1 36 37else1: 38 %gep2 = getelementptr i32, i32* %a, i32 2 39 %val2 = load i32, i32* %gep2 40 %cond2 = icmp ugt i32 %val2, 2 41 br i1 %cond2, label %then2, label %else2, !prof !0 42 43then2: 44 call void @error(i32 %i, i32 1, i32 %b) 45 br label %else2 46 47else2: 48 %gep3 = getelementptr i32, i32* %a, i32 3 49 %val3 = load i32, i32* %gep3 50 %cond3 = icmp ugt i32 %val3, 3 51 br i1 %cond3, label %then3, label %else3, !prof !0 52 53then3: 54 call void @error(i32 %i, i32 1, i32 %b) 55 br label %else3 56 57else3: 58 %gep4 = getelementptr i32, i32* %a, i32 4 59 %val4 = load i32, i32* %gep4 60 %cond4 = icmp ugt i32 %val4, 4 61 br i1 %cond4, label %then4, label %else4, !prof !0 62 63then4: 64 call void @error(i32 %i, i32 1, i32 %b) 65 br label %else4 66 67else4: 68 %gep5 = getelementptr i32, i32* %a, i32 3 69 %val5 = load i32, i32* %gep5 70 %cond5 = icmp ugt i32 %val5, 3 71 br i1 %cond5, label %then5, label %exit, !prof !0 72 73then5: 74 call void @error(i32 %i, i32 1, i32 %b) 75 br label %exit 76 77exit: 78 ret i32 %b 79} 80 81define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { 82; Check that we sink cold loop blocks after the hot loop body. 83; CHECK-LABEL: test_loop_cold_blocks: 84; CHECK: %entry 85; CHECK: .p2align 86; CHECK: %body1 87; CHECK: %body2 88; CHECK: %body3 89; CHECK-NOT: .p2align 90; CHECK: %unlikely1 91; CHECK-NOT: .p2align 92; CHECK: %unlikely2 93; CHECK: %exit 94 95entry: 96 br label %body1 97 98body1: 99 %iv = phi i32 [ 0, %entry ], [ %next, %body3 ] 100 %base = phi i32 [ 0, %entry ], [ %sum, %body3 ] 101 %unlikelycond1 = icmp slt i32 %base, 42 102 br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0 103 104unlikely1: 105 call void @error(i32 %i, i32 1, i32 %base) 106 br label %body2 107 108body2: 109 %unlikelycond2 = icmp sgt i32 %base, 21 110 br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0 111 112unlikely2: 113 call void @error(i32 %i, i32 2, i32 %base) 114 br label %body3 115 116body3: 117 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 118 %0 = load i32, i32* %arrayidx 119 %sum = add nsw i32 %0, %base 120 %next = add i32 %iv, 1 121 %exitcond = icmp eq i32 %next, %i 122 br i1 %exitcond, label %exit, label %body1 123 124exit: 125 ret i32 %sum 126} 127 128!0 = !{!"branch_weights", i32 1, i32 64} 129 130define i32 @test_loop_early_exits(i32 %i, i32* %a) { 131; Check that we sink early exit blocks out of loop bodies. 132; CHECK-LABEL: test_loop_early_exits: 133; CHECK: %entry 134; CHECK: %body1 135; CHECK: %body2 136; CHECK: %body3 137; CHECK: %body4 138; CHECK: %exit 139; CHECK: %bail1 140; CHECK: %bail2 141; CHECK: %bail3 142 143entry: 144 br label %body1 145 146body1: 147 %iv = phi i32 [ 0, %entry ], [ %next, %body4 ] 148 %base = phi i32 [ 0, %entry ], [ %sum, %body4 ] 149 %bailcond1 = icmp eq i32 %base, 42 150 br i1 %bailcond1, label %bail1, label %body2 151 152bail1: 153 ret i32 -1 154 155body2: 156 %bailcond2 = icmp eq i32 %base, 43 157 br i1 %bailcond2, label %bail2, label %body3 158 159bail2: 160 ret i32 -2 161 162body3: 163 %bailcond3 = icmp eq i32 %base, 44 164 br i1 %bailcond3, label %bail3, label %body4 165 166bail3: 167 ret i32 -3 168 169body4: 170 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 171 %0 = load i32, i32* %arrayidx 172 %sum = add nsw i32 %0, %base 173 %next = add i32 %iv, 1 174 %exitcond = icmp eq i32 %next, %i 175 br i1 %exitcond, label %exit, label %body1 176 177exit: 178 ret i32 %sum 179} 180 181; Tail duplication during layout can entirely remove body0 by duplicating it 182; into the entry block and into body1. This is a good thing but it isn't what 183; this test is looking for. So to make the blocks longer so they don't get 184; duplicated, we add some calls to dummy. 185declare void @dummy() 186 187define i32 @test_loop_rotate(i32 %i, i32* %a) { 188; Check that we rotate conditional exits from the loop to the bottom of the 189; loop, eliminating unconditional branches to the top. 190; CHECK-LABEL: test_loop_rotate: 191; CHECK: %entry 192; CHECK: %body0 193; CHECK: %body1 194; CHECK: %exit 195 196entry: 197 br label %body0 198 199body0: 200 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 201 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 202 %next = add i32 %iv, 1 203 %exitcond = icmp eq i32 %next, %i 204 call void @dummy() 205 call void @dummy() 206 br i1 %exitcond, label %exit, label %body1 207 208body1: 209 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 210 %0 = load i32, i32* %arrayidx 211 %sum = add nsw i32 %0, %base 212 %bailcond1 = icmp eq i32 %sum, 42 213 br label %body0 214 215exit: 216 ret i32 %base 217} 218 219define i32 @test_no_loop_rotate(i32 %i, i32* %a) { 220; Check that we don't try to rotate a loop which is already laid out with 221; fallthrough opportunities into the top and out of the bottom. 222; CHECK-LABEL: test_no_loop_rotate: 223; CHECK: %entry 224; CHECK: %body0 225; CHECK: %body1 226; CHECK: %exit 227 228entry: 229 br label %body0 230 231body0: 232 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 233 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 234 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 235 %0 = load i32, i32* %arrayidx 236 %sum = add nsw i32 %0, %base 237 %bailcond1 = icmp eq i32 %sum, 42 238 br i1 %bailcond1, label %exit, label %body1 239 240body1: 241 %next = add i32 %iv, 1 242 %exitcond = icmp eq i32 %next, %i 243 br i1 %exitcond, label %exit, label %body0 244 245exit: 246 ret i32 %base 247} 248 249define i32 @test_loop_align(i32 %i, i32* %a) { 250; Check that we provide basic loop body alignment with the block placement 251; pass. 252; CHECK-LABEL: test_loop_align: 253; CHECK: %entry 254; CHECK: .p2align [[ALIGN:[0-9]+]], 255; CHECK-NEXT: %body 256; CHECK: %exit 257 258entry: 259 br label %body 260 261body: 262 %iv = phi i32 [ 0, %entry ], [ %next, %body ] 263 %base = phi i32 [ 0, %entry ], [ %sum, %body ] 264 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 265 %0 = load i32, i32* %arrayidx 266 %sum = add nsw i32 %0, %base 267 %next = add i32 %iv, 1 268 %exitcond = icmp eq i32 %next, %i 269 br i1 %exitcond, label %exit, label %body 270 271exit: 272 ret i32 %sum 273} 274 275define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) { 276; Check that we provide nested loop body alignment. 277; CHECK-LABEL: test_nested_loop_align: 278; CHECK: %entry 279; CHECK: .p2align [[ALIGN]], 280; CHECK-NEXT: %loop.body.1 281; CHECK: .p2align [[ALIGN]], 282; CHECK-NEXT: %inner.loop.body 283; CHECK-NOT: .p2align 284; CHECK: %exit 285 286entry: 287 br label %loop.body.1 288 289loop.body.1: 290 %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ] 291 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 292 %bidx = load i32, i32* %arrayidx 293 br label %inner.loop.body 294 295inner.loop.body: 296 %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ] 297 %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ] 298 %scaled_idx = mul i32 %bidx, %iv 299 %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx 300 %0 = load i32, i32* %inner.arrayidx 301 %sum = add nsw i32 %0, %base 302 %inner.next = add i32 %iv, 1 303 %inner.exitcond = icmp eq i32 %inner.next, %i 304 br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body 305 306loop.body.2: 307 %next = add i32 %iv, 1 308 %exitcond = icmp eq i32 %next, %i 309 br i1 %exitcond, label %exit, label %loop.body.1 310 311exit: 312 ret i32 %sum 313} 314 315define void @unnatural_cfg1() { 316; Test that we can handle a loop with an inner unnatural loop at the end of 317; a function. This is a gross CFG reduced out of the single source GCC. 318; CHECK-LABEL: unnatural_cfg1 319; CHECK: %entry 320; CHECK: %loop.header 321; CHECK: %loop.body2 322; CHECK: %loop.body3 323 324entry: 325 br label %loop.header 326 327loop.header: 328 br label %loop.body1 329 330loop.body1: 331 br i1 undef, label %loop.body3, label %loop.body2 332 333loop.body2: 334 %ptr = load i32*, i32** undef, align 4 335 br label %loop.body3 336 337loop.body3: 338 %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ] 339 %bcmyptr = bitcast i32* %myptr to i32* 340 %val = load i32, i32* %bcmyptr, align 4 341 %comp = icmp eq i32 %val, 48 342 br i1 %comp, label %loop.body4, label %loop.body5 343 344loop.body4: 345 br i1 undef, label %loop.header, label %loop.body5 346 347loop.body5: 348 %ptr2 = load i32*, i32** undef, align 4 349 br label %loop.body3 350} 351 352define void @unnatural_cfg2(i32* %p0, i32 %a0) { 353; Test that we can handle a loop with a nested natural loop *and* an unnatural 354; loop. This was reduced from a crash on block placement when run over 355; single-source GCC. 356; CHECK-LABEL: unnatural_cfg2 357; CHECK: %entry 358; CHECK: %loop.header 359; CHECK: %loop.body1 360; CHECK: %loop.body2 361; CHECK: %loop.body3 362; CHECK: %loop.inner1.begin 363; CHECK: %loop.body4 364; CHECK: %loop.inner2.begin 365; CHECK: %loop.inner2.begin 366; CHECK: %bail 367 368entry: 369 br label %loop.header 370 371loop.header: 372 %comp0 = icmp eq i32* %p0, null 373 br i1 %comp0, label %bail, label %loop.body1 374 375loop.body1: 376 %val0 = load i32*, i32** undef, align 4 377 br i1 undef, label %loop.body2, label %loop.inner1.begin 378 379loop.body2: 380 br i1 undef, label %loop.body4, label %loop.body3 381 382loop.body3: 383 %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0 384 %castptr1 = bitcast i32* %ptr1 to i32** 385 %val1 = load i32*, i32** %castptr1, align 4 386 br label %loop.inner1.begin 387 388loop.inner1.begin: 389 %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ] 390 %castval = bitcast i32* %valphi to i32* 391 %comp1 = icmp eq i32 %a0, 48 392 br i1 %comp1, label %loop.inner1.end, label %loop.body4 393 394loop.inner1.end: 395 %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0 396 %castptr2 = bitcast i32* %ptr2 to i32** 397 %val2 = load i32*, i32** %castptr2, align 4 398 br label %loop.inner1.begin 399 400loop.body4.dead: 401 br label %loop.body4 402 403loop.body4: 404 %comp2 = icmp ult i32 %a0, 3 405 br i1 %comp2, label %loop.inner2.begin, label %loop.end 406 407loop.inner2.begin: 408 br i1 false, label %loop.end, label %loop.inner2.end 409 410loop.inner2.end: 411 %comp3 = icmp eq i32 %a0, 1769472 412 br i1 %comp3, label %loop.end, label %loop.inner2.begin 413 414loop.end: 415 br label %loop.header 416 417bail: 418 unreachable 419} 420 421define i32 @problematic_switch() { 422; This function's CFG caused overlow in the machine branch probability 423; calculation, triggering asserts. Make sure we don't crash on it. 424; CHECK: problematic_switch 425 426entry: 427 switch i32 undef, label %exit [ 428 i32 879, label %bogus 429 i32 877, label %step 430 i32 876, label %step 431 i32 875, label %step 432 i32 874, label %step 433 i32 873, label %step 434 i32 872, label %step 435 i32 868, label %step 436 i32 867, label %step 437 i32 866, label %step 438 i32 861, label %step 439 i32 860, label %step 440 i32 856, label %step 441 i32 855, label %step 442 i32 854, label %step 443 i32 831, label %step 444 i32 830, label %step 445 i32 829, label %step 446 i32 828, label %step 447 i32 815, label %step 448 i32 814, label %step 449 i32 811, label %step 450 i32 806, label %step 451 i32 805, label %step 452 i32 804, label %step 453 i32 803, label %step 454 i32 802, label %step 455 i32 801, label %step 456 i32 800, label %step 457 i32 799, label %step 458 i32 798, label %step 459 i32 797, label %step 460 i32 796, label %step 461 i32 795, label %step 462 ] 463bogus: 464 unreachable 465step: 466 br label %exit 467exit: 468 %merge = phi i32 [ 3, %step ], [ 6, %entry ] 469 ret i32 %merge 470} 471 472define void @fpcmp_unanalyzable_branch(i1 %cond, double %a0) { 473; This function's CFG contains an once-unanalyzable branch (une on floating 474; points). As now it becomes analyzable, we should get best layout in which each 475; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is 476; fall-through. 477; CHECK-LABEL: fpcmp_unanalyzable_branch: 478; CHECK: # %bb.0: # %entry 479; CHECK: # %bb.1: # %entry.if.then_crit_edge 480; CHECK: .LBB10_5: # %if.then 481; CHECK: .LBB10_6: # %if.end 482; CHECK: # %bb.3: # %exit 483; CHECK: jne .LBB10_4 484; CHECK-NEXT: jnp .LBB10_6 485; CHECK: jmp .LBB10_5 486 487entry: 488; Note that this branch must be strongly biased toward 489; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for 490; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. 491 br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 492 493entry.if.then_crit_edge: 494 %.pre14 = load i8, i8* undef, align 1 495 br label %if.then 496 497lor.lhs.false: 498 br i1 undef, label %if.end, label %exit 499 500exit: 501 %cmp.i = fcmp une double 0.000000e+00, %a0 502 br i1 %cmp.i, label %if.then, label %if.end, !prof !3 503 504if.then: 505 %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] 506 %1 = and i8 %0, 1 507 store i8 %1, i8* undef, align 4 508 br label %if.end 509 510if.end: 511 ret void 512} 513 514!1 = !{!"branch_weights", i32 1000, i32 1} 515!3 = !{!"branch_weights", i32 1, i32 1000} 516 517declare i32 @f() 518declare i32 @g() 519declare i32 @h(i32 %x) 520 521define i32 @test_global_cfg_break_profitability() { 522; Check that our metrics for the profitability of a CFG break are global rather 523; than local. A successor may be very hot, but if the current block isn't, it 524; doesn't matter. Within this test the 'then' block is slightly warmer than the 525; 'else' block, but not nearly enough to merit merging it with the exit block 526; even though the probability of 'then' branching to the 'exit' block is very 527; high. 528; CHECK: test_global_cfg_break_profitability 529; CHECK: calll {{_?}}f 530; CHECK: calll {{_?}}g 531; CHECK: calll {{_?}}h 532; CHECK: ret 533 534entry: 535 br i1 undef, label %then, label %else, !prof !2 536 537then: 538 %then.result = call i32 @f() 539 br label %exit 540 541else: 542 %else.result = call i32 @g() 543 br label %exit 544 545exit: 546 %result = phi i32 [ %then.result, %then ], [ %else.result, %else ] 547 %result2 = call i32 @h(i32 %result) 548 ret i32 %result 549} 550 551!2 = !{!"branch_weights", i32 3, i32 1} 552 553declare i32 @__gxx_personality_v0(...) 554 555define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 556; Some times the landing pad ends up as the first successor of an invoke block. 557; When this happens, a strange result used to fall out of updateTerminators: we 558; didn't correctly locate the fallthrough successor, assuming blindly that the 559; first one was the fallthrough successor. As a result, we would add an 560; erroneous jump to the landing pad thinking *that* was the default successor. 561; CHECK-LABEL: test_eh_lpad_successor 562; CHECK: %entry 563; CHECK-NOT: jmp 564; CHECK: %loop 565 566entry: 567 invoke i32 @f() to label %preheader unwind label %lpad 568 569preheader: 570 br label %loop 571 572lpad: 573 %lpad.val = landingpad { i8*, i32 } 574 cleanup 575 resume { i8*, i32 } %lpad.val 576 577loop: 578 br label %loop 579} 580 581declare void @fake_throw() noreturn 582 583define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 584; For blocks containing a 'throw' (or similar functionality), we have 585; a no-return invoke. In this case, only EH successors will exist, and 586; fallthrough simply won't occur. Make sure we don't crash trying to update 587; terminators for such constructs. 588; 589; CHECK-LABEL: test_eh_throw 590; CHECK: %entry 591; CHECK: %cleanup 592 593entry: 594 invoke void @fake_throw() to label %continue unwind label %cleanup 595 596continue: 597 unreachable 598 599cleanup: 600 %0 = landingpad { i8*, i32 } 601 cleanup 602 unreachable 603} 604 605define void @test_unnatural_cfg_backwards_inner_loop() { 606; Test that when we encounter an unnatural CFG structure after having formed 607; a chain for an inner loop which happened to be laid out backwards we don't 608; attempt to merge onto the wrong end of the inner loop just because we find it 609; first. This was reduced from a crasher in GCC's single source. 610; 611; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop 612; CHECK: %entry 613; CHECK: %loop2b 614; CHECK: %loop3 615 616entry: 617 br i1 undef, label %loop2a, label %body 618 619body: 620 br label %loop2a 621 622loop1: 623 %next.load = load i32*, i32** undef 624 br i1 %comp.a, label %loop2a, label %loop2b 625 626loop2a: 627 %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ] 628 %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ] 629 %comp.a = icmp eq i32* %var, null 630 br label %loop3 631 632loop2b: 633 %gep = getelementptr inbounds i32, i32* %var.phi, i32 0 634 %next.ptr = bitcast i32* %gep to i32** 635 store i32* %next.phi, i32** %next.ptr 636 br label %loop3 637 638loop3: 639 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 640 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 641 br label %loop1 642} 643 644define void @unanalyzable_branch_to_loop_header(double %a0) { 645; Ensure that we can handle unanalyzable branches into loop headers. We 646; pre-form chains for unanalyzable branches, and will find the tail end of that 647; at the start of the loop. This function uses floating point comparison 648; fallthrough because that happens to always produce unanalyzable branches on 649; x86. 650; 651; CHECK-LABEL: unanalyzable_branch_to_loop_header 652; CHECK: %entry 653; CHECK: %loop 654; CHECK: %exit 655 656entry: 657 %cmp = fcmp une double 0.000000e+00, %a0 658 br i1 %cmp, label %loop, label %exit 659 660loop: 661 %cond = icmp eq i8 undef, 42 662 br i1 %cond, label %exit, label %loop 663 664exit: 665 ret void 666} 667 668define void @unanalyzable_branch_to_best_succ(i1 %cond, double %a0) { 669; Ensure that we can handle unanalyzable branches where the destination block 670; gets selected as the optimal successor to merge. 671; 672; This branch is now analyzable and hence the destination block becomes the 673; hotter one. The right order is entry->bar->exit->foo. 674; 675; CHECK-LABEL: unanalyzable_branch_to_best_succ 676; CHECK: %entry 677; CHECK: %bar 678; CHECK: %exit 679; CHECK: %foo 680 681entry: 682 ; Bias this branch toward bar to ensure we form that chain. 683 br i1 %cond, label %bar, label %foo, !prof !1 684 685foo: 686 %cmp = fcmp une double 0.000000e+00, %a0 687 br i1 %cmp, label %bar, label %exit 688 689bar: 690 call i32 @f() 691 br label %exit 692 693exit: 694 ret void 695} 696 697define void @unanalyzable_branch_to_free_block(float %x) { 698; Ensure that we can handle unanalyzable branches where the destination block 699; gets selected as the best free block in the CFG. 700; 701; CHECK-LABEL: unanalyzable_branch_to_free_block 702; CHECK: %entry 703; CHECK: %a 704; CHECK: %b 705; CHECK: %c 706; CHECK: %exit 707 708entry: 709 br i1 undef, label %a, label %b 710 711a: 712 call i32 @f() 713 br label %c 714 715b: 716 %cmp = fcmp une float %x, 0.0 717 br i1 %cmp, label %c, label %exit 718 719c: 720 call i32 @g() 721 br label %exit 722 723exit: 724 ret void 725} 726 727define void @many_unanalyzable_branches() { 728; Ensure that we don't crash as we're building up many unanalyzable branches, 729; blocks, and loops. 730; 731; CHECK-LABEL: many_unanalyzable_branches 732; CHECK: %entry 733; CHECK: %exit 734 735entry: 736 br label %0 737 738 %val0 = load volatile float, float* undef 739 %cmp0 = fcmp une float %val0, 0.0 740 br i1 %cmp0, label %1, label %0 741 %val1 = load volatile float, float* undef 742 %cmp1 = fcmp une float %val1, 0.0 743 br i1 %cmp1, label %2, label %1 744 %val2 = load volatile float, float* undef 745 %cmp2 = fcmp une float %val2, 0.0 746 br i1 %cmp2, label %3, label %2 747 %val3 = load volatile float, float* undef 748 %cmp3 = fcmp une float %val3, 0.0 749 br i1 %cmp3, label %4, label %3 750 %val4 = load volatile float, float* undef 751 %cmp4 = fcmp une float %val4, 0.0 752 br i1 %cmp4, label %5, label %4 753 %val5 = load volatile float, float* undef 754 %cmp5 = fcmp une float %val5, 0.0 755 br i1 %cmp5, label %6, label %5 756 %val6 = load volatile float, float* undef 757 %cmp6 = fcmp une float %val6, 0.0 758 br i1 %cmp6, label %7, label %6 759 %val7 = load volatile float, float* undef 760 %cmp7 = fcmp une float %val7, 0.0 761 br i1 %cmp7, label %8, label %7 762 %val8 = load volatile float, float* undef 763 %cmp8 = fcmp une float %val8, 0.0 764 br i1 %cmp8, label %9, label %8 765 %val9 = load volatile float, float* undef 766 %cmp9 = fcmp une float %val9, 0.0 767 br i1 %cmp9, label %10, label %9 768 %val10 = load volatile float, float* undef 769 %cmp10 = fcmp une float %val10, 0.0 770 br i1 %cmp10, label %11, label %10 771 %val11 = load volatile float, float* undef 772 %cmp11 = fcmp une float %val11, 0.0 773 br i1 %cmp11, label %12, label %11 774 %val12 = load volatile float, float* undef 775 %cmp12 = fcmp une float %val12, 0.0 776 br i1 %cmp12, label %13, label %12 777 %val13 = load volatile float, float* undef 778 %cmp13 = fcmp une float %val13, 0.0 779 br i1 %cmp13, label %14, label %13 780 %val14 = load volatile float, float* undef 781 %cmp14 = fcmp une float %val14, 0.0 782 br i1 %cmp14, label %15, label %14 783 %val15 = load volatile float, float* undef 784 %cmp15 = fcmp une float %val15, 0.0 785 br i1 %cmp15, label %16, label %15 786 %val16 = load volatile float, float* undef 787 %cmp16 = fcmp une float %val16, 0.0 788 br i1 %cmp16, label %17, label %16 789 %val17 = load volatile float, float* undef 790 %cmp17 = fcmp une float %val17, 0.0 791 br i1 %cmp17, label %18, label %17 792 %val18 = load volatile float, float* undef 793 %cmp18 = fcmp une float %val18, 0.0 794 br i1 %cmp18, label %19, label %18 795 %val19 = load volatile float, float* undef 796 %cmp19 = fcmp une float %val19, 0.0 797 br i1 %cmp19, label %20, label %19 798 %val20 = load volatile float, float* undef 799 %cmp20 = fcmp une float %val20, 0.0 800 br i1 %cmp20, label %21, label %20 801 %val21 = load volatile float, float* undef 802 %cmp21 = fcmp une float %val21, 0.0 803 br i1 %cmp21, label %22, label %21 804 %val22 = load volatile float, float* undef 805 %cmp22 = fcmp une float %val22, 0.0 806 br i1 %cmp22, label %23, label %22 807 %val23 = load volatile float, float* undef 808 %cmp23 = fcmp une float %val23, 0.0 809 br i1 %cmp23, label %24, label %23 810 %val24 = load volatile float, float* undef 811 %cmp24 = fcmp une float %val24, 0.0 812 br i1 %cmp24, label %25, label %24 813 %val25 = load volatile float, float* undef 814 %cmp25 = fcmp une float %val25, 0.0 815 br i1 %cmp25, label %26, label %25 816 %val26 = load volatile float, float* undef 817 %cmp26 = fcmp une float %val26, 0.0 818 br i1 %cmp26, label %27, label %26 819 %val27 = load volatile float, float* undef 820 %cmp27 = fcmp une float %val27, 0.0 821 br i1 %cmp27, label %28, label %27 822 %val28 = load volatile float, float* undef 823 %cmp28 = fcmp une float %val28, 0.0 824 br i1 %cmp28, label %29, label %28 825 %val29 = load volatile float, float* undef 826 %cmp29 = fcmp une float %val29, 0.0 827 br i1 %cmp29, label %30, label %29 828 %val30 = load volatile float, float* undef 829 %cmp30 = fcmp une float %val30, 0.0 830 br i1 %cmp30, label %31, label %30 831 %val31 = load volatile float, float* undef 832 %cmp31 = fcmp une float %val31, 0.0 833 br i1 %cmp31, label %32, label %31 834 %val32 = load volatile float, float* undef 835 %cmp32 = fcmp une float %val32, 0.0 836 br i1 %cmp32, label %33, label %32 837 %val33 = load volatile float, float* undef 838 %cmp33 = fcmp une float %val33, 0.0 839 br i1 %cmp33, label %34, label %33 840 %val34 = load volatile float, float* undef 841 %cmp34 = fcmp une float %val34, 0.0 842 br i1 %cmp34, label %35, label %34 843 %val35 = load volatile float, float* undef 844 %cmp35 = fcmp une float %val35, 0.0 845 br i1 %cmp35, label %36, label %35 846 %val36 = load volatile float, float* undef 847 %cmp36 = fcmp une float %val36, 0.0 848 br i1 %cmp36, label %37, label %36 849 %val37 = load volatile float, float* undef 850 %cmp37 = fcmp une float %val37, 0.0 851 br i1 %cmp37, label %38, label %37 852 %val38 = load volatile float, float* undef 853 %cmp38 = fcmp une float %val38, 0.0 854 br i1 %cmp38, label %39, label %38 855 %val39 = load volatile float, float* undef 856 %cmp39 = fcmp une float %val39, 0.0 857 br i1 %cmp39, label %40, label %39 858 %val40 = load volatile float, float* undef 859 %cmp40 = fcmp une float %val40, 0.0 860 br i1 %cmp40, label %41, label %40 861 %val41 = load volatile float, float* undef 862 %cmp41 = fcmp une float %val41, undef 863 br i1 %cmp41, label %42, label %41 864 %val42 = load volatile float, float* undef 865 %cmp42 = fcmp une float %val42, 0.0 866 br i1 %cmp42, label %43, label %42 867 %val43 = load volatile float, float* undef 868 %cmp43 = fcmp une float %val43, 0.0 869 br i1 %cmp43, label %44, label %43 870 %val44 = load volatile float, float* undef 871 %cmp44 = fcmp une float %val44, 0.0 872 br i1 %cmp44, label %45, label %44 873 %val45 = load volatile float, float* undef 874 %cmp45 = fcmp une float %val45, 0.0 875 br i1 %cmp45, label %46, label %45 876 %val46 = load volatile float, float* undef 877 %cmp46 = fcmp une float %val46, 0.0 878 br i1 %cmp46, label %47, label %46 879 %val47 = load volatile float, float* undef 880 %cmp47 = fcmp une float %val47, 0.0 881 br i1 %cmp47, label %48, label %47 882 %val48 = load volatile float, float* undef 883 %cmp48 = fcmp une float %val48, 0.0 884 br i1 %cmp48, label %49, label %48 885 %val49 = load volatile float, float* undef 886 %cmp49 = fcmp une float %val49, 0.0 887 br i1 %cmp49, label %50, label %49 888 %val50 = load volatile float, float* undef 889 %cmp50 = fcmp une float %val50, 0.0 890 br i1 %cmp50, label %51, label %50 891 %val51 = load volatile float, float* undef 892 %cmp51 = fcmp une float %val51, 0.0 893 br i1 %cmp51, label %52, label %51 894 %val52 = load volatile float, float* undef 895 %cmp52 = fcmp une float %val52, 0.0 896 br i1 %cmp52, label %53, label %52 897 %val53 = load volatile float, float* undef 898 %cmp53 = fcmp une float %val53, 0.0 899 br i1 %cmp53, label %54, label %53 900 %val54 = load volatile float, float* undef 901 %cmp54 = fcmp une float %val54, 0.0 902 br i1 %cmp54, label %55, label %54 903 %val55 = load volatile float, float* undef 904 %cmp55 = fcmp une float %val55, 0.0 905 br i1 %cmp55, label %56, label %55 906 %val56 = load volatile float, float* undef 907 %cmp56 = fcmp une float %val56, 0.0 908 br i1 %cmp56, label %57, label %56 909 %val57 = load volatile float, float* undef 910 %cmp57 = fcmp une float %val57, 0.0 911 br i1 %cmp57, label %58, label %57 912 %val58 = load volatile float, float* undef 913 %cmp58 = fcmp une float %val58, 0.0 914 br i1 %cmp58, label %59, label %58 915 %val59 = load volatile float, float* undef 916 %cmp59 = fcmp une float %val59, 0.0 917 br i1 %cmp59, label %60, label %59 918 %val60 = load volatile float, float* undef 919 %cmp60 = fcmp une float %val60, 0.0 920 br i1 %cmp60, label %61, label %60 921 %val61 = load volatile float, float* undef 922 %cmp61 = fcmp une float %val61, 0.0 923 br i1 %cmp61, label %62, label %61 924 %val62 = load volatile float, float* undef 925 %cmp62 = fcmp une float %val62, 0.0 926 br i1 %cmp62, label %63, label %62 927 %val63 = load volatile float, float* undef 928 %cmp63 = fcmp une float %val63, 0.0 929 br i1 %cmp63, label %64, label %63 930 %val64 = load volatile float, float* undef 931 %cmp64 = fcmp une float %val64, 0.0 932 br i1 %cmp64, label %65, label %64 933 934 br label %exit 935exit: 936 ret void 937} 938 939define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { 940; This test case comes from the heapsort benchmark, and exemplifies several 941; important aspects to block placement in the presence of loops: 942; 1) Loop rotation needs to *ensure* that the desired exiting edge can be 943; a fallthrough. 944; 2) The exiting edge from the loop which is rotated to be laid out at the 945; bottom of the loop needs to be exiting into the nearest enclosing loop (to 946; which there is an exit). Otherwise, we force that enclosing loop into 947; strange layouts that are siginificantly less efficient, often times making 948; it discontiguous. 949; 950; CHECK-LABEL: @benchmark_heapsort 951; CHECK: %entry 952; First rotated loop top. 953; CHECK: .p2align 954; CHECK: %while.end 955; %for.cond gets completely tail-duplicated away. 956; CHECK: %if.then 957; CHECK: %if.else 958; CHECK: %if.end10 959; Second rotated loop top 960; CHECK: %while.cond.outer 961; Third rotated loop top 962; CHECK: .p2align 963; CHECK: %if.end20 964; CHECK: %while.cond 965; CHECK: %while.body 966; CHECK: %land.lhs.true 967; CHECK: %if.then19 968; CHECK: %if.then24 969; CHECK: %if.then8 970; CHECK: ret 971 972entry: 973 %shr = ashr i32 %n, 1 974 %add = add nsw i32 %shr, 1 975 %arrayidx3 = getelementptr inbounds double, double* %ra, i64 1 976 br label %for.cond 977 978for.cond: 979 %ir.0 = phi i32 [ %n, %entry ], [ %ir.1, %while.end ] 980 %l.0 = phi i32 [ %add, %entry ], [ %l.1, %while.end ] 981 %cmp = icmp sgt i32 %l.0, 1 982 br i1 %cmp, label %if.then, label %if.else 983 984if.then: 985 %dec = add nsw i32 %l.0, -1 986 %idxprom = sext i32 %dec to i64 987 %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom 988 %0 = load double, double* %arrayidx, align 8 989 br label %if.end10 990 991if.else: 992 %idxprom1 = sext i32 %ir.0 to i64 993 %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1 994 %1 = load double, double* %arrayidx2, align 8 995 %2 = load double, double* %arrayidx3, align 8 996 store double %2, double* %arrayidx2, align 8 997 %dec6 = add nsw i32 %ir.0, -1 998 %cmp7 = icmp eq i32 %dec6, 1 999 br i1 %cmp7, label %if.then8, label %if.end10 1000 1001if.then8: 1002 store double %1, double* %arrayidx3, align 8 1003 ret void 1004 1005if.end10: 1006 %ir.1 = phi i32 [ %ir.0, %if.then ], [ %dec6, %if.else ] 1007 %l.1 = phi i32 [ %dec, %if.then ], [ %l.0, %if.else ] 1008 %rra.0 = phi double [ %0, %if.then ], [ %1, %if.else ] 1009 %add31 = add nsw i32 %ir.1, 1 1010 br label %while.cond.outer 1011 1012while.cond.outer: 1013 %j.0.ph.in = phi i32 [ %l.1, %if.end10 ], [ %j.1, %if.then24 ] 1014 %j.0.ph = shl i32 %j.0.ph.in, 1 1015 br label %while.cond 1016 1017while.cond: 1018 %j.0 = phi i32 [ %add31, %if.end20 ], [ %j.0.ph, %while.cond.outer ] 1019 %cmp11 = icmp sgt i32 %j.0, %ir.1 1020 br i1 %cmp11, label %while.end, label %while.body 1021 1022while.body: 1023 %cmp12 = icmp slt i32 %j.0, %ir.1 1024 br i1 %cmp12, label %land.lhs.true, label %if.end20 1025 1026land.lhs.true: 1027 %idxprom13 = sext i32 %j.0 to i64 1028 %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13 1029 %3 = load double, double* %arrayidx14, align 8 1030 %add15 = add nsw i32 %j.0, 1 1031 %idxprom16 = sext i32 %add15 to i64 1032 %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16 1033 %4 = load double, double* %arrayidx17, align 8 1034 %cmp18 = fcmp olt double %3, %4 1035 br i1 %cmp18, label %if.then19, label %if.end20 1036 1037if.then19: 1038 br label %if.end20 1039 1040if.end20: 1041 %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ] 1042 %idxprom21 = sext i32 %j.1 to i64 1043 %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21 1044 %5 = load double, double* %arrayidx22, align 8 1045 %cmp23 = fcmp olt double %rra.0, %5 1046 br i1 %cmp23, label %if.then24, label %while.cond 1047 1048if.then24: 1049 %idxprom27 = sext i32 %j.0.ph.in to i64 1050 %arrayidx28 = getelementptr inbounds double, double* %ra, i64 %idxprom27 1051 store double %5, double* %arrayidx28, align 8 1052 br label %while.cond.outer 1053 1054while.end: 1055 %idxprom33 = sext i32 %j.0.ph.in to i64 1056 %arrayidx34 = getelementptr inbounds double, double* %ra, i64 %idxprom33 1057 store double %rra.0, double* %arrayidx34, align 8 1058 br label %for.cond 1059} 1060 1061declare void @cold_function() cold 1062 1063define i32 @test_cold_calls(i32* %a) { 1064; Test that edges to blocks post-dominated by cold calls are 1065; marked as not expected to be taken. They should be laid out 1066; at the bottom. 1067; CHECK-LABEL: test_cold_calls: 1068; CHECK: %entry 1069; CHECK: %else 1070; CHECK: %exit 1071; CHECK: %then 1072 1073entry: 1074 %gep1 = getelementptr i32, i32* %a, i32 1 1075 %val1 = load i32, i32* %gep1 1076 %cond1 = icmp ugt i32 %val1, 1 1077 br i1 %cond1, label %then, label %else 1078 1079then: 1080 call void @cold_function() 1081 br label %exit 1082 1083else: 1084 %gep2 = getelementptr i32, i32* %a, i32 2 1085 %val2 = load i32, i32* %gep2 1086 br label %exit 1087 1088exit: 1089 %ret = phi i32 [ %val1, %then ], [ %val2, %else ] 1090 ret i32 %ret 1091} 1092 1093; Make sure we put landingpads out of the way. 1094declare i32 @pers(...) 1095 1096declare i32 @foo(); 1097 1098declare i32 @bar(); 1099 1100define i32 @test_lp(i32 %a) personality i32 (...)* @pers { 1101; CHECK-LABEL: test_lp: 1102; CHECK: %entry 1103; CHECK: %hot 1104; CHECK: %then 1105; CHECK: %cold 1106; CHECK: %coldlp 1107; CHECK: %hotlp 1108; CHECK: %lpret 1109entry: 1110 %0 = icmp sgt i32 %a, 1 1111 br i1 %0, label %hot, label %cold, !prof !4 1112 1113hot: 1114 %1 = invoke i32 @foo() 1115 to label %then unwind label %hotlp 1116 1117cold: 1118 %2 = invoke i32 @bar() 1119 to label %then unwind label %coldlp 1120 1121then: 1122 %3 = phi i32 [ %1, %hot ], [ %2, %cold ] 1123 ret i32 %3 1124 1125hotlp: 1126 %4 = landingpad { i8*, i32 } 1127 cleanup 1128 br label %lpret 1129 1130coldlp: 1131 %5 = landingpad { i8*, i32 } 1132 cleanup 1133 br label %lpret 1134 1135lpret: 1136 %6 = phi i32 [-1, %hotlp], [-2, %coldlp] 1137 %7 = add i32 %6, 42 1138 ret i32 %7 1139} 1140 1141!4 = !{!"branch_weights", i32 65536, i32 0} 1142 1143; Make sure that ehpad are scheduled from the least probable one 1144; to the most probable one. See selectBestCandidateBlock as to why. 1145declare void @clean(); 1146 1147define void @test_flow_unwind() personality i32 (...)* @pers { 1148; CHECK-LABEL: test_flow_unwind: 1149; CHECK: %entry 1150; CHECK: %then 1151; CHECK: %exit 1152; CHECK: %innerlp 1153; CHECK: %outerlp 1154; CHECK: %outercleanup 1155entry: 1156 %0 = invoke i32 @foo() 1157 to label %then unwind label %outerlp 1158 1159then: 1160 %1 = invoke i32 @bar() 1161 to label %exit unwind label %innerlp 1162 1163exit: 1164 ret void 1165 1166innerlp: 1167 %2 = landingpad { i8*, i32 } 1168 cleanup 1169 br label %innercleanup 1170 1171outerlp: 1172 %3 = landingpad { i8*, i32 } 1173 cleanup 1174 br label %outercleanup 1175 1176outercleanup: 1177 %4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp] 1178 call void @clean() 1179 resume { i8*, i32 } %4 1180 1181innercleanup: 1182 call void @clean() 1183 br label %outercleanup 1184} 1185 1186declare void @hot_function() 1187 1188define void @test_hot_branch(i32* %a) { 1189; Test that a hot branch that has a probability a little larger than 80% will 1190; break CFG constrains when doing block placement. 1191; CHECK-LABEL: test_hot_branch: 1192; CHECK: %entry 1193; CHECK: %then 1194; CHECK: %exit 1195; CHECK: %else 1196 1197entry: 1198 %gep1 = getelementptr i32, i32* %a, i32 1 1199 %val1 = load i32, i32* %gep1 1200 %cond1 = icmp ugt i32 %val1, 1 1201 br i1 %cond1, label %then, label %else, !prof !5 1202 1203then: 1204 call void @hot_function() 1205 br label %exit 1206 1207else: 1208 call void @cold_function() 1209 br label %exit 1210 1211exit: 1212 call void @hot_function() 1213 ret void 1214} 1215 1216define void @test_hot_branch_profile(i32* %a) !prof !6 { 1217; Test that a hot branch that has a probability a little larger than 50% will 1218; break CFG constrains when doing block placement when profile is available. 1219; CHECK-LABEL: test_hot_branch_profile: 1220; CHECK: %entry 1221; CHECK: %then 1222; CHECK: %exit 1223; CHECK: %else 1224 1225entry: 1226 %gep1 = getelementptr i32, i32* %a, i32 1 1227 %val1 = load i32, i32* %gep1 1228 %cond1 = icmp ugt i32 %val1, 1 1229 br i1 %cond1, label %then, label %else, !prof !7 1230 1231then: 1232 call void @hot_function() 1233 br label %exit 1234 1235else: 1236 call void @cold_function() 1237 br label %exit 1238 1239exit: 1240 call void @hot_function() 1241 ret void 1242} 1243 1244define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 { 1245; Test that a hot branch that has a probability a little larger than 80% will 1246; break triangle shaped CFG constrains when doing block placement if profile 1247; is present. 1248; CHECK-LABEL: test_hot_branch_triangle_profile: 1249; CHECK: %entry 1250; CHECK: %exit 1251; CHECK: %then 1252 1253entry: 1254 %gep1 = getelementptr i32, i32* %a, i32 1 1255 %val1 = load i32, i32* %gep1 1256 %cond1 = icmp ugt i32 %val1, 1 1257 br i1 %cond1, label %exit, label %then, !prof !5 1258 1259then: 1260 call void @hot_function() 1261 br label %exit 1262 1263exit: 1264 call void @hot_function() 1265 ret void 1266} 1267 1268define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 { 1269; Test that a hot branch that has a probability between 50% and 66% will not 1270; break triangle shaped CFG constrains when doing block placement if profile 1271; is present. 1272; CHECK-LABEL: test_hot_branch_triangle_profile_topology: 1273; CHECK: %entry 1274; CHECK: %then 1275; CHECK: %exit 1276 1277entry: 1278 %gep1 = getelementptr i32, i32* %a, i32 1 1279 %val1 = load i32, i32* %gep1 1280 %cond1 = icmp ugt i32 %val1, 1 1281 br i1 %cond1, label %exit, label %then, !prof !7 1282 1283then: 1284 call void @hot_function() 1285 br label %exit 1286 1287exit: 1288 call void @hot_function() 1289 ret void 1290} 1291 1292declare void @a() 1293declare void @b() 1294 1295define void @test_forked_hot_diamond(i32* %a) { 1296; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1297; will not place the cold predecessor if the probability for the fallthrough 1298; remains above 80% 1299; CHECK-LABEL: test_forked_hot_diamond 1300; CHECK: %entry 1301; CHECK: %then 1302; CHECK: %fork1 1303; CHECK: %else 1304; CHECK: %fork2 1305; CHECK: %exit 1306entry: 1307 %gep1 = getelementptr i32, i32* %a, i32 1 1308 %val1 = load i32, i32* %gep1 1309 %cond1 = icmp ugt i32 %val1, 1 1310 br i1 %cond1, label %then, label %else, !prof !5 1311 1312then: 1313 call void @hot_function() 1314 %gep2 = getelementptr i32, i32* %a, i32 2 1315 %val2 = load i32, i32* %gep2 1316 %cond2 = icmp ugt i32 %val2, 2 1317 br i1 %cond2, label %fork1, label %fork2, !prof !8 1318 1319else: 1320 call void @cold_function() 1321 %gep3 = getelementptr i32, i32* %a, i32 3 1322 %val3 = load i32, i32* %gep3 1323 %cond3 = icmp ugt i32 %val3, 3 1324 br i1 %cond3, label %fork1, label %fork2, !prof !8 1325 1326fork1: 1327 call void @a() 1328 br label %exit 1329 1330fork2: 1331 call void @b() 1332 br label %exit 1333 1334exit: 1335 call void @hot_function() 1336 ret void 1337} 1338 1339define void @test_forked_hot_diamond_gets_cold(i32* %a) { 1340; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1341; will place the cold predecessor if the probability for the fallthrough 1342; falls below 80% 1343; The probability for both branches is 85%. For then2 vs else1 1344; this results in a compounded probability of 83%. 1345; Neither then2->fork1 nor then2->fork2 has a large enough relative 1346; probability to break the CFG. 1347; Relative probs: 1348; then2 -> fork1 vs else1 -> fork1 = 71% 1349; then2 -> fork2 vs else2 -> fork2 = 74% 1350; CHECK-LABEL: test_forked_hot_diamond_gets_cold 1351; CHECK: %entry 1352; CHECK: %then1 1353; CHECK: %then2 1354; CHECK: %else1 1355; CHECK: %fork1 1356; CHECK: %else2 1357; CHECK: %fork2 1358; CHECK: %exit 1359entry: 1360 %gep1 = getelementptr i32, i32* %a, i32 1 1361 %val1 = load i32, i32* %gep1 1362 %cond1 = icmp ugt i32 %val1, 1 1363 br i1 %cond1, label %then1, label %else1, !prof !9 1364 1365then1: 1366 call void @hot_function() 1367 %gep2 = getelementptr i32, i32* %a, i32 2 1368 %val2 = load i32, i32* %gep2 1369 %cond2 = icmp ugt i32 %val2, 2 1370 br i1 %cond2, label %then2, label %else2, !prof !9 1371 1372else1: 1373 call void @cold_function() 1374 br label %fork1 1375 1376then2: 1377 call void @hot_function() 1378 %gep3 = getelementptr i32, i32* %a, i32 3 1379 %val3 = load i32, i32* %gep2 1380 %cond3 = icmp ugt i32 %val2, 3 1381 br i1 %cond3, label %fork1, label %fork2, !prof !8 1382 1383else2: 1384 call void @cold_function() 1385 br label %fork2 1386 1387fork1: 1388 call void @a() 1389 br label %exit 1390 1391fork2: 1392 call void @b() 1393 br label %exit 1394 1395exit: 1396 call void @hot_function() 1397 ret void 1398} 1399 1400define void @test_forked_hot_diamond_stays_hot(i32* %a) { 1401; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50 1402; branch will not place the cold predecessor as the probability for the 1403; fallthrough stays above 80% 1404; (1:8) followed by (1:1) is still (1:4) 1405; Here we use 90% probability because two in a row 1406; have a 89 % probability vs the original branch. 1407; CHECK-LABEL: test_forked_hot_diamond_stays_hot 1408; CHECK: %entry 1409; CHECK: %then1 1410; CHECK: %then2 1411; CHECK: %fork1 1412; CHECK: %else1 1413; CHECK: %else2 1414; CHECK: %fork2 1415; CHECK: %exit 1416entry: 1417 %gep1 = getelementptr i32, i32* %a, i32 1 1418 %val1 = load i32, i32* %gep1 1419 %cond1 = icmp ugt i32 %val1, 1 1420 br i1 %cond1, label %then1, label %else1, !prof !10 1421 1422then1: 1423 call void @hot_function() 1424 %gep2 = getelementptr i32, i32* %a, i32 2 1425 %val2 = load i32, i32* %gep2 1426 %cond2 = icmp ugt i32 %val2, 2 1427 br i1 %cond2, label %then2, label %else2, !prof !10 1428 1429else1: 1430 call void @cold_function() 1431 br label %fork1 1432 1433then2: 1434 call void @hot_function() 1435 %gep3 = getelementptr i32, i32* %a, i32 3 1436 %val3 = load i32, i32* %gep2 1437 %cond3 = icmp ugt i32 %val2, 3 1438 br i1 %cond3, label %fork1, label %fork2, !prof !8 1439 1440else2: 1441 call void @cold_function() 1442 br label %fork2 1443 1444fork1: 1445 call void @a() 1446 br label %exit 1447 1448fork2: 1449 call void @b() 1450 br label %exit 1451 1452exit: 1453 call void @hot_function() 1454 ret void 1455} 1456 1457; Because %endif has a higher frequency than %if, the calculations show we 1458; shouldn't tail-duplicate %endif so that we can place it after %if. We were 1459; previously undercounting the cost by ignoring execution frequency that didn't 1460; come from the %if->%endif path. 1461; CHECK-LABEL: higher_frequency_succ_tail_dup 1462; CHECK: %entry 1463; CHECK: %elseif 1464; CHECK: %else 1465; CHECK: %endif 1466; CHECK: %then 1467; CHECK: %ret 1468define void @higher_frequency_succ_tail_dup(i1 %a, i1 %b, i1 %c) { 1469entry: 1470 br label %if 1471if: ; preds = %entry 1472 call void @effect(i32 0) 1473 br i1 %a, label %elseif, label %endif, !prof !11 ; even 1474 1475elseif: ; preds = %if 1476 call void @effect(i32 1) 1477 br i1 %b, label %else, label %endif, !prof !11 ; even 1478 1479else: ; preds = %elseif 1480 call void @effect(i32 2) 1481 br label %endif 1482 1483endif: ; preds = %if, %elseif, %else 1484 br i1 %c, label %then, label %ret, !prof !12 ; 5 to 3 1485 1486then: ; preds = %endif 1487 call void @effect(i32 3) 1488 br label %ret 1489 1490ret: ; preds = %endif, %then 1491 ret void 1492} 1493 1494define i32 @not_rotate_if_extra_branch(i32 %count) { 1495; Test checks that there is no loop rotation 1496; if it introduces extra branch. 1497; Specifically in this case because best exit is .header 1498; but it has fallthrough to .middle block and last block in 1499; loop chain .slow does not have afallthrough to .header. 1500; CHECK-LABEL: not_rotate_if_extra_branch 1501; CHECK: %.entry 1502; CHECK: %.header 1503; CHECK: %.middle 1504; CHECK: %.backedge 1505; CHECK: %.slow 1506; CHECK: %.bailout 1507; CHECK: %.stop 1508.entry: 1509 %sum.0 = shl nsw i32 %count, 1 1510 br label %.header 1511 1512.header: 1513 %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ] 1514 %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ] 1515 %is_exc = icmp sgt i32 %i, 9000000 1516 br i1 %is_exc, label %.bailout, label %.middle, !prof !13 1517 1518.bailout: 1519 %sum.2 = add nsw i32 %count, 1 1520 br label %.stop 1521 1522.middle: 1523 %pr.1 = and i32 %i, 1023 1524 %pr.2 = icmp eq i32 %pr.1, 0 1525 br i1 %pr.2, label %.slow, label %.backedge, !prof !14 1526 1527.slow: 1528 tail call void @effect(i32 %sum) 1529 br label %.backedge 1530 1531.backedge: 1532 %sum.1 = add nsw i32 %i, %sum 1533 %i.1 = add nsw i32 %i, 1 1534 %end = icmp slt i32 %i.1, %count 1535 br i1 %end, label %.header, label %.stop, !prof !15 1536 1537.stop: 1538 %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ] 1539 ret i32 %sum.phi 1540} 1541 1542define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { 1543; This is a regression test against patch avoid loop rotation if 1544; it introduce an extra btanch. 1545; CHECK-LABEL: not_rotate_if_extra_branch_regression 1546; CHECK: %.entry 1547; CHECK: %.first_backedge 1548; CHECK: %.second_header 1549; CHECK: %.slow 1550.entry: 1551 %sum.0 = shl nsw i32 %count, 1 1552 br label %.first_header 1553 1554.first_header: 1555 %i = phi i32 [ %i.1, %.first_backedge ], [ 0, %.entry ] 1556 %is_bo1 = icmp sgt i32 %i, 9000000 1557 br i1 %is_bo1, label %.bailout, label %.first_backedge, !prof !14 1558 1559.first_backedge: 1560 %i.1 = add nsw i32 %i, 1 1561 %end = icmp slt i32 %i.1, %count 1562 br i1 %end, label %.first_header, label %.second_header, !prof !13 1563 1564.second_header: 1565 %j = phi i32 [ %j.1, %.second_backedge ], [ %init, %.first_backedge ] 1566 %end.2 = icmp sgt i32 %j, %count 1567 br i1 %end.2, label %.stop, label %.second_middle, !prof !14 1568 1569.second_middle: 1570 %is_slow = icmp sgt i32 %j, 9000000 1571 br i1 %is_slow, label %.slow, label %.second_backedge, !prof !14 1572 1573.slow: 1574 tail call void @effect(i32 %j) 1575 br label %.second_backedge 1576 1577.second_backedge: 1578 %j.1 = add nsw i32 %j, 1 1579 %end.3 = icmp slt i32 %j, 10000000 1580 br i1 %end.3, label %.second_header, label %.stop, !prof !13 1581 1582.stop: 1583 %res = add nsw i32 %j, %i.1 1584 ret i32 %res 1585 1586.bailout: 1587 ret i32 0 1588} 1589 1590declare void @effect(i32) 1591 1592!5 = !{!"branch_weights", i32 84, i32 16} 1593!6 = !{!"function_entry_count", i32 10} 1594!7 = !{!"branch_weights", i32 60, i32 40} 1595!8 = !{!"branch_weights", i32 5001, i32 4999} 1596!9 = !{!"branch_weights", i32 85, i32 15} 1597!10 = !{!"branch_weights", i32 90, i32 10} 1598!11 = !{!"branch_weights", i32 1, i32 1} 1599!12 = !{!"branch_weights", i32 5, i32 3} 1600!13 = !{!"branch_weights", i32 1, i32 1} 1601!14 = !{!"branch_weights", i32 1, i32 1023} 1602!15 = !{!"branch_weights", i32 4095, i32 1} 1603