1; RUN: llc -O2 -o - %s | FileCheck %s 2target datalayout = "e-m:e-i64:64-n32:64" 3target triple = "powerpc64le-grtev4-linux-gnu" 4 5; Intended layout: 6; The code for tail-duplication during layout will produce the layout: 7; test1 8; test2 9; body1 (with copy of test2) 10; body2 11; exit 12 13;CHECK-LABEL: tail_dup_break_cfg: 14;CHECK: mr [[TAGREG:[0-9]+]], 3 15;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 16;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]] 17;CHECK-NEXT: # %test2 18;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 19;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]] 20;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit 21;CHECK: blr 22;CHECK-NEXT: [[BODY1LABEL]] 23;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 24;CHECK-NEXT: beq 0, [[EXITLABEL]] 25;CHECK-NEXT: [[BODY2LABEL:[._0-9A-Za-z]+]]: 26;CHECK: b [[EXITLABEL]] 27define void @tail_dup_break_cfg(i32 %tag) { 28entry: 29 br label %test1 30test1: 31 %tagbit1 = and i32 %tag, 1 32 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 33 br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely 34body1: 35 call void @a() 36 call void @a() 37 call void @a() 38 call void @a() 39 br label %test2 40test2: 41 %tagbit2 = and i32 %tag, 2 42 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 43 br i1 %tagbit2eq0, label %exit, label %body2, !prof !1 ; %exit more likely 44body2: 45 call void @b() 46 call void @b() 47 call void @b() 48 call void @b() 49 br label %exit 50exit: 51 ret void 52} 53 54; The branch weights here hint that we shouldn't tail duplicate in this case. 55;CHECK-LABEL: tail_dup_dont_break_cfg: 56;CHECK: mr [[TAGREG:[0-9]+]], 3 57;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 58;CHECK-NEXT: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]] 59;CHECK-NEXT: # %body1 60;CHECK: [[TEST2LABEL]]: # %test2 61;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 62;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]] 63;CHECK-NEXT: # %body2 64;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit 65;CHECK: blr 66define void @tail_dup_dont_break_cfg(i32 %tag) { 67entry: 68 br label %test1 69test1: 70 %tagbit1 = and i32 %tag, 1 71 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 72 br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely 73body1: 74 call void @a() 75 call void @a() 76 call void @a() 77 call void @a() 78 br label %test2 79test2: 80 %tagbit2 = and i32 %tag, 2 81 %tagbit2eq0 = icmp ne i32 %tagbit2, 0 82 br i1 %tagbit2eq0, label %body2, label %exit, !prof !3 ; %body2 more likely 83body2: 84 call void @b() 85 call void @b() 86 call void @b() 87 call void @b() 88 br label %exit 89exit: 90 ret void 91} 92declare void @a() 93declare void @b() 94declare void @c() 95declare void @d() 96 97; This function arranges for the successors of %succ to have already been laid 98; out. When we consider whether to lay out succ after bb and to tail-duplicate 99; it, v and ret have already been placed, so we tail-duplicate as it removes a 100; branch and strictly increases fallthrough 101; CHECK-LABEL: tail_dup_no_succ 102; CHECK: # %entry 103; CHECK: # %v 104; CHECK: # %ret 105; CHECK: # %bb 106; CHECK: # %succ 107; CHECK: # %c 108; CHECK: bl c 109; CHECK: andi. {{[0-9]+}}, {{[0-9]+}}, 4 110; CHECK: beq 111; CHECK: b 112define void @tail_dup_no_succ(i32 %tag) { 113entry: 114 %tagbit1 = and i32 %tag, 1 115 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 116 br i1 %tagbit1eq0, label %v, label %bb, !prof !2 ; %v very much more likely 117bb: 118 %tagbit2 = and i32 %tag, 2 119 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 120 br i1 %tagbit2eq0, label %succ, label %c, !prof !3 ; %succ more likely 121c: 122 call void @c() 123 call void @c() 124 br label %succ 125succ: 126 %tagbit3 = and i32 %tag, 4 127 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 128 br i1 %tagbit3eq0, label %ret, label %v, !prof !1 ; %u more likely 129v: 130 call void @d() 131 call void @d() 132 br label %ret 133ret: 134 ret void 135} 136 137 138!1 = !{!"branch_weights", i32 5, i32 3} 139!2 = !{!"branch_weights", i32 95, i32 5} 140!3 = !{!"branch_weights", i32 8, i32 3} 141