1; RUN: opt < %s -analyze -block-freq | FileCheck %s 2; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s 3 4; This code contains three loops. One is triple-nested, the 5; second is double nested and the third is a single loop. At 6; runtime, all three loops execute 1,000,000 times each. We use to 7; give different frequencies to each of the loops because loop 8; scales were limited to no more than 4,096. 9; 10; This was penalizing the hotness of the second and third loops 11; because BFI was reducing the loop scale for for.cond16 and 12; for.cond26 to a max of 4,096. 13; 14; Without this restriction, all loops are now correctly given the same 15; frequency values. 16; 17; Original C code: 18; 19; 20; int g; 21; __attribute__((noinline)) void bar() { 22; g++; 23; } 24; 25; extern int printf(const char*, ...); 26; 27; int main() 28; { 29; int i, j, k; 30; 31; g = 0; 32; for (i = 0; i < 100; i++) 33; for (j = 0; j < 100; j++) 34; for (k = 0; k < 100; k++) 35; bar(); 36; 37; printf ("g = %d\n", g); 38; g = 0; 39; 40; for (i = 0; i < 100; i++) 41; for (j = 0; j < 10000; j++) 42; bar(); 43; 44; printf ("g = %d\n", g); 45; g = 0; 46; 47; 48; for (i = 0; i < 1000000; i++) 49; bar(); 50; 51; printf ("g = %d\n", g); 52; g = 0; 53; } 54 55@g = common global i32 0, align 4 56@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1 57 58declare void @bar() 59declare i32 @printf(i8*, ...) 60 61; CHECK: Printing analysis {{.*}} for function 'main': 62; CHECK-NEXT: block-frequency-info: main 63define i32 @main() { 64entry: 65 %retval = alloca i32, align 4 66 %i = alloca i32, align 4 67 %j = alloca i32, align 4 68 %k = alloca i32, align 4 69 store i32 0, i32* %retval 70 store i32 0, i32* @g, align 4 71 store i32 0, i32* %i, align 4 72 br label %for.cond 73 74for.cond: ; preds = %for.inc10, %entry 75 %0 = load i32, i32* %i, align 4 76 %cmp = icmp slt i32 %0, 100 77 br i1 %cmp, label %for.body, label %for.end12, !prof !1 78 79for.body: ; preds = %for.cond 80 store i32 0, i32* %j, align 4 81 br label %for.cond1 82 83for.cond1: ; preds = %for.inc7, %for.body 84 %1 = load i32, i32* %j, align 4 85 %cmp2 = icmp slt i32 %1, 100 86 br i1 %cmp2, label %for.body3, label %for.end9, !prof !2 87 88for.body3: ; preds = %for.cond1 89 store i32 0, i32* %k, align 4 90 br label %for.cond4 91 92for.cond4: ; preds = %for.inc, %for.body3 93 %2 = load i32, i32* %k, align 4 94 %cmp5 = icmp slt i32 %2, 100 95 br i1 %cmp5, label %for.body6, label %for.end, !prof !3 96 97; CHECK: - for.body6: float = 500000.5, int = 4000004 98for.body6: ; preds = %for.cond4 99 call void @bar() 100 br label %for.inc 101 102for.inc: ; preds = %for.body6 103 %3 = load i32, i32* %k, align 4 104 %inc = add nsw i32 %3, 1 105 store i32 %inc, i32* %k, align 4 106 br label %for.cond4 107 108for.end: ; preds = %for.cond4 109 br label %for.inc7 110 111for.inc7: ; preds = %for.end 112 %4 = load i32, i32* %j, align 4 113 %inc8 = add nsw i32 %4, 1 114 store i32 %inc8, i32* %j, align 4 115 br label %for.cond1 116 117for.end9: ; preds = %for.cond1 118 br label %for.inc10 119 120for.inc10: ; preds = %for.end9 121 %5 = load i32, i32* %i, align 4 122 %inc11 = add nsw i32 %5, 1 123 store i32 %inc11, i32* %i, align 4 124 br label %for.cond 125 126for.end12: ; preds = %for.cond 127 %6 = load i32, i32* @g, align 4 128 %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6) 129 store i32 0, i32* @g, align 4 130 store i32 0, i32* %i, align 4 131 br label %for.cond13 132 133for.cond13: ; preds = %for.inc22, %for.end12 134 %7 = load i32, i32* %i, align 4 135 %cmp14 = icmp slt i32 %7, 100 136 br i1 %cmp14, label %for.body15, label %for.end24, !prof !1 137 138for.body15: ; preds = %for.cond13 139 store i32 0, i32* %j, align 4 140 br label %for.cond16 141 142for.cond16: ; preds = %for.inc19, %for.body15 143 %8 = load i32, i32* %j, align 4 144 %cmp17 = icmp slt i32 %8, 10000 145 br i1 %cmp17, label %for.body18, label %for.end21, !prof !4 146 147; CHECK: - for.body18: float = 499999.9, int = 3999998 148for.body18: ; preds = %for.cond16 149 call void @bar() 150 br label %for.inc19 151 152for.inc19: ; preds = %for.body18 153 %9 = load i32, i32* %j, align 4 154 %inc20 = add nsw i32 %9, 1 155 store i32 %inc20, i32* %j, align 4 156 br label %for.cond16 157 158for.end21: ; preds = %for.cond16 159 br label %for.inc22 160 161for.inc22: ; preds = %for.end21 162 %10 = load i32, i32* %i, align 4 163 %inc23 = add nsw i32 %10, 1 164 store i32 %inc23, i32* %i, align 4 165 br label %for.cond13 166 167for.end24: ; preds = %for.cond13 168 %11 = load i32, i32* @g, align 4 169 %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11) 170 store i32 0, i32* @g, align 4 171 store i32 0, i32* %i, align 4 172 br label %for.cond26 173 174for.cond26: ; preds = %for.inc29, %for.end24 175 %12 = load i32, i32* %i, align 4 176 %cmp27 = icmp slt i32 %12, 1000000 177 br i1 %cmp27, label %for.body28, label %for.end31, !prof !5 178 179; CHECK: - for.body28: float = 499995.2, int = 3999961 180for.body28: ; preds = %for.cond26 181 call void @bar() 182 br label %for.inc29 183 184for.inc29: ; preds = %for.body28 185 %13 = load i32, i32* %i, align 4 186 %inc30 = add nsw i32 %13, 1 187 store i32 %inc30, i32* %i, align 4 188 br label %for.cond26 189 190for.end31: ; preds = %for.cond26 191 %14 = load i32, i32* @g, align 4 192 %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14) 193 store i32 0, i32* @g, align 4 194 %15 = load i32, i32* %retval 195 ret i32 %15 196} 197 198!llvm.ident = !{!0} 199 200!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"} 201!1 = !{!"branch_weights", i32 101, i32 2} 202!2 = !{!"branch_weights", i32 10001, i32 101} 203!3 = !{!"branch_weights", i32 1000001, i32 10001} 204!4 = !{!"branch_weights", i32 1000001, i32 101} 205!5 = !{!"branch_weights", i32 1000001, i32 2} 206