1; RUN: opt < %s -analyze -block-freq | FileCheck %s
2; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s
3
4; This code contains three loops. One is triple-nested, the
5; second is double nested and the third is a single loop. At
6; runtime, all three loops execute 1,000,000 times each. We use to
7; give different frequencies to each of the loops because loop
8; scales were limited to no more than 4,096.
9;
10; This was penalizing the hotness of the second and third loops
11; because BFI was reducing the loop scale for for.cond16 and
12; for.cond26 to a max of 4,096.
13;
14; Without this restriction, all loops are now correctly given the same
15; frequency values.
16;
17; Original C code:
18;
19;
20; int g;
21; __attribute__((noinline)) void bar() {
22;  g++;
23; }
24;
25; extern int printf(const char*, ...);
26;
27; int main()
28; {
29;   int i, j, k;
30;
31;   g = 0;
32;   for (i = 0; i < 100; i++)
33;     for (j = 0; j < 100; j++)
34;        for (k = 0; k < 100; k++)
35;            bar();
36;
37;   printf ("g = %d\n", g);
38;   g = 0;
39;
40;   for (i = 0; i < 100; i++)
41;     for (j = 0; j < 10000; j++)
42;         bar();
43;
44;   printf ("g = %d\n", g);
45;   g = 0;
46;
47;
48;   for (i = 0; i < 1000000; i++)
49;     bar();
50;
51;   printf ("g = %d\n", g);
52;   g = 0;
53; }
54
55@g = common global i32 0, align 4
56@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1
57
58declare void @bar()
59declare i32 @printf(i8*, ...)
60
61; CHECK: Printing analysis {{.*}} for function 'main':
62; CHECK-NEXT: block-frequency-info: main
63define i32 @main() {
64entry:
65  %retval = alloca i32, align 4
66  %i = alloca i32, align 4
67  %j = alloca i32, align 4
68  %k = alloca i32, align 4
69  store i32 0, i32* %retval
70  store i32 0, i32* @g, align 4
71  store i32 0, i32* %i, align 4
72  br label %for.cond
73
74for.cond:                                         ; preds = %for.inc10, %entry
75  %0 = load i32, i32* %i, align 4
76  %cmp = icmp slt i32 %0, 100
77  br i1 %cmp, label %for.body, label %for.end12, !prof !1
78
79for.body:                                         ; preds = %for.cond
80  store i32 0, i32* %j, align 4
81  br label %for.cond1
82
83for.cond1:                                        ; preds = %for.inc7, %for.body
84  %1 = load i32, i32* %j, align 4
85  %cmp2 = icmp slt i32 %1, 100
86  br i1 %cmp2, label %for.body3, label %for.end9, !prof !2
87
88for.body3:                                        ; preds = %for.cond1
89  store i32 0, i32* %k, align 4
90  br label %for.cond4
91
92for.cond4:                                        ; preds = %for.inc, %for.body3
93  %2 = load i32, i32* %k, align 4
94  %cmp5 = icmp slt i32 %2, 100
95  br i1 %cmp5, label %for.body6, label %for.end, !prof !3
96
97; CHECK: - for.body6: float = 500000.5, int = 4000004
98for.body6:                                        ; preds = %for.cond4
99  call void @bar()
100  br label %for.inc
101
102for.inc:                                          ; preds = %for.body6
103  %3 = load i32, i32* %k, align 4
104  %inc = add nsw i32 %3, 1
105  store i32 %inc, i32* %k, align 4
106  br label %for.cond4
107
108for.end:                                          ; preds = %for.cond4
109  br label %for.inc7
110
111for.inc7:                                         ; preds = %for.end
112  %4 = load i32, i32* %j, align 4
113  %inc8 = add nsw i32 %4, 1
114  store i32 %inc8, i32* %j, align 4
115  br label %for.cond1
116
117for.end9:                                         ; preds = %for.cond1
118  br label %for.inc10
119
120for.inc10:                                        ; preds = %for.end9
121  %5 = load i32, i32* %i, align 4
122  %inc11 = add nsw i32 %5, 1
123  store i32 %inc11, i32* %i, align 4
124  br label %for.cond
125
126for.end12:                                        ; preds = %for.cond
127  %6 = load i32, i32* @g, align 4
128  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
129  store i32 0, i32* @g, align 4
130  store i32 0, i32* %i, align 4
131  br label %for.cond13
132
133for.cond13:                                       ; preds = %for.inc22, %for.end12
134  %7 = load i32, i32* %i, align 4
135  %cmp14 = icmp slt i32 %7, 100
136  br i1 %cmp14, label %for.body15, label %for.end24, !prof !1
137
138for.body15:                                       ; preds = %for.cond13
139  store i32 0, i32* %j, align 4
140  br label %for.cond16
141
142for.cond16:                                       ; preds = %for.inc19, %for.body15
143  %8 = load i32, i32* %j, align 4
144  %cmp17 = icmp slt i32 %8, 10000
145  br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
146
147; CHECK: - for.body18: float = 499999.9, int = 3999998
148for.body18:                                       ; preds = %for.cond16
149  call void @bar()
150  br label %for.inc19
151
152for.inc19:                                        ; preds = %for.body18
153  %9 = load i32, i32* %j, align 4
154  %inc20 = add nsw i32 %9, 1
155  store i32 %inc20, i32* %j, align 4
156  br label %for.cond16
157
158for.end21:                                        ; preds = %for.cond16
159  br label %for.inc22
160
161for.inc22:                                        ; preds = %for.end21
162  %10 = load i32, i32* %i, align 4
163  %inc23 = add nsw i32 %10, 1
164  store i32 %inc23, i32* %i, align 4
165  br label %for.cond13
166
167for.end24:                                        ; preds = %for.cond13
168  %11 = load i32, i32* @g, align 4
169  %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
170  store i32 0, i32* @g, align 4
171  store i32 0, i32* %i, align 4
172  br label %for.cond26
173
174for.cond26:                                       ; preds = %for.inc29, %for.end24
175  %12 = load i32, i32* %i, align 4
176  %cmp27 = icmp slt i32 %12, 1000000
177  br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
178
179; CHECK: - for.body28: float = 499995.2, int = 3999961
180for.body28:                                       ; preds = %for.cond26
181  call void @bar()
182  br label %for.inc29
183
184for.inc29:                                        ; preds = %for.body28
185  %13 = load i32, i32* %i, align 4
186  %inc30 = add nsw i32 %13, 1
187  store i32 %inc30, i32* %i, align 4
188  br label %for.cond26
189
190for.end31:                                        ; preds = %for.cond26
191  %14 = load i32, i32* @g, align 4
192  %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
193  store i32 0, i32* @g, align 4
194  %15 = load i32, i32* %retval
195  ret i32 %15
196}
197
198!llvm.ident = !{!0}
199
200!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
201!1 = !{!"branch_weights", i32 101, i32 2}
202!2 = !{!"branch_weights", i32 10001, i32 101}
203!3 = !{!"branch_weights", i32 1000001, i32 10001}
204!4 = !{!"branch_weights", i32 1000001, i32 101}
205!5 = !{!"branch_weights", i32 1000001, i32 2}
206