1; RUN: opt < %s -analyze -block-freq | FileCheck %s
2
3; This code contains three loops. One is triple-nested, the
4; second is double nested and the third is a single loop. At
5; runtime, all three loops execute 1,000,000 times each. We use to
6; give different frequencies to each of the loops because loop
7; scales were limited to no more than 4,096.
8;
9; This was penalizing the hotness of the second and third loops
10; because BFI was reducing the loop scale for for.cond16 and
11; for.cond26 to a max of 4,096.
12;
13; Without this restriction, all loops are now correctly given the same
14; frequency values.
15;
16; Original C code:
17;
18;
19; int g;
20; __attribute__((noinline)) void bar() {
21;  g++;
22; }
23;
24; extern int printf(const char*, ...);
25;
26; int main()
27; {
28;   int i, j, k;
29;
30;   g = 0;
31;   for (i = 0; i < 100; i++)
32;     for (j = 0; j < 100; j++)
33;        for (k = 0; k < 100; k++)
34;            bar();
35;
36;   printf ("g = %d\n", g);
37;   g = 0;
38;
39;   for (i = 0; i < 100; i++)
40;     for (j = 0; j < 10000; j++)
41;         bar();
42;
43;   printf ("g = %d\n", g);
44;   g = 0;
45;
46;
47;   for (i = 0; i < 1000000; i++)
48;     bar();
49;
50;   printf ("g = %d\n", g);
51;   g = 0;
52; }
53
54@g = common global i32 0, align 4
55@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1
56
57declare void @bar()
58declare i32 @printf(i8*, ...)
59
60; CHECK: Printing analysis {{.*}} for function 'main':
61; CHECK-NEXT: block-frequency-info: main
62define i32 @main() {
63entry:
64  %retval = alloca i32, align 4
65  %i = alloca i32, align 4
66  %j = alloca i32, align 4
67  %k = alloca i32, align 4
68  store i32 0, i32* %retval
69  store i32 0, i32* @g, align 4
70  store i32 0, i32* %i, align 4
71  br label %for.cond
72
73for.cond:                                         ; preds = %for.inc10, %entry
74  %0 = load i32, i32* %i, align 4
75  %cmp = icmp slt i32 %0, 100
76  br i1 %cmp, label %for.body, label %for.end12, !prof !1
77
78for.body:                                         ; preds = %for.cond
79  store i32 0, i32* %j, align 4
80  br label %for.cond1
81
82for.cond1:                                        ; preds = %for.inc7, %for.body
83  %1 = load i32, i32* %j, align 4
84  %cmp2 = icmp slt i32 %1, 100
85  br i1 %cmp2, label %for.body3, label %for.end9, !prof !2
86
87for.body3:                                        ; preds = %for.cond1
88  store i32 0, i32* %k, align 4
89  br label %for.cond4
90
91for.cond4:                                        ; preds = %for.inc, %for.body3
92  %2 = load i32, i32* %k, align 4
93  %cmp5 = icmp slt i32 %2, 100
94  br i1 %cmp5, label %for.body6, label %for.end, !prof !3
95
96; CHECK: - for.body6: float = 500000.5, int = 4000004
97for.body6:                                        ; preds = %for.cond4
98  call void @bar()
99  br label %for.inc
100
101for.inc:                                          ; preds = %for.body6
102  %3 = load i32, i32* %k, align 4
103  %inc = add nsw i32 %3, 1
104  store i32 %inc, i32* %k, align 4
105  br label %for.cond4
106
107for.end:                                          ; preds = %for.cond4
108  br label %for.inc7
109
110for.inc7:                                         ; preds = %for.end
111  %4 = load i32, i32* %j, align 4
112  %inc8 = add nsw i32 %4, 1
113  store i32 %inc8, i32* %j, align 4
114  br label %for.cond1
115
116for.end9:                                         ; preds = %for.cond1
117  br label %for.inc10
118
119for.inc10:                                        ; preds = %for.end9
120  %5 = load i32, i32* %i, align 4
121  %inc11 = add nsw i32 %5, 1
122  store i32 %inc11, i32* %i, align 4
123  br label %for.cond
124
125for.end12:                                        ; preds = %for.cond
126  %6 = load i32, i32* @g, align 4
127  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
128  store i32 0, i32* @g, align 4
129  store i32 0, i32* %i, align 4
130  br label %for.cond13
131
132for.cond13:                                       ; preds = %for.inc22, %for.end12
133  %7 = load i32, i32* %i, align 4
134  %cmp14 = icmp slt i32 %7, 100
135  br i1 %cmp14, label %for.body15, label %for.end24, !prof !1
136
137for.body15:                                       ; preds = %for.cond13
138  store i32 0, i32* %j, align 4
139  br label %for.cond16
140
141for.cond16:                                       ; preds = %for.inc19, %for.body15
142  %8 = load i32, i32* %j, align 4
143  %cmp17 = icmp slt i32 %8, 10000
144  br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
145
146; CHECK: - for.body18: float = 499999.9, int = 3999998
147for.body18:                                       ; preds = %for.cond16
148  call void @bar()
149  br label %for.inc19
150
151for.inc19:                                        ; preds = %for.body18
152  %9 = load i32, i32* %j, align 4
153  %inc20 = add nsw i32 %9, 1
154  store i32 %inc20, i32* %j, align 4
155  br label %for.cond16
156
157for.end21:                                        ; preds = %for.cond16
158  br label %for.inc22
159
160for.inc22:                                        ; preds = %for.end21
161  %10 = load i32, i32* %i, align 4
162  %inc23 = add nsw i32 %10, 1
163  store i32 %inc23, i32* %i, align 4
164  br label %for.cond13
165
166for.end24:                                        ; preds = %for.cond13
167  %11 = load i32, i32* @g, align 4
168  %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
169  store i32 0, i32* @g, align 4
170  store i32 0, i32* %i, align 4
171  br label %for.cond26
172
173for.cond26:                                       ; preds = %for.inc29, %for.end24
174  %12 = load i32, i32* %i, align 4
175  %cmp27 = icmp slt i32 %12, 1000000
176  br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
177
178; CHECK: - for.body28: float = 499995.2, int = 3999961
179for.body28:                                       ; preds = %for.cond26
180  call void @bar()
181  br label %for.inc29
182
183for.inc29:                                        ; preds = %for.body28
184  %13 = load i32, i32* %i, align 4
185  %inc30 = add nsw i32 %13, 1
186  store i32 %inc30, i32* %i, align 4
187  br label %for.cond26
188
189for.end31:                                        ; preds = %for.cond26
190  %14 = load i32, i32* @g, align 4
191  %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
192  store i32 0, i32* @g, align 4
193  %15 = load i32, i32* %retval
194  ret i32 %15
195}
196
197!llvm.ident = !{!0}
198
199!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
200!1 = !{!"branch_weights", i32 101, i32 2}
201!2 = !{!"branch_weights", i32 10001, i32 101}
202!3 = !{!"branch_weights", i32 1000001, i32 10001}
203!4 = !{!"branch_weights", i32 1000001, i32 101}
204!5 = !{!"branch_weights", i32 1000001, i32 2}
205