1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
2;
3; Verify that misched resource/latency balancy heuristics are sane.
4
5define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
6  i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
7 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
8  nounwind uwtable ssp {
9entry:
10  br label %for.body
11
12; imull folded loads should be in order and interleaved with addl, never
13; adjacent. Also check that we have no spilling.
14;
15; Since mmult1 IR is already in good order, this effectively ensure
16; the scheduler maintains source order.
17;
18; CHECK-LABEL: %for.body
19; CHECK-NOT: %rsp
20; CHECK: imull 4
21; CHECK-NOT: {{imull|rsp}}
22; CHECK: addl
23; CHECK: imull 8
24; CHECK-NOT: {{imull|rsp}}
25; CHECK: addl
26; CHECK: imull 12
27; CHECK-NOT: {{imull|rsp}}
28; CHECK: addl
29; CHECK: imull 16
30; CHECK-NOT: {{imull|rsp}}
31; CHECK: addl
32; CHECK: imull 20
33; CHECK-NOT: {{imull|rsp}}
34; CHECK: addl
35; CHECK: imull 24
36; CHECK-NOT: {{imull|rsp}}
37; CHECK: addl
38; CHECK: imull 28
39; CHECK-NOT: {{imull|rsp}}
40; CHECK: addl
41; CHECK: imull 32
42; CHECK-NOT: {{imull|rsp}}
43; CHECK: addl
44; CHECK: imull 36
45; CHECK-NOT: {{imull|rsp}}
46; CHECK: addl
47; CHECK-NOT: {{imull|rsp}}
48; CHECK-LABEL: %end
49for.body:
50  %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
51  %tmp57 = load i32, i32* %tmp56, align 4
52  %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
53  %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
54  %mul.us.i = mul nsw i32 %tmp58, %tmp57
55  %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
56  %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
57  %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
58  %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
59  %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
60  %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
61  %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
62  %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
63  %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
64  %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
65  %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
66  %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
67  %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
68  %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
69  %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
70  %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
71  %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
72  %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
73  %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
74  %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
75  %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
76  %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
77  %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
78  %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
79  %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
80  %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
81  %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
82  %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
83  %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
84  %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
85  %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
86  %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
87  %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
88  %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
89  %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
90  %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
91  %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
92  %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
93  %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
94  %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
95  %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
96  %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
97  %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
98  %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
99  %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
100  %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
101  %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
102  %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
103  %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
104  %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
105  %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
106  %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
107  %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
108  %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
109  %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i
110  store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
111  %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
112  %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
113  %exitcond = icmp eq i32 %lftr.wideiv, 10
114  br i1 %exitcond, label %end, label %for.body
115
116end:
117  ret void
118}
119
120; Unlike the above loop, this IR starts out bad and must be
121; rescheduled.
122;
123; CHECK-LABEL: %for.body
124; CHECK-NOT: %rsp
125; CHECK: imull 4
126; CHECK-NOT: {{imull|rsp}}
127; CHECK: addl
128; CHECK: imull 8
129; CHECK-NOT: {{imull|rsp}}
130; CHECK: addl
131; CHECK: imull 12
132; CHECK-NOT: {{imull|rsp}}
133; CHECK: addl
134; CHECK: imull 16
135; CHECK-NOT: {{imull|rsp}}
136; CHECK: addl
137; CHECK: imull 20
138; CHECK-NOT: {{imull|rsp}}
139; CHECK: addl
140; CHECK: imull 24
141; CHECK-NOT: {{imull|rsp}}
142; CHECK: addl
143; CHECK: imull 28
144; CHECK-NOT: {{imull|rsp}}
145; CHECK: addl
146; CHECK: imull 32
147; CHECK-NOT: {{imull|rsp}}
148; CHECK: addl
149; CHECK: imull 36
150; CHECK-NOT: {{imull|rsp}}
151; CHECK: addl
152; CHECK-NOT: {{imull|rsp}}
153; CHECK-LABEL: %end
154define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
155  i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
156  i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
157  nounwind uwtable ssp {
158entry:
159  br label %for.body
160for.body:
161  %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
162  %tmp57 = load i32, i32* %tmp56, align 4
163  %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
164  %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
165  %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
166  %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
167  %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
168  %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
169  %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
170  %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
171  %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
172  %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
173  %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
174  %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
175  %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
176  %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
177  %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
178  %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
179  %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
180  %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
181  %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
182  %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
183  %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
184  %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
185  %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
186  %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
187  %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
188  %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
189  %mul.us.i = mul nsw i32 %tmp58, %tmp57
190  %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
191  %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
192  %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
193  %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
194  %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
195  %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
196  %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
197  %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
198  %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
199  %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
200  %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
201  %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
202  %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
203  %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
204  %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
205  %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
206  %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
207  %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
208  %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
209  %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
210  %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
211  %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
212  %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
213  %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
214  %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
215  %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
216  %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
217  %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
218  %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
219  %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
220  %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i
221  store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
222  %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
223  %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
224  %exitcond = icmp eq i32 %lftr.wideiv, 10
225  br i1 %exitcond, label %end, label %for.body
226
227end:
228  ret void
229}
230
231; A mildly interesting little block extracted from a cipher.  The
232; balanced heuristics are interesting here because we have resource,
233; latency, and register limits all at once. For now, simply check that
234; we don't use any callee-saves.
235; CHECK-LABEL: @encpc1
236; CHECK-LABEL: %entry
237; CHECK-NOT: push
238; CHECK-NOT: pop
239; CHECK: ret
240@a = external global i32, align 4
241@b = external global i32, align 4
242@c = external global i32, align 4
243@d = external global i32, align 4
244define i32 @encpc1() nounwind {
245entry:
246  %l1 = load i32, i32* @a, align 16
247  %conv = shl i32 %l1, 8
248  %s5 = lshr i32 %l1, 8
249  %add = or i32 %conv, %s5
250  store i32 %add, i32* @b
251  %l6 = load i32, i32* @a
252  %l7 = load i32, i32* @c
253  %add.i = add i32 %l7, %l6
254  %idxprom.i = zext i32 %l7 to i64
255  %arrayidx.i = getelementptr inbounds i32, i32* @d, i64 %idxprom.i
256  %l8 = load i32, i32* %arrayidx.i
257  store i32 346, i32* @c
258  store i32 20021, i32* @d
259  %l9 = load i32, i32* @a
260  store i32 %l8, i32* @a
261  store i32 %l9, i32* @b
262  store i32 %add.i, i32* @c
263  store i32 %l9, i32* @d
264  %cmp.i = icmp eq i32 %add.i, 0
265  %s10 = lshr i32 %l1, 16
266  %s12 = lshr i32 %l1, 24
267  %s14 = lshr i32 %l1, 30
268  br i1 %cmp.i, label %if, label %return
269if:
270  %sa = add i32 %s5, %s10
271  %sb = add i32 %sa, %s12
272  %sc = add i32 %sb, %s14
273  br label %return
274return:
275  %result = phi i32 [0, %entry], [%sc, %if]
276  ret i32 %result
277}
278