1; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN
2; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD
3; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
4; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
5
6; The following tests use the balance-fp-ops feature, and should be independent of
7; the target cpu.
8
9; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP
10; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops  -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP
11
12; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
13; our test strategy is to:
14;   * Force the pass to always perform register swapping even if the dest register is of the
15;     correct color already (-force-all)
16;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
17;     and run it twice, once where it always hints odd, and once where it always hints even.
18;
19; We then use regex magic to check that in the two cases the register allocation is
20; different; this is what gives us the testing coverage and distinguishes cases where
21; the pass has done some work versus accidental regalloc.
22
23target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
24target triple = "aarch64"
25
26; Non-overlapping groups - shouldn't need any changing at all.
27
28; CHECK-LABEL: f1:
29; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
30; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
31; CHECK: fmadd [[x]]
32; CHECK: fmsub [[x]]
33; CHECK: fmadd [[x]]
34; CHECK: str [[x]]
35
36define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
37entry:
38  %0 = load double, double* %p, align 8
39  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
40  %1 = load double, double* %arrayidx1, align 8
41  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
42  %2 = load double, double* %arrayidx2, align 8
43  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
44  %3 = load double, double* %arrayidx3, align 8
45  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
46  %4 = load double, double* %arrayidx4, align 8
47  %mul = fmul fast double %0, %1
48  %add = fadd fast double %mul, %4
49  %mul5 = fmul fast double %1, %2
50  %add6 = fadd fast double %mul5, %add
51  %mul7 = fmul fast double %1, %3
52  %sub = fsub fast double %add6, %mul7
53  %mul8 = fmul fast double %2, %3
54  %add9 = fadd fast double %mul8, %sub
55  store double %add9, double* %q, align 8
56  %arrayidx11 = getelementptr inbounds double, double* %p, i64 5
57  %5 = load double, double* %arrayidx11, align 8
58  %arrayidx12 = getelementptr inbounds double, double* %p, i64 6
59  %6 = load double, double* %arrayidx12, align 8
60  %arrayidx13 = getelementptr inbounds double, double* %p, i64 7
61  %7 = load double, double* %arrayidx13, align 8
62  %mul15 = fmul fast double %6, %7
63  %mul16 = fmul fast double %0, %5
64  %add17 = fadd fast double %mul16, %mul15
65  %mul18 = fmul fast double %5, %6
66  %add19 = fadd fast double %mul18, %add17
67  %arrayidx20 = getelementptr inbounds double, double* %q, i64 1
68  store double %add19, double* %arrayidx20, align 8
69  ret void
70}
71
72; Overlapping groups - coloring needed.
73
74; CHECK-LABEL: f2:
75; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
76; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
77; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
78; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
79; CHECK: fmadd [[x]]
80; CHECK: fmadd [[y]]
81; CHECK: fmsub [[x]]
82; CHECK: fmadd [[y]]
83; CHECK: fmadd [[x]]
84; CHECK-BALFP: stp [[x]], [[y]]
85; CHECK-A53-DAG: str [[x]]
86; CHECK-A53-DAG: str [[y]]
87
88define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
89entry:
90  %0 = load double, double* %p, align 8
91  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
92  %1 = load double, double* %arrayidx1, align 8
93  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
94  %2 = load double, double* %arrayidx2, align 8
95  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
96  %3 = load double, double* %arrayidx3, align 8
97  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
98  %4 = load double, double* %arrayidx4, align 8
99  %arrayidx5 = getelementptr inbounds double, double* %p, i64 5
100  %5 = load double, double* %arrayidx5, align 8
101  %arrayidx6 = getelementptr inbounds double, double* %p, i64 6
102  %6 = load double, double* %arrayidx6, align 8
103  %arrayidx7 = getelementptr inbounds double, double* %p, i64 7
104  %7 = load double, double* %arrayidx7, align 8
105  %mul = fmul fast double %0, %1
106  %add = fadd fast double %mul, %7
107  %mul8 = fmul fast double %5, %6
108  %mul9 = fmul fast double %1, %2
109  %add10 = fadd fast double %mul9, %add
110  %mul11 = fmul fast double %3, %4
111  %add12 = fadd fast double %mul11, %mul8
112  %mul13 = fmul fast double %1, %3
113  %sub = fsub fast double %add10, %mul13
114  %mul14 = fmul fast double %4, %5
115  %add15 = fadd fast double %mul14, %add12
116  %mul16 = fmul fast double %2, %3
117  %add17 = fadd fast double %mul16, %sub
118  store double %add17, double* %q, align 8
119  %arrayidx19 = getelementptr inbounds double, double* %q, i64 1
120  store double %add15, double* %arrayidx19, align 8
121  ret void
122}
123
124; Dest register is live on block exit - fixup needed.
125
126; CHECK-LABEL: f3:
127; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
128; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
129; CHECK: fmadd [[x]]
130; CHECK: fmsub [[x]]
131; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
132; CHECK: str [[y]]
133
134define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
135entry:
136  %0 = load double, double* %p, align 8
137  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
138  %1 = load double, double* %arrayidx1, align 8
139  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
140  %2 = load double, double* %arrayidx2, align 8
141  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
142  %3 = load double, double* %arrayidx3, align 8
143  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
144  %4 = load double, double* %arrayidx4, align 8
145  %mul = fmul fast double %0, %1
146  %add = fadd fast double %mul, %4
147  %mul5 = fmul fast double %1, %2
148  %add6 = fadd fast double %mul5, %add
149  %mul7 = fmul fast double %1, %3
150  %sub = fsub fast double %add6, %mul7
151  %mul8 = fmul fast double %2, %3
152  %add9 = fadd fast double %mul8, %sub
153  %cmp = fcmp oeq double %3, 0.000000e+00
154  br i1 %cmp, label %if.then, label %if.end
155
156if.then:                                          ; preds = %entry
157  tail call void bitcast (void (...)* @g to void ()*)() #2
158  br label %if.end
159
160if.end:                                           ; preds = %if.then, %entry
161  store double %add9, double* %q, align 8
162  ret void
163}
164
165declare void @g(...) #1
166
167; Single precision version of f2.
168
169; CHECK-LABEL: f4:
170; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
171; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
172; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
173; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
174; CHECK: fmadd [[x]]
175; CHECK: fmadd [[y]]
176; CHECK: fmsub [[x]]
177; CHECK: fmadd [[y]]
178; CHECK: fmadd [[x]]
179; CHECK-BALFP: stp [[x]], [[y]]
180; CHECK-A53-DAG: str [[x]]
181; CHECK-A53-DAG: str [[y]]
182
183define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
184entry:
185  %0 = load float, float* %p, align 4
186  %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
187  %1 = load float, float* %arrayidx1, align 4
188  %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
189  %2 = load float, float* %arrayidx2, align 4
190  %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
191  %3 = load float, float* %arrayidx3, align 4
192  %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
193  %4 = load float, float* %arrayidx4, align 4
194  %arrayidx5 = getelementptr inbounds float, float* %p, i64 5
195  %5 = load float, float* %arrayidx5, align 4
196  %arrayidx6 = getelementptr inbounds float, float* %p, i64 6
197  %6 = load float, float* %arrayidx6, align 4
198  %arrayidx7 = getelementptr inbounds float, float* %p, i64 7
199  %7 = load float, float* %arrayidx7, align 4
200  %mul = fmul fast float %0, %1
201  %add = fadd fast float %mul, %7
202  %mul8 = fmul fast float %5, %6
203  %mul9 = fmul fast float %1, %2
204  %add10 = fadd fast float %mul9, %add
205  %mul11 = fmul fast float %3, %4
206  %add12 = fadd fast float %mul11, %mul8
207  %mul13 = fmul fast float %1, %3
208  %sub = fsub fast float %add10, %mul13
209  %mul14 = fmul fast float %4, %5
210  %add15 = fadd fast float %mul14, %add12
211  %mul16 = fmul fast float %2, %3
212  %add17 = fadd fast float %mul16, %sub
213  store float %add17, float* %q, align 4
214  %arrayidx19 = getelementptr inbounds float, float* %q, i64 1
215  store float %add15, float* %arrayidx19, align 4
216  ret void
217}
218
219; Single precision version of f3
220
221; CHECK-LABEL: f5:
222; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
223; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
224; CHECK: fmadd [[x]]
225; CHECK: fmsub [[x]]
226; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
227; CHECK: str [[y]]
228
229define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
230entry:
231  %0 = load float, float* %p, align 4
232  %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
233  %1 = load float, float* %arrayidx1, align 4
234  %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
235  %2 = load float, float* %arrayidx2, align 4
236  %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
237  %3 = load float, float* %arrayidx3, align 4
238  %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
239  %4 = load float, float* %arrayidx4, align 4
240  %mul = fmul fast float %0, %1
241  %add = fadd fast float %mul, %4
242  %mul5 = fmul fast float %1, %2
243  %add6 = fadd fast float %mul5, %add
244  %mul7 = fmul fast float %1, %3
245  %sub = fsub fast float %add6, %mul7
246  %mul8 = fmul fast float %2, %3
247  %add9 = fadd fast float %mul8, %sub
248  %cmp = fcmp oeq float %3, 0.000000e+00
249  br i1 %cmp, label %if.then, label %if.end
250
251if.then:                                          ; preds = %entry
252  tail call void bitcast (void (...)* @g to void ()*)() #2
253  br label %if.end
254
255if.end:                                           ; preds = %if.then, %entry
256  store float %add9, float* %q, align 4
257  ret void
258}
259
260; Test that regmask clobbering stops a chain sequence.
261
262; CHECK-LABEL: f6:
263; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
264; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
265; CHECK: fmadd [[x]]
266; CHECK: fmsub [[x]]
267; CHECK: fmadd d0, {{.*}}, [[x]]
268; CHECK: bl hh
269; CHECK: str d0
270
271define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
272entry:
273  %0 = load double, double* %p, align 8
274  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
275  %1 = load double, double* %arrayidx1, align 8
276  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
277  %2 = load double, double* %arrayidx2, align 8
278  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
279  %3 = load double, double* %arrayidx3, align 8
280  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
281  %4 = load double, double* %arrayidx4, align 8
282  %mul = fmul fast double %0, %1
283  %add = fadd fast double %mul, %4
284  %mul5 = fmul fast double %1, %2
285  %add6 = fadd fast double %mul5, %add
286  %mul7 = fmul fast double %1, %3
287  %sub = fsub fast double %add6, %mul7
288  %mul8 = fmul fast double %2, %3
289  %add9 = fadd fast double %mul8, %sub
290  %call = tail call double @hh(double %add9) #2
291  store double %call, double* %q, align 8
292  ret void
293}
294
295declare double @hh(double) #1
296
297; Check that we correctly deal with repeated operands.
298; The following testcase creates:
299;   %d1 = FADDDrr killed %d0, %d0
300; We'll get a crash if we naively look at the first operand, remove it
301; from the substitution list then look at the second operand.
302
303; CHECK: fmadd [[x:d[0-9]+]]
304; CHECK: fadd d1, [[x]], [[x]]
305
306define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
307entry:
308  %0 = load double, double* %p, align 8
309  %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
310  %1 = load double, double* %arrayidx1, align 8
311  %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
312  %2 = load double, double* %arrayidx2, align 8
313  %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
314  %3 = load double, double* %arrayidx3, align 8
315  %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
316  %4 = load double, double* %arrayidx4, align 8
317  %mul = fmul fast double %0, %1
318  %add = fadd fast double %mul, %4
319  %mul5 = fmul fast double %1, %2
320  %add6 = fadd fast double %mul5, %add
321  %mul7 = fmul fast double %1, %3
322  %sub = fsub fast double %add6, %mul7
323  %mul8 = fmul fast double %2, %3
324  %add9 = fadd fast double %mul8, %sub
325  %add10 = fadd fast double %add9, %add9
326  call void @hhh(double 0.0, double %add10)
327  ret void
328}
329
330declare void @hhh(double, double)
331
332attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
333attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
334attributes #2 = { nounwind }
335
336