1; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
2; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
3; XFAIL: *
4
5; Also run with -schedule-ppc-vsx-fma-mutation-early as a stress test for the
6; live-interval-updating logic.
7; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -schedule-ppc-vsx-fma-mutation-early
8target datalayout = "E-m:e-i64:64-n32:64"
9target triple = "powerpc64-unknown-linux-gnu"
10
11define void @test1(double %a, double %b, double %c, double %e, double* nocapture %d) #0 {
12entry:
13  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
14  store double %0, double* %d, align 8
15  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
16  %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
17  store double %1, double* %arrayidx1, align 8
18  ret void
19
20; CHECK-LABEL: @test1
21; CHECK-DAG: li [[C1:[0-9]+]], 8
22; CHECK-DAG: xsmaddmdp 3, 2, 1
23; CHECK-DAG: xsmaddadp 1, 2, 4
24; CHECK-DAG: stxsdx 3, 0, 7
25; CHECK-DAG: stxsdx 1, 7, [[C1]]
26; CHECK: blr
27
28; CHECK-FISL-LABEL: @test1
29; CHECK-FISL-DAG: fmr 0, 1
30; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
31; CHECK-FISL-DAG: stxsdx 0, 0, 7
32; CHECK-FISL-DAG: xsmaddadp 1, 2, 4
33; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
34; CHECK-FISL-DAG: stxsdx 1, 7, [[C1]]
35; CHECK-FISL: blr
36}
37
38define void @test2(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
39entry:
40  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
41  store double %0, double* %d, align 8
42  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
43  %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
44  store double %1, double* %arrayidx1, align 8
45  %2 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
46  %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
47  store double %2, double* %arrayidx2, align 8
48  ret void
49
50; CHECK-LABEL: @test2
51; CHECK-DAG: li [[C1:[0-9]+]], 8
52; CHECK-DAG: li [[C2:[0-9]+]], 16
53; FIXME: We no longer get this because of copy ordering at the MI level.
54; CHECX-DAG: xsmaddmdp 3, 2, 1
55; CHECX-DAG: xsmaddmdp 4, 2, 1
56; CHECX-DAG: xsmaddadp 1, 2, 5
57; CHECX-DAG: stxsdx 3, 0, 8
58; CHECX-DAG: stxsdx 4, 8, [[C1]]
59; CHECX-DAG: stxsdx 1, 8, [[C2]]
60; CHECK: blr
61
62; CHECK-FISL-LABEL: @test2
63; CHECK-FISL-DAG: fmr 0, 1
64; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
65; CHECK-FISL-DAG: stxsdx 0, 0, 8
66; CHECK-FISL-DAG: fmr 0, 1
67; CHECK-FISL-DAG: xsmaddadp 0, 2, 4
68; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
69; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
70; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
71; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
72; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
73; CHECK-FISL: blr
74}
75
76define void @test3(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
77entry:
78  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
79  store double %0, double* %d, align 8
80  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
81  %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
82  %arrayidx1 = getelementptr inbounds double, double* %d, i64 3
83  store double %2, double* %arrayidx1, align 8
84  %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
85  %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
86  store double %3, double* %arrayidx2, align 8
87  %arrayidx3 = getelementptr inbounds double, double* %d, i64 1
88  store double %1, double* %arrayidx3, align 8
89  ret void
90
91; CHECK-LABEL: @test3
92; CHECK-DAG: fmr [[F1:[0-9]+]], 1
93; CHECK-DAG: li [[C1:[0-9]+]], 24
94; CHECK-DAG: li [[C2:[0-9]+]], 16
95; CHECK-DAG: li [[C3:[0-9]+]], 8
96; CHECK-DAG: xsmaddmdp 4, 2, 1
97; CHECK-DAG: xsmaddadp 1, 2, 5
98
99; Note: We could convert this next FMA to M-type as well, but it would require
100; re-ordering the instructions.
101; CHECK-DAG: xsmaddadp [[F1]], 2, 3
102
103; CHECK-DAG: xsmaddmdp 3, 2, 4
104; CHECK-DAG: stxsdx [[F1]], 0, 8
105; CHECK-DAG: stxsdx 3, 8, [[C1]]
106; CHECK-DAG: stxsdx 1, 8, [[C2]]
107; CHECK-DAG: stxsdx 4, 8, [[C3]]
108; CHECK: blr
109
110; CHECK-FISL-LABEL: @test3
111; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
112; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
113; CHECK-FISL-DAG: fmr 4, [[F1]]
114; CHECK-FISL-DAG: xsmaddadp 4, 2, 3
115; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
116; CHECK-FISL-DAG: stxsdx 4, 8, [[C1]]
117; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
118; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
119; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
120; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
121; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
122; CHECK-FISL: blr
123}
124
125define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
126entry:
127  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
128  store double %0, double* %d, align 8
129  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
130  %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
131  store double %1, double* %arrayidx1, align 8
132  %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
133  %arrayidx3 = getelementptr inbounds double, double* %d, i64 3
134  store double %2, double* %arrayidx3, align 8
135  %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
136  %arrayidx4 = getelementptr inbounds double, double* %d, i64 2
137  store double %3, double* %arrayidx4, align 8
138  ret void
139
140; CHECK-LABEL: @test4
141; CHECK-DAG: fmr [[F1:[0-9]+]], 1
142; CHECK-DAG: li [[C1:[0-9]+]], 8
143; CHECK-DAG: li [[C2:[0-9]+]], 16
144; CHECK-DAG: xsmaddmdp 4, 2, 1
145
146; Note: We could convert this next FMA to M-type as well, but it would require
147; re-ordering the instructions.
148; CHECK-DAG: xsmaddadp 1, 2, 5
149
150; CHECK-DAG: xsmaddadp [[F1]], 2, 3
151; CHECK-DAG: stxsdx [[F1]], 0, 8
152; CHECK-DAG: stxsdx 4, 8, [[C1]]
153; CHECK-DAG: li [[C3:[0-9]+]], 24
154; CHECK-DAG: xsmaddadp 4, 2, 3
155; CHECK-DAG: stxsdx 4, 8, [[C3]]
156; CHECK-DAG: stxsdx 1, 8, [[C2]]
157; CHECK: blr
158
159; CHECK-FISL-LABEL: @test4
160; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
161; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 3
162; CHECK-FISL-DAG: stxsdx 0, 0, 8
163; CHECK-FISL-DAG: fmr [[F1]], 1
164; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
165; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
166; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
167; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
168; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
169; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
170; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
171; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
172; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
173; CHECK-FISL: blr
174}
175
176declare double @llvm.fma.f64(double, double, double) #0
177
178define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
179entry:
180  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
181  store <2 x double> %0, <2 x double>* %d, align 8
182  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
183  %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
184  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
185  ret void
186
187; CHECK-LABEL: @testv1
188; CHECK-DAG: xvmaddmdp 36, 35, 34
189; CHECK-DAG: xvmaddadp 34, 35, 37
190; CHECK-DAG: li [[C1:[0-9]+]], 16
191; CHECK-DAG: stxvd2x 36, 0, 3
192; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
193; CHECK: blr
194
195; CHECK-FISL-LABEL: @testv1
196; CHECK-FISL-DAG: xxlor 0, 34, 34
197; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
198; CHECK-FISL-DAG: stxvd2x 0, 0, 3
199; CHECK-FISL-DAG: xvmaddadp 34, 35, 37
200; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
201; CHECK-FISL-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
202; CHECK-FISL: blr
203}
204
205define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
206entry:
207  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
208  store <2 x double> %0, <2 x double>* %d, align 8
209  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
210  %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
211  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
212  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
213  %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
214  store <2 x double> %2, <2 x double>* %arrayidx2, align 8
215  ret void
216
217; CHECK-LABEL: @testv2
218; FIXME: We currently don't get this because of copy ordering on the MI level.
219; CHECX-DAG: xvmaddmdp 36, 35, 34
220; CHECX-DAG: xvmaddmdp 37, 35, 34
221; CHECX-DAG: li [[C1:[0-9]+]], 16
222; CHECX-DAG: li [[C2:[0-9]+]], 32
223; CHECX-DAG: xvmaddadp 34, 35, 38
224; CHECX-DAG: stxvd2x 36, 0, 3
225; CHECX-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
226; CHECX-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
227; CHECK: blr
228
229; CHECK-FISL-LABEL: @testv2
230; CHECK-FISL-DAG: xxlor 0, 34, 34
231; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
232; CHECK-FISL-DAG: stxvd2x 0, 0, 3
233; CHECK-FISL-DAG: xxlor 0, 34, 34
234; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
235; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
236; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1:[0-9]+]]
237; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
238; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
239; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
240; CHECK-FISL: blr
241}
242
243define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
244entry:
245  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
246  store <2 x double> %0, <2 x double>* %d, align 8
247  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
248  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
249  %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
250  store <2 x double> %2, <2 x double>* %arrayidx1, align 8
251  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
252  %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
253  store <2 x double> %3, <2 x double>* %arrayidx2, align 8
254  %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
255  store <2 x double> %1, <2 x double>* %arrayidx3, align 8
256  ret void
257
258; Note: There is some unavoidable changeability in this variant.  If the
259; FMAs are reordered differently, the algorithm can pick a different
260; multiplicand to destroy, changing the register assignment.  There isn't
261; a good way to express this possibility, so hopefully this doesn't change
262; too often.
263
264; CHECK-LABEL: @testv3
265; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
266; CHECK-DAG: li [[C1:[0-9]+]], 48
267; CHECK-DAG: li [[C2:[0-9]+]], 32
268; CHECK-DAG: xvmaddmdp 37, 35, 34
269; CHECK-DAG: li [[C3:[0-9]+]], 16
270
271; Note: We could convert this next FMA to M-type as well, but it would require
272; re-ordering the instructions.
273; CHECK-DAG: xvmaddadp [[V1]], 35, 36
274
275; CHECK-DAG: xvmaddmdp 36, 35, 37
276; CHECK-DAG: xvmaddadp 34, 35, 38
277; CHECK-DAG: stxvd2x 32, 0, 3
278; CHECK-DAG: stxvd2x 36, 3, [[C1]]
279; CHECK-DAG: stxvd2x 34, 3, [[C2]]
280; CHECK-DAG: stxvd2x 37, 3, [[C3]]
281; CHECK: blr
282
283; CHECK-FISL-LABEL: @testv3
284; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
285; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
286; CHECK-FISL-DAG: stxvd2x [[V1]], 0, 3
287; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
288; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
289; CHECK-FISL-DAG: xxlor [[V3:[0-9]+]], 0, 0
290; CHECK-FISL-DAG: xvmaddadp [[V3]], 35, 36
291; CHECK-FISL-DAG: li [[C1:[0-9]+]], 48
292; CHECK-FISL-DAG: stxvd2x [[V3]], 3, [[C1]]
293; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
294; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
295; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
296; CHECK-FISL-DAG: li [[C3:[0-9]+]], 16
297; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
298; CHECK-FISL: blr
299}
300
301define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
302entry:
303  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
304  store <2 x double> %0, <2 x double>* %d, align 8
305  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
306  %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
307  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
308  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
309  %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
310  store <2 x double> %2, <2 x double>* %arrayidx3, align 8
311  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
312  %arrayidx4 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
313  store <2 x double> %3, <2 x double>* %arrayidx4, align 8
314  ret void
315
316; CHECK-LABEL: @testv4
317; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
318; CHECK-DAG: xvmaddmdp 37, 35, 34
319; CHECK-DAG: li [[C1:[0-9]+]], 16
320; CHECK-DAG: li [[C2:[0-9]+]], 32
321; CHECK-DAG: xvmaddadp 34, 35, 38
322
323; Note: We could convert this next FMA to M-type as well, but it would require
324; re-ordering the instructions.
325; CHECK-DAG: xvmaddadp [[V1]], 35, 36
326
327; CHECK-DAG: stxvd2x 32, 0, 3
328; CHECK-DAG: stxvd2x 37, 3, [[C1]]
329; CHECK-DAG: li [[C3:[0-9]+]], 48
330; CHECK-DAG: xvmaddadp 37, 35, 36
331; CHECK-DAG: stxvd2x 37, 3, [[C3]]
332; CHECK-DAG: stxvd2x 34, 3, [[C2]]
333; CHECK: blr
334
335; CHECK-FISL-LABEL: @testv4
336; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
337; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
338; CHECK-FISL-DAG: stxvd2x 0, 0, 3
339; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
340; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
341; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
342; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1]]
343; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
344; CHECK-FISL-DAG: li [[C3:[0-9]+]], 48
345; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
346; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
347; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
348; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
349; CHECK-FISL: blr
350}
351
352declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
353
354attributes #0 = { nounwind readnone }
355
356