1; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
4;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
5	%tmp1 = fmul <2 x float> %A, %B;
6	%tmp2 = fadd <2 x float> %C, %tmp1;
7	ret <2 x float> %tmp2
8}
9
10define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
11;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
12	%tmp1 = fmul <4 x float> %A, %B;
13	%tmp2 = fadd <4 x float> %C, %tmp1;
14	ret <4 x float> %tmp2
15}
16
17define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
18;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
19	%tmp1 = fmul <2 x double> %A, %B;
20	%tmp2 = fadd <2 x double> %C, %tmp1;
21	ret <2 x double> %tmp2
22}
23
24
25define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
26;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
27	%tmp1 = fmul <2 x float> %A, %B;
28	%tmp2 = fsub <2 x float> %C, %tmp1;
29	ret <2 x float> %tmp2
30}
31
32define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
33;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
34	%tmp1 = fmul <4 x float> %A, %B;
35	%tmp2 = fsub <4 x float> %C, %tmp1;
36	ret <4 x float> %tmp2
37}
38
39define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
40;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
41	%tmp1 = fmul <2 x double> %A, %B;
42	%tmp2 = fsub <2 x double> %C, %tmp1;
43	ret <2 x double> %tmp2
44}
45
46
47; Another set of tests for when the intrinsic is used.
48
49declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
50declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
51declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
52
53define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
54;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
55        %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
56	ret <2 x float> %val
57}
58
59define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
60;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
61        %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
62	ret <4 x float> %val
63}
64
65define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
66;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
67        %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
68	ret <2 x double> %val
69}
70
71define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
72;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
73        %negA = fsub <2 x float> <float -0.0, float -0.0>, %A
74        %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C)
75	ret <2 x float> %val
76}
77
78define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
79;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
80        %negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A
81        %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C)
82	ret <4 x float> %val
83}
84
85define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
86;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
87        %negA = fsub <2 x double> <double -0.0, double -0.0>, %A
88        %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C)
89	ret <2 x double> %val
90}
91
92declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
93declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
94declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
95
96define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
97;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
98        %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
99	ret <2 x float> %val
100}
101
102define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
103;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
104        %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
105	ret <4 x float> %val
106}
107
108define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
109;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
110        %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
111	ret <2 x double> %val
112}
113
114
115; Another set of tests that check for multiply single use
116
117define <2 x float> @fmla2xfloati_su(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
118;CHECK-NOT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
119  %tmp1 = fmul <2 x float> %A, %B;
120  %tmp2 = fadd <2 x float> %C, %tmp1;
121  %tmp3 = fadd <2 x float> %tmp2, %tmp1;
122  ret <2 x float> %tmp3
123}
124
125define <2 x double> @fmls2xdouble_su(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
126;CHECK-NOT: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
127        %tmp1 = fmul <2 x double> %A, %B;
128        %tmp2 = fsub <2 x double> %C, %tmp1;
129        %tmp3 = fsub <2 x double> %tmp2, %tmp1;
130        ret <2 x double> %tmp3
131}
132
133