1; RUN: llc -mtriple=x86_64-unknown-unknown -march=x86-64 < %s | FileCheck %s
2
3; CHECK-LABEL: fmul2_f32:
4; CHECK: addss %xmm0, %xmm0
5define float @fmul2_f32(float %x) {
6  %y = fmul float %x, 2.0
7  ret float %y
8}
9
10; fmul 2.0, x -> fadd x, x for vectors.
11
12; CHECK-LABEL: fmul2_v4f32:
13; CHECK: addps %xmm0, %xmm0
14; CHECK-NEXT: retq
15define <4 x float> @fmul2_v4f32(<4 x float> %x) {
16  %y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
17  ret <4 x float> %y
18}
19
20; CHECK-LABEL: constant_fold_fmul_v4f32:
21; CHECK: movaps
22; CHECK-NEXT: ret
23define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) {
24  %y = fmul <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float 2.0>
25  ret <4 x float> %y
26}
27
28; CHECK-LABEL: fmul0_v4f32:
29; CHECK: xorps %xmm0, %xmm0
30; CHECK-NEXT: retq
31define <4 x float> @fmul0_v4f32(<4 x float> %x) #0 {
32  %y = fmul <4 x float> %x, <float 0.0, float 0.0, float 0.0, float 0.0>
33  ret <4 x float> %y
34}
35
36; CHECK-LABEL: fmul_c2_c4_v4f32:
37; CHECK-NOT: addps
38; CHECK: mulps
39; CHECK-NOT: mulps
40; CHECK-NEXT: ret
41define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 {
42  %y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
43  %z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
44  ret <4 x float> %z
45}
46
47; CHECK-LABEL: fmul_c3_c4_v4f32:
48; CHECK-NOT: addps
49; CHECK: mulps
50; CHECK-NOT: mulps
51; CHECK-NEXT: ret
52define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
53  %y = fmul <4 x float> %x, <float 3.0, float 3.0, float 3.0, float 3.0>
54  %z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
55  ret <4 x float> %z
56}
57
58; We should be able to pre-multiply the two constant vectors.
59; CHECK: float 5.000000e+00
60; CHECK: float 1.200000e+01
61; CHECK: float 2.100000e+01
62; CHECK: float 3.200000e+01
63; CHECK-LABEL: fmul_v4f32_two_consts_no_splat:
64; CHECK: mulps
65; CHECK-NOT: mulps
66; CHECK-NEXT: ret
67define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 {
68  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
69  %z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
70  ret <4 x float> %z
71}
72
73; Same as above, but reverse operands to make sure non-canonical form is also handled.
74; CHECK: float 5.000000e+00
75; CHECK: float 1.200000e+01
76; CHECK: float 2.100000e+01
77; CHECK: float 3.200000e+01
78; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical:
79; CHECK: mulps
80; CHECK-NOT: mulps
81; CHECK-NEXT: ret
82define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 {
83  %y = fmul <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
84  %z = fmul <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>, %y
85  ret <4 x float> %z
86}
87
88; More than one use of a constant multiply should not inhibit the optimization.
89; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
90; CHECK: float 5.000000e+00
91; CHECK: float 1.200000e+01
92; CHECK: float 2.100000e+01
93; CHECK: float 3.200000e+01
94; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
95; CHECK: mulps
96; CHECK: mulps
97; CHECK: addps
98; CHECK: ret
99define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
100  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
101  %z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
102  %a = fadd <4 x float> %y, %z
103  ret <4 x float> %a
104}
105
106; PR22698 - http://llvm.org/bugs/show_bug.cgi?id=22698
107; Make sure that we don't infinite loop swapping constants back and forth.
108
109define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
110  %mul1 = fmul fast <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <float 3.0, float 3.0, float 3.0, float 3.0>
111  %mul2 = fmul fast <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, %mul1
112  %mul3 = fmul fast <4 x float> %a, %mul2
113  ret <4 x float> %mul3
114
115; CHECK: float 2.400000e+01
116; CHECK: float 2.400000e+01
117; CHECK: float 2.400000e+01
118; CHECK: float 2.400000e+01
119; CHECK-LABEL: PR22698_splats:
120; CHECK: mulps
121; CHECK: ret
122}
123
124; Same as above, but verify that non-splat vectors are handled correctly too.
125define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
126  %mul1 = fmul fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <float 5.0, float 6.0, float 7.0, float 8.0>
127  %mul2 = fmul fast <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, %mul1
128  %mul3 = fmul fast <4 x float> %a, %mul2
129  ret <4 x float> %mul3
130
131; CHECK: float 4.500000e+01
132; CHECK: float 1.200000e+02
133; CHECK: float 2.310000e+02
134; CHECK: float 3.840000e+02
135; CHECK-LABEL: PR22698_no_splats:
136; CHECK: mulps
137; CHECK: ret
138}
139
140; CHECK-LABEL: fmul_c2_c4_f32:
141; CHECK-NOT: addss
142; CHECK: mulss
143; CHECK-NOT: mulss
144; CHECK-NEXT: ret
145define float @fmul_c2_c4_f32(float %x) #0 {
146  %y = fmul float %x, 2.0
147  %z = fmul float %y, 4.0
148  ret float %z
149}
150
151; CHECK-LABEL: fmul_c3_c4_f32:
152; CHECK-NOT: addss
153; CHECK: mulss
154; CHECK-NOT: mulss
155; CHECK-NET: ret
156define float @fmul_c3_c4_f32(float %x) #0 {
157  %y = fmul float %x, 3.0
158  %z = fmul float %y, 4.0
159  ret float %z
160}
161
162; CHECK-LABEL: fmul_fneg_fneg_f32:
163; CHECK: mulss %xmm1, %xmm0
164; CHECK-NEXT: retq
165define float @fmul_fneg_fneg_f32(float %x, float %y) {
166  %x.neg = fsub float -0.0, %x
167  %y.neg = fsub float -0.0, %y
168  %mul = fmul float %x.neg, %y.neg
169  ret float %mul
170}
171; CHECK-LABEL: fmul_fneg_fneg_v4f32:
172; CHECK: mulps {{%xmm1|\(%rdx\)}}, %xmm0
173; CHECK-NEXT: retq
174define <4 x float> @fmul_fneg_fneg_v4f32(<4 x float> %x, <4 x float> %y) {
175  %x.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
176  %y.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %y
177  %mul = fmul <4 x float> %x.neg, %y.neg
178  ret <4 x float> %mul
179}
180
181attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
182