1; RUN: llc -mtriple=x86_64-unknown-unknown -march=x86-64 < %s | FileCheck %s
2
3; CHECK-LABEL: fmul2_f32:
4; CHECK: addss %xmm0, %xmm0
5define float @fmul2_f32(float %x) {
6  %y = fmul float %x, 2.0
7  ret float %y
8}
9
10; fmul 2.0, x -> fadd x, x for vectors.
11
12; CHECK-LABEL: fmul2_v4f32:
13; CHECK: addps %xmm0, %xmm0
14; CHECK-NEXT: retq
15define <4 x float> @fmul2_v4f32(<4 x float> %x) {
16  %y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
17  ret <4 x float> %y
18}
19
20; CHECK-LABEL: constant_fold_fmul_v4f32:
21; CHECK: movaps
22; CHECK-NEXT: ret
23define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) {
24  %y = fmul <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float 2.0>
25  ret <4 x float> %y
26}
27
28; CHECK-LABEL: fmul0_v4f32:
29; CHECK: xorps %xmm0, %xmm0
30; CHECK-NEXT: retq
31define <4 x float> @fmul0_v4f32(<4 x float> %x) #0 {
32  %y = fmul <4 x float> %x, <float 0.0, float 0.0, float 0.0, float 0.0>
33  ret <4 x float> %y
34}
35
36; CHECK-LABEL: fmul_c2_c4_v4f32:
37; CHECK-NOT: addps
38; CHECK: mulps
39; CHECK-NOT: mulps
40; CHECK-NEXT: ret
41define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 {
42  %y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
43  %z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
44  ret <4 x float> %z
45}
46
47; CHECK-LABEL: fmul_c3_c4_v4f32:
48; CHECK-NOT: addps
49; CHECK: mulps
50; CHECK-NOT: mulps
51; CHECK-NEXT: ret
52define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
53  %y = fmul <4 x float> %x, <float 3.0, float 3.0, float 3.0, float 3.0>
54  %z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
55  ret <4 x float> %z
56}
57
58; We should be able to pre-multiply the two constant vectors.
59; CHECK: float 5
60; CHECK: float 12
61; CHECK: float 21
62; CHECK: float 32
63; CHECK-LABEL: fmul_v4f32_two_consts_no_splat:
64; CHECK: mulps
65; CHECK-NOT: mulps
66; CHECK-NEXT: ret
67define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 {
68  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
69  %z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
70  ret <4 x float> %z
71}
72
73; Same as above, but reverse operands to make sure non-canonical form is also handled.
74; CHECK: float 5
75; CHECK: float 12
76; CHECK: float 21
77; CHECK: float 32
78; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical:
79; CHECK: mulps
80; CHECK-NOT: mulps
81; CHECK-NEXT: ret
82define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 {
83  %y = fmul <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
84  %z = fmul <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>, %y
85  ret <4 x float> %z
86}
87
88; More than one use of a constant multiply should not inhibit the optimization.
89; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
90; CHECK: float 6
91; CHECK: float 14
92; CHECK: float 24
93; CHECK: float 36
94; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
95; CHECK: mulps
96; CHECK: ret
97define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
98  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
99  %z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
100  %a = fadd <4 x float> %y, %z
101  ret <4 x float> %a
102}
103
104; PR22698 - http://llvm.org/bugs/show_bug.cgi?id=22698
105; Make sure that we don't infinite loop swapping constants back and forth.
106
107define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
108  %mul1 = fmul fast <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <float 3.0, float 3.0, float 3.0, float 3.0>
109  %mul2 = fmul fast <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, %mul1
110  %mul3 = fmul fast <4 x float> %a, %mul2
111  ret <4 x float> %mul3
112
113; CHECK: float 24
114; CHECK: float 24
115; CHECK: float 24
116; CHECK: float 24
117; CHECK-LABEL: PR22698_splats:
118; CHECK: mulps
119; CHECK: ret
120}
121
122; Same as above, but verify that non-splat vectors are handled correctly too.
123define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
124  %mul1 = fmul fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <float 5.0, float 6.0, float 7.0, float 8.0>
125  %mul2 = fmul fast <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, %mul1
126  %mul3 = fmul fast <4 x float> %a, %mul2
127  ret <4 x float> %mul3
128
129; CHECK: float 45
130; CHECK: float 120
131; CHECK: float 231
132; CHECK: float 384
133; CHECK-LABEL: PR22698_no_splats:
134; CHECK: mulps
135; CHECK: ret
136}
137
138; CHECK-LABEL: fmul_c2_c4_f32:
139; CHECK-NOT: addss
140; CHECK: mulss
141; CHECK-NOT: mulss
142; CHECK-NEXT: ret
143define float @fmul_c2_c4_f32(float %x) #0 {
144  %y = fmul float %x, 2.0
145  %z = fmul float %y, 4.0
146  ret float %z
147}
148
149; CHECK-LABEL: fmul_c3_c4_f32:
150; CHECK-NOT: addss
151; CHECK: mulss
152; CHECK-NOT: mulss
153; CHECK-NET: ret
154define float @fmul_c3_c4_f32(float %x) #0 {
155  %y = fmul float %x, 3.0
156  %z = fmul float %y, 4.0
157  ret float %z
158}
159
160; CHECK-LABEL: fmul_fneg_fneg_f32:
161; CHECK: mulss %xmm1, %xmm0
162; CHECK-NEXT: retq
163define float @fmul_fneg_fneg_f32(float %x, float %y) {
164  %x.neg = fsub float -0.0, %x
165  %y.neg = fsub float -0.0, %y
166  %mul = fmul float %x.neg, %y.neg
167  ret float %mul
168}
169; CHECK-LABEL: fmul_fneg_fneg_v4f32:
170; CHECK: mulps {{%xmm1|\(%rdx\)}}, %xmm0
171; CHECK-NEXT: retq
172define <4 x float> @fmul_fneg_fneg_v4f32(<4 x float> %x, <4 x float> %y) {
173  %x.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
174  %y.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %y
175  %mul = fmul <4 x float> %x.neg, %y.neg
176  ret <4 x float> %mul
177}
178
179attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
180