1; RUN: opt < %s -instcombine -S | FileCheck %s
2
3; (V * C1) * C2 => V * (C1 * C2)
4; Verify this doesn't fold when no fast-math-flags are specified
5define <4 x float> @test_fmul(<4 x float> %V) {
6; CHECK-LABEL: @test_fmul(
7; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
8; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
9; CHECK-NEXT:     ret <4 x float> [[TMP2]]
10        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
11        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
12        ret <4 x float> %Z
13}
14
15; (V * C1) * C2 => V * (C1 * C2)
16; Verify this folds with 'fast'
17define <4 x float> @test_fmul_fast(<4 x float> %V) {
18; CHECK-LABEL: @test_fmul_fast(
19; CHECK-NEXT:     [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
20; CHECK-NEXT:     ret <4 x float> [[TMP1]]
21        %Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
22        %Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
23        ret <4 x float> %Z
24}
25
26; (V * C1) * C2 => V * (C1 * C2)
27; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
28define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
29; CHECK-LABEL: @test_fmul_reassoc_nsz(
30; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
31; CHECK-NEXT:     ret <4 x float> [[TMP1]]
32        %Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
33        %Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
34        ret <4 x float> %Z
35}
36
37; (V * C1) * C2 => V * (C1 * C2)
38; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
39define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
40; CHECK-LABEL: @test_fmul_reassoc(
41; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
42; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
43; CHECK-NEXT:     ret <4 x float> [[TMP2]]
44        %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
45        %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
46        ret <4 x float> %Z
47}
48
49; (V + C1) + C2 => V + (C1 + C2)
50; Verify this doesn't fold when no fast-math-flags are specified
51define <4 x float> @test_fadd(<4 x float> %V) {
52; CHECK-LABEL: @test_fadd(
53; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
54; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
55; CHECK-NEXT:     ret <4 x float> [[TMP2]]
56        %Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
57        %Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
58        ret <4 x float> %Z
59}
60
61; (V + C1) + C2 => V + (C1 + C2)
62; Verify this folds with 'fast'
63define <4 x float> @test_fadd_fast(<4 x float> %V) {
64; CHECK-LABEL: @test_fadd_fast(
65; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
66; CHECK-NEXT:     ret <4 x float> [[TMP1]]
67        %Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
68        %Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
69        ret <4 x float> %Z
70}
71
72; (V + C1) + C2 => V + (C1 + C2)
73; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
74define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
75; CHECK-LABEL: @test_fadd_reassoc_nsz(
76; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
77; CHECK-NEXT:     ret <4 x float> [[TMP1]]
78        %Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
79        %Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
80        ret <4 x float> %Z
81}
82
83; (V + C1) + C2 => V + (C1 + C2)
84; TODO: This doesn't require 'nsz'.  It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
85define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
86; CHECK-LABEL: @test_fadd_reassoc(
87; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
88; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
89; CHECK-NEXT:     ret <4 x float> [[TMP2]]
90        %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
91        %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
92        ret <4 x float> %Z
93}
94
95; ( A + C1 ) + ( B + -C1 )
96; Verify this doesn't fold when no fast-math-flags are specified
97define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) {
98; CHECK-LABEL: @test_fadds_cancel_(
99; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
100; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
101; CHECK-NEXT:     [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
102; CHECK-NEXT:     ret <4 x float> [[TMP3]]
103        %X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
104        %Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
105        %Z = fadd <4 x float> %X, %Y
106        ret <4 x float> %Z
107}
108
109; ( A + C1 ) + ( B + -C1 )
110; Verify this folds to 'A + B' with 'fast'
111define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
112; CHECK-LABEL: @test_fadds_cancel_fast(
113; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]]
114; CHECK-NEXT:     ret <4 x float> [[TMP1]]
115        %X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
116        %Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
117        %Z = fadd fast <4 x float> %X, %Y
118        ret <4 x float> %Z
119}
120
121; ( A + C1 ) + ( B + -C1 )
122; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
123define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
124; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
125; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
126; CHECK-NEXT:     ret <4 x float> [[TMP1]]
127        %X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
128        %Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
129        %Z = fadd reassoc nsz <4 x float> %X, %Y
130        ret <4 x float> %Z
131}
132
133; ( A + C1 ) + ( B + -C1 )
134; Verify the fold is not done with only 'reassoc' ('nsz' is required).
135define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
136; CHECK-LABEL: @test_fadds_cancel_reassoc(
137; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
138; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
139; CHECK-NEXT:     [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
140; CHECK-NEXT:     ret <4 x float> [[TMP3]]
141        %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
142        %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
143        %Z = fadd reassoc <4 x float> %X, %Y
144        ret <4 x float> %Z
145}
146