1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -instcombine %s | FileCheck %s
3
4define <4 x i8> @add(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
5; CHECK-LABEL: @add(
6; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], [[C:%.*]]
7; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[B:%.*]], [[D:%.*]]
8; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9; CHECK-NEXT:    ret <4 x i8> [[R]]
10;
11  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13  %r = and <4 x i8> %concat1, %concat2
14  ret <4 x i8> %r
15}
16
17; Flags should propagate.
18
19define <4 x i8> @sub(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
20; CHECK-LABEL: @sub(
21; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <2 x i8> [[A:%.*]], [[C:%.*]]
22; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i8> [[B:%.*]], [[D:%.*]]
23; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
24; CHECK-NEXT:    ret <4 x i8> [[R]]
25;
26  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
28  %r = sub nsw <4 x i8> %concat1, %concat2
29  ret <4 x i8> %r
30}
31
32; Flags should propagate.
33
34define <4 x i8> @mul(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
35; CHECK-LABEL: @mul(
36; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw <2 x i8> [[A:%.*]], [[C:%.*]]
37; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw <2 x i8> [[B:%.*]], [[D:%.*]]
38; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
39; CHECK-NEXT:    ret <4 x i8> [[R]]
40;
41  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
42  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43  %r = mul nuw <4 x i8> %concat1, %concat2
44  ret <4 x i8> %r
45}
46
47; Undef in shuffle mask does not necessarily propagate.
48
49define <4 x i8> @and(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
50; CHECK-LABEL: @and(
51; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
52; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
53; CHECK-NEXT:    [[R:%.*]] = and <4 x i8> [[CONCAT1]], [[CONCAT2]]
54; CHECK-NEXT:    ret <4 x i8> [[R]]
55;
56  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
57  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
58  %r = and <4 x i8> %concat1, %concat2
59  ret <4 x i8> %r
60}
61
62; Undef in shuffle mask does not necessarily propagate.
63
64define <4 x i8> @or(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
65; CHECK-LABEL: @or(
66; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
67; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
68; CHECK-NEXT:    [[R:%.*]] = or <4 x i8> [[CONCAT1]], [[CONCAT2]]
69; CHECK-NEXT:    ret <4 x i8> [[R]]
70;
71  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
72  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
73  %r = or <4 x i8> %concat1, %concat2
74  ret <4 x i8> %r
75}
76
77; Undefs in shuffle mask do not necessarily propagate.
78
79define <4 x i8> @xor(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
80; CHECK-LABEL: @xor(
81; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
82; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
83; CHECK-NEXT:    [[R:%.*]] = xor <4 x i8> [[CONCAT1]], [[CONCAT2]]
84; CHECK-NEXT:    ret <4 x i8> [[R]]
85;
86  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
87  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
88  %r = xor <4 x i8> %concat1, %concat2
89  ret <4 x i8> %r
90}
91
92define <4 x i8> @shl(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
93; CHECK-LABEL: @shl(
94; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw <2 x i8> [[A:%.*]], [[C:%.*]]
95; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw <2 x i8> [[B:%.*]], [[D:%.*]]
96; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
97; CHECK-NEXT:    ret <4 x i8> [[R]]
98;
99  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
100  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
101  %r = shl nuw <4 x i8> %concat1, %concat2
102  ret <4 x i8> %r
103}
104
105define <4 x i8> @lshr(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
106; CHECK-LABEL: @lshr(
107; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact <2 x i8> [[A:%.*]], [[C:%.*]]
108; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact <2 x i8> [[B:%.*]], [[D:%.*]]
109; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
110; CHECK-NEXT:    ret <4 x i8> [[R]]
111;
112  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
113  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
114  %r = lshr exact <4 x i8> %concat1, %concat2
115  ret <4 x i8> %r
116}
117
118; Extra-uses prevent the transform.
119declare void @use(<4 x i8>)
120
121define <4 x i8> @ashr(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
122; CHECK-LABEL: @ashr(
123; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
124; CHECK-NEXT:    call void @use(<4 x i8> [[CONCAT1]])
125; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
126; CHECK-NEXT:    [[R:%.*]] = ashr <4 x i8> [[CONCAT1]], [[CONCAT2]]
127; CHECK-NEXT:    ret <4 x i8> [[R]]
128;
129  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
130  call void @use(<4 x i8> %concat1)
131  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
132  %r = ashr <4 x i8> %concat1, %concat2
133  ret <4 x i8> %r
134}
135
136; TODO: Div/rem with undef in any element in the divisor is undef, so this should be simplified away?
137
138define <4 x i8> @sdiv(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
139; CHECK-LABEL: @sdiv(
140; CHECK-NEXT:    [[TMP1:%.*]] = sdiv exact <2 x i8> [[A:%.*]], [[C:%.*]]
141; CHECK-NEXT:    [[TMP2:%.*]] = sdiv exact <2 x i8> [[B:%.*]], [[D:%.*]]
142; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
143; CHECK-NEXT:    ret <4 x i8> [[R]]
144;
145  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
146  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
147  %r = sdiv exact <4 x i8> %concat1, %concat2
148  ret <4 x i8> %r
149}
150
151define <4 x i8> @srem(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
152; CHECK-LABEL: @srem(
153; CHECK-NEXT:    [[TMP1:%.*]] = srem <2 x i8> [[A:%.*]], [[C:%.*]]
154; CHECK-NEXT:    [[TMP2:%.*]] = srem <2 x i8> [[B:%.*]], [[D:%.*]]
155; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
156; CHECK-NEXT:    ret <4 x i8> [[R]]
157;
158  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
159  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
160  %r = srem <4 x i8> %concat1, %concat2
161  ret <4 x i8> %r
162}
163
164define <4 x i8> @udiv(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
165; CHECK-LABEL: @udiv(
166; CHECK-NEXT:    [[TMP1:%.*]] = udiv exact <2 x i8> [[A:%.*]], [[C:%.*]]
167; CHECK-NEXT:    [[TMP2:%.*]] = udiv exact <2 x i8> [[B:%.*]], [[D:%.*]]
168; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
169; CHECK-NEXT:    ret <4 x i8> [[R]]
170;
171  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
172  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
173  %r = udiv exact <4 x i8> %concat1, %concat2
174  ret <4 x i8> %r
175}
176
177; TODO: Div/rem with undef in any element in the divisor is undef, so this should be simplified away?
178
179define <4 x i8> @urem(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
180; CHECK-LABEL: @urem(
181; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i8> [[A:%.*]], [[C:%.*]]
182; CHECK-NEXT:    [[TMP2:%.*]] = urem <2 x i8> [[B:%.*]], [[D:%.*]]
183; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
184; CHECK-NEXT:    ret <4 x i8> [[R]]
185;
186  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
187  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
188  %r = urem <4 x i8> %concat1, %concat2
189  ret <4 x i8> %r
190}
191
192define <4 x float> @fadd(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
193; CHECK-LABEL: @fadd(
194; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[A:%.*]], [[C:%.*]]
195; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[B:%.*]], [[D:%.*]]
196; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
197; CHECK-NEXT:    ret <4 x float> [[R]]
198;
199  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
200  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
201  %r = fadd <4 x float> %concat1, %concat2
202  ret <4 x float> %r
203}
204
205; Fast-math-flags propagate.
206
207define <4 x float> @fsub(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
208; CHECK-LABEL: @fsub(
209; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <2 x float> [[A:%.*]], [[C:%.*]]
210; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast <2 x float> [[B:%.*]], [[D:%.*]]
211; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
212; CHECK-NEXT:    ret <4 x float> [[R]]
213;
214  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
215  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
216  %r = fsub fast <4 x float> %concat1, %concat2
217  ret <4 x float> %r
218}
219
220; Extra-uses prevent the transform.
221declare void @use2(<4 x float>)
222
223define <4 x float> @fmul(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
224; CHECK-LABEL: @fmul(
225; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
226; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x float> [[C:%.*]], <2 x float> [[D:%.*]], <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
227; CHECK-NEXT:    call void @use2(<4 x float> [[CONCAT2]])
228; CHECK-NEXT:    [[R:%.*]] = fmul nnan <4 x float> [[CONCAT1]], [[CONCAT2]]
229; CHECK-NEXT:    ret <4 x float> [[R]]
230;
231  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
232  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
233  call void @use2(<4 x float> %concat2)
234  %r = fmul nnan <4 x float> %concat1, %concat2
235  ret <4 x float> %r
236}
237
238; Fast-math-flags propagate.
239
240define <4 x float> @fdiv(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
241; CHECK-LABEL: @fdiv(
242; CHECK-NEXT:    [[TMP1:%.*]] = fdiv ninf arcp <2 x float> [[A:%.*]], [[C:%.*]]
243; CHECK-NEXT:    [[TMP2:%.*]] = fdiv ninf arcp <2 x float> [[B:%.*]], [[D:%.*]]
244; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
245; CHECK-NEXT:    ret <4 x float> [[R]]
246;
247  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
248  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
249  %r = fdiv ninf arcp <4 x float> %concat1, %concat2
250  ret <4 x float> %r
251}
252
253define <4 x float> @frem(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
254; CHECK-LABEL: @frem(
255; CHECK-NEXT:    [[TMP1:%.*]] = frem <2 x float> [[A:%.*]], [[C:%.*]]
256; CHECK-NEXT:    [[TMP2:%.*]] = frem <2 x float> [[B:%.*]], [[D:%.*]]
257; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
258; CHECK-NEXT:    ret <4 x float> [[R]]
259;
260  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
261  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
262  %r = frem <4 x float> %concat1, %concat2
263  ret <4 x float> %r
264}
265
266; https://bugs.llvm.org/show_bug.cgi?id=33026 - all of the shuffles can be eliminated.
267
268define <4 x i32> @PR33026(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
269; CHECK-LABEL: @PR33026(
270; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], [[C:%.*]]
271; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[B:%.*]], [[D:%.*]]
272; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
273; CHECK-NEXT:    ret <4 x i32> [[SUB]]
274;
275  %concat1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
276  %concat2 = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
277  %and = and <8 x i32> %concat1, %concat2
278  %extract1 = shufflevector <8 x i32> %and, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
279  %extract2 = shufflevector <8 x i32> %and, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
280  %sub = sub <4 x i32> %extract1, %extract2
281  ret <4 x i32> %sub
282}
283