1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4; Try to eliminate binops and shuffles when the shuffle is a select in disguise:
5; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806
6
7define <4 x i32> @add(<4 x i32> %v) {
8; CHECK-LABEL: @add(
9; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
10; CHECK-NEXT:    ret <4 x i32> [[S]]
11;
12  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
13  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
14  ret <4 x i32> %s
15}
16
17; Propagate flags when possible.
18
19define <4 x i32> @add_nuw_nsw(<4 x i32> %v) {
20; CHECK-LABEL: @add_nuw_nsw(
21; CHECK-NEXT:    [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
22; CHECK-NEXT:    ret <4 x i32> [[S]]
23;
24  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
25  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
26  ret <4 x i32> %s
27}
28
29define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) {
30; CHECK-LABEL: @add_undef_mask_elt(
31; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 undef, i32 0>
32; CHECK-NEXT:    ret <4 x i32> [[S]]
33;
34  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
35  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
36  ret <4 x i32> %s
37}
38
39; Poison flags must be dropped or undef must be replaced with safe constant.
40
41define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) {
42; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt(
43; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 undef, i32 13, i32 0>
44; CHECK-NEXT:    ret <4 x i32> [[S]]
45;
46  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
47  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
48  ret <4 x i32> %s
49}
50
51; Constant operand 0 (LHS) could work for some non-commutative binops?
52
53define <4 x i32> @sub(<4 x i32> %v) {
54; CHECK-LABEL: @sub(
55; CHECK-NEXT:    [[B:%.*]] = sub <4 x i32> <i32 undef, i32 undef, i32 undef, i32 14>, [[V:%.*]]
56; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
57; CHECK-NEXT:    ret <4 x i32> [[S]]
58;
59  %b = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
60  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
61  ret <4 x i32> %s
62}
63
64; If any element of the shuffle mask operand is undef, that element of the result is undef.
65; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
66; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants.
67
68define <4 x i32> @mul(<4 x i32> %v) {
69; CHECK-LABEL: @mul(
70; CHECK-NEXT:    [[S:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 12, i32 1, i32 14>
71; CHECK-NEXT:    ret <4 x i32> [[S]]
72;
73  %b = mul nsw nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
74  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
75  ret <4 x i32> %s
76}
77
78define <4 x i32> @shl(<4 x i32> %v) {
79; CHECK-LABEL: @shl(
80; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
81; CHECK-NEXT:    ret <4 x i32> [[S]]
82;
83  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
84  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
85  ret <4 x i32> %s
86}
87
88define <4 x i32> @shl_nsw(<4 x i32> %v) {
89; CHECK-LABEL: @shl_nsw(
90; CHECK-NEXT:    [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
91; CHECK-NEXT:    ret <4 x i32> [[S]]
92;
93  %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
94  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
95  ret <4 x i32> %s
96}
97
98define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) {
99; CHECK-LABEL: @shl_undef_mask_elt(
100; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
101; CHECK-NEXT:    ret <4 x i32> [[S]]
102;
103  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
104  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
105  ret <4 x i32> %s
106}
107
108define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) {
109; CHECK-LABEL: @shl_nuw_undef_mask_elt(
110; CHECK-NEXT:    [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
111; CHECK-NEXT:    ret <4 x i32> [[S]]
112;
113  %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
114  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
115  ret <4 x i32> %s
116}
117
118define <4 x i32> @lshr_constant_op0(<4 x i32> %v) {
119; CHECK-LABEL: @lshr_constant_op0(
120; CHECK-NEXT:    [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
121; CHECK-NEXT:    ret <4 x i32> [[S]]
122;
123  %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
124  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
125  ret <4 x i32> %s
126}
127
128define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) {
129; CHECK-LABEL: @lshr_exact_constant_op0(
130; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
131; CHECK-NEXT:    ret <4 x i32> [[S]]
132;
133  %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
134  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
135  ret <4 x i32> %s
136}
137
138define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) {
139; CHECK-LABEL: @lshr_undef_mask_elt(
140; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
141; CHECK-NEXT:    ret <4 x i32> [[S]]
142;
143  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
144  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
145  ret <4 x i32> %s
146}
147
148define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) {
149; CHECK-LABEL: @lshr_exact_undef_mask_elt(
150; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
151; CHECK-NEXT:    ret <4 x i32> [[S]]
152;
153  %b = lshr exact  <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
154  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
155  ret <4 x i32> %s
156}
157
158define <4 x i32> @lshr_constant_op1(<4 x i32> %v) {
159; CHECK-LABEL: @lshr_constant_op1(
160; CHECK-NEXT:    [[B:%.*]] = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
161; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
162; CHECK-NEXT:    ret <4 x i32> [[S]]
163;
164  %b = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
165  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
166  ret <4 x i32> %s
167}
168
169; Try weird types.
170
171define <3 x i32> @ashr(<3 x i32> %v) {
172; CHECK-LABEL: @ashr(
173; CHECK-NEXT:    [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13>
174; CHECK-NEXT:    ret <3 x i32> [[S]]
175;
176  %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13>
177  %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> <i32 3, i32 1, i32 2>
178  ret <3 x i32> %s
179}
180
181define <3 x i42> @and(<3 x i42> %v) {
182; CHECK-LABEL: @and(
183; CHECK-NEXT:    [[S:%.*]] = and <3 x i42> [[V:%.*]], <i42 -1, i42 12, i42 undef>
184; CHECK-NEXT:    ret <3 x i42> [[S]]
185;
186  %b = and <3 x i42> %v, <i42 11, i42 12, i42 13>
187  %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> <i32 0, i32 4, i32 undef>
188  ret <3 x i42> %s
189}
190
191; It doesn't matter if the intermediate op has extra uses.
192
193declare void @use_v4i32(<4 x i32>)
194
195define <4 x i32> @or(<4 x i32> %v) {
196; CHECK-LABEL: @or(
197; CHECK-NEXT:    [[B:%.*]] = or <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
198; CHECK-NEXT:    [[S:%.*]] = or <4 x i32> [[V]], <i32 0, i32 0, i32 13, i32 14>
199; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[B]])
200; CHECK-NEXT:    ret <4 x i32> [[S]]
201;
202  %b = or <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
203  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
204  call void @use_v4i32(<4 x i32> %b)
205  ret <4 x i32> %s
206}
207
208define <4 x i32> @xor(<4 x i32> %v) {
209; CHECK-LABEL: @xor(
210; CHECK-NEXT:    [[S:%.*]] = xor <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 0, i32 0>
211; CHECK-NEXT:    ret <4 x i32> [[S]]
212;
213  %b = xor <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
214  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
215  ret <4 x i32> %s
216}
217
218define <4 x i32> @udiv(<4 x i32> %v) {
219; CHECK-LABEL: @udiv(
220; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
221; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
222; CHECK-NEXT:    ret <4 x i32> [[S]]
223;
224  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
225  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
226  ret <4 x i32> %s
227}
228
229define <4 x i32> @udiv_exact(<4 x i32> %v) {
230; CHECK-LABEL: @udiv_exact(
231; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
232; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
233; CHECK-NEXT:    ret <4 x i32> [[S]]
234;
235  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
236  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
237  ret <4 x i32> %s
238}
239
240define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) {
241; CHECK-LABEL: @udiv_undef_mask_elt(
242; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
243; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
244; CHECK-NEXT:    ret <4 x i32> [[S]]
245;
246  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
247  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
248  ret <4 x i32> %s
249}
250
251define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) {
252; CHECK-LABEL: @udiv_exact_undef_mask_elt(
253; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
254; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
255; CHECK-NEXT:    ret <4 x i32> [[S]]
256;
257  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
258  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
259  ret <4 x i32> %s
260}
261
262define <4 x i32> @sdiv(<4 x i32> %v) {
263; CHECK-LABEL: @sdiv(
264; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
265; CHECK-NEXT:    ret <4 x i32> [[S]]
266;
267  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
268  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
269  ret <4 x i32> %s
270}
271
272define <4 x i32> @sdiv_exact(<4 x i32> %v) {
273; CHECK-LABEL: @sdiv_exact(
274; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
275; CHECK-NEXT:    ret <4 x i32> [[S]]
276;
277  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
278  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
279  ret <4 x i32> %s
280}
281
282; Div/rem need special handling if the shuffle has undef elements.
283
284define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) {
285; CHECK-LABEL: @sdiv_undef_mask_elt(
286; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
287; CHECK-NEXT:    ret <4 x i32> [[S]]
288;
289  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
290  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
291  ret <4 x i32> %s
292}
293
294define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) {
295; CHECK-LABEL: @sdiv_exact_undef_mask_elt(
296; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
297; CHECK-NEXT:    ret <4 x i32> [[S]]
298;
299  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
300  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
301  ret <4 x i32> %s
302}
303
304define <4 x i32> @urem(<4 x i32> %v) {
305; CHECK-LABEL: @urem(
306; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
307; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
308; CHECK-NEXT:    ret <4 x i32> [[S]]
309;
310  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
311  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
312  ret <4 x i32> %s
313}
314
315define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) {
316; CHECK-LABEL: @urem_undef_mask_elt(
317; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
318; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
319; CHECK-NEXT:    ret <4 x i32> [[S]]
320;
321  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
322  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
323  ret <4 x i32> %s
324}
325
326define <4 x i32> @srem(<4 x i32> %v) {
327; CHECK-LABEL: @srem(
328; CHECK-NEXT:    [[B:%.*]] = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
329; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
330; CHECK-NEXT:    ret <4 x i32> [[S]]
331;
332  %b = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
333  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
334  ret <4 x i32> %s
335}
336
337; Try FP ops/types.
338
339define <4 x float> @fadd(<4 x float> %v) {
340; CHECK-LABEL: @fadd(
341; CHECK-NEXT:    [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
342; CHECK-NEXT:    ret <4 x float> [[S]]
343;
344  %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
345  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
346  ret <4 x float> %s
347}
348
349define <4 x double> @fsub(<4 x double> %v) {
350; CHECK-LABEL: @fsub(
351; CHECK-NEXT:    [[B:%.*]] = fsub <4 x double> <double undef, double undef, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
352; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
353; CHECK-NEXT:    ret <4 x double> [[S]]
354;
355  %b = fsub <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
356  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
357  ret <4 x double> %s
358}
359
360; Propagate any FMF.
361
362define <4 x float> @fmul(<4 x float> %v) {
363; CHECK-LABEL: @fmul(
364; CHECK-NEXT:    [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
365; CHECK-NEXT:    ret <4 x float> [[S]]
366;
367  %b = fmul nnan ninf <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
368  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
369  ret <4 x float> %s
370}
371
372define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
373; CHECK-LABEL: @fdiv_constant_op0(
374; CHECK-NEXT:    [[B:%.*]] = fdiv fast <4 x double> <double undef, double undef, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
375; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
376; CHECK-NEXT:    ret <4 x double> [[S]]
377;
378  %b = fdiv fast <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
379  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
380  ret <4 x double> %s
381}
382
383define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
384; CHECK-LABEL: @fdiv_constant_op1(
385; CHECK-NEXT:    [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
386; CHECK-NEXT:    ret <4 x double> [[S]]
387;
388  %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0>
389  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
390  ret <4 x double> %s
391}
392
393define <4 x double> @frem(<4 x double> %v) {
394; CHECK-LABEL: @frem(
395; CHECK-NEXT:    [[B:%.*]] = frem <4 x double> <double 4.100000e+01, double 4.200000e+01, double undef, double undef>, [[V:%.*]]
396; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
397; CHECK-NEXT:    ret <4 x double> [[S]]
398;
399  %b = frem <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
400  %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
401  ret <4 x double> %s
402}
403
404; Tests where both operands of the shuffle are binops with the same opcode.
405
406define <4 x i32> @add_add(<4 x i32> %v0) {
407; CHECK-LABEL: @add_add(
408; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
409; CHECK-NEXT:    ret <4 x i32> [[T3]]
410;
411  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
412  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
413  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
414  ret <4 x i32> %t3
415}
416
417define <4 x i32> @add_add_nsw(<4 x i32> %v0) {
418; CHECK-LABEL: @add_add_nsw(
419; CHECK-NEXT:    [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
420; CHECK-NEXT:    ret <4 x i32> [[T3]]
421;
422  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
423  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
424  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
425  ret <4 x i32> %t3
426}
427
428define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) {
429; CHECK-LABEL: @add_add_undef_mask_elt(
430; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
431; CHECK-NEXT:    ret <4 x i32> [[T3]]
432;
433  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
434  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
435  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
436  ret <4 x i32> %t3
437}
438
439; Poison flags must be dropped or undef must be replaced with safe constant.
440
441define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) {
442; CHECK-LABEL: @add_add_nsw_undef_mask_elt(
443; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
444; CHECK-NEXT:    ret <4 x i32> [[T3]]
445;
446  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
447  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
448  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
449  ret <4 x i32> %t3
450}
451
452; Constant operand 0 (LHS) also works.
453
454define <4 x i32> @sub_sub(<4 x i32> %v0) {
455; CHECK-LABEL: @sub_sub(
456; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
457; CHECK-NEXT:    ret <4 x i32> [[T3]]
458;
459  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
460  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
461  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
462  ret <4 x i32> %t3
463}
464
465define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) {
466; CHECK-LABEL: @sub_sub_nuw(
467; CHECK-NEXT:    [[T3:%.*]] = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
468; CHECK-NEXT:    ret <4 x i32> [[T3]]
469;
470  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
471  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
472  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
473  ret <4 x i32> %t3
474}
475
476define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) {
477; CHECK-LABEL: @sub_sub_undef_mask_elt(
478; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
479; CHECK-NEXT:    ret <4 x i32> [[T3]]
480;
481  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
482  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
483  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
484  ret <4 x i32> %t3
485}
486
487; Poison flags must be dropped or undef must be replaced with safe constant.
488
489define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) {
490; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt(
491; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
492; CHECK-NEXT:    ret <4 x i32> [[T3]]
493;
494  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
495  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
496  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
497  ret <4 x i32> %t3
498}
499
500; If any element of the shuffle mask operand is undef, that element of the result is undef.
501; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
502
503define <4 x i32> @mul_mul(<4 x i32> %v0) {
504; CHECK-LABEL: @mul_mul(
505; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 6, i32 3, i32 8>
506; CHECK-NEXT:    ret <4 x i32> [[T3]]
507;
508  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
509  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
510  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
511  ret <4 x i32> %t3
512}
513
514; Preserve flags when possible.
515
516define <4 x i32> @shl_shl(<4 x i32> %v0) {
517; CHECK-LABEL: @shl_shl(
518; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
519; CHECK-NEXT:    ret <4 x i32> [[T3]]
520;
521  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
522  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
523  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
524  ret <4 x i32> %t3
525}
526
527define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) {
528; CHECK-LABEL: @shl_shl_nuw(
529; CHECK-NEXT:    [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
530; CHECK-NEXT:    ret <4 x i32> [[T3]]
531;
532  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
533  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
534  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
535  ret <4 x i32> %t3
536}
537
538; Shift by undef is poison. Undef must be replaced by safe constant.
539
540define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) {
541; CHECK-LABEL: @shl_shl_undef_mask_elt(
542; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
543; CHECK-NEXT:    ret <4 x i32> [[T3]]
544;
545  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
546  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
547  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
548  ret <4 x i32> %t3
549}
550
551; Shift by undef is poison. Undef must be replaced by safe constant.
552
553define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) {
554; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt(
555; CHECK-NEXT:    [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
556; CHECK-NEXT:    ret <4 x i32> [[T3]]
557;
558  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
559  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
560  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
561  ret <4 x i32> %t3
562}
563
564; Can't propagate the flag here.
565
566define <4 x i32> @lshr_lshr(<4 x i32> %v0) {
567; CHECK-LABEL: @lshr_lshr(
568; CHECK-NEXT:    [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[V0:%.*]]
569; CHECK-NEXT:    ret <4 x i32> [[T3]]
570;
571  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
572  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
573  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
574  ret <4 x i32> %t3
575}
576
577; Try weird types.
578
579define <3 x i32> @ashr_ashr(<3 x i32> %v0) {
580; CHECK-LABEL: @ashr_ashr(
581; CHECK-NEXT:    [[T3:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 4, i32 2, i32 3>
582; CHECK-NEXT:    ret <3 x i32> [[T3]]
583;
584  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
585  %t2 = ashr <3 x i32> %v0, <i32 4, i32 5, i32 6>
586  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
587  ret <3 x i32> %t3
588}
589
590define <3 x i42> @and_and(<3 x i42> %v0) {
591; CHECK-LABEL: @and_and(
592; CHECK-NEXT:    [[T3:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 5, i42 undef>
593; CHECK-NEXT:    ret <3 x i42> [[T3]]
594;
595  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
596  %t2 = and <3 x i42> %v0, <i42 4, i42 5, i42 6>
597  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
598  ret <3 x i42> %t3
599}
600
601; It doesn't matter if the intermediate ops have extra uses.
602
603define <4 x i32> @or_or(<4 x i32> %v0) {
604; CHECK-LABEL: @or_or(
605; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
606; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[V0]], <i32 5, i32 6, i32 3, i32 4>
607; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
608; CHECK-NEXT:    ret <4 x i32> [[T3]]
609;
610  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
611  %t2 = or <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
612  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
613  call void @use_v4i32(<4 x i32> %t1)
614  ret <4 x i32> %t3
615}
616
617define <4 x i32> @xor_xor(<4 x i32> %v0) {
618; CHECK-LABEL: @xor_xor(
619; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
620; CHECK-NEXT:    [[T3:%.*]] = xor <4 x i32> [[V0]], <i32 1, i32 6, i32 3, i32 4>
621; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
622; CHECK-NEXT:    ret <4 x i32> [[T3]]
623;
624  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
625  %t2 = xor <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
626  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
627  call void @use_v4i32(<4 x i32> %t2)
628  ret <4 x i32> %t3
629}
630
631define <4 x i32> @udiv_udiv(<4 x i32> %v0) {
632; CHECK-LABEL: @udiv_udiv(
633; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
634; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]]
635; CHECK-NEXT:    [[T3:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0]]
636; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
637; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
638; CHECK-NEXT:    ret <4 x i32> [[T3]]
639;
640  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
641  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
642  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
643  call void @use_v4i32(<4 x i32> %t1)
644  call void @use_v4i32(<4 x i32> %t2)
645  ret <4 x i32> %t3
646}
647
648; Div/rem need special handling if the shuffle has undef elements.
649
650define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) {
651; CHECK-LABEL: @sdiv_sdiv(
652; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
653; CHECK-NEXT:    ret <4 x i32> [[T3]]
654;
655  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
656  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
657  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
658  ret <4 x i32> %t3
659}
660
661define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) {
662; CHECK-LABEL: @sdiv_sdiv_exact(
663; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
664; CHECK-NEXT:    ret <4 x i32> [[T3]]
665;
666  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
667  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
668  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
669  ret <4 x i32> %t3
670}
671
672define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) {
673; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt(
674; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
675; CHECK-NEXT:    ret <4 x i32> [[T3]]
676;
677  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
678  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
679  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
680  ret <4 x i32> %t3
681}
682
683define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) {
684; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt(
685; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
686; CHECK-NEXT:    ret <4 x i32> [[T3]]
687;
688  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
689  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
690  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
691  ret <4 x i32> %t3
692}
693
694define <4 x i32> @urem_urem(<4 x i32> %v0) {
695; CHECK-LABEL: @urem_urem(
696; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[V0:%.*]]
697; CHECK-NEXT:    ret <4 x i32> [[T3]]
698;
699  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
700  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
701  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
702  ret <4 x i32> %t3
703}
704
705; This is folded by using a safe constant.
706
707define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) {
708; CHECK-LABEL: @urem_urem_undef_mask_elt(
709; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]]
710; CHECK-NEXT:    ret <4 x i32> [[T3]]
711;
712  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
713  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
714  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
715  ret <4 x i32> %t3
716}
717
718define <4 x i32> @srem_srem(<4 x i32> %v0) {
719; CHECK-LABEL: @srem_srem(
720; CHECK-NEXT:    [[T3:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 7, i32 4>, [[V0:%.*]]
721; CHECK-NEXT:    ret <4 x i32> [[T3]]
722;
723  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
724  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
725  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
726  ret <4 x i32> %t3
727}
728
729; This is folded by using a safe constant.
730
731define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) {
732; CHECK-LABEL: @srem_srem_undef_mask_elt(
733; CHECK-NEXT:    [[T3:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]]
734; CHECK-NEXT:    ret <4 x i32> [[T3]]
735;
736  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
737  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
738  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
739  ret <4 x i32> %t3
740}
741
742; Try FP ops/types.
743
744define <4 x float> @fadd_fadd(<4 x float> %v0) {
745; CHECK-LABEL: @fadd_fadd(
746; CHECK-NEXT:    [[T3:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
747; CHECK-NEXT:    ret <4 x float> [[T3]]
748;
749  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
750  %t2 = fadd <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
751  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
752  ret <4 x float> %t3
753}
754
755define <4 x double> @fsub_fsub(<4 x double> %v0) {
756; CHECK-LABEL: @fsub_fsub(
757; CHECK-NEXT:    [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
758; CHECK-NEXT:    ret <4 x double> [[T3]]
759;
760  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
761  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
762  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
763  ret <4 x double> %t3
764}
765
766; Intersect any FMF.
767
768define <4 x float> @fmul_fmul(<4 x float> %v0) {
769; CHECK-LABEL: @fmul_fmul(
770; CHECK-NEXT:    [[T3:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
771; CHECK-NEXT:    ret <4 x float> [[T3]]
772;
773  %t1 = fmul nnan ninf <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
774  %t2 = fmul nnan ninf <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
775  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
776  ret <4 x float> %t3
777}
778
779define <4 x double> @fdiv_fdiv(<4 x double> %v0) {
780; CHECK-LABEL: @fdiv_fdiv(
781; CHECK-NEXT:    [[T3:%.*]] = fdiv nnan arcp <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
782; CHECK-NEXT:    ret <4 x double> [[T3]]
783;
784  %t1 = fdiv fast <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
785  %t2 = fdiv nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
786  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
787  ret <4 x double> %t3
788}
789
790; The variable operand must be either the first operand or second operand in both binops.
791
792define <4 x double> @frem_frem(<4 x double> %v0) {
793; CHECK-LABEL: @frem_frem(
794; CHECK-NEXT:    [[T1:%.*]] = frem <4 x double> <double 1.000000e+00, double 2.000000e+00, double undef, double undef>, [[V0:%.*]]
795; CHECK-NEXT:    [[T2:%.*]] = frem <4 x double> [[V0]], <double undef, double undef, double 7.000000e+00, double 8.000000e+00>
796; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
797; CHECK-NEXT:    ret <4 x double> [[T3]]
798;
799  %t1 = frem <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
800  %t2 = frem <4 x double> %v0, <double 5.0, double 6.0, double 7.0, double 8.0>
801  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
802  ret <4 x double> %t3
803}
804
805define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
806; CHECK-LABEL: @add_2_vars(
807; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
808; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
809; CHECK-NEXT:    ret <4 x i32> [[T3]]
810;
811  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
812  %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
813  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
814  ret <4 x i32> %t3
815}
816
817; Constant operand 0 (LHS) also works.
818
819define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
820; CHECK-LABEL: @sub_2_vars(
821; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
822; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
823; CHECK-NEXT:    ret <4 x i32> [[T3]]
824;
825  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
826  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
827  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
828  ret <4 x i32> %t3
829}
830
831define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
832; CHECK-LABEL: @sub_2_vars_nsw(
833; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
834; CHECK-NEXT:    [[T3:%.*]] = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
835; CHECK-NEXT:    ret <4 x i32> [[T3]]
836;
837  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
838  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
839  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
840  ret <4 x i32> %t3
841}
842
843define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
844; CHECK-LABEL: @sub_2_vars_undef_mask_elt(
845; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
846; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
847; CHECK-NEXT:    ret <4 x i32> [[T3]]
848;
849  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
850  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
851  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
852  ret <4 x i32> %t3
853}
854
855; Poison flags must be dropped or undef must be replaced with safe constant.
856
857define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
858; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt(
859; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
860; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
861; CHECK-NEXT:    ret <4 x i32> [[T3]]
862;
863  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
864  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
865  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
866  ret <4 x i32> %t3
867}
868
869; If any element of the shuffle mask operand is undef, that element of the result is undef.
870; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
871
872define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
873; CHECK-LABEL: @mul_2_vars(
874; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
875; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
876; CHECK-NEXT:    ret <4 x i32> [[T3]]
877;
878  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
879  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
880  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
881  ret <4 x i32> %t3
882}
883
884define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) {
885; CHECK-LABEL: @mul_2_vars_nuw(
886; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
887; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
888; CHECK-NEXT:    ret <4 x i32> [[T3]]
889;
890  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
891  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
892  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
893  ret <4 x i32> %t3
894}
895
896define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
897; CHECK-LABEL: @mul_2_vars_undef_mask_elt(
898; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
899; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
900; CHECK-NEXT:    ret <4 x i32> [[T3]]
901;
902  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
903  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
904  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
905  ret <4 x i32> %t3
906}
907
908; Poison flags must be dropped or undef must be replaced with safe constant.
909
910define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
911; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt(
912; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
913; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
914; CHECK-NEXT:    ret <4 x i32> [[T3]]
915;
916  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
917  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
918  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
919  ret <4 x i32> %t3
920}
921
922; Preserve flags when possible.
923
924define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
925; CHECK-LABEL: @shl_2_vars(
926; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
927; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
928; CHECK-NEXT:    ret <4 x i32> [[T3]]
929;
930  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
931  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
932  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
933  ret <4 x i32> %t3
934}
935
936define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
937; CHECK-LABEL: @shl_2_vars_nsw(
938; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
939; CHECK-NEXT:    [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
940; CHECK-NEXT:    ret <4 x i32> [[T3]]
941;
942  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
943  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
944  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
945  ret <4 x i32> %t3
946}
947
948; Shift by undef is poison. Undef is replaced by safe constant.
949
950define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
951; CHECK-LABEL: @shl_2_vars_undef_mask_elt(
952; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
953; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
954; CHECK-NEXT:    ret <4 x i32> [[T3]]
955;
956  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
957  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
958  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
959  ret <4 x i32> %t3
960}
961
962; Shift by undef is poison. Undef is replaced by safe constant.
963
964define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
965; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt(
966; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
967; CHECK-NEXT:    [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
968; CHECK-NEXT:    ret <4 x i32> [[T3]]
969;
970  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
971  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
972  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
973  ret <4 x i32> %t3
974}
975
976; Can't propagate the flag here.
977
978define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
979; CHECK-LABEL: @lshr_2_vars(
980; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
981; CHECK-NEXT:    [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
982; CHECK-NEXT:    ret <4 x i32> [[T3]]
983;
984  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
985  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
986  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
987  ret <4 x i32> %t3
988}
989
990define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
991; CHECK-LABEL: @lshr_2_vars_exact(
992; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
993; CHECK-NEXT:    [[T3:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
994; CHECK-NEXT:    ret <4 x i32> [[T3]]
995;
996  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
997  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
998  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
999  ret <4 x i32> %t3
1000}
1001
1002; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
1003
1004define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1005; CHECK-LABEL: @lshr_2_vars_undef_mask_elt(
1006; CHECK-NEXT:    [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1007; CHECK-NEXT:    [[T2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1008; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1009; CHECK-NEXT:    ret <4 x i32> [[T3]]
1010;
1011  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1012  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1013  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1014  ret <4 x i32> %t3
1015}
1016
1017; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
1018
1019define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1020; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt(
1021; CHECK-NEXT:    [[T1:%.*]] = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1022; CHECK-NEXT:    [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1023; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1024; CHECK-NEXT:    ret <4 x i32> [[T3]]
1025;
1026  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1027  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1028  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1029  ret <4 x i32> %t3
1030}
1031
1032; Try weird types.
1033
1034define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {
1035; CHECK-LABEL: @ashr_2_vars(
1036; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> <i32 0, i32 4, i32 5>
1037; CHECK-NEXT:    [[T3:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3>
1038; CHECK-NEXT:    ret <3 x i32> [[T3]]
1039;
1040  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
1041  %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6>
1042  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
1043  ret <3 x i32> %t3
1044}
1045
1046define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {
1047; CHECK-LABEL: @and_2_vars(
1048; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 undef>
1049; CHECK-NEXT:    [[T3:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef>
1050; CHECK-NEXT:    ret <3 x i42> [[T3]]
1051;
1052  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
1053  %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6>
1054  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
1055  ret <3 x i42> %t3
1056}
1057
1058; It doesn't matter if only one intermediate op has extra uses.
1059
1060define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1061; CHECK-LABEL: @or_2_vars(
1062; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1063; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1064; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4>
1065; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
1066; CHECK-NEXT:    ret <4 x i32> [[T3]]
1067;
1068  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1069  %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1070  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1071  call void @use_v4i32(<4 x i32> %t1)
1072  ret <4 x i32> %t3
1073}
1074
1075; But we don't transform if both intermediate values have extra uses.
1076
1077define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1078; CHECK-LABEL: @xor_2_vars(
1079; CHECK-NEXT:    [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1080; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
1081; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1082; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
1083; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
1084; CHECK-NEXT:    ret <4 x i32> [[T3]]
1085;
1086  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1087  %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1088  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1089  call void @use_v4i32(<4 x i32> %t1)
1090  call void @use_v4i32(<4 x i32> %t2)
1091  ret <4 x i32> %t3
1092}
1093
1094; Div/rem need special handling if the shuffle has undef elements.
1095
1096define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1097; CHECK-LABEL: @udiv_2_vars(
1098; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1099; CHECK-NEXT:    [[T3:%.*]] = udiv <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
1100; CHECK-NEXT:    ret <4 x i32> [[T3]]
1101;
1102  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1103  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1104  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1105  ret <4 x i32> %t3
1106}
1107
1108define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
1109; CHECK-LABEL: @udiv_2_vars_exact(
1110; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1111; CHECK-NEXT:    [[T3:%.*]] = udiv exact <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
1112; CHECK-NEXT:    ret <4 x i32> [[T3]]
1113;
1114  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1115  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1116  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1117  ret <4 x i32> %t3
1118}
1119
1120; TODO: This could be transformed using a safe constant.
1121
1122define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1123; CHECK-LABEL: @udiv_2_vars_undef_mask_elt(
1124; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1125; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1126; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1127; CHECK-NEXT:    ret <4 x i32> [[T3]]
1128;
1129  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1130  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1131  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1132  ret <4 x i32> %t3
1133}
1134
1135; TODO: This could be transformed using a safe constant.
1136
1137define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1138; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt(
1139; CHECK-NEXT:    [[T1:%.*]] = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1140; CHECK-NEXT:    [[T2:%.*]] = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1141; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1142; CHECK-NEXT:    ret <4 x i32> [[T3]]
1143;
1144  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1145  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1146  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1147  ret <4 x i32> %t3
1148}
1149
1150; If the shuffle has no undefs, it's safe to shuffle the variables first.
1151
1152define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1153; CHECK-LABEL: @sdiv_2_vars(
1154; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1155; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
1156; CHECK-NEXT:    ret <4 x i32> [[T3]]
1157;
1158  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1159  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1160  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1161  ret <4 x i32> %t3
1162}
1163
1164define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
1165; CHECK-LABEL: @sdiv_2_vars_exact(
1166; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1167; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
1168; CHECK-NEXT:    ret <4 x i32> [[T3]]
1169;
1170  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1171  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1172  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1173  ret <4 x i32> %t3
1174}
1175
1176; Div by undef is UB. Undef is replaced by safe constant.
1177
1178define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1179; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt(
1180; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1181; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
1182; CHECK-NEXT:    ret <4 x i32> [[T3]]
1183;
1184  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1185  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1186  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1187  ret <4 x i32> %t3
1188}
1189
1190; Div by undef is UB. Undef is replaced by safe constant.
1191
1192define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1193; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt(
1194; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1195; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
1196; CHECK-NEXT:    ret <4 x i32> [[T3]]
1197;
1198  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1199  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1200  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1201  ret <4 x i32> %t3
1202}
1203
1204; If the shuffle has no undefs, it's safe to shuffle the variables first.
1205
1206define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1207; CHECK-LABEL: @urem_2_vars(
1208; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1209; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[TMP1]]
1210; CHECK-NEXT:    ret <4 x i32> [[T3]]
1211;
1212  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1213  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1214  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1215  ret <4 x i32> %t3
1216}
1217
1218define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1219; CHECK-LABEL: @srem_2_vars(
1220; CHECK-NEXT:    [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1221; CHECK-NEXT:    [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1222; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
1223; CHECK-NEXT:    ret <4 x i32> [[T3]]
1224;
1225  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1226  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1227  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
1228  ret <4 x i32> %t3
1229}
1230
1231; Try FP ops/types.
1232
1233define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {
1234; CHECK-LABEL: @fadd_2_vars(
1235; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1236; CHECK-NEXT:    [[T3:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
1237; CHECK-NEXT:    ret <4 x float> [[T3]]
1238;
1239  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
1240  %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
1241  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1242  ret <4 x float> %t3
1243}
1244
1245define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {
1246; CHECK-LABEL: @fsub_2_vars(
1247; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1248; CHECK-NEXT:    [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
1249; CHECK-NEXT:    ret <4 x double> [[T3]]
1250;
1251  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1252  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
1253  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1254  ret <4 x double> %t3
1255}
1256
1257; Intersect any FMF.
1258
1259define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {
1260; CHECK-LABEL: @fmul_2_vars(
1261; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1262; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
1263; CHECK-NEXT:    ret <4 x float> [[T3]]
1264;
1265  %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
1266  %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
1267  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1268  ret <4 x float> %t3
1269}
1270
1271define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) {
1272; CHECK-LABEL: @frem_2_vars(
1273; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1274; CHECK-NEXT:    [[T3:%.*]] = frem nnan <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
1275; CHECK-NEXT:    ret <4 x double> [[T3]]
1276;
1277  %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1278  %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
1279  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1280  ret <4 x double> %t3
1281}
1282
1283; The variable operand must be either the first operand or second operand in both binops.
1284
1285define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) {
1286; CHECK-LABEL: @fdiv_2_vars(
1287; CHECK-NEXT:    [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double undef, double undef>, [[V0:%.*]]
1288; CHECK-NEXT:    [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double undef, double undef, double 7.000000e+00, double 8.000000e+00>
1289; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1290; CHECK-NEXT:    ret <4 x double> [[T3]]
1291;
1292  %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1293  %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0>
1294  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1295  ret <4 x double> %t3
1296}
1297
1298; Shift-left with constant shift amount can be converted to mul to enable the fold.
1299
1300define <4 x i32> @mul_shl(<4 x i32> %v0) {
1301; CHECK-LABEL: @mul_shl(
1302; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
1303; CHECK-NEXT:    ret <4 x i32> [[T3]]
1304;
1305  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1306  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1307  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1308  ret <4 x i32> %t3
1309}
1310
1311; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.
1312
1313define <4 x i32> @shl_mul(<4 x i32> %v0) {
1314; CHECK-LABEL: @shl_mul(
1315; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
1316; CHECK-NEXT:    ret <4 x i32> [[T3]]
1317;
1318  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1319  %t2 = mul nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1320  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
1321  ret <4 x i32> %t3
1322}
1323
1324; Demanded elements + simplification can remove the mul alone, but that's not the best case.
1325
1326define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
1327; CHECK-LABEL: @mul_is_nop_shl(
1328; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
1329; CHECK-NEXT:    ret <4 x i32> [[T3]]
1330;
1331  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1332  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1333  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1334  ret <4 x i32> %t3
1335}
1336
1337; Negative test: shift amount (operand 1) must be constant.
1338
1339define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
1340; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
1341; CHECK-NEXT:    [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1342; CHECK-NEXT:    [[T2:%.*]] = mul <4 x i32> [[V0]], <i32 5, i32 6, i32 undef, i32 undef>
1343; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1344; CHECK-NEXT:    ret <4 x i32> [[T3]]
1345;
1346  %t1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1347  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1348  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1349  ret <4 x i32> %t3
1350}
1351
1352; Try with 2 variable inputs.
1353
1354define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1355; CHECK-LABEL: @mul_shl_2_vars(
1356; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1357; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4>
1358; CHECK-NEXT:    ret <4 x i32> [[T3]]
1359;
1360  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1361  %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1362  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1363  ret <4 x i32> %t3
1364}
1365
1366define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1367; CHECK-LABEL: @shl_mul_2_vars(
1368; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 undef, i32 6, i32 7>
1369; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16>
1370; CHECK-NEXT:    ret <4 x i32> [[T3]]
1371;
1372  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1373  %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1374  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
1375  ret <4 x i32> %t3
1376}
1377
1378; Or with constant can be converted to add to enable the fold.
1379; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'.
1380; TODO: The 'or' constant is limited to a splat.
1381
1382define <4 x i32> @add_or(<4 x i32> %v) {
1383; CHECK-LABEL: @add_or(
1384; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
1385; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537>
1386; CHECK-NEXT:    ret <4 x i32> [[T3]]
1387;
1388  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
1389  %t1 = add <4 x i32> %v0, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
1390  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
1391  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1392  ret <4 x i32> %t3
1393}
1394
1395; Try with 'or' as operand 0 of the shuffle.
1396
1397define <4 x i8> @or_add(<4 x i8> %v) {
1398; CHECK-LABEL: @or_add(
1399; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
1400; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
1401; CHECK-NEXT:    ret <4 x i8> [[T3]]
1402;
1403  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
1404  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
1405  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
1406  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1407  ret <4 x i8> %t3
1408}
1409
1410; Negative test: not all 'or' insts can be converted to 'add'.
1411
1412define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
1413; CHECK-LABEL: @or_add_not_enough_masking(
1414; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1>
1415; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 undef, i8 undef, i8 -64, i8 -64>
1416; CHECK-NEXT:    [[T2:%.*]] = add <4 x i8> [[V0]], <i8 1, i8 2, i8 undef, i8 undef>
1417; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1418; CHECK-NEXT:    ret <4 x i8> [[T3]]
1419;
1420  %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1>          ; clear not enough top bits
1421  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
1422  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
1423  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1424  ret <4 x i8> %t3
1425}
1426
1427; Try with 2 variable inputs.
1428
1429define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
1430; CHECK-LABEL: @add_or_2_vars(
1431; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
1432; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1433; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537>
1434; CHECK-NEXT:    ret <4 x i32> [[T3]]
1435;
1436  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
1437  %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
1438  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
1439  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1440  ret <4 x i32> %t3
1441}
1442
1443define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
1444; CHECK-LABEL: @or_add_2_vars(
1445; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
1446; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1447; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64>
1448; CHECK-NEXT:    ret <4 x i8> [[T3]]
1449;
1450  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
1451  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
1452  %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
1453  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1454  ret <4 x i8> %t3
1455}
1456
1457; The undef operand is used to simplify the shuffle mask, but don't assert that too soon.
1458
1459define <4 x i32> @PR41419(<4 x i32> %v) {
1460; CHECK-LABEL: @PR41419(
1461; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 2, i32 undef>
1462; CHECK-NEXT:    ret <4 x i32> [[S]]
1463;
1464  %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1465  ret <4 x i32> %s
1466}
1467
1468