1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6
7; Verify that we don't emit packed vector shifts instructions if the
8; condition used by the vector select is a vector of constants.
9
10define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
11; SSE2-LABEL: test1:
12; SSE2:       # %bb.0:
13; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
14; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
15; SSE2-NEXT:    retq
16;
17; SSE41-LABEL: test1:
18; SSE41:       # %bb.0:
19; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
20; SSE41-NEXT:    retq
21;
22; AVX-LABEL: test1:
23; AVX:       # %bb.0:
24; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
25; AVX-NEXT:    retq
26  %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b
27  ret <4 x float> %1
28}
29
30define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
31; SSE2-LABEL: test2:
32; SSE2:       # %bb.0:
33; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
34; SSE2-NEXT:    retq
35;
36; SSE41-LABEL: test2:
37; SSE41:       # %bb.0:
38; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
39; SSE41-NEXT:    retq
40;
41; AVX-LABEL: test2:
42; AVX:       # %bb.0:
43; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
44; AVX-NEXT:    retq
45  %1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
46  ret <4 x float> %1
47}
48
49define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
50; SSE2-LABEL: test3:
51; SSE2:       # %bb.0:
52; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
53; SSE2-NEXT:    retq
54;
55; SSE41-LABEL: test3:
56; SSE41:       # %bb.0:
57; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
58; SSE41-NEXT:    retq
59;
60; AVX-LABEL: test3:
61; AVX:       # %bb.0:
62; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
63; AVX-NEXT:    retq
64  %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
65  ret <4 x float> %1
66}
67
68define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
69; SSE-LABEL: test4:
70; SSE:       # %bb.0:
71; SSE-NEXT:    movaps %xmm1, %xmm0
72; SSE-NEXT:    retq
73;
74; AVX-LABEL: test4:
75; AVX:       # %bb.0:
76; AVX-NEXT:    vmovaps %xmm1, %xmm0
77; AVX-NEXT:    retq
78  %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
79  ret <4 x float> %1
80}
81
82define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
83; SSE-LABEL: test5:
84; SSE:       # %bb.0:
85; SSE-NEXT:    retq
86;
87; AVX-LABEL: test5:
88; AVX:       # %bb.0:
89; AVX-NEXT:    retq
90  %1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
91  ret <4 x float> %1
92}
93
94define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
95; SSE-LABEL: test6:
96; SSE:       # %bb.0:
97; SSE-NEXT:    retq
98;
99; AVX-LABEL: test6:
100; AVX:       # %bb.0:
101; AVX-NEXT:    retq
102  %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a
103  ret <8 x i16> %1
104}
105
106define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
107; SSE2-LABEL: test7:
108; SSE2:       # %bb.0:
109; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
110; SSE2-NEXT:    retq
111;
112; SSE41-LABEL: test7:
113; SSE41:       # %bb.0:
114; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
115; SSE41-NEXT:    retq
116;
117; AVX-LABEL: test7:
118; AVX:       # %bb.0:
119; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
120; AVX-NEXT:    retq
121  %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
122  ret <8 x i16> %1
123}
124
125define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) {
126; SSE2-LABEL: test8:
127; SSE2:       # %bb.0:
128; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
129; SSE2-NEXT:    retq
130;
131; SSE41-LABEL: test8:
132; SSE41:       # %bb.0:
133; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
134; SSE41-NEXT:    retq
135;
136; AVX-LABEL: test8:
137; AVX:       # %bb.0:
138; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
139; AVX-NEXT:    retq
140  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
141  ret <8 x i16> %1
142}
143
144define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
145; SSE-LABEL: test9:
146; SSE:       # %bb.0:
147; SSE-NEXT:    movaps %xmm1, %xmm0
148; SSE-NEXT:    retq
149;
150; AVX-LABEL: test9:
151; AVX:       # %bb.0:
152; AVX-NEXT:    vmovaps %xmm1, %xmm0
153; AVX-NEXT:    retq
154  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
155  ret <8 x i16> %1
156}
157
158define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
159; SSE-LABEL: test10:
160; SSE:       # %bb.0:
161; SSE-NEXT:    retq
162;
163; AVX-LABEL: test10:
164; AVX:       # %bb.0:
165; AVX-NEXT:    retq
166  %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
167  ret <8 x i16> %1
168}
169
170define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
171; SSE2-LABEL: test11:
172; SSE2:       # %bb.0:
173; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,0,0,65535,65535,0]
174; SSE2-NEXT:    andps %xmm2, %xmm0
175; SSE2-NEXT:    andnps %xmm1, %xmm2
176; SSE2-NEXT:    orps %xmm2, %xmm0
177; SSE2-NEXT:    retq
178;
179; SSE41-LABEL: test11:
180; SSE41:       # %bb.0:
181; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
182; SSE41-NEXT:    retq
183;
184; AVX-LABEL: test11:
185; AVX:       # %bb.0:
186; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
187; AVX-NEXT:    retq
188  %1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
189  ret <8 x i16> %1
190}
191
192define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
193; SSE-LABEL: test12:
194; SSE:       # %bb.0:
195; SSE-NEXT:    movaps %xmm1, %xmm0
196; SSE-NEXT:    retq
197;
198; AVX-LABEL: test12:
199; AVX:       # %bb.0:
200; AVX-NEXT:    vmovaps %xmm1, %xmm0
201; AVX-NEXT:    retq
202  %1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
203  ret <8 x i16> %1
204}
205
206define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
207; SSE-LABEL: test13:
208; SSE:       # %bb.0:
209; SSE-NEXT:    movaps %xmm1, %xmm0
210; SSE-NEXT:    retq
211;
212; AVX-LABEL: test13:
213; AVX:       # %bb.0:
214; AVX-NEXT:    vmovaps %xmm1, %xmm0
215; AVX-NEXT:    retq
216  %1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b
217  ret <8 x i16> %1
218}
219
220; Fold (vselect (build_vector AllOnes), N1, N2) -> N1
221define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
222; SSE-LABEL: test14:
223; SSE:       # %bb.0:
224; SSE-NEXT:    retq
225;
226; AVX-LABEL: test14:
227; AVX:       # %bb.0:
228; AVX-NEXT:    retq
229  %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b
230  ret <4 x float> %1
231}
232
233define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
234; SSE-LABEL: test15:
235; SSE:       # %bb.0:
236; SSE-NEXT:    retq
237;
238; AVX-LABEL: test15:
239; AVX:       # %bb.0:
240; AVX-NEXT:    retq
241  %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
242  ret <8 x i16> %1
243}
244
245; Fold (vselect (build_vector AllZeros), N1, N2) -> N2
246define <4 x float> @test16(<4 x float> %a, <4 x float> %b) {
247; SSE-LABEL: test16:
248; SSE:       # %bb.0:
249; SSE-NEXT:    movaps %xmm1, %xmm0
250; SSE-NEXT:    retq
251;
252; AVX-LABEL: test16:
253; AVX:       # %bb.0:
254; AVX-NEXT:    vmovaps %xmm1, %xmm0
255; AVX-NEXT:    retq
256  %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b
257  ret <4 x float> %1
258}
259
260define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
261; SSE-LABEL: test17:
262; SSE:       # %bb.0:
263; SSE-NEXT:    movaps %xmm1, %xmm0
264; SSE-NEXT:    retq
265;
266; AVX-LABEL: test17:
267; AVX:       # %bb.0:
268; AVX-NEXT:    vmovaps %xmm1, %xmm0
269; AVX-NEXT:    retq
270  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
271  ret <8 x i16> %1
272}
273
274define <4 x float> @test18(<4 x float> %a, <4 x float> %b) {
275; SSE2-LABEL: test18:
276; SSE2:       # %bb.0:
277; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
278; SSE2-NEXT:    retq
279;
280; SSE41-LABEL: test18:
281; SSE41:       # %bb.0:
282; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
283; SSE41-NEXT:    retq
284;
285; AVX-LABEL: test18:
286; AVX:       # %bb.0:
287; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
288; AVX-NEXT:    retq
289  %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
290  ret <4 x float> %1
291}
292
293define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
294; SSE2-LABEL: test19:
295; SSE2:       # %bb.0:
296; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
297; SSE2-NEXT:    retq
298;
299; SSE41-LABEL: test19:
300; SSE41:       # %bb.0:
301; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
302; SSE41-NEXT:    retq
303;
304; AVX-LABEL: test19:
305; AVX:       # %bb.0:
306; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
307; AVX-NEXT:    retq
308  %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b
309  ret <4 x i32> %1
310}
311
312define <2 x double> @test20(<2 x double> %a, <2 x double> %b) {
313; SSE2-LABEL: test20:
314; SSE2:       # %bb.0:
315; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
316; SSE2-NEXT:    retq
317;
318; SSE41-LABEL: test20:
319; SSE41:       # %bb.0:
320; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
321; SSE41-NEXT:    retq
322;
323; AVX-LABEL: test20:
324; AVX:       # %bb.0:
325; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
326; AVX-NEXT:    retq
327  %1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b
328  ret <2 x double> %1
329}
330
331define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
332; SSE2-LABEL: test21:
333; SSE2:       # %bb.0:
334; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
335; SSE2-NEXT:    retq
336;
337; SSE41-LABEL: test21:
338; SSE41:       # %bb.0:
339; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
340; SSE41-NEXT:    retq
341;
342; AVX-LABEL: test21:
343; AVX:       # %bb.0:
344; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
345; AVX-NEXT:    retq
346  %1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b
347  ret <2 x i64> %1
348}
349
350define <4 x float> @test22(<4 x float> %a, <4 x float> %b) {
351; SSE2-LABEL: test22:
352; SSE2:       # %bb.0:
353; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
354; SSE2-NEXT:    movaps %xmm1, %xmm0
355; SSE2-NEXT:    retq
356;
357; SSE41-LABEL: test22:
358; SSE41:       # %bb.0:
359; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
360; SSE41-NEXT:    retq
361;
362; AVX-LABEL: test22:
363; AVX:       # %bb.0:
364; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
365; AVX-NEXT:    retq
366  %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
367  ret <4 x float> %1
368}
369
370define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
371; SSE2-LABEL: test23:
372; SSE2:       # %bb.0:
373; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
374; SSE2-NEXT:    movaps %xmm1, %xmm0
375; SSE2-NEXT:    retq
376;
377; SSE41-LABEL: test23:
378; SSE41:       # %bb.0:
379; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
380; SSE41-NEXT:    retq
381;
382; AVX-LABEL: test23:
383; AVX:       # %bb.0:
384; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
385; AVX-NEXT:    retq
386  %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b
387  ret <4 x i32> %1
388}
389
390define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
391; SSE2-LABEL: test24:
392; SSE2:       # %bb.0:
393; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
394; SSE2-NEXT:    retq
395;
396; SSE41-LABEL: test24:
397; SSE41:       # %bb.0:
398; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
399; SSE41-NEXT:    retq
400;
401; AVX-LABEL: test24:
402; AVX:       # %bb.0:
403; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
404; AVX-NEXT:    retq
405  %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
406  ret <2 x double> %1
407}
408
409define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
410; SSE2-LABEL: test25:
411; SSE2:       # %bb.0:
412; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
413; SSE2-NEXT:    retq
414;
415; SSE41-LABEL: test25:
416; SSE41:       # %bb.0:
417; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
418; SSE41-NEXT:    retq
419;
420; AVX-LABEL: test25:
421; AVX:       # %bb.0:
422; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
423; AVX-NEXT:    retq
424  %1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
425  ret <2 x i64> %1
426}
427
428define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
429; SSE-LABEL: select_of_shuffles_0:
430; SSE:       # %bb.0:
431; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
432; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
433; SSE-NEXT:    subps %xmm1, %xmm0
434; SSE-NEXT:    retq
435;
436; AVX-LABEL: select_of_shuffles_0:
437; AVX:       # %bb.0:
438; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
439; AVX-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
440; AVX-NEXT:    vsubps %xmm1, %xmm0, %xmm0
441; AVX-NEXT:    retq
442  %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
443  %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
444  %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1
445  %4 = shufflevector <2 x float> %b0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
446  %5 = shufflevector <2 x float> %b1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
447  %6 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %5, <4 x float> %4
448  %7 = fsub <4 x float> %3, %6
449  ret <4 x float> %7
450}
451
452; PR20677
453define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) {
454; SSE-LABEL: select_illegal:
455; SSE:       # %bb.0:
456; SSE-NEXT:    movq %rdi, %rax
457; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm4
458; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm5
459; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm6
460; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm7
461; SSE-NEXT:    movaps %xmm7, 112(%rdi)
462; SSE-NEXT:    movaps %xmm6, 96(%rdi)
463; SSE-NEXT:    movaps %xmm5, 80(%rdi)
464; SSE-NEXT:    movaps %xmm4, 64(%rdi)
465; SSE-NEXT:    movaps %xmm3, 48(%rdi)
466; SSE-NEXT:    movaps %xmm2, 32(%rdi)
467; SSE-NEXT:    movaps %xmm1, 16(%rdi)
468; SSE-NEXT:    movaps %xmm0, (%rdi)
469; SSE-NEXT:    retq
470;
471; AVX-LABEL: select_illegal:
472; AVX:       # %bb.0:
473; AVX-NEXT:    vmovaps %ymm7, %ymm3
474; AVX-NEXT:    vmovaps %ymm6, %ymm2
475; AVX-NEXT:    retq
476  %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
477  ret <16 x double> %sel
478}
479
480; Make sure we can optimize the condition MSB when it is used by 2 selects.
481; The v2i1 here will be passed as v2i64 and we will emit a sign_extend_inreg to fill the upper bits.
482; We should be able to remove the sra from the sign_extend_inreg to leave only shl.
483define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
484; SSE2-LABEL: shrunkblend_2uses:
485; SSE2:       # %bb.0:
486; SSE2-NEXT:    psllq $63, %xmm0
487; SSE2-NEXT:    psrad $31, %xmm0
488; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
489; SSE2-NEXT:    movdqa %xmm0, %xmm5
490; SSE2-NEXT:    pandn %xmm2, %xmm5
491; SSE2-NEXT:    pand %xmm0, %xmm1
492; SSE2-NEXT:    por %xmm1, %xmm5
493; SSE2-NEXT:    pand %xmm0, %xmm3
494; SSE2-NEXT:    pandn %xmm4, %xmm0
495; SSE2-NEXT:    por %xmm3, %xmm0
496; SSE2-NEXT:    paddq %xmm5, %xmm0
497; SSE2-NEXT:    retq
498;
499; SSE41-LABEL: shrunkblend_2uses:
500; SSE41:       # %bb.0:
501; SSE41-NEXT:    psllq $63, %xmm0
502; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
503; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm4
504; SSE41-NEXT:    paddq %xmm2, %xmm4
505; SSE41-NEXT:    movdqa %xmm4, %xmm0
506; SSE41-NEXT:    retq
507;
508; AVX-LABEL: shrunkblend_2uses:
509; AVX:       # %bb.0:
510; AVX-NEXT:    vpsllq $63, %xmm0, %xmm0
511; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
512; AVX-NEXT:    vblendvpd %xmm0, %xmm3, %xmm4, %xmm0
513; AVX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
514; AVX-NEXT:    retq
515  %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
516  %y = select <2 x i1> %cond, <2 x i64> %c, <2 x i64> %d
517  %z = add <2 x i64> %x, %y
518  ret <2 x i64> %z
519}
520
521; Similar to above, but condition has a use that isn't a condition of a vselect so we can't optimize.
522define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
523; SSE2-LABEL: shrunkblend_nonvselectuse:
524; SSE2:       # %bb.0:
525; SSE2-NEXT:    psllq $63, %xmm0
526; SSE2-NEXT:    psrad $31, %xmm0
527; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
528; SSE2-NEXT:    movdqa %xmm3, %xmm0
529; SSE2-NEXT:    pandn %xmm2, %xmm0
530; SSE2-NEXT:    pand %xmm3, %xmm1
531; SSE2-NEXT:    por %xmm1, %xmm0
532; SSE2-NEXT:    paddq %xmm3, %xmm0
533; SSE2-NEXT:    retq
534;
535; SSE41-LABEL: shrunkblend_nonvselectuse:
536; SSE41:       # %bb.0:
537; SSE41-NEXT:    psllq $63, %xmm0
538; SSE41-NEXT:    psrad $31, %xmm0
539; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
540; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
541; SSE41-NEXT:    paddq %xmm2, %xmm0
542; SSE41-NEXT:    retq
543;
544; AVX-LABEL: shrunkblend_nonvselectuse:
545; AVX:       # %bb.0:
546; AVX-NEXT:    vpsllq $63, %xmm0, %xmm0
547; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
548; AVX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
549; AVX-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
550; AVX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
551; AVX-NEXT:    retq
552  %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
553  %y = sext <2 x i1> %cond to <2 x i64>
554  %z = add <2 x i64> %x, %y
555  ret <2 x i64> %z
556}
557
558; This turns into a SHRUNKBLEND with SSE4 or later, and via
559; late shuffle magic, both sides of the blend are the same
560; value. If that is not simplified before isel, it can fail
561; to match (crash).
562
563define <2 x i32> @simplify_select(i32 %x, <2 x i1> %z) {
564; SSE2-LABEL: simplify_select:
565; SSE2:       # %bb.0:
566; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
567; SSE2-NEXT:    pslld $31, %xmm0
568; SSE2-NEXT:    psrad $31, %xmm0
569; SSE2-NEXT:    movd %edi, %xmm1
570; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1]
571; SSE2-NEXT:    por %xmm1, %xmm2
572; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[1,3]
573; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[1,1]
574; SSE2-NEXT:    pand %xmm0, %xmm2
575; SSE2-NEXT:    pandn %xmm1, %xmm0
576; SSE2-NEXT:    por %xmm2, %xmm0
577; SSE2-NEXT:    retq
578;
579; SSE41-LABEL: simplify_select:
580; SSE41:       # %bb.0:
581; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
582; SSE41-NEXT:    pslld $31, %xmm0
583; SSE41-NEXT:    movd %edi, %xmm1
584; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1]
585; SSE41-NEXT:    por %xmm1, %xmm2
586; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
587; SSE41-NEXT:    pinsrd $1, %edi, %xmm1
588; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1
589; SSE41-NEXT:    movaps %xmm1, %xmm0
590; SSE41-NEXT:    retq
591;
592; AVX-LABEL: simplify_select:
593; AVX:       # %bb.0:
594; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
595; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
596; AVX-NEXT:    vmovd %edi, %xmm1
597; AVX-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,0,1,1]
598; AVX-NEXT:    vpor %xmm1, %xmm2, %xmm1
599; AVX-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
600; AVX-NEXT:    vpinsrd $1, %edi, %xmm2, %xmm2
601; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
602; AVX-NEXT:    retq
603  %a = insertelement <2 x i32> <i32 0, i32 undef>, i32 %x, i32 1
604  %b = insertelement <2 x i32> <i32 undef, i32 0>, i32 %x, i32 0
605  %y = or <2 x i32> %a, %b
606  %p16 = extractelement <2 x i32> %y, i32 1
607  %p17 = insertelement <2 x i32> undef, i32 %p16, i32 0
608  %p18 = insertelement <2 x i32> %p17, i32 %x, i32 1
609  %r = select <2 x i1> %z, <2 x i32> %y, <2 x i32> %p18
610  ret <2 x i32> %r
611}
612
613; Test to make sure we don't try to insert a new setcc to swap the operands
614; of select with all zeros LHS if the setcc has additional users.
615define void @vselect_allzeros_LHS_multiple_use_setcc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>* %p1, <4 x i32>* %p2) {
616; SSE-LABEL: vselect_allzeros_LHS_multiple_use_setcc:
617; SSE:       # %bb.0:
618; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1,2,4,8]
619; SSE-NEXT:    pand %xmm3, %xmm0
620; SSE-NEXT:    pcmpeqd %xmm3, %xmm0
621; SSE-NEXT:    movdqa %xmm0, %xmm3
622; SSE-NEXT:    pandn %xmm1, %xmm3
623; SSE-NEXT:    pand %xmm2, %xmm0
624; SSE-NEXT:    movdqa %xmm3, (%rdi)
625; SSE-NEXT:    movdqa %xmm0, (%rsi)
626; SSE-NEXT:    retq
627;
628; AVX-LABEL: vselect_allzeros_LHS_multiple_use_setcc:
629; AVX:       # %bb.0:
630; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2,4,8]
631; AVX-NEXT:    vpand %xmm3, %xmm0, %xmm0
632; AVX-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
633; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm1
634; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
635; AVX-NEXT:    vmovdqa %xmm1, (%rdi)
636; AVX-NEXT:    vmovdqa %xmm0, (%rsi)
637; AVX-NEXT:    retq
638  %and = and <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8>
639  %cond = icmp ne <4 x i32> %and, zeroinitializer
640  %sel1 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %y
641  %sel2 = select <4 x i1> %cond, <4 x i32> %z, <4 x i32> zeroinitializer
642  store <4 x i32> %sel1, <4 x i32>* %p1
643  store <4 x i32> %sel2, <4 x i32>* %p2
644  ret void
645}
646
647; This test case previously crashed after r363802, r363850, and r363856 due
648; any_extend_vector_inreg not being handled by the X86 backend.
649define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
650; SSE-LABEL: vselect_any_extend_vector_inreg_crash:
651; SSE:       # %bb.0:
652; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
653; SSE-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
654; SSE-NEXT:    movq %xmm0, %rax
655; SSE-NEXT:    andl $1, %eax
656; SSE-NEXT:    shlq $15, %rax
657; SSE-NEXT:    retq
658;
659; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
660; AVX:       # %bb.0:
661; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
662; AVX-NEXT:    vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
663; AVX-NEXT:    vmovq %xmm0, %rax
664; AVX-NEXT:    andl $1, %eax
665; AVX-NEXT:    shlq $15, %rax
666; AVX-NEXT:    retq
6670:
668  %1 = load <8 x i8>, <8 x i8>* %x
669  %2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
670  %3 = select <8 x i1> %2, <8 x i64> <i64 32768, i64 16384, i64 8192, i64 4096, i64 2048, i64 1024, i64 512, i64 256>, <8 x i64> zeroinitializer
671  %4 = extractelement <8 x i64> %3, i32 0
672  ret i64 %4
673}
674
675