1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
10;
11; 32-bit runs to make sure we do reasonable things for i64 shifts.
12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=X86-AVX1
13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2
14
15;
16; Variable Shifts
17;
18
19define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
20; AVX1-LABEL: var_shift_v4i64:
21; AVX1:       # %bb.0:
22; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
23; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
24; AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm4
25; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
26; AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm2
27; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
28; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm3
29; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
30; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
31; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
32; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
33; AVX1-NEXT:    retq
34;
35; AVX2-LABEL: var_shift_v4i64:
36; AVX2:       # %bb.0:
37; AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
38; AVX2-NEXT:    retq
39;
40; XOPAVX1-LABEL: var_shift_v4i64:
41; XOPAVX1:       # %bb.0:
42; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
43; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
44; XOPAVX1-NEXT:    vpshlq %xmm2, %xmm3, %xmm2
45; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
46; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
47; XOPAVX1-NEXT:    retq
48;
49; XOPAVX2-LABEL: var_shift_v4i64:
50; XOPAVX2:       # %bb.0:
51; XOPAVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
52; XOPAVX2-NEXT:    retq
53;
54; AVX512-LABEL: var_shift_v4i64:
55; AVX512:       # %bb.0:
56; AVX512-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
57; AVX512-NEXT:    retq
58;
59; AVX512VL-LABEL: var_shift_v4i64:
60; AVX512VL:       # %bb.0:
61; AVX512VL-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
62; AVX512VL-NEXT:    retq
63;
64; X86-AVX1-LABEL: var_shift_v4i64:
65; X86-AVX1:       # %bb.0:
66; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
67; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
68; X86-AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm4
69; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
70; X86-AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm2
71; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
72; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm3
73; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
74; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
75; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
76; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
77; X86-AVX1-NEXT:    retl
78;
79; X86-AVX2-LABEL: var_shift_v4i64:
80; X86-AVX2:       # %bb.0:
81; X86-AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
82; X86-AVX2-NEXT:    retl
83  %shift = shl <4 x i64> %a, %b
84  ret <4 x i64> %shift
85}
86
87define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
88; AVX1-LABEL: var_shift_v8i32:
89; AVX1:       # %bb.0:
90; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
91; AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
92; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
93; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
94; AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
95; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
96; AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
97; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
98; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
99; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
100; AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
101; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
102; AVX1-NEXT:    retq
103;
104; AVX2-LABEL: var_shift_v8i32:
105; AVX2:       # %bb.0:
106; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
107; AVX2-NEXT:    retq
108;
109; XOPAVX1-LABEL: var_shift_v8i32:
110; XOPAVX1:       # %bb.0:
111; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
112; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
113; XOPAVX1-NEXT:    vpshld %xmm2, %xmm3, %xmm2
114; XOPAVX1-NEXT:    vpshld %xmm1, %xmm0, %xmm0
115; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
116; XOPAVX1-NEXT:    retq
117;
118; XOPAVX2-LABEL: var_shift_v8i32:
119; XOPAVX2:       # %bb.0:
120; XOPAVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
121; XOPAVX2-NEXT:    retq
122;
123; AVX512-LABEL: var_shift_v8i32:
124; AVX512:       # %bb.0:
125; AVX512-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
126; AVX512-NEXT:    retq
127;
128; AVX512VL-LABEL: var_shift_v8i32:
129; AVX512VL:       # %bb.0:
130; AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
131; AVX512VL-NEXT:    retq
132;
133; X86-AVX1-LABEL: var_shift_v8i32:
134; X86-AVX1:       # %bb.0:
135; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
136; X86-AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
137; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
138; X86-AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
139; X86-AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
140; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
141; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
142; X86-AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
143; X86-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
144; X86-AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
145; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
146; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
147; X86-AVX1-NEXT:    retl
148;
149; X86-AVX2-LABEL: var_shift_v8i32:
150; X86-AVX2:       # %bb.0:
151; X86-AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
152; X86-AVX2-NEXT:    retl
153  %shift = shl <8 x i32> %a, %b
154  ret <8 x i32> %shift
155}
156
157define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
158; AVX1-LABEL: var_shift_v16i16:
159; AVX1:       # %bb.0:
160; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
161; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7]
162; AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
163; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
164; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
165; AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
166; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
167; AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
168; AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
169; AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
170; AVX1-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
171; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
172; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
173; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7]
174; AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
175; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
176; AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
177; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
178; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
179; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm1
180; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
181; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
182; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
183; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
184; AVX1-NEXT:    retq
185;
186; AVX2-LABEL: var_shift_v16i16:
187; AVX2:       # %bb.0:
188; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
189; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
190; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
191; AVX2-NEXT:    vpsllvd %ymm3, %ymm4, %ymm3
192; AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
193; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
194; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
195; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
196; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
197; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
198; AVX2-NEXT:    retq
199;
200; XOPAVX1-LABEL: var_shift_v16i16:
201; XOPAVX1:       # %bb.0:
202; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
203; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
204; XOPAVX1-NEXT:    vpshlw %xmm2, %xmm3, %xmm2
205; XOPAVX1-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
206; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
207; XOPAVX1-NEXT:    retq
208;
209; XOPAVX2-LABEL: var_shift_v16i16:
210; XOPAVX2:       # %bb.0:
211; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
212; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
213; XOPAVX2-NEXT:    vpshlw %xmm2, %xmm3, %xmm2
214; XOPAVX2-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
215; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
216; XOPAVX2-NEXT:    retq
217;
218; AVX512DQ-LABEL: var_shift_v16i16:
219; AVX512DQ:       # %bb.0:
220; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
221; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
222; AVX512DQ-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
223; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
224; AVX512DQ-NEXT:    retq
225;
226; AVX512BW-LABEL: var_shift_v16i16:
227; AVX512BW:       # %bb.0:
228; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
229; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
230; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
231; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
232; AVX512BW-NEXT:    retq
233;
234; AVX512DQVL-LABEL: var_shift_v16i16:
235; AVX512DQVL:       # %bb.0:
236; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
237; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
238; AVX512DQVL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
239; AVX512DQVL-NEXT:    vpmovdw %zmm0, %ymm0
240; AVX512DQVL-NEXT:    retq
241;
242; AVX512BWVL-LABEL: var_shift_v16i16:
243; AVX512BWVL:       # %bb.0:
244; AVX512BWVL-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0
245; AVX512BWVL-NEXT:    retq
246;
247; X86-AVX1-LABEL: var_shift_v16i16:
248; X86-AVX1:       # %bb.0:
249; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
250; X86-AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7]
251; X86-AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
252; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
253; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
254; X86-AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
255; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
256; X86-AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
257; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
258; X86-AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
259; X86-AVX1-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
260; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
261; X86-AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
262; X86-AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7]
263; X86-AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
264; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
265; X86-AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
266; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
267; X86-AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
268; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm1
269; X86-AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
270; X86-AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
271; X86-AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
272; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
273; X86-AVX1-NEXT:    retl
274;
275; X86-AVX2-LABEL: var_shift_v16i16:
276; X86-AVX2:       # %bb.0:
277; X86-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
278; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
279; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
280; X86-AVX2-NEXT:    vpsllvd %ymm3, %ymm4, %ymm3
281; X86-AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
282; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
283; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
284; X86-AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
285; X86-AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
286; X86-AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
287; X86-AVX2-NEXT:    retl
288  %shift = shl <16 x i16> %a, %b
289  ret <16 x i16> %shift
290}
291
292define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
293; AVX1-LABEL: var_shift_v32i8:
294; AVX1:       # %bb.0:
295; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
296; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm3
297; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
298; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
299; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
300; AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
301; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
302; AVX1-NEXT:    vpsllw $2, %xmm2, %xmm3
303; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
304; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
305; AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
306; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
307; AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
308; AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
309; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
310; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm3
311; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
312; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
313; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
314; AVX1-NEXT:    vpsllw $2, %xmm0, %xmm3
315; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
316; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
317; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
318; AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm3
319; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
320; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
321; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
322; AVX1-NEXT:    retq
323;
324; AVX2-LABEL: var_shift_v32i8:
325; AVX2:       # %bb.0:
326; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
327; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
328; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
329; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
330; AVX2-NEXT:    vpsllw $2, %ymm0, %ymm2
331; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
332; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
333; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
334; AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
335; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
336; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
337; AVX2-NEXT:    retq
338;
339; XOPAVX1-LABEL: var_shift_v32i8:
340; XOPAVX1:       # %bb.0:
341; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
342; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
343; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm3, %xmm2
344; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
345; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
346; XOPAVX1-NEXT:    retq
347;
348; XOPAVX2-LABEL: var_shift_v32i8:
349; XOPAVX2:       # %bb.0:
350; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
351; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
352; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm3, %xmm2
353; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
354; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
355; XOPAVX2-NEXT:    retq
356;
357; AVX512DQ-LABEL: var_shift_v32i8:
358; AVX512DQ:       # %bb.0:
359; AVX512DQ-NEXT:    vpsllw $5, %ymm1, %ymm1
360; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm2
361; AVX512DQ-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
362; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
363; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm2
364; AVX512DQ-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
365; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
366; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
367; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
368; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
369; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
370; AVX512DQ-NEXT:    retq
371;
372; AVX512BW-LABEL: var_shift_v32i8:
373; AVX512BW:       # %bb.0:
374; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
375; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
376; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
377; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
378; AVX512BW-NEXT:    retq
379;
380; AVX512DQVL-LABEL: var_shift_v32i8:
381; AVX512DQVL:       # %bb.0:
382; AVX512DQVL-NEXT:    vpsllw $5, %ymm1, %ymm1
383; AVX512DQVL-NEXT:    vpsllw $4, %ymm0, %ymm2
384; AVX512DQVL-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
385; AVX512DQVL-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
386; AVX512DQVL-NEXT:    vpsllw $2, %ymm0, %ymm2
387; AVX512DQVL-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
388; AVX512DQVL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
389; AVX512DQVL-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
390; AVX512DQVL-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
391; AVX512DQVL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
392; AVX512DQVL-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
393; AVX512DQVL-NEXT:    retq
394;
395; AVX512BWVL-LABEL: var_shift_v32i8:
396; AVX512BWVL:       # %bb.0:
397; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
398; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
399; AVX512BWVL-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
400; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
401; AVX512BWVL-NEXT:    retq
402;
403; X86-AVX1-LABEL: var_shift_v32i8:
404; X86-AVX1:       # %bb.0:
405; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
406; X86-AVX1-NEXT:    vpsllw $4, %xmm2, %xmm3
407; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
408; X86-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
409; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
410; X86-AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
411; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
412; X86-AVX1-NEXT:    vpsllw $2, %xmm2, %xmm3
413; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
414; X86-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
415; X86-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
416; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
417; X86-AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
418; X86-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
419; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
420; X86-AVX1-NEXT:    vpsllw $4, %xmm0, %xmm3
421; X86-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
422; X86-AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
423; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
424; X86-AVX1-NEXT:    vpsllw $2, %xmm0, %xmm3
425; X86-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
426; X86-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
427; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
428; X86-AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm3
429; X86-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
430; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
431; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
432; X86-AVX1-NEXT:    retl
433;
434; X86-AVX2-LABEL: var_shift_v32i8:
435; X86-AVX2:       # %bb.0:
436; X86-AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
437; X86-AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
438; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
439; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
440; X86-AVX2-NEXT:    vpsllw $2, %ymm0, %ymm2
441; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
442; X86-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
443; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
444; X86-AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
445; X86-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
446; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
447; X86-AVX2-NEXT:    retl
448  %shift = shl <32 x i8> %a, %b
449  ret <32 x i8> %shift
450}
451
452;
453; Uniform Variable Shifts
454;
455
456define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
457; AVX1-LABEL: splatvar_shift_v4i64:
458; AVX1:       # %bb.0:
459; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
460; AVX1-NEXT:    vpsllq %xmm1, %xmm2, %xmm2
461; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
462; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
463; AVX1-NEXT:    retq
464;
465; AVX2-LABEL: splatvar_shift_v4i64:
466; AVX2:       # %bb.0:
467; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
468; AVX2-NEXT:    retq
469;
470; XOPAVX1-LABEL: splatvar_shift_v4i64:
471; XOPAVX1:       # %bb.0:
472; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
473; XOPAVX1-NEXT:    vpsllq %xmm1, %xmm2, %xmm2
474; XOPAVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
475; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
476; XOPAVX1-NEXT:    retq
477;
478; XOPAVX2-LABEL: splatvar_shift_v4i64:
479; XOPAVX2:       # %bb.0:
480; XOPAVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
481; XOPAVX2-NEXT:    retq
482;
483; AVX512-LABEL: splatvar_shift_v4i64:
484; AVX512:       # %bb.0:
485; AVX512-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
486; AVX512-NEXT:    retq
487;
488; AVX512VL-LABEL: splatvar_shift_v4i64:
489; AVX512VL:       # %bb.0:
490; AVX512VL-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
491; AVX512VL-NEXT:    retq
492;
493; X86-AVX1-LABEL: splatvar_shift_v4i64:
494; X86-AVX1:       # %bb.0:
495; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
496; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm2, %xmm2
497; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
498; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
499; X86-AVX1-NEXT:    retl
500;
501; X86-AVX2-LABEL: splatvar_shift_v4i64:
502; X86-AVX2:       # %bb.0:
503; X86-AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
504; X86-AVX2-NEXT:    retl
505  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
506  %shift = shl <4 x i64> %a, %splat
507  ret <4 x i64> %shift
508}
509
510define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
511; AVX1-LABEL: splatvar_shift_v8i32:
512; AVX1:       # %bb.0:
513; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
514; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
515; AVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2
516; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0
517; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
518; AVX1-NEXT:    retq
519;
520; AVX2-LABEL: splatvar_shift_v8i32:
521; AVX2:       # %bb.0:
522; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
523; AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
524; AVX2-NEXT:    retq
525;
526; XOPAVX1-LABEL: splatvar_shift_v8i32:
527; XOPAVX1:       # %bb.0:
528; XOPAVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
529; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
530; XOPAVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2
531; XOPAVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0
532; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
533; XOPAVX1-NEXT:    retq
534;
535; XOPAVX2-LABEL: splatvar_shift_v8i32:
536; XOPAVX2:       # %bb.0:
537; XOPAVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
538; XOPAVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
539; XOPAVX2-NEXT:    retq
540;
541; AVX512-LABEL: splatvar_shift_v8i32:
542; AVX512:       # %bb.0:
543; AVX512-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
544; AVX512-NEXT:    vpslld %xmm1, %ymm0, %ymm0
545; AVX512-NEXT:    retq
546;
547; AVX512VL-LABEL: splatvar_shift_v8i32:
548; AVX512VL:       # %bb.0:
549; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
550; AVX512VL-NEXT:    vpslld %xmm1, %ymm0, %ymm0
551; AVX512VL-NEXT:    retq
552;
553; X86-AVX1-LABEL: splatvar_shift_v8i32:
554; X86-AVX1:       # %bb.0:
555; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
556; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
557; X86-AVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2
558; X86-AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0
559; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
560; X86-AVX1-NEXT:    retl
561;
562; X86-AVX2-LABEL: splatvar_shift_v8i32:
563; X86-AVX2:       # %bb.0:
564; X86-AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
565; X86-AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
566; X86-AVX2-NEXT:    retl
567  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
568  %shift = shl <8 x i32> %a, %splat
569  ret <8 x i32> %shift
570}
571
572define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
573; AVX1-LABEL: splatvar_shift_v16i16:
574; AVX1:       # %bb.0:
575; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
576; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
577; AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
578; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
579; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
580; AVX1-NEXT:    retq
581;
582; AVX2-LABEL: splatvar_shift_v16i16:
583; AVX2:       # %bb.0:
584; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
585; AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
586; AVX2-NEXT:    retq
587;
588; XOPAVX1-LABEL: splatvar_shift_v16i16:
589; XOPAVX1:       # %bb.0:
590; XOPAVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
591; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
592; XOPAVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
593; XOPAVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
594; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
595; XOPAVX1-NEXT:    retq
596;
597; XOPAVX2-LABEL: splatvar_shift_v16i16:
598; XOPAVX2:       # %bb.0:
599; XOPAVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
600; XOPAVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
601; XOPAVX2-NEXT:    retq
602;
603; AVX512-LABEL: splatvar_shift_v16i16:
604; AVX512:       # %bb.0:
605; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
606; AVX512-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
607; AVX512-NEXT:    retq
608;
609; AVX512VL-LABEL: splatvar_shift_v16i16:
610; AVX512VL:       # %bb.0:
611; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
612; AVX512VL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
613; AVX512VL-NEXT:    retq
614;
615; X86-AVX1-LABEL: splatvar_shift_v16i16:
616; X86-AVX1:       # %bb.0:
617; X86-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
618; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
619; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
620; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
621; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
622; X86-AVX1-NEXT:    retl
623;
624; X86-AVX2-LABEL: splatvar_shift_v16i16:
625; X86-AVX2:       # %bb.0:
626; X86-AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
627; X86-AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
628; X86-AVX2-NEXT:    retl
629  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
630  %shift = shl <16 x i16> %a, %splat
631  ret <16 x i16> %shift
632}
633
634define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
635; AVX1-LABEL: splatvar_shift_v32i8:
636; AVX1:       # %bb.0:
637; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
638; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
639; AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
640; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
641; AVX1-NEXT:    vpsllw %xmm1, %xmm3, %xmm3
642; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
643; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
644; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
645; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
646; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
647; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
648; AVX1-NEXT:    retq
649;
650; AVX2-LABEL: splatvar_shift_v32i8:
651; AVX2:       # %bb.0:
652; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
653; AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
654; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
655; AVX2-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
656; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
657; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
658; AVX2-NEXT:    retq
659;
660; XOPAVX1-LABEL: splatvar_shift_v32i8:
661; XOPAVX1:       # %bb.0:
662; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
663; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
664; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
665; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm2, %xmm2
666; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
667; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
668; XOPAVX1-NEXT:    retq
669;
670; XOPAVX2-LABEL: splatvar_shift_v32i8:
671; XOPAVX2:       # %bb.0:
672; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
673; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
674; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm2, %xmm2
675; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
676; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
677; XOPAVX2-NEXT:    retq
678;
679; AVX512DQ-LABEL: splatvar_shift_v32i8:
680; AVX512DQ:       # %bb.0:
681; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
682; AVX512DQ-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
683; AVX512DQ-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
684; AVX512DQ-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
685; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1
686; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
687; AVX512DQ-NEXT:    retq
688;
689; AVX512BW-LABEL: splatvar_shift_v32i8:
690; AVX512BW:       # %bb.0:
691; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
692; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
693; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
694; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
695; AVX512BW-NEXT:    retq
696;
697; AVX512DQVL-LABEL: splatvar_shift_v32i8:
698; AVX512DQVL:       # %bb.0:
699; AVX512DQVL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
700; AVX512DQVL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
701; AVX512DQVL-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
702; AVX512DQVL-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
703; AVX512DQVL-NEXT:    vpbroadcastb %xmm1, %ymm1
704; AVX512DQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
705; AVX512DQVL-NEXT:    retq
706;
707; AVX512BWVL-LABEL: splatvar_shift_v32i8:
708; AVX512BWVL:       # %bb.0:
709; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
710; AVX512BWVL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
711; AVX512BWVL-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
712; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
713; AVX512BWVL-NEXT:    retq
714;
715; X86-AVX1-LABEL: splatvar_shift_v32i8:
716; X86-AVX1:       # %bb.0:
717; X86-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
718; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
719; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
720; X86-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
721; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm3, %xmm3
722; X86-AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
723; X86-AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
724; X86-AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
725; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
726; X86-AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
727; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
728; X86-AVX1-NEXT:    retl
729;
730; X86-AVX2-LABEL: splatvar_shift_v32i8:
731; X86-AVX2:       # %bb.0:
732; X86-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
733; X86-AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
734; X86-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
735; X86-AVX2-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
736; X86-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
737; X86-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
738; X86-AVX2-NEXT:    retl
739  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
740  %shift = shl <32 x i8> %a, %splat
741  ret <32 x i8> %shift
742}
743
744;
745; Constant Shifts
746;
747
748define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
749; AVX1-LABEL: constant_shift_v4i64:
750; AVX1:       # %bb.0:
751; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
752; AVX1-NEXT:    vpsllq $62, %xmm1, %xmm2
753; AVX1-NEXT:    vpsllq $31, %xmm1, %xmm1
754; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
755; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm2
756; AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
757; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
758; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
759; AVX1-NEXT:    retq
760;
761; AVX2-LABEL: constant_shift_v4i64:
762; AVX2:       # %bb.0:
763; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
764; AVX2-NEXT:    retq
765;
766; XOPAVX1-LABEL: constant_shift_v4i64:
767; XOPAVX1:       # %bb.0:
768; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm1
769; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
770; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm0
771; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
772; XOPAVX1-NEXT:    retq
773;
774; XOPAVX2-LABEL: constant_shift_v4i64:
775; XOPAVX2:       # %bb.0:
776; XOPAVX2-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
777; XOPAVX2-NEXT:    retq
778;
779; AVX512-LABEL: constant_shift_v4i64:
780; AVX512:       # %bb.0:
781; AVX512-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
782; AVX512-NEXT:    retq
783;
784; AVX512VL-LABEL: constant_shift_v4i64:
785; AVX512VL:       # %bb.0:
786; AVX512VL-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
787; AVX512VL-NEXT:    retq
788;
789; X86-AVX1-LABEL: constant_shift_v4i64:
790; X86-AVX1:       # %bb.0:
791; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
792; X86-AVX1-NEXT:    vpsllq $62, %xmm1, %xmm2
793; X86-AVX1-NEXT:    vpsllq $31, %xmm1, %xmm1
794; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
795; X86-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm2
796; X86-AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
797; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
798; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
799; X86-AVX1-NEXT:    retl
800;
801; X86-AVX2-LABEL: constant_shift_v4i64:
802; X86-AVX2:       # %bb.0:
803; X86-AVX2-NEXT:    vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0
804; X86-AVX2-NEXT:    retl
805  %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
806  ret <4 x i64> %shift
807}
808
809define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
810; AVX1-LABEL: constant_shift_v8i32:
811; AVX1:       # %bb.0:
812; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
813; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
814; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
815; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
816; AVX1-NEXT:    retq
817;
818; AVX2-LABEL: constant_shift_v8i32:
819; AVX2:       # %bb.0:
820; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
821; AVX2-NEXT:    retq
822;
823; XOPAVX1-LABEL: constant_shift_v8i32:
824; XOPAVX1:       # %bb.0:
825; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm1
826; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
827; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
828; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
829; XOPAVX1-NEXT:    retq
830;
831; XOPAVX2-LABEL: constant_shift_v8i32:
832; XOPAVX2:       # %bb.0:
833; XOPAVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
834; XOPAVX2-NEXT:    retq
835;
836; AVX512-LABEL: constant_shift_v8i32:
837; AVX512:       # %bb.0:
838; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
839; AVX512-NEXT:    retq
840;
841; AVX512VL-LABEL: constant_shift_v8i32:
842; AVX512VL:       # %bb.0:
843; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
844; AVX512VL-NEXT:    retq
845;
846; X86-AVX1-LABEL: constant_shift_v8i32:
847; X86-AVX1:       # %bb.0:
848; X86-AVX1-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm1
849; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
850; X86-AVX1-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
851; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
852; X86-AVX1-NEXT:    retl
853;
854; X86-AVX2-LABEL: constant_shift_v8i32:
855; X86-AVX2:       # %bb.0:
856; X86-AVX2-NEXT:    vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0
857; X86-AVX2-NEXT:    retl
858  %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
859  ret <8 x i32> %shift
860}
861
862define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
863; AVX1-LABEL: constant_shift_v16i16:
864; AVX1:       # %bb.0:
865; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm1
866; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
867; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
868; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
869; AVX1-NEXT:    retq
870;
871; AVX2-LABEL: constant_shift_v16i16:
872; AVX2:       # %bb.0:
873; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
874; AVX2-NEXT:    retq
875;
876; XOPAVX1-LABEL: constant_shift_v16i16:
877; XOPAVX1:       # %bb.0:
878; XOPAVX1-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm1
879; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
880; XOPAVX1-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm0
881; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
882; XOPAVX1-NEXT:    retq
883;
884; XOPAVX2-LABEL: constant_shift_v16i16:
885; XOPAVX2:       # %bb.0:
886; XOPAVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
887; XOPAVX2-NEXT:    retq
888;
889; AVX512DQ-LABEL: constant_shift_v16i16:
890; AVX512DQ:       # %bb.0:
891; AVX512DQ-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
892; AVX512DQ-NEXT:    retq
893;
894; AVX512BW-LABEL: constant_shift_v16i16:
895; AVX512BW:       # %bb.0:
896; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
897; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
898; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
899; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
900; AVX512BW-NEXT:    retq
901;
902; AVX512DQVL-LABEL: constant_shift_v16i16:
903; AVX512DQVL:       # %bb.0:
904; AVX512DQVL-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
905; AVX512DQVL-NEXT:    retq
906;
907; AVX512BWVL-LABEL: constant_shift_v16i16:
908; AVX512BWVL:       # %bb.0:
909; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %ymm0, %ymm0
910; AVX512BWVL-NEXT:    retq
911;
912; X86-AVX1-LABEL: constant_shift_v16i16:
913; X86-AVX1:       # %bb.0:
914; X86-AVX1-NEXT:    vpmullw {{\.LCPI.*}}, %xmm0, %xmm1
915; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
916; X86-AVX1-NEXT:    vpmullw {{\.LCPI.*}}, %xmm0, %xmm0
917; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
918; X86-AVX1-NEXT:    retl
919;
920; X86-AVX2-LABEL: constant_shift_v16i16:
921; X86-AVX2:       # %bb.0:
922; X86-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm0, %ymm0
923; X86-AVX2-NEXT:    retl
924  %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
925  ret <16 x i16> %shift
926}
927
928define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
929; AVX1-LABEL: constant_shift_v32i8:
930; AVX1:       # %bb.0:
931; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
932; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
933; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,64,32,16,8,4,2,1]
934; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
935; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
936; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
937; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
938; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
939; AVX1-NEXT:    vpmullw %xmm5, %xmm1, %xmm1
940; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
941; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
942; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
943; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
944; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
945; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
946; AVX1-NEXT:    vpmullw %xmm5, %xmm0, %xmm0
947; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
948; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
949; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
950; AVX1-NEXT:    retq
951;
952; AVX2-LABEL: constant_shift_v32i8:
953; AVX2:       # %bb.0:
954; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm1
955; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
956; AVX2-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
957; AVX2-NEXT:    # ymm2 = mem[0,1,0,1]
958; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
959; AVX2-NEXT:    vpsllw $2, %ymm0, %ymm1
960; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
961; AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
962; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
963; AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm1
964; AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
965; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
966; AVX2-NEXT:    retq
967;
968; XOPAVX1-LABEL: constant_shift_v32i8:
969; XOPAVX1:       # %bb.0:
970; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
971; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
972; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm1, %xmm1
973; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm0, %xmm0
974; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
975; XOPAVX1-NEXT:    retq
976;
977; XOPAVX2-LABEL: constant_shift_v32i8:
978; XOPAVX2:       # %bb.0:
979; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
980; XOPAVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
981; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm1, %xmm1
982; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm0, %xmm0
983; XOPAVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
984; XOPAVX2-NEXT:    retq
985;
986; AVX512DQ-LABEL: constant_shift_v32i8:
987; AVX512DQ:       # %bb.0:
988; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm1
989; AVX512DQ-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
990; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
991; AVX512DQ-NEXT:    # ymm2 = mem[0,1,0,1]
992; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
993; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm1
994; AVX512DQ-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
995; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
996; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
997; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm1
998; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
999; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1000; AVX512DQ-NEXT:    retq
1001;
1002; AVX512BW-LABEL: constant_shift_v32i8:
1003; AVX512BW:       # %bb.0:
1004; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1005; AVX512BW-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm0
1006; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1007; AVX512BW-NEXT:    retq
1008;
1009; AVX512DQVL-LABEL: constant_shift_v32i8:
1010; AVX512DQVL:       # %bb.0:
1011; AVX512DQVL-NEXT:    vpsllw $4, %ymm0, %ymm1
1012; AVX512DQVL-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
1013; AVX512DQVL-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
1014; AVX512DQVL-NEXT:    # ymm2 = mem[0,1,0,1]
1015; AVX512DQVL-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1016; AVX512DQVL-NEXT:    vpsllw $2, %ymm0, %ymm1
1017; AVX512DQVL-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
1018; AVX512DQVL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
1019; AVX512DQVL-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1020; AVX512DQVL-NEXT:    vpaddb %ymm0, %ymm0, %ymm1
1021; AVX512DQVL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
1022; AVX512DQVL-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1023; AVX512DQVL-NEXT:    retq
1024;
1025; AVX512BWVL-LABEL: constant_shift_v32i8:
1026; AVX512BWVL:       # %bb.0:
1027; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
1028; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm0
1029; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
1030; AVX512BWVL-NEXT:    retq
1031;
1032; X86-AVX1-LABEL: constant_shift_v32i8:
1033; X86-AVX1:       # %bb.0:
1034; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1035; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1036; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,64,32,16,8,4,2,1]
1037; X86-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
1038; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1039; X86-AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
1040; X86-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1041; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
1042; X86-AVX1-NEXT:    vpmullw %xmm5, %xmm1, %xmm1
1043; X86-AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
1044; X86-AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
1045; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1046; X86-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
1047; X86-AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
1048; X86-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1049; X86-AVX1-NEXT:    vpmullw %xmm5, %xmm0, %xmm0
1050; X86-AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
1051; X86-AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
1052; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1053; X86-AVX1-NEXT:    retl
1054;
1055; X86-AVX2-LABEL: constant_shift_v32i8:
1056; X86-AVX2:       # %bb.0:
1057; X86-AVX2-NEXT:    vpsllw $4, %ymm0, %ymm1
1058; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm1, %ymm1
1059; X86-AVX2-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
1060; X86-AVX2-NEXT:    # ymm2 = mem[0,1,0,1]
1061; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1062; X86-AVX2-NEXT:    vpsllw $2, %ymm0, %ymm1
1063; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm1, %ymm1
1064; X86-AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
1065; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1066; X86-AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm1
1067; X86-AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
1068; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1069; X86-AVX2-NEXT:    retl
1070  %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
1071  ret <32 x i8> %shift
1072}
1073
1074;
1075; Uniform Constant Shifts
1076;
1077
1078define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
1079; AVX1-LABEL: splatconstant_shift_v4i64:
1080; AVX1:       # %bb.0:
1081; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
1082; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1083; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm0
1084; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1085; AVX1-NEXT:    retq
1086;
1087; AVX2-LABEL: splatconstant_shift_v4i64:
1088; AVX2:       # %bb.0:
1089; AVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
1090; AVX2-NEXT:    retq
1091;
1092; XOPAVX1-LABEL: splatconstant_shift_v4i64:
1093; XOPAVX1:       # %bb.0:
1094; XOPAVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
1095; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1096; XOPAVX1-NEXT:    vpsllq $7, %xmm0, %xmm0
1097; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1098; XOPAVX1-NEXT:    retq
1099;
1100; XOPAVX2-LABEL: splatconstant_shift_v4i64:
1101; XOPAVX2:       # %bb.0:
1102; XOPAVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
1103; XOPAVX2-NEXT:    retq
1104;
1105; AVX512-LABEL: splatconstant_shift_v4i64:
1106; AVX512:       # %bb.0:
1107; AVX512-NEXT:    vpsllq $7, %ymm0, %ymm0
1108; AVX512-NEXT:    retq
1109;
1110; AVX512VL-LABEL: splatconstant_shift_v4i64:
1111; AVX512VL:       # %bb.0:
1112; AVX512VL-NEXT:    vpsllq $7, %ymm0, %ymm0
1113; AVX512VL-NEXT:    retq
1114;
1115; X86-AVX1-LABEL: splatconstant_shift_v4i64:
1116; X86-AVX1:       # %bb.0:
1117; X86-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
1118; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1119; X86-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm0
1120; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1121; X86-AVX1-NEXT:    retl
1122;
1123; X86-AVX2-LABEL: splatconstant_shift_v4i64:
1124; X86-AVX2:       # %bb.0:
1125; X86-AVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
1126; X86-AVX2-NEXT:    retl
1127  %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
1128  ret <4 x i64> %shift
1129}
1130
1131define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
1132; AVX1-LABEL: splatconstant_shift_v8i32:
1133; AVX1:       # %bb.0:
1134; AVX1-NEXT:    vpslld $5, %xmm0, %xmm1
1135; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1136; AVX1-NEXT:    vpslld $5, %xmm0, %xmm0
1137; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1138; AVX1-NEXT:    retq
1139;
1140; AVX2-LABEL: splatconstant_shift_v8i32:
1141; AVX2:       # %bb.0:
1142; AVX2-NEXT:    vpslld $5, %ymm0, %ymm0
1143; AVX2-NEXT:    retq
1144;
1145; XOPAVX1-LABEL: splatconstant_shift_v8i32:
1146; XOPAVX1:       # %bb.0:
1147; XOPAVX1-NEXT:    vpslld $5, %xmm0, %xmm1
1148; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1149; XOPAVX1-NEXT:    vpslld $5, %xmm0, %xmm0
1150; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1151; XOPAVX1-NEXT:    retq
1152;
1153; XOPAVX2-LABEL: splatconstant_shift_v8i32:
1154; XOPAVX2:       # %bb.0:
1155; XOPAVX2-NEXT:    vpslld $5, %ymm0, %ymm0
1156; XOPAVX2-NEXT:    retq
1157;
1158; AVX512-LABEL: splatconstant_shift_v8i32:
1159; AVX512:       # %bb.0:
1160; AVX512-NEXT:    vpslld $5, %ymm0, %ymm0
1161; AVX512-NEXT:    retq
1162;
1163; AVX512VL-LABEL: splatconstant_shift_v8i32:
1164; AVX512VL:       # %bb.0:
1165; AVX512VL-NEXT:    vpslld $5, %ymm0, %ymm0
1166; AVX512VL-NEXT:    retq
1167;
1168; X86-AVX1-LABEL: splatconstant_shift_v8i32:
1169; X86-AVX1:       # %bb.0:
1170; X86-AVX1-NEXT:    vpslld $5, %xmm0, %xmm1
1171; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1172; X86-AVX1-NEXT:    vpslld $5, %xmm0, %xmm0
1173; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1174; X86-AVX1-NEXT:    retl
1175;
1176; X86-AVX2-LABEL: splatconstant_shift_v8i32:
1177; X86-AVX2:       # %bb.0:
1178; X86-AVX2-NEXT:    vpslld $5, %ymm0, %ymm0
1179; X86-AVX2-NEXT:    retl
1180  %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
1181  ret <8 x i32> %shift
1182}
1183
1184define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
1185; AVX1-LABEL: splatconstant_shift_v16i16:
1186; AVX1:       # %bb.0:
1187; AVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
1188; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1189; AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
1190; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1191; AVX1-NEXT:    retq
1192;
1193; AVX2-LABEL: splatconstant_shift_v16i16:
1194; AVX2:       # %bb.0:
1195; AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
1196; AVX2-NEXT:    retq
1197;
1198; XOPAVX1-LABEL: splatconstant_shift_v16i16:
1199; XOPAVX1:       # %bb.0:
1200; XOPAVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
1201; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1202; XOPAVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
1203; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1204; XOPAVX1-NEXT:    retq
1205;
1206; XOPAVX2-LABEL: splatconstant_shift_v16i16:
1207; XOPAVX2:       # %bb.0:
1208; XOPAVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
1209; XOPAVX2-NEXT:    retq
1210;
1211; AVX512-LABEL: splatconstant_shift_v16i16:
1212; AVX512:       # %bb.0:
1213; AVX512-NEXT:    vpsllw $3, %ymm0, %ymm0
1214; AVX512-NEXT:    retq
1215;
1216; AVX512VL-LABEL: splatconstant_shift_v16i16:
1217; AVX512VL:       # %bb.0:
1218; AVX512VL-NEXT:    vpsllw $3, %ymm0, %ymm0
1219; AVX512VL-NEXT:    retq
1220;
1221; X86-AVX1-LABEL: splatconstant_shift_v16i16:
1222; X86-AVX1:       # %bb.0:
1223; X86-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
1224; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1225; X86-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
1226; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1227; X86-AVX1-NEXT:    retl
1228;
1229; X86-AVX2-LABEL: splatconstant_shift_v16i16:
1230; X86-AVX2:       # %bb.0:
1231; X86-AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
1232; X86-AVX2-NEXT:    retl
1233  %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1234  ret <16 x i16> %shift
1235}
1236
1237define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
1238; AVX1-LABEL: splatconstant_shift_v32i8:
1239; AVX1:       # %bb.0:
1240; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1241; AVX1-NEXT:    vpsllw $3, %xmm1, %xmm1
1242; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
1243; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
1244; AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
1245; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1246; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1247; AVX1-NEXT:    retq
1248;
1249; AVX2-LABEL: splatconstant_shift_v32i8:
1250; AVX2:       # %bb.0:
1251; AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
1252; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
1253; AVX2-NEXT:    retq
1254;
1255; XOPAVX1-LABEL: splatconstant_shift_v32i8:
1256; XOPAVX1:       # %bb.0:
1257; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1258; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
1259; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm1, %xmm1
1260; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm0, %xmm0
1261; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1262; XOPAVX1-NEXT:    retq
1263;
1264; XOPAVX2-LABEL: splatconstant_shift_v32i8:
1265; XOPAVX2:       # %bb.0:
1266; XOPAVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
1267; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
1268; XOPAVX2-NEXT:    retq
1269;
1270; AVX512-LABEL: splatconstant_shift_v32i8:
1271; AVX512:       # %bb.0:
1272; AVX512-NEXT:    vpsllw $3, %ymm0, %ymm0
1273; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
1274; AVX512-NEXT:    retq
1275;
1276; AVX512VL-LABEL: splatconstant_shift_v32i8:
1277; AVX512VL:       # %bb.0:
1278; AVX512VL-NEXT:    vpsllw $3, %ymm0, %ymm0
1279; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
1280; AVX512VL-NEXT:    retq
1281;
1282; X86-AVX1-LABEL: splatconstant_shift_v32i8:
1283; X86-AVX1:       # %bb.0:
1284; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1285; X86-AVX1-NEXT:    vpsllw $3, %xmm1, %xmm1
1286; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
1287; X86-AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
1288; X86-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
1289; X86-AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1290; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1291; X86-AVX1-NEXT:    retl
1292;
1293; X86-AVX2-LABEL: splatconstant_shift_v32i8:
1294; X86-AVX2:       # %bb.0:
1295; X86-AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
1296; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
1297; X86-AVX2-NEXT:    retl
1298  %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1299  ret <32 x i8> %shift
1300}
1301