1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
14
15; Just one 32-bit run to make sure we do reasonable things for i64 cases.
16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE2
17
18declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
19declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
20declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
21declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
22
23;
24; Variable Shifts
25;
26
27define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
28; SSE2-LABEL: var_funnnel_v2i64:
29; SSE2:       # %bb.0:
30; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [63,63]
31; SSE2-NEXT:    pxor %xmm3, %xmm3
32; SSE2-NEXT:    psubq %xmm1, %xmm3
33; SSE2-NEXT:    pand %xmm2, %xmm1
34; SSE2-NEXT:    movdqa %xmm0, %xmm4
35; SSE2-NEXT:    psrlq %xmm1, %xmm4
36; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
37; SSE2-NEXT:    movdqa %xmm0, %xmm5
38; SSE2-NEXT:    psrlq %xmm1, %xmm5
39; SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
40; SSE2-NEXT:    pand %xmm2, %xmm3
41; SSE2-NEXT:    movdqa %xmm0, %xmm1
42; SSE2-NEXT:    psllq %xmm3, %xmm1
43; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
44; SSE2-NEXT:    psllq %xmm2, %xmm0
45; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
46; SSE2-NEXT:    orpd %xmm5, %xmm0
47; SSE2-NEXT:    retq
48;
49; SSE41-LABEL: var_funnnel_v2i64:
50; SSE41:       # %bb.0:
51; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [63,63]
52; SSE41-NEXT:    pxor %xmm3, %xmm3
53; SSE41-NEXT:    psubq %xmm1, %xmm3
54; SSE41-NEXT:    pand %xmm2, %xmm1
55; SSE41-NEXT:    movdqa %xmm0, %xmm4
56; SSE41-NEXT:    psrlq %xmm1, %xmm4
57; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
58; SSE41-NEXT:    movdqa %xmm0, %xmm5
59; SSE41-NEXT:    psrlq %xmm1, %xmm5
60; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5,6,7]
61; SSE41-NEXT:    pand %xmm2, %xmm3
62; SSE41-NEXT:    movdqa %xmm0, %xmm1
63; SSE41-NEXT:    psllq %xmm3, %xmm1
64; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
65; SSE41-NEXT:    psllq %xmm2, %xmm0
66; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
67; SSE41-NEXT:    por %xmm5, %xmm0
68; SSE41-NEXT:    retq
69;
70; AVX1-LABEL: var_funnnel_v2i64:
71; AVX1:       # %bb.0:
72; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
73; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
74; AVX1-NEXT:    vpsrlq %xmm3, %xmm0, %xmm4
75; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
76; AVX1-NEXT:    vpsrlq %xmm3, %xmm0, %xmm3
77; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
78; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
79; AVX1-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
80; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
81; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm2
82; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
83; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
84; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
85; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
86; AVX1-NEXT:    retq
87;
88; AVX2-LABEL: var_funnnel_v2i64:
89; AVX2:       # %bb.0:
90; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
91; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
92; AVX2-NEXT:    vpsrlvq %xmm3, %xmm0, %xmm3
93; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
94; AVX2-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
95; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
96; AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
97; AVX2-NEXT:    vpor %xmm0, %xmm3, %xmm0
98; AVX2-NEXT:    retq
99;
100; AVX512F-LABEL: var_funnnel_v2i64:
101; AVX512F:       # %bb.0:
102; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
103; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
104; AVX512F-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
105; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
106; AVX512F-NEXT:    vzeroupper
107; AVX512F-NEXT:    retq
108;
109; AVX512VL-LABEL: var_funnnel_v2i64:
110; AVX512VL:       # %bb.0:
111; AVX512VL-NEXT:    vprorvq %xmm1, %xmm0, %xmm0
112; AVX512VL-NEXT:    retq
113;
114; AVX512BW-LABEL: var_funnnel_v2i64:
115; AVX512BW:       # %bb.0:
116; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
117; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
118; AVX512BW-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
119; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
120; AVX512BW-NEXT:    vzeroupper
121; AVX512BW-NEXT:    retq
122;
123; AVX512VLBW-LABEL: var_funnnel_v2i64:
124; AVX512VLBW:       # %bb.0:
125; AVX512VLBW-NEXT:    vprorvq %xmm1, %xmm0, %xmm0
126; AVX512VLBW-NEXT:    retq
127;
128; AVX512VBMI2-LABEL: var_funnnel_v2i64:
129; AVX512VBMI2:       # %bb.0:
130; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
131; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
132; AVX512VBMI2-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
133; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
134; AVX512VBMI2-NEXT:    vzeroupper
135; AVX512VBMI2-NEXT:    retq
136;
137; AVX512VLVBMI2-LABEL: var_funnnel_v2i64:
138; AVX512VLVBMI2:       # %bb.0:
139; AVX512VLVBMI2-NEXT:    vprorvq %xmm1, %xmm0, %xmm0
140; AVX512VLVBMI2-NEXT:    retq
141;
142; XOP-LABEL: var_funnnel_v2i64:
143; XOP:       # %bb.0:
144; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
145; XOP-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
146; XOP-NEXT:    vprotq %xmm1, %xmm0, %xmm0
147; XOP-NEXT:    retq
148;
149; X86-SSE2-LABEL: var_funnnel_v2i64:
150; X86-SSE2:       # %bb.0:
151; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [63,0,63,0]
152; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
153; X86-SSE2-NEXT:    psubq %xmm1, %xmm3
154; X86-SSE2-NEXT:    pand %xmm2, %xmm1
155; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
156; X86-SSE2-NEXT:    psrlq %xmm1, %xmm4
157; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
158; X86-SSE2-NEXT:    movdqa %xmm0, %xmm5
159; X86-SSE2-NEXT:    psrlq %xmm1, %xmm5
160; X86-SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
161; X86-SSE2-NEXT:    pand %xmm2, %xmm3
162; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
163; X86-SSE2-NEXT:    psllq %xmm3, %xmm1
164; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
165; X86-SSE2-NEXT:    psllq %xmm2, %xmm0
166; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
167; X86-SSE2-NEXT:    orpd %xmm5, %xmm0
168; X86-SSE2-NEXT:    retl
169  %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> %amt)
170  ret <2 x i64> %res
171}
172
173define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
174; SSE2-LABEL: var_funnnel_v4i32:
175; SSE2:       # %bb.0:
176; SSE2-NEXT:    pxor %xmm2, %xmm2
177; SSE2-NEXT:    psubd %xmm1, %xmm2
178; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
179; SSE2-NEXT:    pslld $23, %xmm2
180; SSE2-NEXT:    paddd {{.*}}(%rip), %xmm2
181; SSE2-NEXT:    cvttps2dq %xmm2, %xmm1
182; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
183; SSE2-NEXT:    pmuludq %xmm1, %xmm0
184; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
185; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
186; SSE2-NEXT:    pmuludq %xmm2, %xmm1
187; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
188; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
189; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
190; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
191; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
192; SSE2-NEXT:    por %xmm3, %xmm0
193; SSE2-NEXT:    retq
194;
195; SSE41-LABEL: var_funnnel_v4i32:
196; SSE41:       # %bb.0:
197; SSE41-NEXT:    pxor %xmm2, %xmm2
198; SSE41-NEXT:    psubd %xmm1, %xmm2
199; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
200; SSE41-NEXT:    pslld $23, %xmm2
201; SSE41-NEXT:    paddd {{.*}}(%rip), %xmm2
202; SSE41-NEXT:    cvttps2dq %xmm2, %xmm1
203; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
204; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
205; SSE41-NEXT:    pmuludq %xmm2, %xmm3
206; SSE41-NEXT:    pmuludq %xmm1, %xmm0
207; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
208; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
209; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
210; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
211; SSE41-NEXT:    por %xmm1, %xmm0
212; SSE41-NEXT:    retq
213;
214; AVX1-LABEL: var_funnnel_v4i32:
215; AVX1:       # %bb.0:
216; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
217; AVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
218; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
219; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
220; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
221; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
222; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
223; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
224; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
225; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
226; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
227; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
228; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
229; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
230; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
231; AVX1-NEXT:    retq
232;
233; AVX2-LABEL: var_funnnel_v4i32:
234; AVX2:       # %bb.0:
235; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
236; AVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
237; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
238; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
239; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2
240; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
241; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
242; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
243; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
244; AVX2-NEXT:    retq
245;
246; AVX512F-LABEL: var_funnnel_v4i32:
247; AVX512F:       # %bb.0:
248; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
249; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
250; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
251; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
252; AVX512F-NEXT:    vzeroupper
253; AVX512F-NEXT:    retq
254;
255; AVX512VL-LABEL: var_funnnel_v4i32:
256; AVX512VL:       # %bb.0:
257; AVX512VL-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
258; AVX512VL-NEXT:    retq
259;
260; AVX512BW-LABEL: var_funnnel_v4i32:
261; AVX512BW:       # %bb.0:
262; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
263; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
264; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
265; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
266; AVX512BW-NEXT:    vzeroupper
267; AVX512BW-NEXT:    retq
268;
269; AVX512VLBW-LABEL: var_funnnel_v4i32:
270; AVX512VLBW:       # %bb.0:
271; AVX512VLBW-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
272; AVX512VLBW-NEXT:    retq
273;
274; AVX512VBMI2-LABEL: var_funnnel_v4i32:
275; AVX512VBMI2:       # %bb.0:
276; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
277; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
278; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
279; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
280; AVX512VBMI2-NEXT:    vzeroupper
281; AVX512VBMI2-NEXT:    retq
282;
283; AVX512VLVBMI2-LABEL: var_funnnel_v4i32:
284; AVX512VLVBMI2:       # %bb.0:
285; AVX512VLVBMI2-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
286; AVX512VLVBMI2-NEXT:    retq
287;
288; XOP-LABEL: var_funnnel_v4i32:
289; XOP:       # %bb.0:
290; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
291; XOP-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
292; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
293; XOP-NEXT:    retq
294;
295; X86-SSE2-LABEL: var_funnnel_v4i32:
296; X86-SSE2:       # %bb.0:
297; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
298; X86-SSE2-NEXT:    psubd %xmm1, %xmm2
299; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
300; X86-SSE2-NEXT:    pslld $23, %xmm2
301; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm2
302; X86-SSE2-NEXT:    cvttps2dq %xmm2, %xmm1
303; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
304; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
305; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
306; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
307; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
308; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
309; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
310; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
311; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
312; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
313; X86-SSE2-NEXT:    por %xmm3, %xmm0
314; X86-SSE2-NEXT:    retl
315  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %amt)
316  ret <4 x i32> %res
317}
318
319define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
320; SSE2-LABEL: var_funnnel_v8i16:
321; SSE2:       # %bb.0:
322; SSE2-NEXT:    pxor %xmm2, %xmm2
323; SSE2-NEXT:    psubw %xmm1, %xmm2
324; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
325; SSE2-NEXT:    movdqa %xmm2, %xmm1
326; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
327; SSE2-NEXT:    pslld $23, %xmm1
328; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
329; SSE2-NEXT:    paddd %xmm3, %xmm1
330; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
331; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
332; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
333; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
334; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
335; SSE2-NEXT:    pslld $23, %xmm2
336; SSE2-NEXT:    paddd %xmm3, %xmm2
337; SSE2-NEXT:    cvttps2dq %xmm2, %xmm2
338; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
339; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
340; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
341; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
342; SSE2-NEXT:    movdqa %xmm0, %xmm1
343; SSE2-NEXT:    pmulhuw %xmm2, %xmm1
344; SSE2-NEXT:    pmullw %xmm2, %xmm0
345; SSE2-NEXT:    por %xmm1, %xmm0
346; SSE2-NEXT:    retq
347;
348; SSE41-LABEL: var_funnnel_v8i16:
349; SSE41:       # %bb.0:
350; SSE41-NEXT:    pxor %xmm2, %xmm2
351; SSE41-NEXT:    psubw %xmm1, %xmm2
352; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
353; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
354; SSE41-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
355; SSE41-NEXT:    pslld $23, %xmm2
356; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
357; SSE41-NEXT:    paddd %xmm3, %xmm2
358; SSE41-NEXT:    cvttps2dq %xmm2, %xmm2
359; SSE41-NEXT:    pslld $23, %xmm1
360; SSE41-NEXT:    paddd %xmm3, %xmm1
361; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
362; SSE41-NEXT:    packusdw %xmm2, %xmm1
363; SSE41-NEXT:    movdqa %xmm0, %xmm2
364; SSE41-NEXT:    pmulhuw %xmm1, %xmm2
365; SSE41-NEXT:    pmullw %xmm1, %xmm0
366; SSE41-NEXT:    por %xmm2, %xmm0
367; SSE41-NEXT:    retq
368;
369; AVX1-LABEL: var_funnnel_v8i16:
370; AVX1:       # %bb.0:
371; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
372; AVX1-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
373; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
374; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
375; AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
376; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
377; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
378; AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
379; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
380; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
381; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
382; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
383; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
384; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
385; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
386; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
387; AVX1-NEXT:    retq
388;
389; AVX2-LABEL: var_funnnel_v8i16:
390; AVX2:       # %bb.0:
391; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
392; AVX2-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
393; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
394; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
395; AVX2-NEXT:    vpsubw %xmm1, %xmm2, %xmm2
396; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
397; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
398; AVX2-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm2
399; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
400; AVX2-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
401; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
402; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
403; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
404; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
405; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
406; AVX2-NEXT:    vzeroupper
407; AVX2-NEXT:    retq
408;
409; AVX512F-LABEL: var_funnnel_v8i16:
410; AVX512F:       # %bb.0:
411; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
412; AVX512F-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
413; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
414; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
415; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
416; AVX512F-NEXT:    vpsllvd %ymm2, %ymm0, %ymm2
417; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
418; AVX512F-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
419; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
420; AVX512F-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
421; AVX512F-NEXT:    vpor %ymm0, %ymm2, %ymm0
422; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
423; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
424; AVX512F-NEXT:    vzeroupper
425; AVX512F-NEXT:    retq
426;
427; AVX512VL-LABEL: var_funnnel_v8i16:
428; AVX512VL:       # %bb.0:
429; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
430; AVX512VL-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
431; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
432; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
433; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
434; AVX512VL-NEXT:    vpsllvd %ymm2, %ymm0, %ymm2
435; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
436; AVX512VL-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
437; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
438; AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
439; AVX512VL-NEXT:    vpor %ymm0, %ymm2, %ymm0
440; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
441; AVX512VL-NEXT:    vzeroupper
442; AVX512VL-NEXT:    retq
443;
444; AVX512BW-LABEL: var_funnnel_v8i16:
445; AVX512BW:       # %bb.0:
446; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
447; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
448; AVX512BW-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
449; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
450; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2
451; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
452; AVX512BW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
453; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
454; AVX512BW-NEXT:    vpor %xmm0, %xmm2, %xmm0
455; AVX512BW-NEXT:    vzeroupper
456; AVX512BW-NEXT:    retq
457;
458; AVX512VLBW-LABEL: var_funnnel_v8i16:
459; AVX512VLBW:       # %bb.0:
460; AVX512VLBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
461; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
462; AVX512VLBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
463; AVX512VLBW-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2
464; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
465; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
466; AVX512VLBW-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0
467; AVX512VLBW-NEXT:    vpor %xmm0, %xmm2, %xmm0
468; AVX512VLBW-NEXT:    retq
469;
470; AVX512VBMI2-LABEL: var_funnnel_v8i16:
471; AVX512VBMI2:       # %bb.0:
472; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
473; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
474; AVX512VBMI2-NEXT:    vpshrdvw %zmm1, %zmm0, %zmm0
475; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
476; AVX512VBMI2-NEXT:    vzeroupper
477; AVX512VBMI2-NEXT:    retq
478;
479; AVX512VLVBMI2-LABEL: var_funnnel_v8i16:
480; AVX512VLVBMI2:       # %bb.0:
481; AVX512VLVBMI2-NEXT:    vpshrdvw %xmm1, %xmm0, %xmm0
482; AVX512VLVBMI2-NEXT:    retq
483;
484; XOP-LABEL: var_funnnel_v8i16:
485; XOP:       # %bb.0:
486; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
487; XOP-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
488; XOP-NEXT:    vprotw %xmm1, %xmm0, %xmm0
489; XOP-NEXT:    retq
490;
491; X86-SSE2-LABEL: var_funnnel_v8i16:
492; X86-SSE2:       # %bb.0:
493; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
494; X86-SSE2-NEXT:    psubw %xmm1, %xmm2
495; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
496; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
497; X86-SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
498; X86-SSE2-NEXT:    pslld $23, %xmm1
499; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
500; X86-SSE2-NEXT:    paddd %xmm3, %xmm1
501; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
502; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
503; X86-SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
504; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
505; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
506; X86-SSE2-NEXT:    pslld $23, %xmm2
507; X86-SSE2-NEXT:    paddd %xmm3, %xmm2
508; X86-SSE2-NEXT:    cvttps2dq %xmm2, %xmm2
509; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
510; X86-SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
511; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
512; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
513; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
514; X86-SSE2-NEXT:    pmulhuw %xmm2, %xmm1
515; X86-SSE2-NEXT:    pmullw %xmm2, %xmm0
516; X86-SSE2-NEXT:    por %xmm1, %xmm0
517; X86-SSE2-NEXT:    retl
518  %res = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> %amt)
519  ret <8 x i16> %res
520}
521
522define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
523; SSE2-LABEL: var_funnnel_v16i8:
524; SSE2:       # %bb.0:
525; SSE2-NEXT:    movdqa %xmm0, %xmm2
526; SSE2-NEXT:    pxor %xmm0, %xmm0
527; SSE2-NEXT:    pxor %xmm3, %xmm3
528; SSE2-NEXT:    psubb %xmm1, %xmm3
529; SSE2-NEXT:    psllw $5, %xmm3
530; SSE2-NEXT:    pxor %xmm1, %xmm1
531; SSE2-NEXT:    pcmpgtb %xmm3, %xmm1
532; SSE2-NEXT:    movdqa %xmm2, %xmm4
533; SSE2-NEXT:    psrlw $4, %xmm4
534; SSE2-NEXT:    pand {{.*}}(%rip), %xmm4
535; SSE2-NEXT:    movdqa %xmm2, %xmm5
536; SSE2-NEXT:    psllw $4, %xmm5
537; SSE2-NEXT:    pand {{.*}}(%rip), %xmm5
538; SSE2-NEXT:    por %xmm4, %xmm5
539; SSE2-NEXT:    pand %xmm1, %xmm5
540; SSE2-NEXT:    pandn %xmm2, %xmm1
541; SSE2-NEXT:    por %xmm5, %xmm1
542; SSE2-NEXT:    movdqa %xmm1, %xmm2
543; SSE2-NEXT:    psrlw $6, %xmm2
544; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
545; SSE2-NEXT:    movdqa %xmm1, %xmm4
546; SSE2-NEXT:    psllw $2, %xmm4
547; SSE2-NEXT:    pand {{.*}}(%rip), %xmm4
548; SSE2-NEXT:    por %xmm2, %xmm4
549; SSE2-NEXT:    paddb %xmm3, %xmm3
550; SSE2-NEXT:    pxor %xmm2, %xmm2
551; SSE2-NEXT:    pcmpgtb %xmm3, %xmm2
552; SSE2-NEXT:    pand %xmm2, %xmm4
553; SSE2-NEXT:    pandn %xmm1, %xmm2
554; SSE2-NEXT:    por %xmm4, %xmm2
555; SSE2-NEXT:    movdqa %xmm2, %xmm1
556; SSE2-NEXT:    paddb %xmm2, %xmm1
557; SSE2-NEXT:    movdqa %xmm2, %xmm4
558; SSE2-NEXT:    psrlw $7, %xmm4
559; SSE2-NEXT:    pand {{.*}}(%rip), %xmm4
560; SSE2-NEXT:    por %xmm1, %xmm4
561; SSE2-NEXT:    paddb %xmm3, %xmm3
562; SSE2-NEXT:    pcmpgtb %xmm3, %xmm0
563; SSE2-NEXT:    pand %xmm0, %xmm4
564; SSE2-NEXT:    pandn %xmm2, %xmm0
565; SSE2-NEXT:    por %xmm4, %xmm0
566; SSE2-NEXT:    retq
567;
568; SSE41-LABEL: var_funnnel_v16i8:
569; SSE41:       # %bb.0:
570; SSE41-NEXT:    movdqa %xmm0, %xmm2
571; SSE41-NEXT:    psrlw $4, %xmm0
572; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
573; SSE41-NEXT:    movdqa %xmm2, %xmm3
574; SSE41-NEXT:    psllw $4, %xmm3
575; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
576; SSE41-NEXT:    por %xmm0, %xmm3
577; SSE41-NEXT:    pxor %xmm0, %xmm0
578; SSE41-NEXT:    psubb %xmm1, %xmm0
579; SSE41-NEXT:    psllw $5, %xmm0
580; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
581; SSE41-NEXT:    movdqa %xmm2, %xmm1
582; SSE41-NEXT:    psrlw $6, %xmm1
583; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
584; SSE41-NEXT:    movdqa %xmm2, %xmm3
585; SSE41-NEXT:    psllw $2, %xmm3
586; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
587; SSE41-NEXT:    por %xmm1, %xmm3
588; SSE41-NEXT:    paddb %xmm0, %xmm0
589; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
590; SSE41-NEXT:    movdqa %xmm2, %xmm1
591; SSE41-NEXT:    paddb %xmm2, %xmm1
592; SSE41-NEXT:    movdqa %xmm2, %xmm3
593; SSE41-NEXT:    psrlw $7, %xmm3
594; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
595; SSE41-NEXT:    por %xmm1, %xmm3
596; SSE41-NEXT:    paddb %xmm0, %xmm0
597; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
598; SSE41-NEXT:    movdqa %xmm2, %xmm0
599; SSE41-NEXT:    retq
600;
601; AVX-LABEL: var_funnnel_v16i8:
602; AVX:       # %bb.0:
603; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm2
604; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
605; AVX-NEXT:    vpsllw $4, %xmm0, %xmm3
606; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
607; AVX-NEXT:    vpor %xmm2, %xmm3, %xmm2
608; AVX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
609; AVX-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
610; AVX-NEXT:    vpsllw $5, %xmm1, %xmm1
611; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
612; AVX-NEXT:    vpsrlw $6, %xmm0, %xmm2
613; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
614; AVX-NEXT:    vpsllw $2, %xmm0, %xmm3
615; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
616; AVX-NEXT:    vpor %xmm2, %xmm3, %xmm2
617; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
618; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
619; AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
620; AVX-NEXT:    vpsrlw $7, %xmm0, %xmm3
621; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
622; AVX-NEXT:    vpor %xmm3, %xmm2, %xmm2
623; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
624; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
625; AVX-NEXT:    retq
626;
627; AVX512F-LABEL: var_funnnel_v16i8:
628; AVX512F:       # %bb.0:
629; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
630; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm3
631; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
632; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
633; AVX512F-NEXT:    vpsrlvd %zmm3, %zmm0, %zmm3
634; AVX512F-NEXT:    vpxor %xmm4, %xmm4, %xmm4
635; AVX512F-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
636; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm1
637; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
638; AVX512F-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
639; AVX512F-NEXT:    vpord %zmm0, %zmm3, %zmm0
640; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
641; AVX512F-NEXT:    vzeroupper
642; AVX512F-NEXT:    retq
643;
644; AVX512VL-LABEL: var_funnnel_v16i8:
645; AVX512VL:       # %bb.0:
646; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
647; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm3
648; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
649; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
650; AVX512VL-NEXT:    vpsrlvd %zmm3, %zmm0, %zmm3
651; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
652; AVX512VL-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
653; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm1
654; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
655; AVX512VL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
656; AVX512VL-NEXT:    vpord %zmm0, %zmm3, %zmm0
657; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
658; AVX512VL-NEXT:    vzeroupper
659; AVX512VL-NEXT:    retq
660;
661; AVX512BW-LABEL: var_funnnel_v16i8:
662; AVX512BW:       # %bb.0:
663; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
664; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
665; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm3
666; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
667; AVX512BW-NEXT:    vpsrlvw %zmm3, %zmm0, %zmm3
668; AVX512BW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
669; AVX512BW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
670; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm1
671; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
672; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
673; AVX512BW-NEXT:    vpor %ymm0, %ymm3, %ymm0
674; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
675; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
676; AVX512BW-NEXT:    vzeroupper
677; AVX512BW-NEXT:    retq
678;
679; AVX512VLBW-LABEL: var_funnnel_v16i8:
680; AVX512VLBW:       # %bb.0:
681; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
682; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm3
683; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
684; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
685; AVX512VLBW-NEXT:    vpsrlvw %ymm3, %ymm0, %ymm3
686; AVX512VLBW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
687; AVX512VLBW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
688; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm1
689; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
690; AVX512VLBW-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0
691; AVX512VLBW-NEXT:    vpor %ymm0, %ymm3, %ymm0
692; AVX512VLBW-NEXT:    vpmovwb %ymm0, %xmm0
693; AVX512VLBW-NEXT:    vzeroupper
694; AVX512VLBW-NEXT:    retq
695;
696; AVX512VBMI2-LABEL: var_funnnel_v16i8:
697; AVX512VBMI2:       # %bb.0:
698; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
699; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
700; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
701; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
702; AVX512VBMI2-NEXT:    vpsrlvw %zmm3, %zmm0, %zmm3
703; AVX512VBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
704; AVX512VBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
705; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
706; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
707; AVX512VBMI2-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
708; AVX512VBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
709; AVX512VBMI2-NEXT:    vpmovwb %zmm0, %ymm0
710; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
711; AVX512VBMI2-NEXT:    vzeroupper
712; AVX512VBMI2-NEXT:    retq
713;
714; AVX512VLVBMI2-LABEL: var_funnnel_v16i8:
715; AVX512VLVBMI2:       # %bb.0:
716; AVX512VLVBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
717; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
718; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
719; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
720; AVX512VLVBMI2-NEXT:    vpsrlvw %ymm3, %ymm0, %ymm3
721; AVX512VLVBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
722; AVX512VLVBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
723; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
724; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
725; AVX512VLVBMI2-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0
726; AVX512VLVBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
727; AVX512VLVBMI2-NEXT:    vpmovwb %ymm0, %xmm0
728; AVX512VLVBMI2-NEXT:    vzeroupper
729; AVX512VLVBMI2-NEXT:    retq
730;
731; XOP-LABEL: var_funnnel_v16i8:
732; XOP:       # %bb.0:
733; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
734; XOP-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
735; XOP-NEXT:    vprotb %xmm1, %xmm0, %xmm0
736; XOP-NEXT:    retq
737;
738; X86-SSE2-LABEL: var_funnnel_v16i8:
739; X86-SSE2:       # %bb.0:
740; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
741; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
742; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
743; X86-SSE2-NEXT:    psubb %xmm1, %xmm3
744; X86-SSE2-NEXT:    psllw $5, %xmm3
745; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
746; X86-SSE2-NEXT:    pcmpgtb %xmm3, %xmm1
747; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
748; X86-SSE2-NEXT:    psrlw $4, %xmm4
749; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm4
750; X86-SSE2-NEXT:    movdqa %xmm2, %xmm5
751; X86-SSE2-NEXT:    psllw $4, %xmm5
752; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm5
753; X86-SSE2-NEXT:    por %xmm4, %xmm5
754; X86-SSE2-NEXT:    pand %xmm1, %xmm5
755; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
756; X86-SSE2-NEXT:    por %xmm5, %xmm1
757; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
758; X86-SSE2-NEXT:    psrlw $6, %xmm2
759; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
760; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
761; X86-SSE2-NEXT:    psllw $2, %xmm4
762; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm4
763; X86-SSE2-NEXT:    por %xmm2, %xmm4
764; X86-SSE2-NEXT:    paddb %xmm3, %xmm3
765; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
766; X86-SSE2-NEXT:    pcmpgtb %xmm3, %xmm2
767; X86-SSE2-NEXT:    pand %xmm2, %xmm4
768; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
769; X86-SSE2-NEXT:    por %xmm4, %xmm2
770; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
771; X86-SSE2-NEXT:    paddb %xmm2, %xmm1
772; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
773; X86-SSE2-NEXT:    psrlw $7, %xmm4
774; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm4
775; X86-SSE2-NEXT:    por %xmm1, %xmm4
776; X86-SSE2-NEXT:    paddb %xmm3, %xmm3
777; X86-SSE2-NEXT:    pcmpgtb %xmm3, %xmm0
778; X86-SSE2-NEXT:    pand %xmm0, %xmm4
779; X86-SSE2-NEXT:    pandn %xmm2, %xmm0
780; X86-SSE2-NEXT:    por %xmm4, %xmm0
781; X86-SSE2-NEXT:    retl
782  %res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> %amt)
783  ret <16 x i8> %res
784}
785
786;
787; Uniform Variable Shifts
788;
789
790define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
791; SSE-LABEL: splatvar_funnnel_v2i64:
792; SSE:       # %bb.0:
793; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [63,63]
794; SSE-NEXT:    pxor %xmm3, %xmm3
795; SSE-NEXT:    psubq %xmm1, %xmm3
796; SSE-NEXT:    pand %xmm2, %xmm1
797; SSE-NEXT:    movdqa %xmm0, %xmm4
798; SSE-NEXT:    psrlq %xmm1, %xmm4
799; SSE-NEXT:    pand %xmm2, %xmm3
800; SSE-NEXT:    psllq %xmm3, %xmm0
801; SSE-NEXT:    por %xmm4, %xmm0
802; SSE-NEXT:    retq
803;
804; AVX-LABEL: splatvar_funnnel_v2i64:
805; AVX:       # %bb.0:
806; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
807; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm3
808; AVX-NEXT:    vpsrlq %xmm3, %xmm0, %xmm3
809; AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
810; AVX-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
811; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
812; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
813; AVX-NEXT:    vpor %xmm0, %xmm3, %xmm0
814; AVX-NEXT:    retq
815;
816; AVX512F-LABEL: splatvar_funnnel_v2i64:
817; AVX512F:       # %bb.0:
818; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
819; AVX512F-NEXT:    vpbroadcastq %xmm1, %xmm1
820; AVX512F-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
821; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
822; AVX512F-NEXT:    vzeroupper
823; AVX512F-NEXT:    retq
824;
825; AVX512VL-LABEL: splatvar_funnnel_v2i64:
826; AVX512VL:       # %bb.0:
827; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm1
828; AVX512VL-NEXT:    vprorvq %xmm1, %xmm0, %xmm0
829; AVX512VL-NEXT:    retq
830;
831; AVX512BW-LABEL: splatvar_funnnel_v2i64:
832; AVX512BW:       # %bb.0:
833; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
834; AVX512BW-NEXT:    vpbroadcastq %xmm1, %xmm1
835; AVX512BW-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
836; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
837; AVX512BW-NEXT:    vzeroupper
838; AVX512BW-NEXT:    retq
839;
840; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
841; AVX512VLBW:       # %bb.0:
842; AVX512VLBW-NEXT:    vpbroadcastq %xmm1, %xmm1
843; AVX512VLBW-NEXT:    vprorvq %xmm1, %xmm0, %xmm0
844; AVX512VLBW-NEXT:    retq
845;
846; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64:
847; AVX512VBMI2:       # %bb.0:
848; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
849; AVX512VBMI2-NEXT:    vpbroadcastq %xmm1, %xmm1
850; AVX512VBMI2-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
851; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
852; AVX512VBMI2-NEXT:    vzeroupper
853; AVX512VBMI2-NEXT:    retq
854;
855; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i64:
856; AVX512VLVBMI2:       # %bb.0:
857; AVX512VLVBMI2-NEXT:    vpbroadcastq %xmm1, %xmm1
858; AVX512VLVBMI2-NEXT:    vprorvq %xmm1, %xmm0, %xmm0
859; AVX512VLVBMI2-NEXT:    retq
860;
861; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
862; XOPAVX1:       # %bb.0:
863; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
864; XOPAVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
865; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
866; XOPAVX1-NEXT:    vprotq %xmm1, %xmm0, %xmm0
867; XOPAVX1-NEXT:    retq
868;
869; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
870; XOPAVX2:       # %bb.0:
871; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
872; XOPAVX2-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
873; XOPAVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
874; XOPAVX2-NEXT:    vprotq %xmm1, %xmm0, %xmm0
875; XOPAVX2-NEXT:    retq
876;
877; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
878; X86-SSE2:       # %bb.0:
879; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
880; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [63,0,63,0]
881; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
882; X86-SSE2-NEXT:    psubq %xmm1, %xmm3
883; X86-SSE2-NEXT:    pand %xmm2, %xmm1
884; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
885; X86-SSE2-NEXT:    psrlq %xmm1, %xmm4
886; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
887; X86-SSE2-NEXT:    movdqa %xmm0, %xmm5
888; X86-SSE2-NEXT:    psrlq %xmm1, %xmm5
889; X86-SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
890; X86-SSE2-NEXT:    pand %xmm2, %xmm3
891; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
892; X86-SSE2-NEXT:    psllq %xmm3, %xmm1
893; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
894; X86-SSE2-NEXT:    psllq %xmm2, %xmm0
895; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
896; X86-SSE2-NEXT:    orpd %xmm5, %xmm0
897; X86-SSE2-NEXT:    retl
898  %splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer
899  %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> %splat)
900  ret <2 x i64> %res
901}
902
903define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
904; SSE2-LABEL: splatvar_funnnel_v4i32:
905; SSE2:       # %bb.0:
906; SSE2-NEXT:    movd %xmm1, %eax
907; SSE2-NEXT:    negl %eax
908; SSE2-NEXT:    andl $31, %eax
909; SSE2-NEXT:    movd %eax, %xmm1
910; SSE2-NEXT:    movdqa %xmm0, %xmm2
911; SSE2-NEXT:    pslld %xmm1, %xmm2
912; SSE2-NEXT:    movl $32, %ecx
913; SSE2-NEXT:    subl %eax, %ecx
914; SSE2-NEXT:    movd %ecx, %xmm1
915; SSE2-NEXT:    psrld %xmm1, %xmm0
916; SSE2-NEXT:    por %xmm2, %xmm0
917; SSE2-NEXT:    retq
918;
919; SSE41-LABEL: splatvar_funnnel_v4i32:
920; SSE41:       # %bb.0:
921; SSE41-NEXT:    pxor %xmm2, %xmm2
922; SSE41-NEXT:    psubd %xmm1, %xmm2
923; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
924; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero
925; SSE41-NEXT:    movdqa %xmm0, %xmm3
926; SSE41-NEXT:    pslld %xmm1, %xmm3
927; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [32,32,32,32]
928; SSE41-NEXT:    psubd %xmm2, %xmm1
929; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
930; SSE41-NEXT:    psrld %xmm1, %xmm0
931; SSE41-NEXT:    por %xmm3, %xmm0
932; SSE41-NEXT:    retq
933;
934; AVX1-LABEL: splatvar_funnnel_v4i32:
935; AVX1:       # %bb.0:
936; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
937; AVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
938; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
939; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
940; AVX1-NEXT:    vpslld %xmm2, %xmm0, %xmm2
941; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32]
942; AVX1-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
943; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
944; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
945; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
946; AVX1-NEXT:    retq
947;
948; AVX2-LABEL: splatvar_funnnel_v4i32:
949; AVX2:       # %bb.0:
950; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
951; AVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
952; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
953; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
954; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
955; AVX2-NEXT:    vpslld %xmm2, %xmm0, %xmm2
956; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
957; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
958; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
959; AVX2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
960; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
961; AVX2-NEXT:    retq
962;
963; AVX512F-LABEL: splatvar_funnnel_v4i32:
964; AVX512F:       # %bb.0:
965; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
966; AVX512F-NEXT:    vpbroadcastd %xmm1, %xmm1
967; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
968; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
969; AVX512F-NEXT:    vzeroupper
970; AVX512F-NEXT:    retq
971;
972; AVX512VL-LABEL: splatvar_funnnel_v4i32:
973; AVX512VL:       # %bb.0:
974; AVX512VL-NEXT:    vpbroadcastd %xmm1, %xmm1
975; AVX512VL-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
976; AVX512VL-NEXT:    retq
977;
978; AVX512BW-LABEL: splatvar_funnnel_v4i32:
979; AVX512BW:       # %bb.0:
980; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
981; AVX512BW-NEXT:    vpbroadcastd %xmm1, %xmm1
982; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
983; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
984; AVX512BW-NEXT:    vzeroupper
985; AVX512BW-NEXT:    retq
986;
987; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
988; AVX512VLBW:       # %bb.0:
989; AVX512VLBW-NEXT:    vpbroadcastd %xmm1, %xmm1
990; AVX512VLBW-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
991; AVX512VLBW-NEXT:    retq
992;
993; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32:
994; AVX512VBMI2:       # %bb.0:
995; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
996; AVX512VBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
997; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
998; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
999; AVX512VBMI2-NEXT:    vzeroupper
1000; AVX512VBMI2-NEXT:    retq
1001;
1002; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i32:
1003; AVX512VLVBMI2:       # %bb.0:
1004; AVX512VLVBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
1005; AVX512VLVBMI2-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
1006; AVX512VLVBMI2-NEXT:    retq
1007;
1008; XOPAVX1-LABEL: splatvar_funnnel_v4i32:
1009; XOPAVX1:       # %bb.0:
1010; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1011; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
1012; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1013; XOPAVX1-NEXT:    vprotd %xmm1, %xmm0, %xmm0
1014; XOPAVX1-NEXT:    retq
1015;
1016; XOPAVX2-LABEL: splatvar_funnnel_v4i32:
1017; XOPAVX2:       # %bb.0:
1018; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1019; XOPAVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
1020; XOPAVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
1021; XOPAVX2-NEXT:    vprotd %xmm1, %xmm0, %xmm0
1022; XOPAVX2-NEXT:    retq
1023;
1024; X86-SSE2-LABEL: splatvar_funnnel_v4i32:
1025; X86-SSE2:       # %bb.0:
1026; X86-SSE2-NEXT:    movd %xmm1, %eax
1027; X86-SSE2-NEXT:    negl %eax
1028; X86-SSE2-NEXT:    andl $31, %eax
1029; X86-SSE2-NEXT:    movd %eax, %xmm1
1030; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1031; X86-SSE2-NEXT:    pslld %xmm1, %xmm2
1032; X86-SSE2-NEXT:    movl $32, %ecx
1033; X86-SSE2-NEXT:    subl %eax, %ecx
1034; X86-SSE2-NEXT:    movd %ecx, %xmm1
1035; X86-SSE2-NEXT:    psrld %xmm1, %xmm0
1036; X86-SSE2-NEXT:    por %xmm2, %xmm0
1037; X86-SSE2-NEXT:    retl
1038  %splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
1039  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)
1040  ret <4 x i32> %res
1041}
1042
1043define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
1044; SSE2-LABEL: splatvar_funnnel_v8i16:
1045; SSE2:       # %bb.0:
1046; SSE2-NEXT:    pxor %xmm2, %xmm2
1047; SSE2-NEXT:    psubw %xmm1, %xmm2
1048; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
1049; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,0,0,0]
1050; SSE2-NEXT:    pand %xmm2, %xmm1
1051; SSE2-NEXT:    movdqa %xmm0, %xmm3
1052; SSE2-NEXT:    psllw %xmm1, %xmm3
1053; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16]
1054; SSE2-NEXT:    psubw %xmm2, %xmm1
1055; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1056; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1057; SSE2-NEXT:    psrlw %xmm1, %xmm0
1058; SSE2-NEXT:    por %xmm3, %xmm0
1059; SSE2-NEXT:    retq
1060;
1061; SSE41-LABEL: splatvar_funnnel_v8i16:
1062; SSE41:       # %bb.0:
1063; SSE41-NEXT:    pxor %xmm2, %xmm2
1064; SSE41-NEXT:    psubw %xmm1, %xmm2
1065; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
1066; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
1067; SSE41-NEXT:    movdqa %xmm0, %xmm3
1068; SSE41-NEXT:    psllw %xmm1, %xmm3
1069; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16]
1070; SSE41-NEXT:    psubw %xmm2, %xmm1
1071; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1072; SSE41-NEXT:    psrlw %xmm1, %xmm0
1073; SSE41-NEXT:    por %xmm3, %xmm0
1074; SSE41-NEXT:    retq
1075;
1076; AVX-LABEL: splatvar_funnnel_v8i16:
1077; AVX:       # %bb.0:
1078; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1079; AVX-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
1080; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1081; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1082; AVX-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1083; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1084; AVX-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1085; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1086; AVX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1087; AVX-NEXT:    vpor %xmm0, %xmm2, %xmm0
1088; AVX-NEXT:    retq
1089;
1090; AVX512F-LABEL: splatvar_funnnel_v8i16:
1091; AVX512F:       # %bb.0:
1092; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1093; AVX512F-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
1094; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1095; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1096; AVX512F-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1097; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1098; AVX512F-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1099; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1100; AVX512F-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1101; AVX512F-NEXT:    vpor %xmm0, %xmm2, %xmm0
1102; AVX512F-NEXT:    retq
1103;
1104; AVX512VL-LABEL: splatvar_funnnel_v8i16:
1105; AVX512VL:       # %bb.0:
1106; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1107; AVX512VL-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
1108; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1109; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1110; AVX512VL-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1111; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1112; AVX512VL-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1113; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1114; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1115; AVX512VL-NEXT:    vpor %xmm0, %xmm2, %xmm0
1116; AVX512VL-NEXT:    retq
1117;
1118; AVX512BW-LABEL: splatvar_funnnel_v8i16:
1119; AVX512BW:       # %bb.0:
1120; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1121; AVX512BW-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
1122; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1123; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1124; AVX512BW-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1125; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1126; AVX512BW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1127; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1128; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1129; AVX512BW-NEXT:    vpor %xmm0, %xmm2, %xmm0
1130; AVX512BW-NEXT:    retq
1131;
1132; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
1133; AVX512VLBW:       # %bb.0:
1134; AVX512VLBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1135; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
1136; AVX512VLBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1137; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1138; AVX512VLBW-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1139; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1140; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1141; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1142; AVX512VLBW-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1143; AVX512VLBW-NEXT:    vpor %xmm0, %xmm2, %xmm0
1144; AVX512VLBW-NEXT:    retq
1145;
1146; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
1147; AVX512VBMI2:       # %bb.0:
1148; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1149; AVX512VBMI2-NEXT:    vpbroadcastw %xmm1, %xmm1
1150; AVX512VBMI2-NEXT:    vpshrdvw %zmm1, %zmm0, %zmm0
1151; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1152; AVX512VBMI2-NEXT:    vzeroupper
1153; AVX512VBMI2-NEXT:    retq
1154;
1155; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i16:
1156; AVX512VLVBMI2:       # %bb.0:
1157; AVX512VLVBMI2-NEXT:    vpbroadcastw %xmm1, %xmm1
1158; AVX512VLVBMI2-NEXT:    vpshrdvw %xmm1, %xmm0, %xmm0
1159; AVX512VLVBMI2-NEXT:    retq
1160;
1161; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
1162; XOPAVX1:       # %bb.0:
1163; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1164; XOPAVX1-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
1165; XOPAVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1166; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1167; XOPAVX1-NEXT:    vprotw %xmm1, %xmm0, %xmm0
1168; XOPAVX1-NEXT:    retq
1169;
1170; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
1171; XOPAVX2:       # %bb.0:
1172; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1173; XOPAVX2-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
1174; XOPAVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
1175; XOPAVX2-NEXT:    vprotw %xmm1, %xmm0, %xmm0
1176; XOPAVX2-NEXT:    retq
1177;
1178; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
1179; X86-SSE2:       # %bb.0:
1180; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
1181; X86-SSE2-NEXT:    psubw %xmm1, %xmm2
1182; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
1183; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,0,0,0]
1184; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1185; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1186; X86-SSE2-NEXT:    psllw %xmm1, %xmm3
1187; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16]
1188; X86-SSE2-NEXT:    psubw %xmm2, %xmm1
1189; X86-SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1190; X86-SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1191; X86-SSE2-NEXT:    psrlw %xmm1, %xmm0
1192; X86-SSE2-NEXT:    por %xmm3, %xmm0
1193; X86-SSE2-NEXT:    retl
1194  %splat = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> zeroinitializer
1195  %res = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> %splat)
1196  ret <8 x i16> %res
1197}
1198
1199define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
1200; SSE2-LABEL: splatvar_funnnel_v16i8:
1201; SSE2:       # %bb.0:
1202; SSE2-NEXT:    pxor %xmm2, %xmm2
1203; SSE2-NEXT:    psubb %xmm1, %xmm2
1204; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
1205; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1206; SSE2-NEXT:    psubb %xmm2, %xmm3
1207; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
1208; SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1209; SSE2-NEXT:    movdqa %xmm0, %xmm1
1210; SSE2-NEXT:    psllw %xmm2, %xmm1
1211; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
1212; SSE2-NEXT:    pcmpeqd %xmm5, %xmm5
1213; SSE2-NEXT:    psllw %xmm2, %xmm5
1214; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1215; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm5[0,0,0,0,4,5,6,7]
1216; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1217; SSE2-NEXT:    pand %xmm2, %xmm1
1218; SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0]
1219; SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1220; SSE2-NEXT:    psrlw %xmm3, %xmm0
1221; SSE2-NEXT:    psrlw %xmm3, %xmm4
1222; SSE2-NEXT:    psrlw $8, %xmm4
1223; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1224; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm4[0,0,0,0,4,5,6,7]
1225; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1226; SSE2-NEXT:    pand %xmm0, %xmm2
1227; SSE2-NEXT:    por %xmm2, %xmm1
1228; SSE2-NEXT:    movdqa %xmm1, %xmm0
1229; SSE2-NEXT:    retq
1230;
1231; SSE41-LABEL: splatvar_funnnel_v16i8:
1232; SSE41:       # %bb.0:
1233; SSE41-NEXT:    pxor %xmm2, %xmm2
1234; SSE41-NEXT:    pxor %xmm3, %xmm3
1235; SSE41-NEXT:    psubb %xmm1, %xmm3
1236; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
1237; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm4 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1238; SSE41-NEXT:    movdqa %xmm0, %xmm1
1239; SSE41-NEXT:    psllw %xmm4, %xmm1
1240; SSE41-NEXT:    pcmpeqd %xmm5, %xmm5
1241; SSE41-NEXT:    pcmpeqd %xmm6, %xmm6
1242; SSE41-NEXT:    psllw %xmm4, %xmm6
1243; SSE41-NEXT:    pshufb %xmm2, %xmm6
1244; SSE41-NEXT:    pand %xmm6, %xmm1
1245; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1246; SSE41-NEXT:    psubb %xmm3, %xmm2
1247; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
1248; SSE41-NEXT:    psrlw %xmm2, %xmm0
1249; SSE41-NEXT:    psrlw %xmm2, %xmm5
1250; SSE41-NEXT:    pshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1251; SSE41-NEXT:    pand %xmm0, %xmm5
1252; SSE41-NEXT:    por %xmm5, %xmm1
1253; SSE41-NEXT:    movdqa %xmm1, %xmm0
1254; SSE41-NEXT:    retq
1255;
1256; AVX1-LABEL: splatvar_funnnel_v16i8:
1257; AVX1:       # %bb.0:
1258; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1259; AVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
1260; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1261; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1262; AVX1-NEXT:    vpsllw %xmm3, %xmm0, %xmm4
1263; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
1264; AVX1-NEXT:    vpsllw %xmm3, %xmm5, %xmm3
1265; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1266; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm2
1267; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1268; AVX1-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
1269; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1270; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1271; AVX1-NEXT:    vpsrlw %xmm1, %xmm5, %xmm1
1272; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1273; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1274; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
1275; AVX1-NEXT:    retq
1276;
1277; AVX2-LABEL: splatvar_funnnel_v16i8:
1278; AVX2:       # %bb.0:
1279; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1280; AVX2-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
1281; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1282; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1283; AVX2-NEXT:    vpsllw %xmm2, %xmm0, %xmm3
1284; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
1285; AVX2-NEXT:    vpsllw %xmm2, %xmm4, %xmm2
1286; AVX2-NEXT:    vpbroadcastb %xmm2, %xmm2
1287; AVX2-NEXT:    vpand %xmm2, %xmm3, %xmm2
1288; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1289; AVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
1290; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1291; AVX2-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1292; AVX2-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1
1293; AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1
1294; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
1295; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1296; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
1297; AVX2-NEXT:    retq
1298;
1299; AVX512F-LABEL: splatvar_funnnel_v16i8:
1300; AVX512F:       # %bb.0:
1301; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1302; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm3
1303; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1304; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1305; AVX512F-NEXT:    vpsrld %xmm3, %zmm0, %zmm3
1306; AVX512F-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1307; AVX512F-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1308; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm1
1309; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1310; AVX512F-NEXT:    vpslld %xmm1, %zmm0, %zmm0
1311; AVX512F-NEXT:    vpord %zmm0, %zmm3, %zmm0
1312; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1313; AVX512F-NEXT:    vzeroupper
1314; AVX512F-NEXT:    retq
1315;
1316; AVX512VL-LABEL: splatvar_funnnel_v16i8:
1317; AVX512VL:       # %bb.0:
1318; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1319; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm3
1320; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1321; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1322; AVX512VL-NEXT:    vpsrld %xmm3, %zmm0, %zmm3
1323; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1324; AVX512VL-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1325; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm1
1326; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1327; AVX512VL-NEXT:    vpslld %xmm1, %zmm0, %zmm0
1328; AVX512VL-NEXT:    vpord %zmm0, %zmm3, %zmm0
1329; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
1330; AVX512VL-NEXT:    vzeroupper
1331; AVX512VL-NEXT:    retq
1332;
1333; AVX512BW-LABEL: splatvar_funnnel_v16i8:
1334; AVX512BW:       # %bb.0:
1335; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1336; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm3
1337; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1338; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1339; AVX512BW-NEXT:    vpsrlw %xmm3, %ymm0, %ymm3
1340; AVX512BW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1341; AVX512BW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1342; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm1
1343; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1344; AVX512BW-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
1345; AVX512BW-NEXT:    vpor %ymm0, %ymm3, %ymm0
1346; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1347; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1348; AVX512BW-NEXT:    vzeroupper
1349; AVX512BW-NEXT:    retq
1350;
1351; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
1352; AVX512VLBW:       # %bb.0:
1353; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1354; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm3
1355; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1356; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1357; AVX512VLBW-NEXT:    vpsrlw %xmm3, %ymm0, %ymm3
1358; AVX512VLBW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1359; AVX512VLBW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1360; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm1
1361; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1362; AVX512VLBW-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
1363; AVX512VLBW-NEXT:    vpor %ymm0, %ymm3, %ymm0
1364; AVX512VLBW-NEXT:    vpmovwb %ymm0, %xmm0
1365; AVX512VLBW-NEXT:    vzeroupper
1366; AVX512VLBW-NEXT:    retq
1367;
1368; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
1369; AVX512VBMI2:       # %bb.0:
1370; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1371; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
1372; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1373; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1374; AVX512VBMI2-NEXT:    vpsrlw %xmm3, %ymm0, %ymm3
1375; AVX512VBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1376; AVX512VBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1377; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
1378; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1379; AVX512VBMI2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
1380; AVX512VBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
1381; AVX512VBMI2-NEXT:    vpmovwb %zmm0, %ymm0
1382; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1383; AVX512VBMI2-NEXT:    vzeroupper
1384; AVX512VBMI2-NEXT:    retq
1385;
1386; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
1387; AVX512VLVBMI2:       # %bb.0:
1388; AVX512VLVBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1389; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
1390; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1391; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1392; AVX512VLVBMI2-NEXT:    vpsrlw %xmm3, %ymm0, %ymm3
1393; AVX512VLVBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1394; AVX512VLVBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1395; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
1396; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1397; AVX512VLVBMI2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
1398; AVX512VLVBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
1399; AVX512VLVBMI2-NEXT:    vpmovwb %ymm0, %xmm0
1400; AVX512VLVBMI2-NEXT:    vzeroupper
1401; AVX512VLVBMI2-NEXT:    retq
1402;
1403; XOPAVX1-LABEL: splatvar_funnnel_v16i8:
1404; XOPAVX1:       # %bb.0:
1405; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1406; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
1407; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1408; XOPAVX1-NEXT:    vprotb %xmm1, %xmm0, %xmm0
1409; XOPAVX1-NEXT:    retq
1410;
1411; XOPAVX2-LABEL: splatvar_funnnel_v16i8:
1412; XOPAVX2:       # %bb.0:
1413; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1414; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
1415; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
1416; XOPAVX2-NEXT:    vprotb %xmm1, %xmm0, %xmm0
1417; XOPAVX2-NEXT:    retq
1418;
1419; X86-SSE2-LABEL: splatvar_funnnel_v16i8:
1420; X86-SSE2:       # %bb.0:
1421; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
1422; X86-SSE2-NEXT:    psubb %xmm1, %xmm2
1423; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
1424; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1425; X86-SSE2-NEXT:    psubb %xmm2, %xmm3
1426; X86-SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
1427; X86-SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1428; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1429; X86-SSE2-NEXT:    psllw %xmm2, %xmm1
1430; X86-SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
1431; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm5
1432; X86-SSE2-NEXT:    psllw %xmm2, %xmm5
1433; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1434; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm5[0,0,0,0,4,5,6,7]
1435; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1436; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1437; X86-SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0]
1438; X86-SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1439; X86-SSE2-NEXT:    psrlw %xmm3, %xmm0
1440; X86-SSE2-NEXT:    psrlw %xmm3, %xmm4
1441; X86-SSE2-NEXT:    psrlw $8, %xmm4
1442; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1443; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm4[0,0,0,0,4,5,6,7]
1444; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1445; X86-SSE2-NEXT:    pand %xmm0, %xmm2
1446; X86-SSE2-NEXT:    por %xmm2, %xmm1
1447; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1448; X86-SSE2-NEXT:    retl
1449  %splat = shufflevector <16 x i8> %amt, <16 x i8> undef, <16 x i32> zeroinitializer
1450  %res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> %splat)
1451  ret <16 x i8> %res
1452}
1453
1454;
1455; Constant Shifts
1456;
1457
1458define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
1459; SSE2-LABEL: constant_funnnel_v2i64:
1460; SSE2:       # %bb.0:
1461; SSE2-NEXT:    movdqa %xmm0, %xmm1
1462; SSE2-NEXT:    psllq $60, %xmm1
1463; SSE2-NEXT:    movdqa %xmm0, %xmm2
1464; SSE2-NEXT:    psllq $50, %xmm2
1465; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
1466; SSE2-NEXT:    movdqa %xmm0, %xmm1
1467; SSE2-NEXT:    psrlq $4, %xmm1
1468; SSE2-NEXT:    psrlq $14, %xmm0
1469; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1470; SSE2-NEXT:    orpd %xmm2, %xmm0
1471; SSE2-NEXT:    retq
1472;
1473; SSE41-LABEL: constant_funnnel_v2i64:
1474; SSE41:       # %bb.0:
1475; SSE41-NEXT:    movdqa %xmm0, %xmm1
1476; SSE41-NEXT:    psllq $50, %xmm1
1477; SSE41-NEXT:    movdqa %xmm0, %xmm2
1478; SSE41-NEXT:    psllq $60, %xmm2
1479; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1480; SSE41-NEXT:    movdqa %xmm0, %xmm1
1481; SSE41-NEXT:    psrlq $14, %xmm1
1482; SSE41-NEXT:    psrlq $4, %xmm0
1483; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1484; SSE41-NEXT:    por %xmm2, %xmm0
1485; SSE41-NEXT:    retq
1486;
1487; AVX1-LABEL: constant_funnnel_v2i64:
1488; AVX1:       # %bb.0:
1489; AVX1-NEXT:    vpsllq $50, %xmm0, %xmm1
1490; AVX1-NEXT:    vpsllq $60, %xmm0, %xmm2
1491; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1492; AVX1-NEXT:    vpsrlq $14, %xmm0, %xmm2
1493; AVX1-NEXT:    vpsrlq $4, %xmm0, %xmm0
1494; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
1495; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1496; AVX1-NEXT:    retq
1497;
1498; AVX2-LABEL: constant_funnnel_v2i64:
1499; AVX2:       # %bb.0:
1500; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm1
1501; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
1502; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1503; AVX2-NEXT:    retq
1504;
1505; AVX512F-LABEL: constant_funnnel_v2i64:
1506; AVX512F:       # %bb.0:
1507; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1508; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,14]
1509; AVX512F-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
1510; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1511; AVX512F-NEXT:    vzeroupper
1512; AVX512F-NEXT:    retq
1513;
1514; AVX512VL-LABEL: constant_funnnel_v2i64:
1515; AVX512VL:       # %bb.0:
1516; AVX512VL-NEXT:    vprorvq {{.*}}(%rip), %xmm0, %xmm0
1517; AVX512VL-NEXT:    retq
1518;
1519; AVX512BW-LABEL: constant_funnnel_v2i64:
1520; AVX512BW:       # %bb.0:
1521; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1522; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,14]
1523; AVX512BW-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
1524; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1525; AVX512BW-NEXT:    vzeroupper
1526; AVX512BW-NEXT:    retq
1527;
1528; AVX512VLBW-LABEL: constant_funnnel_v2i64:
1529; AVX512VLBW:       # %bb.0:
1530; AVX512VLBW-NEXT:    vprorvq {{.*}}(%rip), %xmm0, %xmm0
1531; AVX512VLBW-NEXT:    retq
1532;
1533; AVX512VBMI2-LABEL: constant_funnnel_v2i64:
1534; AVX512VBMI2:       # %bb.0:
1535; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1536; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,14]
1537; AVX512VBMI2-NEXT:    vprorvq %zmm1, %zmm0, %zmm0
1538; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1539; AVX512VBMI2-NEXT:    vzeroupper
1540; AVX512VBMI2-NEXT:    retq
1541;
1542; AVX512VLVBMI2-LABEL: constant_funnnel_v2i64:
1543; AVX512VLVBMI2:       # %bb.0:
1544; AVX512VLVBMI2-NEXT:    vprorvq {{.*}}(%rip), %xmm0, %xmm0
1545; AVX512VLVBMI2-NEXT:    retq
1546;
1547; XOP-LABEL: constant_funnnel_v2i64:
1548; XOP:       # %bb.0:
1549; XOP-NEXT:    vprotq {{.*}}(%rip), %xmm0, %xmm0
1550; XOP-NEXT:    retq
1551;
1552; X86-SSE2-LABEL: constant_funnnel_v2i64:
1553; X86-SSE2:       # %bb.0:
1554; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [63,0,63,0]
1555; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = <4,u,14,u>
1556; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
1557; X86-SSE2-NEXT:    psubq %xmm2, %xmm3
1558; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1559; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
1560; X86-SSE2-NEXT:    psrlq %xmm2, %xmm4
1561; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
1562; X86-SSE2-NEXT:    movdqa %xmm0, %xmm5
1563; X86-SSE2-NEXT:    psrlq %xmm2, %xmm5
1564; X86-SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
1565; X86-SSE2-NEXT:    pand %xmm1, %xmm3
1566; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1567; X86-SSE2-NEXT:    psllq %xmm3, %xmm1
1568; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
1569; X86-SSE2-NEXT:    psllq %xmm2, %xmm0
1570; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1571; X86-SSE2-NEXT:    orpd %xmm5, %xmm0
1572; X86-SSE2-NEXT:    retl
1573  %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
1574  ret <2 x i64> %res
1575}
1576
1577define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
1578; SSE2-LABEL: constant_funnnel_v4i32:
1579; SSE2:       # %bb.0:
1580; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [268435456,134217728,67108864,33554432]
1581; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1582; SSE2-NEXT:    pmuludq %xmm1, %xmm0
1583; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
1584; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1585; SSE2-NEXT:    pmuludq %xmm2, %xmm1
1586; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1587; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1588; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1589; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1590; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1591; SSE2-NEXT:    por %xmm3, %xmm0
1592; SSE2-NEXT:    retq
1593;
1594; SSE41-LABEL: constant_funnnel_v4i32:
1595; SSE41:       # %bb.0:
1596; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [268435456,134217728,67108864,33554432]
1597; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1598; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1599; SSE41-NEXT:    pmuludq %xmm2, %xmm3
1600; SSE41-NEXT:    pmuludq %xmm1, %xmm0
1601; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1602; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1603; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1604; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
1605; SSE41-NEXT:    por %xmm1, %xmm0
1606; SSE41-NEXT:    retq
1607;
1608; AVX1-LABEL: constant_funnnel_v4i32:
1609; AVX1:       # %bb.0:
1610; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [268435456,134217728,67108864,33554432]
1611; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1612; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1613; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
1614; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
1615; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1616; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1617; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
1618; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
1619; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1620; AVX1-NEXT:    retq
1621;
1622; AVX2-LABEL: constant_funnnel_v4i32:
1623; AVX2:       # %bb.0:
1624; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
1625; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
1626; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1627; AVX2-NEXT:    retq
1628;
1629; AVX512F-LABEL: constant_funnnel_v4i32:
1630; AVX512F:       # %bb.0:
1631; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1632; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
1633; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
1634; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1635; AVX512F-NEXT:    vzeroupper
1636; AVX512F-NEXT:    retq
1637;
1638; AVX512VL-LABEL: constant_funnnel_v4i32:
1639; AVX512VL:       # %bb.0:
1640; AVX512VL-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1641; AVX512VL-NEXT:    retq
1642;
1643; AVX512BW-LABEL: constant_funnnel_v4i32:
1644; AVX512BW:       # %bb.0:
1645; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1646; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
1647; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
1648; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1649; AVX512BW-NEXT:    vzeroupper
1650; AVX512BW-NEXT:    retq
1651;
1652; AVX512VLBW-LABEL: constant_funnnel_v4i32:
1653; AVX512VLBW:       # %bb.0:
1654; AVX512VLBW-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1655; AVX512VLBW-NEXT:    retq
1656;
1657; AVX512VBMI2-LABEL: constant_funnnel_v4i32:
1658; AVX512VBMI2:       # %bb.0:
1659; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1660; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
1661; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
1662; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1663; AVX512VBMI2-NEXT:    vzeroupper
1664; AVX512VBMI2-NEXT:    retq
1665;
1666; AVX512VLVBMI2-LABEL: constant_funnnel_v4i32:
1667; AVX512VLVBMI2:       # %bb.0:
1668; AVX512VLVBMI2-NEXT:    vprorvd {{.*}}(%rip), %xmm0, %xmm0
1669; AVX512VLVBMI2-NEXT:    retq
1670;
1671; XOP-LABEL: constant_funnnel_v4i32:
1672; XOP:       # %bb.0:
1673; XOP-NEXT:    vprotd {{.*}}(%rip), %xmm0, %xmm0
1674; XOP-NEXT:    retq
1675;
1676; X86-SSE2-LABEL: constant_funnnel_v4i32:
1677; X86-SSE2:       # %bb.0:
1678; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [268435456,134217728,67108864,33554432]
1679; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1680; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
1681; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
1682; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1683; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
1684; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1685; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1686; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1687; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1688; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1689; X86-SSE2-NEXT:    por %xmm3, %xmm0
1690; X86-SSE2-NEXT:    retl
1691  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>)
1692  ret <4 x i32> %res
1693}
1694
1695define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x) nounwind {
1696; SSE-LABEL: constant_funnnel_v8i16:
1697; SSE:       # %bb.0:
1698; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1,32768,16384,8192,4096,2048,1024,512]
1699; SSE-NEXT:    movdqa %xmm0, %xmm2
1700; SSE-NEXT:    pmulhuw %xmm1, %xmm2
1701; SSE-NEXT:    pmullw %xmm1, %xmm0
1702; SSE-NEXT:    por %xmm2, %xmm0
1703; SSE-NEXT:    retq
1704;
1705; AVX-LABEL: constant_funnnel_v8i16:
1706; AVX:       # %bb.0:
1707; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,32768,16384,8192,4096,2048,1024,512]
1708; AVX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
1709; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
1710; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
1711; AVX-NEXT:    retq
1712;
1713; AVX512F-LABEL: constant_funnnel_v8i16:
1714; AVX512F:       # %bb.0:
1715; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,32768,16384,8192,4096,2048,1024,512]
1716; AVX512F-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
1717; AVX512F-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
1718; AVX512F-NEXT:    vpor %xmm2, %xmm0, %xmm0
1719; AVX512F-NEXT:    retq
1720;
1721; AVX512VL-LABEL: constant_funnnel_v8i16:
1722; AVX512VL:       # %bb.0:
1723; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,32768,16384,8192,4096,2048,1024,512]
1724; AVX512VL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
1725; AVX512VL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
1726; AVX512VL-NEXT:    vpor %xmm2, %xmm0, %xmm0
1727; AVX512VL-NEXT:    retq
1728;
1729; AVX512BW-LABEL: constant_funnnel_v8i16:
1730; AVX512BW:       # %bb.0:
1731; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1732; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,1,2,3,4,5,6,7]
1733; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm1
1734; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,15,14,13,12,11,10,9]
1735; AVX512BW-NEXT:    vpsllvw %zmm2, %zmm0, %zmm0
1736; AVX512BW-NEXT:    vpor %xmm1, %xmm0, %xmm0
1737; AVX512BW-NEXT:    vzeroupper
1738; AVX512BW-NEXT:    retq
1739;
1740; AVX512VLBW-LABEL: constant_funnnel_v8i16:
1741; AVX512VLBW:       # %bb.0:
1742; AVX512VLBW-NEXT:    vpsrlvw {{.*}}(%rip), %xmm0, %xmm1
1743; AVX512VLBW-NEXT:    vpsllvw {{.*}}(%rip), %xmm0, %xmm0
1744; AVX512VLBW-NEXT:    vpor %xmm1, %xmm0, %xmm0
1745; AVX512VLBW-NEXT:    retq
1746;
1747; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
1748; AVX512VBMI2:       # %bb.0:
1749; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1750; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
1751; AVX512VBMI2-NEXT:    vpshrdvw %zmm1, %zmm0, %zmm0
1752; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1753; AVX512VBMI2-NEXT:    vzeroupper
1754; AVX512VBMI2-NEXT:    retq
1755;
1756; AVX512VLVBMI2-LABEL: constant_funnnel_v8i16:
1757; AVX512VLVBMI2:       # %bb.0:
1758; AVX512VLVBMI2-NEXT:    vpshrdvw {{.*}}(%rip), %xmm0, %xmm0
1759; AVX512VLVBMI2-NEXT:    retq
1760;
1761; XOP-LABEL: constant_funnnel_v8i16:
1762; XOP:       # %bb.0:
1763; XOP-NEXT:    vprotw {{.*}}(%rip), %xmm0, %xmm0
1764; XOP-NEXT:    retq
1765;
1766; X86-SSE2-LABEL: constant_funnnel_v8i16:
1767; X86-SSE2:       # %bb.0:
1768; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,32768,16384,8192,4096,2048,1024,512]
1769; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1770; X86-SSE2-NEXT:    pmulhuw %xmm1, %xmm2
1771; X86-SSE2-NEXT:    pmullw %xmm1, %xmm0
1772; X86-SSE2-NEXT:    por %xmm2, %xmm0
1773; X86-SSE2-NEXT:    retl
1774  %res = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
1775  ret <8 x i16> %res
1776}
1777
1778define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind {
1779; SSE2-LABEL: constant_funnnel_v16i8:
1780; SSE2:       # %bb.0:
1781; SSE2-NEXT:    pxor %xmm1, %xmm1
1782; SSE2-NEXT:    movdqa %xmm0, %xmm2
1783; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
1784; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm2
1785; SSE2-NEXT:    psrlw $8, %xmm2
1786; SSE2-NEXT:    movdqa %xmm0, %xmm3
1787; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1788; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm3
1789; SSE2-NEXT:    psrlw $8, %xmm3
1790; SSE2-NEXT:    packuswb %xmm2, %xmm3
1791; SSE2-NEXT:    movdqa %xmm0, %xmm1
1792; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1793; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm1
1794; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1795; SSE2-NEXT:    pand %xmm2, %xmm1
1796; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1797; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm0
1798; SSE2-NEXT:    pand %xmm2, %xmm0
1799; SSE2-NEXT:    packuswb %xmm1, %xmm0
1800; SSE2-NEXT:    por %xmm3, %xmm0
1801; SSE2-NEXT:    retq
1802;
1803; SSE41-LABEL: constant_funnnel_v16i8:
1804; SSE41:       # %bb.0:
1805; SSE41-NEXT:    movdqa %xmm0, %xmm2
1806; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1807; SSE41-NEXT:    pmullw {{.*}}(%rip), %xmm2
1808; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
1809; SSE41-NEXT:    pand %xmm3, %xmm2
1810; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1811; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [1,128,64,32,16,8,4,2]
1812; SSE41-NEXT:    pmullw %xmm1, %xmm4
1813; SSE41-NEXT:    pand %xmm3, %xmm4
1814; SSE41-NEXT:    packuswb %xmm2, %xmm4
1815; SSE41-NEXT:    pxor %xmm2, %xmm2
1816; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
1817; SSE41-NEXT:    pmullw {{.*}}(%rip), %xmm0
1818; SSE41-NEXT:    psrlw $8, %xmm0
1819; SSE41-NEXT:    pmullw {{.*}}(%rip), %xmm1
1820; SSE41-NEXT:    psrlw $8, %xmm1
1821; SSE41-NEXT:    packuswb %xmm0, %xmm1
1822; SSE41-NEXT:    por %xmm4, %xmm1
1823; SSE41-NEXT:    movdqa %xmm1, %xmm0
1824; SSE41-NEXT:    retq
1825;
1826; AVX1-LABEL: constant_funnnel_v16i8:
1827; AVX1:       # %bb.0:
1828; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1829; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm1, %xmm1
1830; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1831; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
1832; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1833; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm3, %xmm4
1834; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm2
1835; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm1
1836; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1837; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
1838; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
1839; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
1840; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm3, %xmm2
1841; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
1842; AVX1-NEXT:    vpackuswb %xmm0, %xmm2, %xmm0
1843; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
1844; AVX1-NEXT:    retq
1845;
1846; AVX2-LABEL: constant_funnnel_v16i8:
1847; AVX2:       # %bb.0:
1848; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1849; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm1
1850; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
1851; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
1852; AVX2-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
1853; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
1854; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
1855; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1856; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
1857; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1858; AVX2-NEXT:    vzeroupper
1859; AVX2-NEXT:    retq
1860;
1861; AVX512F-LABEL: constant_funnnel_v16i8:
1862; AVX512F:       # %bb.0:
1863; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1864; AVX512F-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm1
1865; AVX512F-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
1866; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
1867; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1868; AVX512F-NEXT:    vzeroupper
1869; AVX512F-NEXT:    retq
1870;
1871; AVX512VL-LABEL: constant_funnnel_v16i8:
1872; AVX512VL:       # %bb.0:
1873; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1874; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm1
1875; AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
1876; AVX512VL-NEXT:    vpord %zmm1, %zmm0, %zmm0
1877; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
1878; AVX512VL-NEXT:    vzeroupper
1879; AVX512VL-NEXT:    retq
1880;
1881; AVX512BW-LABEL: constant_funnnel_v16i8:
1882; AVX512BW:       # %bb.0:
1883; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1884; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1885; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm1
1886; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1]
1887; AVX512BW-NEXT:    vpsrlvw %zmm2, %zmm0, %zmm0
1888; AVX512BW-NEXT:    vpor %ymm1, %ymm0, %ymm0
1889; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1890; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1891; AVX512BW-NEXT:    vzeroupper
1892; AVX512BW-NEXT:    retq
1893;
1894; AVX512VLBW-LABEL: constant_funnnel_v16i8:
1895; AVX512VLBW:       # %bb.0:
1896; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1897; AVX512VLBW-NEXT:    vpsllvw {{.*}}(%rip), %ymm0, %ymm1
1898; AVX512VLBW-NEXT:    vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
1899; AVX512VLBW-NEXT:    vpor %ymm1, %ymm0, %ymm0
1900; AVX512VLBW-NEXT:    vpmovwb %ymm0, %xmm0
1901; AVX512VLBW-NEXT:    vzeroupper
1902; AVX512VLBW-NEXT:    retq
1903;
1904; AVX512VBMI2-LABEL: constant_funnnel_v16i8:
1905; AVX512VBMI2:       # %bb.0:
1906; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1907; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1908; AVX512VBMI2-NEXT:    vpsllvw %zmm1, %zmm0, %zmm1
1909; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1]
1910; AVX512VBMI2-NEXT:    vpsrlvw %zmm2, %zmm0, %zmm0
1911; AVX512VBMI2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1912; AVX512VBMI2-NEXT:    vpmovwb %zmm0, %ymm0
1913; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1914; AVX512VBMI2-NEXT:    vzeroupper
1915; AVX512VBMI2-NEXT:    retq
1916;
1917; AVX512VLVBMI2-LABEL: constant_funnnel_v16i8:
1918; AVX512VLVBMI2:       # %bb.0:
1919; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1920; AVX512VLVBMI2-NEXT:    vpsllvw {{.*}}(%rip), %ymm0, %ymm1
1921; AVX512VLVBMI2-NEXT:    vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
1922; AVX512VLVBMI2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1923; AVX512VLVBMI2-NEXT:    vpmovwb %ymm0, %xmm0
1924; AVX512VLVBMI2-NEXT:    vzeroupper
1925; AVX512VLVBMI2-NEXT:    retq
1926;
1927; XOP-LABEL: constant_funnnel_v16i8:
1928; XOP:       # %bb.0:
1929; XOP-NEXT:    vprotb {{.*}}(%rip), %xmm0, %xmm0
1930; XOP-NEXT:    retq
1931;
1932; X86-SSE2-LABEL: constant_funnnel_v16i8:
1933; X86-SSE2:       # %bb.0:
1934; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
1935; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1936; X86-SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
1937; X86-SSE2-NEXT:    pmullw {{\.LCPI.*}}, %xmm2
1938; X86-SSE2-NEXT:    psrlw $8, %xmm2
1939; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1940; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1941; X86-SSE2-NEXT:    pmullw {{\.LCPI.*}}, %xmm3
1942; X86-SSE2-NEXT:    psrlw $8, %xmm3
1943; X86-SSE2-NEXT:    packuswb %xmm2, %xmm3
1944; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1945; X86-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1946; X86-SSE2-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
1947; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1948; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1949; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1950; X86-SSE2-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
1951; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1952; X86-SSE2-NEXT:    packuswb %xmm1, %xmm0
1953; X86-SSE2-NEXT:    por %xmm3, %xmm0
1954; X86-SSE2-NEXT:    retl
1955  %res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1956  ret <16 x i8> %res
1957}
1958
1959;
1960; Uniform Constant Shifts
1961;
1962
1963define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind {
1964; SSE-LABEL: splatconstant_funnnel_v2i64:
1965; SSE:       # %bb.0:
1966; SSE-NEXT:    movdqa %xmm0, %xmm1
1967; SSE-NEXT:    psllq $50, %xmm1
1968; SSE-NEXT:    psrlq $14, %xmm0
1969; SSE-NEXT:    por %xmm1, %xmm0
1970; SSE-NEXT:    retq
1971;
1972; AVX-LABEL: splatconstant_funnnel_v2i64:
1973; AVX:       # %bb.0:
1974; AVX-NEXT:    vpsllq $50, %xmm0, %xmm1
1975; AVX-NEXT:    vpsrlq $14, %xmm0, %xmm0
1976; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
1977; AVX-NEXT:    retq
1978;
1979; AVX512F-LABEL: splatconstant_funnnel_v2i64:
1980; AVX512F:       # %bb.0:
1981; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1982; AVX512F-NEXT:    vprorq $14, %zmm0, %zmm0
1983; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1984; AVX512F-NEXT:    vzeroupper
1985; AVX512F-NEXT:    retq
1986;
1987; AVX512VL-LABEL: splatconstant_funnnel_v2i64:
1988; AVX512VL:       # %bb.0:
1989; AVX512VL-NEXT:    vprorq $14, %xmm0, %xmm0
1990; AVX512VL-NEXT:    retq
1991;
1992; AVX512BW-LABEL: splatconstant_funnnel_v2i64:
1993; AVX512BW:       # %bb.0:
1994; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1995; AVX512BW-NEXT:    vprorq $14, %zmm0, %zmm0
1996; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1997; AVX512BW-NEXT:    vzeroupper
1998; AVX512BW-NEXT:    retq
1999;
2000; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64:
2001; AVX512VLBW:       # %bb.0:
2002; AVX512VLBW-NEXT:    vprorq $14, %xmm0, %xmm0
2003; AVX512VLBW-NEXT:    retq
2004;
2005; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64:
2006; AVX512VBMI2:       # %bb.0:
2007; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2008; AVX512VBMI2-NEXT:    vprorq $14, %zmm0, %zmm0
2009; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2010; AVX512VBMI2-NEXT:    vzeroupper
2011; AVX512VBMI2-NEXT:    retq
2012;
2013; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i64:
2014; AVX512VLVBMI2:       # %bb.0:
2015; AVX512VLVBMI2-NEXT:    vprorq $14, %xmm0, %xmm0
2016; AVX512VLVBMI2-NEXT:    retq
2017;
2018; XOP-LABEL: splatconstant_funnnel_v2i64:
2019; XOP:       # %bb.0:
2020; XOP-NEXT:    vprotq $50, %xmm0, %xmm0
2021; XOP-NEXT:    retq
2022;
2023; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
2024; X86-SSE2:       # %bb.0:
2025; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2026; X86-SSE2-NEXT:    psllq $50, %xmm1
2027; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm1[0,1]
2028; X86-SSE2-NEXT:    psrlq $14, %xmm0
2029; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm0[0,1]
2030; X86-SSE2-NEXT:    orpd %xmm1, %xmm0
2031; X86-SSE2-NEXT:    retl
2032  %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
2033  ret <2 x i64> %res
2034}
2035
2036define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x) nounwind {
2037; SSE-LABEL: splatconstant_funnnel_v4i32:
2038; SSE:       # %bb.0:
2039; SSE-NEXT:    movdqa %xmm0, %xmm1
2040; SSE-NEXT:    psrld $4, %xmm1
2041; SSE-NEXT:    pslld $28, %xmm0
2042; SSE-NEXT:    por %xmm1, %xmm0
2043; SSE-NEXT:    retq
2044;
2045; AVX-LABEL: splatconstant_funnnel_v4i32:
2046; AVX:       # %bb.0:
2047; AVX-NEXT:    vpsrld $4, %xmm0, %xmm1
2048; AVX-NEXT:    vpslld $28, %xmm0, %xmm0
2049; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
2050; AVX-NEXT:    retq
2051;
2052; AVX512F-LABEL: splatconstant_funnnel_v4i32:
2053; AVX512F:       # %bb.0:
2054; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2055; AVX512F-NEXT:    vprord $4, %zmm0, %zmm0
2056; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2057; AVX512F-NEXT:    vzeroupper
2058; AVX512F-NEXT:    retq
2059;
2060; AVX512VL-LABEL: splatconstant_funnnel_v4i32:
2061; AVX512VL:       # %bb.0:
2062; AVX512VL-NEXT:    vprord $4, %xmm0, %xmm0
2063; AVX512VL-NEXT:    retq
2064;
2065; AVX512BW-LABEL: splatconstant_funnnel_v4i32:
2066; AVX512BW:       # %bb.0:
2067; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2068; AVX512BW-NEXT:    vprord $4, %zmm0, %zmm0
2069; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2070; AVX512BW-NEXT:    vzeroupper
2071; AVX512BW-NEXT:    retq
2072;
2073; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32:
2074; AVX512VLBW:       # %bb.0:
2075; AVX512VLBW-NEXT:    vprord $4, %xmm0, %xmm0
2076; AVX512VLBW-NEXT:    retq
2077;
2078; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32:
2079; AVX512VBMI2:       # %bb.0:
2080; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2081; AVX512VBMI2-NEXT:    vprord $4, %zmm0, %zmm0
2082; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2083; AVX512VBMI2-NEXT:    vzeroupper
2084; AVX512VBMI2-NEXT:    retq
2085;
2086; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i32:
2087; AVX512VLVBMI2:       # %bb.0:
2088; AVX512VLVBMI2-NEXT:    vprord $4, %xmm0, %xmm0
2089; AVX512VLVBMI2-NEXT:    retq
2090;
2091; XOP-LABEL: splatconstant_funnnel_v4i32:
2092; XOP:       # %bb.0:
2093; XOP-NEXT:    vprotd $28, %xmm0, %xmm0
2094; XOP-NEXT:    retq
2095;
2096; X86-SSE2-LABEL: splatconstant_funnnel_v4i32:
2097; X86-SSE2:       # %bb.0:
2098; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2099; X86-SSE2-NEXT:    psrld $4, %xmm1
2100; X86-SSE2-NEXT:    pslld $28, %xmm0
2101; X86-SSE2-NEXT:    por %xmm1, %xmm0
2102; X86-SSE2-NEXT:    retl
2103  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 4, i32 4, i32 4, i32 4>)
2104  ret <4 x i32> %res
2105}
2106
2107define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind {
2108; SSE-LABEL: splatconstant_funnnel_v8i16:
2109; SSE:       # %bb.0:
2110; SSE-NEXT:    movdqa %xmm0, %xmm1
2111; SSE-NEXT:    psrlw $7, %xmm1
2112; SSE-NEXT:    psllw $9, %xmm0
2113; SSE-NEXT:    por %xmm1, %xmm0
2114; SSE-NEXT:    retq
2115;
2116; AVX-LABEL: splatconstant_funnnel_v8i16:
2117; AVX:       # %bb.0:
2118; AVX-NEXT:    vpsrlw $7, %xmm0, %xmm1
2119; AVX-NEXT:    vpsllw $9, %xmm0, %xmm0
2120; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
2121; AVX-NEXT:    retq
2122;
2123; AVX512F-LABEL: splatconstant_funnnel_v8i16:
2124; AVX512F:       # %bb.0:
2125; AVX512F-NEXT:    vpsrlw $7, %xmm0, %xmm1
2126; AVX512F-NEXT:    vpsllw $9, %xmm0, %xmm0
2127; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
2128; AVX512F-NEXT:    retq
2129;
2130; AVX512VL-LABEL: splatconstant_funnnel_v8i16:
2131; AVX512VL:       # %bb.0:
2132; AVX512VL-NEXT:    vpsrlw $7, %xmm0, %xmm1
2133; AVX512VL-NEXT:    vpsllw $9, %xmm0, %xmm0
2134; AVX512VL-NEXT:    vpor %xmm1, %xmm0, %xmm0
2135; AVX512VL-NEXT:    retq
2136;
2137; AVX512BW-LABEL: splatconstant_funnnel_v8i16:
2138; AVX512BW:       # %bb.0:
2139; AVX512BW-NEXT:    vpsrlw $7, %xmm0, %xmm1
2140; AVX512BW-NEXT:    vpsllw $9, %xmm0, %xmm0
2141; AVX512BW-NEXT:    vpor %xmm1, %xmm0, %xmm0
2142; AVX512BW-NEXT:    retq
2143;
2144; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
2145; AVX512VLBW:       # %bb.0:
2146; AVX512VLBW-NEXT:    vpsrlw $7, %xmm0, %xmm1
2147; AVX512VLBW-NEXT:    vpsllw $9, %xmm0, %xmm0
2148; AVX512VLBW-NEXT:    vpor %xmm1, %xmm0, %xmm0
2149; AVX512VLBW-NEXT:    retq
2150;
2151; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
2152; AVX512VBMI2:       # %bb.0:
2153; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2154; AVX512VBMI2-NEXT:    vpshrdw $7, %zmm0, %zmm0, %zmm0
2155; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2156; AVX512VBMI2-NEXT:    vzeroupper
2157; AVX512VBMI2-NEXT:    retq
2158;
2159; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16:
2160; AVX512VLVBMI2:       # %bb.0:
2161; AVX512VLVBMI2-NEXT:    vpshrdw $7, %xmm0, %xmm0, %xmm0
2162; AVX512VLVBMI2-NEXT:    retq
2163;
2164; XOP-LABEL: splatconstant_funnnel_v8i16:
2165; XOP:       # %bb.0:
2166; XOP-NEXT:    vprotw $9, %xmm0, %xmm0
2167; XOP-NEXT:    retq
2168;
2169; X86-SSE2-LABEL: splatconstant_funnnel_v8i16:
2170; X86-SSE2:       # %bb.0:
2171; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2172; X86-SSE2-NEXT:    psrlw $7, %xmm1
2173; X86-SSE2-NEXT:    psllw $9, %xmm0
2174; X86-SSE2-NEXT:    por %xmm1, %xmm0
2175; X86-SSE2-NEXT:    retl
2176  %res = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
2177  ret <8 x i16> %res
2178}
2179
2180define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
2181; SSE-LABEL: splatconstant_funnnel_v16i8:
2182; SSE:       # %bb.0:
2183; SSE-NEXT:    movdqa %xmm0, %xmm1
2184; SSE-NEXT:    psrlw $4, %xmm1
2185; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
2186; SSE-NEXT:    psllw $4, %xmm0
2187; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
2188; SSE-NEXT:    por %xmm1, %xmm0
2189; SSE-NEXT:    retq
2190;
2191; AVX-LABEL: splatconstant_funnnel_v16i8:
2192; AVX:       # %bb.0:
2193; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm1
2194; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
2195; AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
2196; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2197; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
2198; AVX-NEXT:    retq
2199;
2200; AVX512F-LABEL: splatconstant_funnnel_v16i8:
2201; AVX512F:       # %bb.0:
2202; AVX512F-NEXT:    vpsllw $4, %xmm0, %xmm1
2203; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
2204; AVX512F-NEXT:    vpsrlw $4, %xmm0, %xmm0
2205; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2206; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
2207; AVX512F-NEXT:    retq
2208;
2209; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
2210; AVX512VL:       # %bb.0:
2211; AVX512VL-NEXT:    vpsrlw $4, %xmm0, %xmm1
2212; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm0
2213; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
2214; AVX512VL-NEXT:    retq
2215;
2216; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
2217; AVX512BW:       # %bb.0:
2218; AVX512BW-NEXT:    vpsllw $4, %xmm0, %xmm1
2219; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
2220; AVX512BW-NEXT:    vpsrlw $4, %xmm0, %xmm0
2221; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2222; AVX512BW-NEXT:    vpor %xmm1, %xmm0, %xmm0
2223; AVX512BW-NEXT:    retq
2224;
2225; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
2226; AVX512VLBW:       # %bb.0:
2227; AVX512VLBW-NEXT:    vpsrlw $4, %xmm0, %xmm1
2228; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm0
2229; AVX512VLBW-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
2230; AVX512VLBW-NEXT:    retq
2231;
2232; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
2233; AVX512VBMI2:       # %bb.0:
2234; AVX512VBMI2-NEXT:    vpsllw $4, %xmm0, %xmm1
2235; AVX512VBMI2-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
2236; AVX512VBMI2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2237; AVX512VBMI2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2238; AVX512VBMI2-NEXT:    vpor %xmm1, %xmm0, %xmm0
2239; AVX512VBMI2-NEXT:    retq
2240;
2241; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
2242; AVX512VLVBMI2:       # %bb.0:
2243; AVX512VLVBMI2-NEXT:    vpsrlw $4, %xmm0, %xmm1
2244; AVX512VLVBMI2-NEXT:    vpsllw $4, %xmm0, %xmm0
2245; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
2246; AVX512VLVBMI2-NEXT:    retq
2247;
2248; XOP-LABEL: splatconstant_funnnel_v16i8:
2249; XOP:       # %bb.0:
2250; XOP-NEXT:    vprotb $4, %xmm0, %xmm0
2251; XOP-NEXT:    retq
2252;
2253; X86-SSE2-LABEL: splatconstant_funnnel_v16i8:
2254; X86-SSE2:       # %bb.0:
2255; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2256; X86-SSE2-NEXT:    psrlw $4, %xmm1
2257; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
2258; X86-SSE2-NEXT:    psllw $4, %xmm0
2259; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2260; X86-SSE2-NEXT:    por %xmm1, %xmm0
2261; X86-SSE2-NEXT:    retl
2262  %res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
2263  ret <16 x i8> %res
2264}
2265