1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512VL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
14
15; Just one 32-bit run to make sure we do reasonable things for i64 cases.
16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE2
17
18declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
19
20;
21; Variable Shifts
22;
23
24define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
25; SSE2-LABEL: var_funnnel_v2i32:
26; SSE2:       # %bb.0:
27; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28; SSE2-NEXT:    pslld $23, %xmm1
29; SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
30; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
31; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
32; SSE2-NEXT:    pmuludq %xmm1, %xmm0
33; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
34; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
35; SSE2-NEXT:    pmuludq %xmm2, %xmm1
36; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
37; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
38; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
39; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
40; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
41; SSE2-NEXT:    por %xmm3, %xmm0
42; SSE2-NEXT:    retq
43;
44; SSE41-LABEL: var_funnnel_v2i32:
45; SSE41:       # %bb.0:
46; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
47; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
48; SSE41-NEXT:    pslld $23, %xmm1
49; SSE41-NEXT:    paddd {{.*}}(%rip), %xmm1
50; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
51; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
52; SSE41-NEXT:    pmuludq %xmm2, %xmm3
53; SSE41-NEXT:    pmuludq %xmm1, %xmm0
54; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
55; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
56; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
57; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
58; SSE41-NEXT:    por %xmm1, %xmm0
59; SSE41-NEXT:    retq
60;
61; AVX1-LABEL: var_funnnel_v2i32:
62; AVX1:       # %bb.0:
63; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
64; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
65; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
66; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
67; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
68; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
69; AVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
70; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
71; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
72; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
73; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
74; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
75; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
76; AVX1-NEXT:    retq
77;
78; AVX2-LABEL: var_funnnel_v2i32:
79; AVX2:       # %bb.0:
80; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
81; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
82; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2
83; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
84; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
85; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
86; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
87; AVX2-NEXT:    retq
88;
89; AVX512F-LABEL: var_funnnel_v2i32:
90; AVX512F:       # %bb.0:
91; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
92; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
93; AVX512F-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
94; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
95; AVX512F-NEXT:    vzeroupper
96; AVX512F-NEXT:    retq
97;
98; AVX512VL-LABEL: var_funnnel_v2i32:
99; AVX512VL:       # %bb.0:
100; AVX512VL-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
101; AVX512VL-NEXT:    retq
102;
103; AVX512BW-LABEL: var_funnnel_v2i32:
104; AVX512BW:       # %bb.0:
105; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
106; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
107; AVX512BW-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
108; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
109; AVX512BW-NEXT:    vzeroupper
110; AVX512BW-NEXT:    retq
111;
112; AVX512VLBW-LABEL: var_funnnel_v2i32:
113; AVX512VLBW:       # %bb.0:
114; AVX512VLBW-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
115; AVX512VLBW-NEXT:    retq
116;
117; AVX512VBMI2-LABEL: var_funnnel_v2i32:
118; AVX512VBMI2:       # %bb.0:
119; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
120; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
121; AVX512VBMI2-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
122; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
123; AVX512VBMI2-NEXT:    vzeroupper
124; AVX512VBMI2-NEXT:    retq
125;
126; AVX512VLVBMI2-LABEL: var_funnnel_v2i32:
127; AVX512VLVBMI2:       # %bb.0:
128; AVX512VLVBMI2-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
129; AVX512VLVBMI2-NEXT:    retq
130;
131; XOP-LABEL: var_funnnel_v2i32:
132; XOP:       # %bb.0:
133; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
134; XOP-NEXT:    retq
135;
136; X86-SSE2-LABEL: var_funnnel_v2i32:
137; X86-SSE2:       # %bb.0:
138; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
139; X86-SSE2-NEXT:    pslld $23, %xmm1
140; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
141; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
142; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
143; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
144; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
145; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
146; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
147; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
148; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
149; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
150; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
151; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
152; X86-SSE2-NEXT:    por %xmm3, %xmm0
153; X86-SSE2-NEXT:    retl
154  %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt)
155  ret <2 x i32> %res
156}
157
158;
159; Uniform Variable Shifts
160;
161
162define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
163; SSE2-LABEL: splatvar_funnnel_v2i32:
164; SSE2:       # %bb.0:
165; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
166; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
167; SSE2-NEXT:    pslld $23, %xmm1
168; SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
169; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
170; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
171; SSE2-NEXT:    pmuludq %xmm1, %xmm0
172; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
173; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
174; SSE2-NEXT:    pmuludq %xmm2, %xmm1
175; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
176; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
177; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
178; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
179; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
180; SSE2-NEXT:    por %xmm3, %xmm0
181; SSE2-NEXT:    retq
182;
183; SSE41-LABEL: splatvar_funnnel_v2i32:
184; SSE41:       # %bb.0:
185; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
186; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
187; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
188; SSE41-NEXT:    pslld $23, %xmm1
189; SSE41-NEXT:    paddd {{.*}}(%rip), %xmm1
190; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
191; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
192; SSE41-NEXT:    pmuludq %xmm2, %xmm3
193; SSE41-NEXT:    pmuludq %xmm1, %xmm0
194; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
195; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
196; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
197; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
198; SSE41-NEXT:    por %xmm1, %xmm0
199; SSE41-NEXT:    retq
200;
201; AVX1-LABEL: splatvar_funnnel_v2i32:
202; AVX1:       # %bb.0:
203; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
204; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
205; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
206; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
207; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
208; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
209; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
210; AVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
211; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
212; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
213; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
214; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
215; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
216; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
217; AVX1-NEXT:    retq
218;
219; AVX2-LABEL: splatvar_funnnel_v2i32:
220; AVX2:       # %bb.0:
221; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
222; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
223; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
224; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
225; AVX2-NEXT:    vpslld %xmm2, %xmm0, %xmm2
226; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
227; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
228; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
229; AVX2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
230; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
231; AVX2-NEXT:    retq
232;
233; AVX512F-LABEL: splatvar_funnnel_v2i32:
234; AVX512F:       # %bb.0:
235; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
236; AVX512F-NEXT:    vpbroadcastd %xmm1, %xmm1
237; AVX512F-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
238; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
239; AVX512F-NEXT:    vzeroupper
240; AVX512F-NEXT:    retq
241;
242; AVX512VL-LABEL: splatvar_funnnel_v2i32:
243; AVX512VL:       # %bb.0:
244; AVX512VL-NEXT:    vpbroadcastd %xmm1, %xmm1
245; AVX512VL-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
246; AVX512VL-NEXT:    retq
247;
248; AVX512BW-LABEL: splatvar_funnnel_v2i32:
249; AVX512BW:       # %bb.0:
250; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
251; AVX512BW-NEXT:    vpbroadcastd %xmm1, %xmm1
252; AVX512BW-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
253; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
254; AVX512BW-NEXT:    vzeroupper
255; AVX512BW-NEXT:    retq
256;
257; AVX512VLBW-LABEL: splatvar_funnnel_v2i32:
258; AVX512VLBW:       # %bb.0:
259; AVX512VLBW-NEXT:    vpbroadcastd %xmm1, %xmm1
260; AVX512VLBW-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
261; AVX512VLBW-NEXT:    retq
262;
263; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32:
264; AVX512VBMI2:       # %bb.0:
265; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
266; AVX512VBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
267; AVX512VBMI2-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
268; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
269; AVX512VBMI2-NEXT:    vzeroupper
270; AVX512VBMI2-NEXT:    retq
271;
272; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32:
273; AVX512VLVBMI2:       # %bb.0:
274; AVX512VLVBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
275; AVX512VLVBMI2-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
276; AVX512VLVBMI2-NEXT:    retq
277;
278; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
279; XOPAVX1:       # %bb.0:
280; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
281; XOPAVX1-NEXT:    vprotd %xmm1, %xmm0, %xmm0
282; XOPAVX1-NEXT:    retq
283;
284; XOPAVX2-LABEL: splatvar_funnnel_v2i32:
285; XOPAVX2:       # %bb.0:
286; XOPAVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
287; XOPAVX2-NEXT:    vprotd %xmm1, %xmm0, %xmm0
288; XOPAVX2-NEXT:    retq
289;
290; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
291; X86-SSE2:       # %bb.0:
292; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
293; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
294; X86-SSE2-NEXT:    pslld $23, %xmm1
295; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
296; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
297; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
298; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
299; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
300; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
301; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
302; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
303; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
304; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
305; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
306; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
307; X86-SSE2-NEXT:    por %xmm3, %xmm0
308; X86-SSE2-NEXT:    retl
309  %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer
310  %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat)
311  ret <2 x i32> %res
312}
313
314;
315; Constant Shifts
316;
317
318define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
319; SSE2-LABEL: constant_funnnel_v2i32:
320; SSE2:       # %bb.0:
321; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,1,1]
322; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
323; SSE2-NEXT:    pmuludq %xmm1, %xmm0
324; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
325; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
326; SSE2-NEXT:    pmuludq %xmm2, %xmm1
327; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
328; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
329; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
330; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
331; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
332; SSE2-NEXT:    por %xmm3, %xmm0
333; SSE2-NEXT:    retq
334;
335; SSE41-LABEL: constant_funnnel_v2i32:
336; SSE41:       # %bb.0:
337; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,1,1]
338; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
339; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
340; SSE41-NEXT:    pmuludq %xmm2, %xmm3
341; SSE41-NEXT:    pmuludq %xmm1, %xmm0
342; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
343; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
344; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
345; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
346; SSE41-NEXT:    por %xmm1, %xmm0
347; SSE41-NEXT:    retq
348;
349; AVX1-LABEL: constant_funnnel_v2i32:
350; AVX1:       # %bb.0:
351; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,32,1,1]
352; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
353; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
354; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
355; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
356; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
357; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
358; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
359; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
360; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
361; AVX1-NEXT:    retq
362;
363; AVX2-LABEL: constant_funnnel_v2i32:
364; AVX2:       # %bb.0:
365; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
366; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
367; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
368; AVX2-NEXT:    retq
369;
370; AVX512F-LABEL: constant_funnnel_v2i32:
371; AVX512F:       # %bb.0:
372; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
373; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
374; AVX512F-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
375; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
376; AVX512F-NEXT:    vzeroupper
377; AVX512F-NEXT:    retq
378;
379; AVX512VL-LABEL: constant_funnnel_v2i32:
380; AVX512VL:       # %bb.0:
381; AVX512VL-NEXT:    vprolvd {{.*}}(%rip), %xmm0, %xmm0
382; AVX512VL-NEXT:    retq
383;
384; AVX512BW-LABEL: constant_funnnel_v2i32:
385; AVX512BW:       # %bb.0:
386; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
387; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
388; AVX512BW-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
389; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
390; AVX512BW-NEXT:    vzeroupper
391; AVX512BW-NEXT:    retq
392;
393; AVX512VLBW-LABEL: constant_funnnel_v2i32:
394; AVX512VLBW:       # %bb.0:
395; AVX512VLBW-NEXT:    vprolvd {{.*}}(%rip), %xmm0, %xmm0
396; AVX512VLBW-NEXT:    retq
397;
398; AVX512VBMI2-LABEL: constant_funnnel_v2i32:
399; AVX512VBMI2:       # %bb.0:
400; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
401; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
402; AVX512VBMI2-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
403; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
404; AVX512VBMI2-NEXT:    vzeroupper
405; AVX512VBMI2-NEXT:    retq
406;
407; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32:
408; AVX512VLVBMI2:       # %bb.0:
409; AVX512VLVBMI2-NEXT:    vprolvd {{.*}}(%rip), %xmm0, %xmm0
410; AVX512VLVBMI2-NEXT:    retq
411;
412; XOP-LABEL: constant_funnnel_v2i32:
413; XOP:       # %bb.0:
414; XOP-NEXT:    vprotd {{.*}}(%rip), %xmm0, %xmm0
415; XOP-NEXT:    retq
416;
417; X86-SSE2-LABEL: constant_funnnel_v2i32:
418; X86-SSE2:       # %bb.0:
419; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,1,1]
420; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
421; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
422; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
423; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
424; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
425; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
426; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
427; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
428; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
429; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
430; X86-SSE2-NEXT:    por %xmm3, %xmm0
431; X86-SSE2-NEXT:    retl
432  %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>)
433  ret <2 x i32> %res
434}
435
436;
437; Uniform Constant Shifts
438;
439
440define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
441; SSE2-LABEL: splatconstant_funnnel_v2i32:
442; SSE2:       # %bb.0:
443; SSE2-NEXT:    movdqa %xmm0, %xmm2
444; SSE2-NEXT:    psrld $28, %xmm2
445; SSE2-NEXT:    movdqa %xmm0, %xmm1
446; SSE2-NEXT:    pslld $4, %xmm1
447; SSE2-NEXT:    por %xmm2, %xmm1
448; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
449; SSE2-NEXT:    movaps %xmm1, %xmm0
450; SSE2-NEXT:    retq
451;
452; SSE41-LABEL: splatconstant_funnnel_v2i32:
453; SSE41:       # %bb.0:
454; SSE41-NEXT:    movdqa %xmm0, %xmm2
455; SSE41-NEXT:    psrld $28, %xmm2
456; SSE41-NEXT:    movdqa %xmm0, %xmm1
457; SSE41-NEXT:    pslld $4, %xmm1
458; SSE41-NEXT:    por %xmm2, %xmm1
459; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
460; SSE41-NEXT:    movdqa %xmm1, %xmm0
461; SSE41-NEXT:    retq
462;
463; AVX1-LABEL: splatconstant_funnnel_v2i32:
464; AVX1:       # %bb.0:
465; AVX1-NEXT:    vpsrld $28, %xmm0, %xmm1
466; AVX1-NEXT:    vpslld $4, %xmm0, %xmm2
467; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
468; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
469; AVX1-NEXT:    retq
470;
471; AVX2-LABEL: splatconstant_funnnel_v2i32:
472; AVX2:       # %bb.0:
473; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
474; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
475; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
476; AVX2-NEXT:    retq
477;
478; AVX512F-LABEL: splatconstant_funnnel_v2i32:
479; AVX512F:       # %bb.0:
480; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
481; AVX512F-NEXT:    vprold $4, %zmm0, %zmm0
482; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
483; AVX512F-NEXT:    vzeroupper
484; AVX512F-NEXT:    retq
485;
486; AVX512VL-LABEL: splatconstant_funnnel_v2i32:
487; AVX512VL:       # %bb.0:
488; AVX512VL-NEXT:    vprold $4, %xmm0, %xmm0
489; AVX512VL-NEXT:    retq
490;
491; AVX512BW-LABEL: splatconstant_funnnel_v2i32:
492; AVX512BW:       # %bb.0:
493; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
494; AVX512BW-NEXT:    vprold $4, %zmm0, %zmm0
495; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
496; AVX512BW-NEXT:    vzeroupper
497; AVX512BW-NEXT:    retq
498;
499; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32:
500; AVX512VLBW:       # %bb.0:
501; AVX512VLBW-NEXT:    vprold $4, %xmm0, %xmm0
502; AVX512VLBW-NEXT:    retq
503;
504; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32:
505; AVX512VBMI2:       # %bb.0:
506; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
507; AVX512VBMI2-NEXT:    vprold $4, %zmm0, %zmm0
508; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
509; AVX512VBMI2-NEXT:    vzeroupper
510; AVX512VBMI2-NEXT:    retq
511;
512; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32:
513; AVX512VLVBMI2:       # %bb.0:
514; AVX512VLVBMI2-NEXT:    vprold $4, %xmm0, %xmm0
515; AVX512VLVBMI2-NEXT:    retq
516;
517; XOP-LABEL: splatconstant_funnnel_v2i32:
518; XOP:       # %bb.0:
519; XOP-NEXT:    vprotd $4, %xmm0, %xmm0
520; XOP-NEXT:    retq
521;
522; X86-SSE2-LABEL: splatconstant_funnnel_v2i32:
523; X86-SSE2:       # %bb.0:
524; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
525; X86-SSE2-NEXT:    psrld $28, %xmm2
526; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
527; X86-SSE2-NEXT:    pslld $4, %xmm1
528; X86-SSE2-NEXT:    por %xmm2, %xmm1
529; X86-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
530; X86-SSE2-NEXT:    movaps %xmm1, %xmm0
531; X86-SSE2-NEXT:    retl
532  %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
533  ret <2 x i32> %res
534}
535