1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6
7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-unknown"
9
10define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
11; SSE-LABEL: shuffle_v8i16_01012323:
12; SSE:       # BB#0:
13; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: shuffle_v8i16_01012323:
17; AVX:       # BB#0:
18; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
19; AVX-NEXT:    retq
20  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
21  ret <8 x i16> %shuffle
22}
23define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
24; SSE-LABEL: shuffle_v8i16_67452301:
25; SSE:       # BB#0:
26; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
27; SSE-NEXT:    retq
28;
29; AVX-LABEL: shuffle_v8i16_67452301:
30; AVX:       # BB#0:
31; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
32; AVX-NEXT:    retq
33  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
34  ret <8 x i16> %shuffle
35}
36define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
37; SSE2-LABEL: shuffle_v8i16_456789AB:
38; SSE2:       # BB#0:
39; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
40; SSE2-NEXT:    retq
41;
42; SSSE3-LABEL: shuffle_v8i16_456789AB:
43; SSSE3:       # BB#0:
44; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
45; SSSE3-NEXT:    movdqa %xmm1, %xmm0
46; SSSE3-NEXT:    retq
47;
48; SSE41-LABEL: shuffle_v8i16_456789AB:
49; SSE41:       # BB#0:
50; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
51; SSE41-NEXT:    movdqa %xmm1, %xmm0
52; SSE41-NEXT:    retq
53;
54; AVX-LABEL: shuffle_v8i16_456789AB:
55; AVX:       # BB#0:
56; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
57; AVX-NEXT:    retq
58  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
59  ret <8 x i16> %shuffle
60}
61
62define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
63; SSE2-LABEL: shuffle_v8i16_00000000:
64; SSE2:       # BB#0:
65; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
66; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
67; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
68; SSE2-NEXT:    retq
69;
70; SSSE3-LABEL: shuffle_v8i16_00000000:
71; SSSE3:       # BB#0:
72; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
73; SSSE3-NEXT:    retq
74;
75; SSE41-LABEL: shuffle_v8i16_00000000:
76; SSE41:       # BB#0:
77; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
78; SSE41-NEXT:    retq
79;
80; AVX1-LABEL: shuffle_v8i16_00000000:
81; AVX1:       # BB#0:
82; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
83; AVX1-NEXT:    retq
84;
85; AVX2-LABEL: shuffle_v8i16_00000000:
86; AVX2:       # BB#0:
87; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
88; AVX2-NEXT:    retq
89  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
90  ret <8 x i16> %shuffle
91}
92define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
93; SSE-LABEL: shuffle_v8i16_00004444:
94; SSE:       # BB#0:
95; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
96; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
97; SSE-NEXT:    retq
98;
99; AVX-LABEL: shuffle_v8i16_00004444:
100; AVX:       # BB#0:
101; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
102; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
103; AVX-NEXT:    retq
104  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
105  ret <8 x i16> %shuffle
106}
107define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
108; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
109; SSE:       # BB#0:
110; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
111; SSE-NEXT:    retq
112;
113; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
114; AVX:       # BB#0:
115; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
116; AVX-NEXT:    retq
117  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
118  ret <8 x i16> %shuffle
119}
120define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
121; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
122; SSE:       # BB#0:
123; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
124; SSE-NEXT:    retq
125;
126; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
127; AVX:       # BB#0:
128; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
129; AVX-NEXT:    retq
130  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
131  ret <8 x i16> %shuffle
132}
133define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
134; SSE-LABEL: shuffle_v8i16_31206745:
135; SSE:       # BB#0:
136; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
137; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
138; SSE-NEXT:    retq
139;
140; AVX-LABEL: shuffle_v8i16_31206745:
141; AVX:       # BB#0:
142; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
143; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
144; AVX-NEXT:    retq
145  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
146  ret <8 x i16> %shuffle
147}
148define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
149; SSE2-LABEL: shuffle_v8i16_44440000:
150; SSE2:       # BB#0:
151; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
152; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
153; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
154; SSE2-NEXT:    retq
155;
156; SSSE3-LABEL: shuffle_v8i16_44440000:
157; SSSE3:       # BB#0:
158; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
159; SSSE3-NEXT:    retq
160;
161; SSE41-LABEL: shuffle_v8i16_44440000:
162; SSE41:       # BB#0:
163; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
164; SSE41-NEXT:    retq
165;
166; AVX-LABEL: shuffle_v8i16_44440000:
167; AVX:       # BB#0:
168; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
169; AVX-NEXT:    retq
170  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
171  ret <8 x i16> %shuffle
172}
173define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
174; SSE-LABEL: shuffle_v8i16_23016745:
175; SSE:       # BB#0:
176; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
177; SSE-NEXT:    retq
178;
179; AVX-LABEL: shuffle_v8i16_23016745:
180; AVX:       # BB#0:
181; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
182; AVX-NEXT:    retq
183  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
184  ret <8 x i16> %shuffle
185}
186define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
187; SSE-LABEL: shuffle_v8i16_23026745:
188; SSE:       # BB#0:
189; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
190; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
191; SSE-NEXT:    retq
192;
193; AVX-LABEL: shuffle_v8i16_23026745:
194; AVX:       # BB#0:
195; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
196; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
197; AVX-NEXT:    retq
198  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
199  ret <8 x i16> %shuffle
200}
201define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
202; SSE-LABEL: shuffle_v8i16_23016747:
203; SSE:       # BB#0:
204; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
205; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
206; SSE-NEXT:    retq
207;
208; AVX-LABEL: shuffle_v8i16_23016747:
209; AVX:       # BB#0:
210; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
211; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
212; AVX-NEXT:    retq
213  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
214  ret <8 x i16> %shuffle
215}
216define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
217; SSE2-LABEL: shuffle_v8i16_75643120:
218; SSE2:       # BB#0:
219; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
220; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
221; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
222; SSE2-NEXT:    retq
223;
224; SSSE3-LABEL: shuffle_v8i16_75643120:
225; SSSE3:       # BB#0:
226; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
227; SSSE3-NEXT:    retq
228;
229; SSE41-LABEL: shuffle_v8i16_75643120:
230; SSE41:       # BB#0:
231; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
232; SSE41-NEXT:    retq
233;
234; AVX-LABEL: shuffle_v8i16_75643120:
235; AVX:       # BB#0:
236; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
237; AVX-NEXT:    retq
238  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
239  ret <8 x i16> %shuffle
240}
241
242define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
243; SSE2-LABEL: shuffle_v8i16_10545410:
244; SSE2:       # BB#0:
245; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
246; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
247; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
248; SSE2-NEXT:    retq
249;
250; SSSE3-LABEL: shuffle_v8i16_10545410:
251; SSSE3:       # BB#0:
252; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
253; SSSE3-NEXT:    retq
254;
255; SSE41-LABEL: shuffle_v8i16_10545410:
256; SSE41:       # BB#0:
257; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
258; SSE41-NEXT:    retq
259;
260; AVX-LABEL: shuffle_v8i16_10545410:
261; AVX:       # BB#0:
262; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
263; AVX-NEXT:    retq
264  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
265  ret <8 x i16> %shuffle
266}
267define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
268; SSE2-LABEL: shuffle_v8i16_54105410:
269; SSE2:       # BB#0:
270; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
271; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
272; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
273; SSE2-NEXT:    retq
274;
275; SSSE3-LABEL: shuffle_v8i16_54105410:
276; SSSE3:       # BB#0:
277; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
278; SSSE3-NEXT:    retq
279;
280; SSE41-LABEL: shuffle_v8i16_54105410:
281; SSE41:       # BB#0:
282; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
283; SSE41-NEXT:    retq
284;
285; AVX-LABEL: shuffle_v8i16_54105410:
286; AVX:       # BB#0:
287; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
288; AVX-NEXT:    retq
289  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
290  ret <8 x i16> %shuffle
291}
292define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
293; SSE2-LABEL: shuffle_v8i16_54101054:
294; SSE2:       # BB#0:
295; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
296; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
297; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
298; SSE2-NEXT:    retq
299;
300; SSSE3-LABEL: shuffle_v8i16_54101054:
301; SSSE3:       # BB#0:
302; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
303; SSSE3-NEXT:    retq
304;
305; SSE41-LABEL: shuffle_v8i16_54101054:
306; SSE41:       # BB#0:
307; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
308; SSE41-NEXT:    retq
309;
310; AVX-LABEL: shuffle_v8i16_54101054:
311; AVX:       # BB#0:
312; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
313; AVX-NEXT:    retq
314  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
315  ret <8 x i16> %shuffle
316}
317define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
318; SSE2-LABEL: shuffle_v8i16_04400440:
319; SSE2:       # BB#0:
320; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
321; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
322; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
323; SSE2-NEXT:    retq
324;
325; SSSE3-LABEL: shuffle_v8i16_04400440:
326; SSSE3:       # BB#0:
327; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
328; SSSE3-NEXT:    retq
329;
330; SSE41-LABEL: shuffle_v8i16_04400440:
331; SSE41:       # BB#0:
332; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
333; SSE41-NEXT:    retq
334;
335; AVX-LABEL: shuffle_v8i16_04400440:
336; AVX:       # BB#0:
337; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
338; AVX-NEXT:    retq
339  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
340  ret <8 x i16> %shuffle
341}
342define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
343; SSE2-LABEL: shuffle_v8i16_40044004:
344; SSE2:       # BB#0:
345; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
346; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
347; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
348; SSE2-NEXT:    retq
349;
350; SSSE3-LABEL: shuffle_v8i16_40044004:
351; SSSE3:       # BB#0:
352; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
353; SSSE3-NEXT:    retq
354;
355; SSE41-LABEL: shuffle_v8i16_40044004:
356; SSE41:       # BB#0:
357; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
358; SSE41-NEXT:    retq
359;
360; AVX-LABEL: shuffle_v8i16_40044004:
361; AVX:       # BB#0:
362; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
363; AVX-NEXT:    retq
364  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
365  ret <8 x i16> %shuffle
366}
367
368define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
369; SSE2-LABEL: shuffle_v8i16_26405173:
370; SSE2:       # BB#0:
371; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
372; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
373; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
374; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
375; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
376; SSE2-NEXT:    retq
377;
378; SSSE3-LABEL: shuffle_v8i16_26405173:
379; SSSE3:       # BB#0:
380; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
381; SSSE3-NEXT:    retq
382;
383; SSE41-LABEL: shuffle_v8i16_26405173:
384; SSE41:       # BB#0:
385; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
386; SSE41-NEXT:    retq
387;
388; AVX-LABEL: shuffle_v8i16_26405173:
389; AVX:       # BB#0:
390; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
391; AVX-NEXT:    retq
392  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
393  ret <8 x i16> %shuffle
394}
395define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
396; SSE2-LABEL: shuffle_v8i16_20645173:
397; SSE2:       # BB#0:
398; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
399; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
400; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
401; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
402; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
403; SSE2-NEXT:    retq
404;
405; SSSE3-LABEL: shuffle_v8i16_20645173:
406; SSSE3:       # BB#0:
407; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
408; SSSE3-NEXT:    retq
409;
410; SSE41-LABEL: shuffle_v8i16_20645173:
411; SSE41:       # BB#0:
412; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
413; SSE41-NEXT:    retq
414;
415; AVX-LABEL: shuffle_v8i16_20645173:
416; AVX:       # BB#0:
417; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
418; AVX-NEXT:    retq
419  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
420  ret <8 x i16> %shuffle
421}
422define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
423; SSE2-LABEL: shuffle_v8i16_26401375:
424; SSE2:       # BB#0:
425; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
426; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
427; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
428; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
429; SSE2-NEXT:    retq
430;
431; SSSE3-LABEL: shuffle_v8i16_26401375:
432; SSSE3:       # BB#0:
433; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
434; SSSE3-NEXT:    retq
435;
436; SSE41-LABEL: shuffle_v8i16_26401375:
437; SSE41:       # BB#0:
438; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
439; SSE41-NEXT:    retq
440;
441; AVX-LABEL: shuffle_v8i16_26401375:
442; AVX:       # BB#0:
443; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
444; AVX-NEXT:    retq
445  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
446  ret <8 x i16> %shuffle
447}
448
449define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
450; SSE2-LABEL: shuffle_v8i16_66751643:
451; SSE2:       # BB#0:
452; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
453; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
454; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
455; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
456; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
457; SSE2-NEXT:    retq
458;
459; SSSE3-LABEL: shuffle_v8i16_66751643:
460; SSSE3:       # BB#0:
461; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
462; SSSE3-NEXT:    retq
463;
464; SSE41-LABEL: shuffle_v8i16_66751643:
465; SSE41:       # BB#0:
466; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
467; SSE41-NEXT:    retq
468;
469; AVX-LABEL: shuffle_v8i16_66751643:
470; AVX:       # BB#0:
471; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
472; AVX-NEXT:    retq
473  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
474  ret <8 x i16> %shuffle
475}
476
477define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
478; SSE2-LABEL: shuffle_v8i16_60514754:
479; SSE2:       # BB#0:
480; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
481; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
482; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
483; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
484; SSE2-NEXT:    retq
485;
486; SSSE3-LABEL: shuffle_v8i16_60514754:
487; SSSE3:       # BB#0:
488; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
489; SSSE3-NEXT:    retq
490;
491; SSE41-LABEL: shuffle_v8i16_60514754:
492; SSE41:       # BB#0:
493; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
494; SSE41-NEXT:    retq
495;
496; AVX-LABEL: shuffle_v8i16_60514754:
497; AVX:       # BB#0:
498; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
499; AVX-NEXT:    retq
500  %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
501  ret <8 x i16> %shuffle
502}
503
504define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
505; SSE2-LABEL: shuffle_v8i16_00444444:
506; SSE2:       # BB#0:
507; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
508; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
509; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
510; SSE2-NEXT:    retq
511;
512; SSSE3-LABEL: shuffle_v8i16_00444444:
513; SSSE3:       # BB#0:
514; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
515; SSSE3-NEXT:    retq
516;
517; SSE41-LABEL: shuffle_v8i16_00444444:
518; SSE41:       # BB#0:
519; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
520; SSE41-NEXT:    retq
521;
522; AVX-LABEL: shuffle_v8i16_00444444:
523; AVX:       # BB#0:
524; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
525; AVX-NEXT:    retq
526  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
527  ret <8 x i16> %shuffle
528}
529define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
530; SSE2-LABEL: shuffle_v8i16_44004444:
531; SSE2:       # BB#0:
532; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
533; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
534; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
535; SSE2-NEXT:    retq
536;
537; SSSE3-LABEL: shuffle_v8i16_44004444:
538; SSSE3:       # BB#0:
539; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
540; SSSE3-NEXT:    retq
541;
542; SSE41-LABEL: shuffle_v8i16_44004444:
543; SSE41:       # BB#0:
544; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
545; SSE41-NEXT:    retq
546;
547; AVX-LABEL: shuffle_v8i16_44004444:
548; AVX:       # BB#0:
549; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
550; AVX-NEXT:    retq
551  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
552  ret <8 x i16> %shuffle
553}
554define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
555; SSE2-LABEL: shuffle_v8i16_04404444:
556; SSE2:       # BB#0:
557; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
558; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
559; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
560; SSE2-NEXT:    retq
561;
562; SSSE3-LABEL: shuffle_v8i16_04404444:
563; SSSE3:       # BB#0:
564; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
565; SSSE3-NEXT:    retq
566;
567; SSE41-LABEL: shuffle_v8i16_04404444:
568; SSE41:       # BB#0:
569; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
570; SSE41-NEXT:    retq
571;
572; AVX-LABEL: shuffle_v8i16_04404444:
573; AVX:       # BB#0:
574; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
575; AVX-NEXT:    retq
576  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
577  ret <8 x i16> %shuffle
578}
579define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
580; SSE2-LABEL: shuffle_v8i16_04400000:
581; SSE2:       # BB#0:
582; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
583; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
584; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
585; SSE2-NEXT:    retq
586;
587; SSSE3-LABEL: shuffle_v8i16_04400000:
588; SSSE3:       # BB#0:
589; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
590; SSSE3-NEXT:    retq
591;
592; SSE41-LABEL: shuffle_v8i16_04400000:
593; SSE41:       # BB#0:
594; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
595; SSE41-NEXT:    retq
596;
597; AVX-LABEL: shuffle_v8i16_04400000:
598; AVX:       # BB#0:
599; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
600; AVX-NEXT:    retq
601  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
602  ret <8 x i16> %shuffle
603}
604define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
605; SSE-LABEL: shuffle_v8i16_04404567:
606; SSE:       # BB#0:
607; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
608; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
609; SSE-NEXT:    retq
610;
611; AVX-LABEL: shuffle_v8i16_04404567:
612; AVX:       # BB#0:
613; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
614; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
615; AVX-NEXT:    retq
616  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
617  ret <8 x i16> %shuffle
618}
619
620define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
621; SSE2-LABEL: shuffle_v8i16_0X444444:
622; SSE2:       # BB#0:
623; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
624; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
625; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
626; SSE2-NEXT:    retq
627;
628; SSSE3-LABEL: shuffle_v8i16_0X444444:
629; SSSE3:       # BB#0:
630; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
631; SSSE3-NEXT:    retq
632;
633; SSE41-LABEL: shuffle_v8i16_0X444444:
634; SSE41:       # BB#0:
635; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
636; SSE41-NEXT:    retq
637;
638; AVX-LABEL: shuffle_v8i16_0X444444:
639; AVX:       # BB#0:
640; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
641; AVX-NEXT:    retq
642  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
643  ret <8 x i16> %shuffle
644}
645define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
646; SSE2-LABEL: shuffle_v8i16_44X04444:
647; SSE2:       # BB#0:
648; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
649; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
650; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
651; SSE2-NEXT:    retq
652;
653; SSSE3-LABEL: shuffle_v8i16_44X04444:
654; SSSE3:       # BB#0:
655; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
656; SSSE3-NEXT:    retq
657;
658; SSE41-LABEL: shuffle_v8i16_44X04444:
659; SSE41:       # BB#0:
660; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
661; SSE41-NEXT:    retq
662;
663; AVX-LABEL: shuffle_v8i16_44X04444:
664; AVX:       # BB#0:
665; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
666; AVX-NEXT:    retq
667  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
668  ret <8 x i16> %shuffle
669}
670define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
671; SSE2-LABEL: shuffle_v8i16_X4404444:
672; SSE2:       # BB#0:
673; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
674; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
675; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
676; SSE2-NEXT:    retq
677;
678; SSSE3-LABEL: shuffle_v8i16_X4404444:
679; SSSE3:       # BB#0:
680; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
681; SSSE3-NEXT:    retq
682;
683; SSE41-LABEL: shuffle_v8i16_X4404444:
684; SSE41:       # BB#0:
685; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
686; SSE41-NEXT:    retq
687;
688; AVX-LABEL: shuffle_v8i16_X4404444:
689; AVX:       # BB#0:
690; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
691; AVX-NEXT:    retq
692  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
693  ret <8 x i16> %shuffle
694}
695
696define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
697; SSE2-LABEL: shuffle_v8i16_0127XXXX:
698; SSE2:       # BB#0:
699; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
700; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
701; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
702; SSE2-NEXT:    retq
703;
704; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
705; SSSE3:       # BB#0:
706; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
707; SSSE3-NEXT:    retq
708;
709; SSE41-LABEL: shuffle_v8i16_0127XXXX:
710; SSE41:       # BB#0:
711; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
712; SSE41-NEXT:    retq
713;
714; AVX-LABEL: shuffle_v8i16_0127XXXX:
715; AVX:       # BB#0:
716; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
717; AVX-NEXT:    retq
718  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
719  ret <8 x i16> %shuffle
720}
721
722define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
723; SSE2-LABEL: shuffle_v8i16_XXXX4563:
724; SSE2:       # BB#0:
725; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
726; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
727; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
728; SSE2-NEXT:    retq
729;
730; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
731; SSSE3:       # BB#0:
732; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
733; SSSE3-NEXT:    retq
734;
735; SSE41-LABEL: shuffle_v8i16_XXXX4563:
736; SSE41:       # BB#0:
737; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
738; SSE41-NEXT:    retq
739;
740; AVX-LABEL: shuffle_v8i16_XXXX4563:
741; AVX:       # BB#0:
742; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
743; AVX-NEXT:    retq
744  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
745  ret <8 x i16> %shuffle
746}
747
748define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
749; SSE2-LABEL: shuffle_v8i16_4563XXXX:
750; SSE2:       # BB#0:
751; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
752; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
753; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
754; SSE2-NEXT:    retq
755;
756; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
757; SSSE3:       # BB#0:
758; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
759; SSSE3-NEXT:    retq
760;
761; SSE41-LABEL: shuffle_v8i16_4563XXXX:
762; SSE41:       # BB#0:
763; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
764; SSE41-NEXT:    retq
765;
766; AVX-LABEL: shuffle_v8i16_4563XXXX:
767; AVX:       # BB#0:
768; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
769; AVX-NEXT:    retq
770  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
771  ret <8 x i16> %shuffle
772}
773
774define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
775; SSE2-LABEL: shuffle_v8i16_01274563:
776; SSE2:       # BB#0:
777; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
778; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
779; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
780; SSE2-NEXT:    retq
781;
782; SSSE3-LABEL: shuffle_v8i16_01274563:
783; SSSE3:       # BB#0:
784; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
785; SSSE3-NEXT:    retq
786;
787; SSE41-LABEL: shuffle_v8i16_01274563:
788; SSE41:       # BB#0:
789; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
790; SSE41-NEXT:    retq
791;
792; AVX-LABEL: shuffle_v8i16_01274563:
793; AVX:       # BB#0:
794; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
795; AVX-NEXT:    retq
796  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
797  ret <8 x i16> %shuffle
798}
799
800define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
801; SSE2-LABEL: shuffle_v8i16_45630127:
802; SSE2:       # BB#0:
803; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
804; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
805; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
806; SSE2-NEXT:    retq
807;
808; SSSE3-LABEL: shuffle_v8i16_45630127:
809; SSSE3:       # BB#0:
810; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
811; SSSE3-NEXT:    retq
812;
813; SSE41-LABEL: shuffle_v8i16_45630127:
814; SSE41:       # BB#0:
815; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
816; SSE41-NEXT:    retq
817;
818; AVX-LABEL: shuffle_v8i16_45630127:
819; AVX:       # BB#0:
820; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
821; AVX-NEXT:    retq
822  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
823  ret <8 x i16> %shuffle
824}
825
826define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
827; SSE2-LABEL: shuffle_v8i16_37102735:
828; SSE2:       # BB#0:
829; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
830; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
831; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
832; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
833; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
834; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
835; SSE2-NEXT:    retq
836;
837; SSSE3-LABEL: shuffle_v8i16_37102735:
838; SSSE3:       # BB#0:
839; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
840; SSSE3-NEXT:    retq
841;
842; SSE41-LABEL: shuffle_v8i16_37102735:
843; SSE41:       # BB#0:
844; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
845; SSE41-NEXT:    retq
846;
847; AVX-LABEL: shuffle_v8i16_37102735:
848; AVX:       # BB#0:
849; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
850; AVX-NEXT:    retq
851  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
852  ret <8 x i16> %shuffle
853}
854
855define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
856; SSE-LABEL: shuffle_v8i16_08192a3b:
857; SSE:       # BB#0:
858; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
859; SSE-NEXT:    retq
860;
861; AVX-LABEL: shuffle_v8i16_08192a3b:
862; AVX:       # BB#0:
863; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
864; AVX-NEXT:    retq
865  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
866  ret <8 x i16> %shuffle
867}
868
869define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
870; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
871; SSE:       # BB#0:
872; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
873; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
874; SSE-NEXT:    retq
875;
876; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
877; AVX:       # BB#0:
878; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
879; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
880; AVX-NEXT:    retq
881  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
882  ret <8 x i16> %shuffle
883}
884
885define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
886; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
887; SSE:       # BB#0:
888; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
889; SSE-NEXT:    retq
890;
891; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
892; AVX:       # BB#0:
893; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
894; AVX-NEXT:    retq
895  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
896  ret <8 x i16> %shuffle
897}
898
899define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
900; SSE-LABEL: shuffle_v8i16_48596a7b:
901; SSE:       # BB#0:
902; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
903; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
904; SSE-NEXT:    retq
905;
906; AVX-LABEL: shuffle_v8i16_48596a7b:
907; AVX:       # BB#0:
908; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
909; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
910; AVX-NEXT:    retq
911  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
912  ret <8 x i16> %shuffle
913}
914
915define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
916; SSE-LABEL: shuffle_v8i16_08196e7f:
917; SSE:       # BB#0:
918; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
919; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
920; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
921; SSE-NEXT:    retq
922;
923; AVX-LABEL: shuffle_v8i16_08196e7f:
924; AVX:       # BB#0:
925; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
926; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
927; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
928; AVX-NEXT:    retq
929  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
930  ret <8 x i16> %shuffle
931}
932
933define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
934; SSE-LABEL: shuffle_v8i16_0c1d6879:
935; SSE:       # BB#0:
936; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
937; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
938; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
939; SSE-NEXT:    retq
940;
941; AVX-LABEL: shuffle_v8i16_0c1d6879:
942; AVX:       # BB#0:
943; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
944; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
945; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
946; AVX-NEXT:    retq
947  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
948  ret <8 x i16> %shuffle
949}
950
951define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
952; SSE-LABEL: shuffle_v8i16_109832ba:
953; SSE:       # BB#0:
954; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
955; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
956; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
957; SSE-NEXT:    retq
958;
959; AVX-LABEL: shuffle_v8i16_109832ba:
960; AVX:       # BB#0:
961; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
962; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
963; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
964; AVX-NEXT:    retq
965  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
966  ret <8 x i16> %shuffle
967}
968
969define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
970; SSE-LABEL: shuffle_v8i16_8091a2b3:
971; SSE:       # BB#0:
972; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
973; SSE-NEXT:    movdqa %xmm1, %xmm0
974; SSE-NEXT:    retq
975;
976; AVX-LABEL: shuffle_v8i16_8091a2b3:
977; AVX:       # BB#0:
978; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
979; AVX-NEXT:    retq
980  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
981  ret <8 x i16> %shuffle
982}
983define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
984; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
985; SSE:       # BB#0:
986; SSE-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
987; SSE-NEXT:    movdqa %xmm1, %xmm0
988; SSE-NEXT:    retq
989;
990; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
991; AVX:       # BB#0:
992; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
993; AVX-NEXT:    retq
994  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
995  ret <8 x i16> %shuffle
996}
997
998define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
999; SSE-LABEL: shuffle_v8i16_0213cedf:
1000; SSE:       # BB#0:
1001; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1002; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1003; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1004; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1005; SSE-NEXT:    retq
1006;
1007; AVX-LABEL: shuffle_v8i16_0213cedf:
1008; AVX:       # BB#0:
1009; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1010; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1011; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1012; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1013; AVX-NEXT:    retq
1014  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1015  ret <8 x i16> %shuffle
1016}
1017
1018define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1019; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1020; SSE2:       # BB#0:
1021; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1022; SSE2-NEXT:    pand %xmm2, %xmm0
1023; SSE2-NEXT:    pandn %xmm1, %xmm2
1024; SSE2-NEXT:    por %xmm0, %xmm2
1025; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1026; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1027; SSE2-NEXT:    retq
1028;
1029; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1030; SSSE3:       # BB#0:
1031; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1032; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1033; SSSE3-NEXT:    por %xmm1, %xmm0
1034; SSSE3-NEXT:    retq
1035;
1036; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1037; SSE41:       # BB#0:
1038; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1039; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1040; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1041; SSE41-NEXT:    retq
1042;
1043; AVX-LABEL: shuffle_v8i16_443aXXXX:
1044; AVX:       # BB#0:
1045; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1046; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1047; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1048; AVX-NEXT:    retq
1049  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1050  ret <8 x i16> %shuffle
1051}
1052
1053define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1054; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1055; SSE2:       # BB#0:
1056; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1057; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
1058; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1059; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1060; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1061; SSE2-NEXT:    retq
1062;
1063; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1064; SSSE3:       # BB#0:
1065; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1066; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1067; SSSE3-NEXT:    por %xmm1, %xmm0
1068; SSSE3-NEXT:    retq
1069;
1070; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1071; SSE41:       # BB#0:
1072; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1073; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1074; SSE41-NEXT:    retq
1075;
1076; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1077; AVX1:       # BB#0:
1078; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1079; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1080; AVX1-NEXT:    retq
1081;
1082; AVX2-LABEL: shuffle_v8i16_032dXXXX:
1083; AVX2:       # BB#0:
1084; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1085; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1086; AVX2-NEXT:    retq
1087  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1088  ret <8 x i16> %shuffle
1089}
1090define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1091; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1092; SSE:       # BB#0:
1093; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1094; SSE-NEXT:    retq
1095;
1096; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1097; AVX:       # BB#0:
1098; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1099; AVX-NEXT:    retq
1100  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1101  ret <8 x i16> %shuffle
1102}
1103
1104define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1105; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1106; SSE2:       # BB#0:
1107; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1108; SSE2-NEXT:    pand %xmm2, %xmm0
1109; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1110; SSE2-NEXT:    pandn %xmm1, %xmm2
1111; SSE2-NEXT:    por %xmm2, %xmm0
1112; SSE2-NEXT:    retq
1113;
1114; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1115; SSSE3:       # BB#0:
1116; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1117; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1118; SSSE3-NEXT:    por %xmm1, %xmm0
1119; SSSE3-NEXT:    retq
1120;
1121; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1122; SSE41:       # BB#0:
1123; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1124; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1125; SSE41-NEXT:    retq
1126;
1127; AVX-LABEL: shuffle_v8i16_012dXXXX:
1128; AVX:       # BB#0:
1129; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1130; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1131; AVX-NEXT:    retq
1132  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1133  ret <8 x i16> %shuffle
1134}
1135
1136define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1137; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1138; SSE2:       # BB#0:
1139; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1140; SSE2-NEXT:    pand %xmm2, %xmm1
1141; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1142; SSE2-NEXT:    pandn %xmm0, %xmm2
1143; SSE2-NEXT:    por %xmm1, %xmm2
1144; SSE2-NEXT:    movdqa %xmm2, %xmm0
1145; SSE2-NEXT:    retq
1146;
1147; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1148; SSSE3:       # BB#0:
1149; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1150; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1151; SSSE3-NEXT:    por %xmm1, %xmm0
1152; SSSE3-NEXT:    retq
1153;
1154; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1155; SSE41:       # BB#0:
1156; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1157; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1158; SSE41-NEXT:    retq
1159;
1160; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1161; AVX1:       # BB#0:
1162; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1163; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1164; AVX1-NEXT:    retq
1165;
1166; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
1167; AVX2:       # BB#0:
1168; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
1169; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1170; AVX2-NEXT:    retq
1171  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1172  ret <8 x i16> %shuffle
1173}
1174
1175define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1176; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1177; SSE2:       # BB#0:
1178; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1179; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1180; SSE2-NEXT:    pand %xmm2, %xmm1
1181; SSE2-NEXT:    pandn %xmm0, %xmm2
1182; SSE2-NEXT:    por %xmm1, %xmm2
1183; SSE2-NEXT:    movdqa %xmm2, %xmm0
1184; SSE2-NEXT:    retq
1185;
1186; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1187; SSSE3:       # BB#0:
1188; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1189; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1190; SSSE3-NEXT:    por %xmm1, %xmm0
1191; SSSE3-NEXT:    retq
1192;
1193; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1194; SSE41:       # BB#0:
1195; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1196; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1197; SSE41-NEXT:    retq
1198;
1199; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1200; AVX:       # BB#0:
1201; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1202; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1203; AVX-NEXT:    retq
1204  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1205  ret <8 x i16> %shuffle
1206}
1207
1208define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1209; SSE2-LABEL: shuffle_v8i16_012dcde3:
1210; SSE2:       # BB#0:
1211; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1212; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
1213; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1214; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1215; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1216; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1217; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1218; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1219; SSE2-NEXT:    retq
1220;
1221; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1222; SSSE3:       # BB#0:
1223; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1224; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1225; SSSE3-NEXT:    por %xmm1, %xmm0
1226; SSSE3-NEXT:    retq
1227;
1228; SSE41-LABEL: shuffle_v8i16_012dcde3:
1229; SSE41:       # BB#0:
1230; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1231; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1232; SSE41-NEXT:    retq
1233;
1234; AVX1-LABEL: shuffle_v8i16_012dcde3:
1235; AVX1:       # BB#0:
1236; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1237; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1238; AVX1-NEXT:    retq
1239;
1240; AVX2-LABEL: shuffle_v8i16_012dcde3:
1241; AVX2:       # BB#0:
1242; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1243; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1244; AVX2-NEXT:    retq
1245  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1246  ret <8 x i16> %shuffle
1247}
1248
1249define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1250; SSE2-LABEL: shuffle_v8i16_0923cde7:
1251; SSE2:       # BB#0:
1252; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1253; SSE2-NEXT:    andps %xmm2, %xmm0
1254; SSE2-NEXT:    andnps %xmm1, %xmm2
1255; SSE2-NEXT:    orps %xmm2, %xmm0
1256; SSE2-NEXT:    retq
1257;
1258; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1259; SSSE3:       # BB#0:
1260; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1261; SSSE3-NEXT:    andps %xmm2, %xmm0
1262; SSSE3-NEXT:    andnps %xmm1, %xmm2
1263; SSSE3-NEXT:    orps %xmm2, %xmm0
1264; SSSE3-NEXT:    retq
1265;
1266; SSE41-LABEL: shuffle_v8i16_0923cde7:
1267; SSE41:       # BB#0:
1268; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1269; SSE41-NEXT:    retq
1270;
1271; AVX-LABEL: shuffle_v8i16_0923cde7:
1272; AVX:       # BB#0:
1273; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1274; AVX-NEXT:    retq
1275  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1276  ret <8 x i16> %shuffle
1277}
1278
1279define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1280; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1281; SSE2:       # BB#0:
1282; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1283; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1284; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1285; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1286; SSE2-NEXT:    pand %xmm1, %xmm0
1287; SSE2-NEXT:    pandn %xmm2, %xmm1
1288; SSE2-NEXT:    por %xmm0, %xmm1
1289; SSE2-NEXT:    movdqa %xmm1, %xmm0
1290; SSE2-NEXT:    retq
1291;
1292; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1293; SSSE3:       # BB#0:
1294; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1295; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1296; SSSE3-NEXT:    por %xmm1, %xmm0
1297; SSSE3-NEXT:    retq
1298;
1299; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1300; SSE41:       # BB#0:
1301; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1302; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1303; SSE41-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1304; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1305; SSE41-NEXT:    retq
1306;
1307; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1308; AVX1:       # BB#0:
1309; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1310; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1311; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1312; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1313; AVX1-NEXT:    retq
1314;
1315; AVX2-LABEL: shuffle_v8i16_XXX1X579:
1316; AVX2:       # BB#0:
1317; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
1318; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1319; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1320; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1321; AVX2-NEXT:    retq
1322  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1323  ret <8 x i16> %shuffle
1324}
1325
1326define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1327; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1328; SSE2:       # BB#0:
1329; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
1330; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
1331; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
1332; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
1333; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1334; SSE2-NEXT:    retq
1335;
1336; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1337; SSSE3:       # BB#0:
1338; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1339; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1340; SSSE3-NEXT:    por %xmm1, %xmm0
1341; SSSE3-NEXT:    retq
1342;
1343; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1344; SSE41:       # BB#0:
1345; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1346; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1347; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1348; SSE41-NEXT:    retq
1349;
1350; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1351; AVX1:       # BB#0:
1352; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1353; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1354; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1355; AVX1-NEXT:    retq
1356;
1357; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
1358; AVX2:       # BB#0:
1359; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1360; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1361; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1362; AVX2-NEXT:    retq
1363  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1364  ret <8 x i16> %shuffle
1365}
1366
1367define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1368; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1369; SSE:       # BB#0:
1370; SSE-NEXT:    movzwl %di, %eax
1371; SSE-NEXT:    movd %eax, %xmm0
1372; SSE-NEXT:    retq
1373;
1374; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1375; AVX:       # BB#0:
1376; AVX-NEXT:    movzwl %di, %eax
1377; AVX-NEXT:    vmovd %eax, %xmm0
1378; AVX-NEXT:    retq
1379  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1380  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1381  ret <8 x i16> %shuffle
1382}
1383
1384define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1385; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1386; SSE:       # BB#0:
1387; SSE-NEXT:    pxor   %xmm0, %xmm0
1388; SSE-NEXT:    pinsrw $1, %edi, %xmm0
1389; SSE-NEXT:    retq
1390;
1391; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1392; AVX:       # BB#0:
1393; AVX-NEXT:    vpxor   %xmm0, %xmm0
1394; AVX-NEXT:    vpinsrw $1, %edi, %xmm0
1395; AVX-NEXT:    retq
1396  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1397  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1398  ret <8 x i16> %shuffle
1399}
1400
1401define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1402; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1403; SSE:       # BB#0:
1404; SSE-NEXT:    pxor   %xmm0, %xmm0
1405; SSE-NEXT:    pinsrw $5, %edi, %xmm0
1406; SSE-NEXT:    retq
1407;
1408; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1409; AVX:       # BB#0:
1410; AVX-NEXT:    vpxor   %xmm0, %xmm0
1411; AVX-NEXT:    vpinsrw $5, %edi, %xmm0
1412; AVX-NEXT:    retq
1413  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1414  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1415  ret <8 x i16> %shuffle
1416}
1417
1418define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1419; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1420; SSE:       # BB#0:
1421; SSE-NEXT:    pxor   %xmm0, %xmm0
1422; SSE-NEXT:    pinsrw $7, %edi, %xmm0
1423; SSE-NEXT:    retq
1424;
1425; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1426; AVX:       # BB#0:
1427; AVX-NEXT:    vpxor   %xmm0, %xmm0
1428; AVX-NEXT:    vpinsrw $7, %edi, %xmm0
1429; AVX-NEXT:    retq
1430  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1431  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1432  ret <8 x i16> %shuffle
1433}
1434
1435define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1436; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1437; SSE:       # BB#0:
1438; SSE-NEXT:    pxor   %xmm0, %xmm0
1439; SSE-NEXT:    pinsrw $2, %edi, %xmm0
1440; SSE-NEXT:    retq
1441;
1442; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1443; AVX:       # BB#0:
1444; AVX-NEXT:    vpxor   %xmm0, %xmm0
1445; AVX-NEXT:    vpinsrw $2, %edi, %xmm0
1446; AVX-NEXT:    retq
1447  %a = insertelement <8 x i16> undef, i16 %i, i32 3
1448  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1449  ret <8 x i16> %shuffle
1450}
1451
1452define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1453; SSE2-LABEL: shuffle_v8i16_def01234:
1454; SSE2:       # BB#0:
1455; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1456; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1457; SSE2-NEXT:    por %xmm1, %xmm0
1458; SSE2-NEXT:    retq
1459;
1460; SSSE3-LABEL: shuffle_v8i16_def01234:
1461; SSSE3:       # BB#0:
1462; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1463; SSSE3-NEXT:    retq
1464;
1465; SSE41-LABEL: shuffle_v8i16_def01234:
1466; SSE41:       # BB#0:
1467; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1468; SSE41-NEXT:    retq
1469;
1470; AVX-LABEL: shuffle_v8i16_def01234:
1471; AVX:       # BB#0:
1472; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1473; AVX-NEXT:    retq
1474  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1475  ret <8 x i16> %shuffle
1476}
1477
1478define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1479; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1480; SSE2:       # BB#0:
1481; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1482; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1483; SSE2-NEXT:    por %xmm1, %xmm0
1484; SSE2-NEXT:    retq
1485;
1486; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1487; SSSE3:       # BB#0:
1488; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1489; SSSE3-NEXT:    retq
1490;
1491; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1492; SSE41:       # BB#0:
1493; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1494; SSE41-NEXT:    retq
1495;
1496; AVX-LABEL: shuffle_v8i16_ueuu123u:
1497; AVX:       # BB#0:
1498; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1499; AVX-NEXT:    retq
1500  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1501  ret <8 x i16> %shuffle
1502}
1503
1504define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1505; SSE2-LABEL: shuffle_v8i16_56701234:
1506; SSE2:       # BB#0:
1507; SSE2-NEXT:    movdqa %xmm0, %xmm1
1508; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1509; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1510; SSE2-NEXT:    por %xmm1, %xmm0
1511; SSE2-NEXT:    retq
1512;
1513; SSSE3-LABEL: shuffle_v8i16_56701234:
1514; SSSE3:       # BB#0:
1515; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1516; SSSE3-NEXT:    retq
1517;
1518; SSE41-LABEL: shuffle_v8i16_56701234:
1519; SSE41:       # BB#0:
1520; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1521; SSE41-NEXT:    retq
1522;
1523; AVX-LABEL: shuffle_v8i16_56701234:
1524; AVX:       # BB#0:
1525; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1526; AVX-NEXT:    retq
1527  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1528  ret <8 x i16> %shuffle
1529}
1530
1531define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1532; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1533; SSE2:       # BB#0:
1534; SSE2-NEXT:    movdqa %xmm0, %xmm1
1535; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1536; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1537; SSE2-NEXT:    por %xmm1, %xmm0
1538; SSE2-NEXT:    retq
1539;
1540; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1541; SSSE3:       # BB#0:
1542; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1543; SSSE3-NEXT:    retq
1544;
1545; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1546; SSE41:       # BB#0:
1547; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1548; SSE41-NEXT:    retq
1549;
1550; AVX-LABEL: shuffle_v8i16_u6uu123u:
1551; AVX:       # BB#0:
1552; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1553; AVX-NEXT:    retq
1554  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1555  ret <8 x i16> %shuffle
1556}
1557
1558define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1559; SSE-LABEL: shuffle_v8i16_uuuu123u:
1560; SSE:       # BB#0:
1561; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1562; SSE-NEXT:    retq
1563;
1564; AVX-LABEL: shuffle_v8i16_uuuu123u:
1565; AVX:       # BB#0:
1566; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1567; AVX-NEXT:    retq
1568  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1569  ret <8 x i16> %shuffle
1570}
1571
1572define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1573; SSE2-LABEL: shuffle_v8i16_bcdef012:
1574; SSE2:       # BB#0:
1575; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1576; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1577; SSE2-NEXT:    por %xmm1, %xmm0
1578; SSE2-NEXT:    retq
1579;
1580; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1581; SSSE3:       # BB#0:
1582; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1583; SSSE3-NEXT:    retq
1584;
1585; SSE41-LABEL: shuffle_v8i16_bcdef012:
1586; SSE41:       # BB#0:
1587; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1588; SSE41-NEXT:    retq
1589;
1590; AVX-LABEL: shuffle_v8i16_bcdef012:
1591; AVX:       # BB#0:
1592; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1593; AVX-NEXT:    retq
1594  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1595  ret <8 x i16> %shuffle
1596}
1597
1598define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1599; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1600; SSE2:       # BB#0:
1601; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1602; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1603; SSE2-NEXT:    por %xmm1, %xmm0
1604; SSE2-NEXT:    retq
1605;
1606; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1607; SSSE3:       # BB#0:
1608; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1609; SSSE3-NEXT:    retq
1610;
1611; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1612; SSE41:       # BB#0:
1613; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1614; SSE41-NEXT:    retq
1615;
1616; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1617; AVX:       # BB#0:
1618; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1619; AVX-NEXT:    retq
1620  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1621  ret <8 x i16> %shuffle
1622}
1623
1624define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1625; SSE2-LABEL: shuffle_v8i16_34567012:
1626; SSE2:       # BB#0:
1627; SSE2-NEXT:    movdqa %xmm0, %xmm1
1628; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1629; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1630; SSE2-NEXT:    por %xmm1, %xmm0
1631; SSE2-NEXT:    retq
1632;
1633; SSSE3-LABEL: shuffle_v8i16_34567012:
1634; SSSE3:       # BB#0:
1635; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1636; SSSE3-NEXT:    retq
1637;
1638; SSE41-LABEL: shuffle_v8i16_34567012:
1639; SSE41:       # BB#0:
1640; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1641; SSE41-NEXT:    retq
1642;
1643; AVX-LABEL: shuffle_v8i16_34567012:
1644; AVX:       # BB#0:
1645; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1646; AVX-NEXT:    retq
1647  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1648  ret <8 x i16> %shuffle
1649}
1650
1651define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1652; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1653; SSE2:       # BB#0:
1654; SSE2-NEXT:    movdqa %xmm0, %xmm1
1655; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1656; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1657; SSE2-NEXT:    por %xmm1, %xmm0
1658; SSE2-NEXT:    retq
1659;
1660; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1661; SSSE3:       # BB#0:
1662; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1663; SSSE3-NEXT:    retq
1664;
1665; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1666; SSE41:       # BB#0:
1667; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1668; SSE41-NEXT:    retq
1669;
1670; AVX-LABEL: shuffle_v8i16_u456uu1u:
1671; AVX:       # BB#0:
1672; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1673; AVX-NEXT:    retq
1674  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1675  ret <8 x i16> %shuffle
1676}
1677
1678define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1679; SSE-LABEL: shuffle_v8i16_u456uuuu:
1680; SSE:       # BB#0:
1681; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1682; SSE-NEXT:    retq
1683;
1684; AVX-LABEL: shuffle_v8i16_u456uuuu:
1685; AVX:       # BB#0:
1686; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1687; AVX-NEXT:    retq
1688  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1689  ret <8 x i16> %shuffle
1690}
1691
1692define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1693; SSE2-LABEL: shuffle_v8i16_3456789a:
1694; SSE2:       # BB#0:
1695; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1696; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1697; SSE2-NEXT:    por %xmm1, %xmm0
1698; SSE2-NEXT:    retq
1699;
1700; SSSE3-LABEL: shuffle_v8i16_3456789a:
1701; SSSE3:       # BB#0:
1702; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1703; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1704; SSSE3-NEXT:    retq
1705;
1706; SSE41-LABEL: shuffle_v8i16_3456789a:
1707; SSE41:       # BB#0:
1708; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1709; SSE41-NEXT:    movdqa %xmm1, %xmm0
1710; SSE41-NEXT:    retq
1711;
1712; AVX-LABEL: shuffle_v8i16_3456789a:
1713; AVX:       # BB#0:
1714; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1715; AVX-NEXT:    retq
1716  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1717  ret <8 x i16> %shuffle
1718}
1719
1720define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1721; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1722; SSE2:       # BB#0:
1723; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1724; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1725; SSE2-NEXT:    por %xmm1, %xmm0
1726; SSE2-NEXT:    retq
1727;
1728; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
1729; SSSE3:       # BB#0:
1730; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1731; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1732; SSSE3-NEXT:    retq
1733;
1734; SSE41-LABEL: shuffle_v8i16_u456uu9u:
1735; SSE41:       # BB#0:
1736; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1737; SSE41-NEXT:    movdqa %xmm1, %xmm0
1738; SSE41-NEXT:    retq
1739;
1740; AVX-LABEL: shuffle_v8i16_u456uu9u:
1741; AVX:       # BB#0:
1742; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1743; AVX-NEXT:    retq
1744  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
1745  ret <8 x i16> %shuffle
1746}
1747
1748define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
1749; SSE2-LABEL: shuffle_v8i16_56789abc:
1750; SSE2:       # BB#0:
1751; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1752; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1753; SSE2-NEXT:    por %xmm1, %xmm0
1754; SSE2-NEXT:    retq
1755;
1756; SSSE3-LABEL: shuffle_v8i16_56789abc:
1757; SSSE3:       # BB#0:
1758; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1759; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1760; SSSE3-NEXT:    retq
1761;
1762; SSE41-LABEL: shuffle_v8i16_56789abc:
1763; SSE41:       # BB#0:
1764; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1765; SSE41-NEXT:    movdqa %xmm1, %xmm0
1766; SSE41-NEXT:    retq
1767;
1768; AVX-LABEL: shuffle_v8i16_56789abc:
1769; AVX:       # BB#0:
1770; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1771; AVX-NEXT:    retq
1772  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1773  ret <8 x i16> %shuffle
1774}
1775
1776define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
1777; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
1778; SSE2:       # BB#0:
1779; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1780; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1781; SSE2-NEXT:    por %xmm1, %xmm0
1782; SSE2-NEXT:    retq
1783;
1784; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
1785; SSSE3:       # BB#0:
1786; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1787; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1788; SSSE3-NEXT:    retq
1789;
1790; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
1791; SSE41:       # BB#0:
1792; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1793; SSE41-NEXT:    movdqa %xmm1, %xmm0
1794; SSE41-NEXT:    retq
1795;
1796; AVX-LABEL: shuffle_v8i16_u6uu9abu:
1797; AVX:       # BB#0:
1798; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1799; AVX-NEXT:    retq
1800  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
1801  ret <8 x i16> %shuffle
1802}
1803
1804define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
1805; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
1806; SSE2:       # BB#0:
1807; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1808; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1809; SSE2-NEXT:    retq
1810;
1811; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
1812; SSSE3:       # BB#0:
1813; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1814; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1815; SSSE3-NEXT:    retq
1816;
1817; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
1818; SSE41:       # BB#0:
1819; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1820; SSE41-NEXT:    retq
1821;
1822; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
1823; AVX:       # BB#0:
1824; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1825; AVX-NEXT:    retq
1826  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
1827  ret <8 x i16> %shuffle
1828}
1829
1830define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
1831; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
1832; SSE2:       # BB#0:
1833; SSE2-NEXT:    pxor %xmm1, %xmm1
1834; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1835; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1836; SSE2-NEXT:    retq
1837;
1838; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
1839; SSSE3:       # BB#0:
1840; SSSE3-NEXT:    pxor %xmm1, %xmm1
1841; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1842; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1843; SSSE3-NEXT:    retq
1844;
1845; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
1846; SSE41:       # BB#0:
1847; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1848; SSE41-NEXT:    retq
1849;
1850; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
1851; AVX:       # BB#0:
1852; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1853; AVX-NEXT:    retq
1854  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1855  ret <8 x i16> %shuffle
1856}
1857
1858define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
1859; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
1860; SSE2:       # BB#0:
1861; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1862; SSE2-NEXT:    retq
1863;
1864; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
1865; SSSE3:       # BB#0:
1866; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1867; SSSE3-NEXT:    retq
1868;
1869; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
1870; SSE41:       # BB#0:
1871; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1872; SSE41-NEXT:    retq
1873;
1874; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
1875; AVX:       # BB#0:
1876; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1877; AVX-NEXT:    retq
1878  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
1879  ret <8 x i16> %shuffle
1880}
1881
1882define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
1883; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
1884; SSE2:       # BB#0:
1885; SSE2-NEXT:    pxor %xmm1, %xmm1
1886; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1887; SSE2-NEXT:    retq
1888;
1889; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
1890; SSSE3:       # BB#0:
1891; SSSE3-NEXT:    pxor %xmm1, %xmm1
1892; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1893; SSSE3-NEXT:    retq
1894;
1895; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
1896; SSE41:       # BB#0:
1897; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1898; SSE41-NEXT:    retq
1899;
1900; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
1901; AVX:       # BB#0:
1902; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1903; AVX-NEXT:    retq
1904  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1905  ret <8 x i16> %shuffle
1906}
1907
1908;
1909; Shuffle to logical bit shifts
1910;
1911define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
1912; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
1913; SSE:       # BB#0:
1914; SSE-NEXT:    pslld $16, %xmm0
1915; SSE-NEXT:    retq
1916;
1917; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
1918; AVX:       # BB#0:
1919; AVX-NEXT:    vpslld $16, %xmm0, %xmm0
1920; AVX-NEXT:    retq
1921  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
1922  ret <8 x i16> %shuffle
1923}
1924
1925define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
1926; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
1927; SSE:       # BB#0:
1928; SSE-NEXT:    psllq $48, %xmm0
1929; SSE-NEXT:    retq
1930;
1931; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
1932; AVX:       # BB#0:
1933; AVX-NEXT:    vpsllq $48, %xmm0, %xmm0
1934; AVX-NEXT:    retq
1935  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
1936  ret <8 x i16> %shuffle
1937}
1938
1939define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
1940; SSE-LABEL: shuffle_v8i16_zz01zX4X:
1941; SSE:       # BB#0:
1942; SSE-NEXT:    psllq $32, %xmm0
1943; SSE-NEXT:    retq
1944;
1945; AVX-LABEL: shuffle_v8i16_zz01zX4X:
1946; AVX:       # BB#0:
1947; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
1948; AVX-NEXT:    retq
1949  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
1950  ret <8 x i16> %shuffle
1951}
1952
1953define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
1954; SSE-LABEL: shuffle_v8i16_z0X2z456:
1955; SSE:       # BB#0:
1956; SSE-NEXT:    psllq $16, %xmm0
1957; SSE-NEXT:    retq
1958;
1959; AVX-LABEL: shuffle_v8i16_z0X2z456:
1960; AVX:       # BB#0:
1961; AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
1962; AVX-NEXT:    retq
1963  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
1964  ret <8 x i16> %shuffle
1965}
1966
1967define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
1968; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
1969; SSE:       # BB#0:
1970; SSE-NEXT:    psrld $16, %xmm0
1971; SSE-NEXT:    retq
1972;
1973; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
1974; AVX:       # BB#0:
1975; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
1976; AVX-NEXT:    retq
1977  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
1978  ret <8 x i16> %shuffle
1979}
1980
1981define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
1982; SSE-LABEL: shuffle_v8i16_1X3z567z:
1983; SSE:       # BB#0:
1984; SSE-NEXT:    psrlq $16, %xmm0
1985; SSE-NEXT:    retq
1986;
1987; AVX-LABEL: shuffle_v8i16_1X3z567z:
1988; AVX:       # BB#0:
1989; AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
1990; AVX-NEXT:    retq
1991  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
1992  ret <8 x i16> %shuffle
1993}
1994
1995define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
1996; SSE-LABEL: shuffle_v8i16_23zz67zz:
1997; SSE:       # BB#0:
1998; SSE-NEXT:    psrlq $32, %xmm0
1999; SSE-NEXT:    retq
2000;
2001; AVX-LABEL: shuffle_v8i16_23zz67zz:
2002; AVX:       # BB#0:
2003; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
2004; AVX-NEXT:    retq
2005  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2006  ret <8 x i16> %shuffle
2007}
2008
2009define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2010; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2011; SSE:       # BB#0:
2012; SSE-NEXT:    psrlq $48, %xmm0
2013; SSE-NEXT:    retq
2014;
2015; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2016; AVX:       # BB#0:
2017; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
2018; AVX-NEXT:    retq
2019  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2020  ret <8 x i16> %shuffle
2021}
2022
2023define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2024; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2025; SSE:       # BB#0:
2026; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2027; SSE-NEXT:    retq
2028;
2029; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2030; AVX:       # BB#0:
2031; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2032; AVX-NEXT:    retq
2033  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2034  ret <8 x i16> %shuffle
2035}
2036
2037define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2038; SSE2-LABEL: shuffle_v8i16_0z234567:
2039; SSE2:       # BB#0:
2040; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2041; SSE2-NEXT:    retq
2042;
2043; SSSE3-LABEL: shuffle_v8i16_0z234567:
2044; SSSE3:       # BB#0:
2045; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2046; SSSE3-NEXT:    retq
2047;
2048; SSE41-LABEL: shuffle_v8i16_0z234567:
2049; SSE41:       # BB#0:
2050; SSE41-NEXT:    pxor %xmm1, %xmm1
2051; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2052; SSE41-NEXT:    retq
2053;
2054; AVX-LABEL: shuffle_v8i16_0z234567:
2055; AVX:       # BB#0:
2056; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2057; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2058; AVX-NEXT:    retq
2059  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2060  ret <8 x i16> %shuffle
2061}
2062
2063define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2064; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2065; SSE2:       # BB#0:
2066; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2067; SSE2-NEXT:    retq
2068;
2069; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2070; SSSE3:       # BB#0:
2071; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2072; SSSE3-NEXT:    retq
2073;
2074; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2075; SSE41:       # BB#0:
2076; SSE41-NEXT:    pxor %xmm1, %xmm1
2077; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2078; SSE41-NEXT:    retq
2079;
2080; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2081; AVX:       # BB#0:
2082; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2083; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2084; AVX-NEXT:    retq
2085  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2086  ret <8 x i16> %shuffle
2087}
2088
2089define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2090; SSE2-LABEL: shuffle_v8i16_0123456z:
2091; SSE2:       # BB#0:
2092; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2093; SSE2-NEXT:    retq
2094;
2095; SSSE3-LABEL: shuffle_v8i16_0123456z:
2096; SSSE3:       # BB#0:
2097; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2098; SSSE3-NEXT:    retq
2099;
2100; SSE41-LABEL: shuffle_v8i16_0123456z:
2101; SSE41:       # BB#0:
2102; SSE41-NEXT:    pxor %xmm1, %xmm1
2103; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2104; SSE41-NEXT:    retq
2105;
2106; AVX-LABEL: shuffle_v8i16_0123456z:
2107; AVX:       # BB#0:
2108; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2109; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2110; AVX-NEXT:    retq
2111  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2112  ret <8 x i16> %shuffle
2113}
2114
2115define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2116; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2117; SSE:       # BB#0:
2118; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2119; SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2120; SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2121; SSE-NEXT:    movdqa %xmm1, %xmm0
2122; SSE-NEXT:    retq
2123;
2124; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
2125; AVX:       # BB#0:
2126; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2127; AVX-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2128; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2129; AVX-NEXT:    retq
2130  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2131  ret <8 x i16> %shuffle
2132}
2133
2134define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2135; SSE-LABEL: shuffle_v8i16_8012345u:
2136; SSE:       # BB#0:
2137; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2138; SSE-NEXT:    retq
2139;
2140; AVX-LABEL: shuffle_v8i16_8012345u:
2141; AVX:       # BB#0:
2142; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2143; AVX-NEXT:    retq
2144  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2145
2146  ret <8 x i16> %shuffle
2147}
2148