1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10target triple = "x86_64-unknown-unknown"
11
12define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
13; SSE-LABEL: shuffle_v4i32_0001:
14; SSE:       # BB#0:
15; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
16; SSE-NEXT:    retq
17;
18; AVX-LABEL: shuffle_v4i32_0001:
19; AVX:       # BB#0:
20; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
21; AVX-NEXT:    retq
22  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
23  ret <4 x i32> %shuffle
24}
25define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
26; SSE-LABEL: shuffle_v4i32_0020:
27; SSE:       # BB#0:
28; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
29; SSE-NEXT:    retq
30;
31; AVX-LABEL: shuffle_v4i32_0020:
32; AVX:       # BB#0:
33; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
34; AVX-NEXT:    retq
35  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
36  ret <4 x i32> %shuffle
37}
38define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
39; SSE-LABEL: shuffle_v4i32_0112:
40; SSE:       # BB#0:
41; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
42; SSE-NEXT:    retq
43;
44; AVX-LABEL: shuffle_v4i32_0112:
45; AVX:       # BB#0:
46; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
47; AVX-NEXT:    retq
48  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
49  ret <4 x i32> %shuffle
50}
51define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
52; SSE-LABEL: shuffle_v4i32_0300:
53; SSE:       # BB#0:
54; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
55; SSE-NEXT:    retq
56;
57; AVX-LABEL: shuffle_v4i32_0300:
58; AVX:       # BB#0:
59; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
60; AVX-NEXT:    retq
61  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
62  ret <4 x i32> %shuffle
63}
64define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
65; SSE-LABEL: shuffle_v4i32_1000:
66; SSE:       # BB#0:
67; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
68; SSE-NEXT:    retq
69;
70; AVX-LABEL: shuffle_v4i32_1000:
71; AVX:       # BB#0:
72; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
73; AVX-NEXT:    retq
74  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
75  ret <4 x i32> %shuffle
76}
77define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
78; SSE-LABEL: shuffle_v4i32_2200:
79; SSE:       # BB#0:
80; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
81; SSE-NEXT:    retq
82;
83; AVX-LABEL: shuffle_v4i32_2200:
84; AVX:       # BB#0:
85; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
86; AVX-NEXT:    retq
87  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
88  ret <4 x i32> %shuffle
89}
90define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
91; SSE-LABEL: shuffle_v4i32_3330:
92; SSE:       # BB#0:
93; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
94; SSE-NEXT:    retq
95;
96; AVX-LABEL: shuffle_v4i32_3330:
97; AVX:       # BB#0:
98; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
99; AVX-NEXT:    retq
100  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
101  ret <4 x i32> %shuffle
102}
103define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
104; SSE-LABEL: shuffle_v4i32_3210:
105; SSE:       # BB#0:
106; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
107; SSE-NEXT:    retq
108;
109; AVX-LABEL: shuffle_v4i32_3210:
110; AVX:       # BB#0:
111; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
112; AVX-NEXT:    retq
113  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
114  ret <4 x i32> %shuffle
115}
116
117define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
118; SSE-LABEL: shuffle_v4i32_2121:
119; SSE:       # BB#0:
120; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
121; SSE-NEXT:    retq
122;
123; AVX-LABEL: shuffle_v4i32_2121:
124; AVX:       # BB#0:
125; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
126; AVX-NEXT:    retq
127  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
128  ret <4 x i32> %shuffle
129}
130
131define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
132; SSE-LABEL: shuffle_v4f32_0001:
133; SSE:       # BB#0:
134; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
135; SSE-NEXT:    retq
136;
137; AVX-LABEL: shuffle_v4f32_0001:
138; AVX:       # BB#0:
139; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
140; AVX-NEXT:    retq
141  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
142  ret <4 x float> %shuffle
143}
144define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
145; SSE-LABEL: shuffle_v4f32_0020:
146; SSE:       # BB#0:
147; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
148; SSE-NEXT:    retq
149;
150; AVX-LABEL: shuffle_v4f32_0020:
151; AVX:       # BB#0:
152; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
153; AVX-NEXT:    retq
154  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
155  ret <4 x float> %shuffle
156}
157define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
158; SSE-LABEL: shuffle_v4f32_0300:
159; SSE:       # BB#0:
160; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
161; SSE-NEXT:    retq
162;
163; AVX-LABEL: shuffle_v4f32_0300:
164; AVX:       # BB#0:
165; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
166; AVX-NEXT:    retq
167  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
168  ret <4 x float> %shuffle
169}
170define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
171; SSE-LABEL: shuffle_v4f32_1000:
172; SSE:       # BB#0:
173; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
174; SSE-NEXT:    retq
175;
176; AVX-LABEL: shuffle_v4f32_1000:
177; AVX:       # BB#0:
178; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
179; AVX-NEXT:    retq
180  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
181  ret <4 x float> %shuffle
182}
183define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
184; SSE-LABEL: shuffle_v4f32_2200:
185; SSE:       # BB#0:
186; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
187; SSE-NEXT:    retq
188;
189; AVX-LABEL: shuffle_v4f32_2200:
190; AVX:       # BB#0:
191; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
192; AVX-NEXT:    retq
193  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
194  ret <4 x float> %shuffle
195}
196define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
197; SSE-LABEL: shuffle_v4f32_3330:
198; SSE:       # BB#0:
199; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
200; SSE-NEXT:    retq
201;
202; AVX-LABEL: shuffle_v4f32_3330:
203; AVX:       # BB#0:
204; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
205; AVX-NEXT:    retq
206  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
207  ret <4 x float> %shuffle
208}
209define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
210; SSE-LABEL: shuffle_v4f32_3210:
211; SSE:       # BB#0:
212; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
213; SSE-NEXT:    retq
214;
215; AVX-LABEL: shuffle_v4f32_3210:
216; AVX:       # BB#0:
217; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
218; AVX-NEXT:    retq
219  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
220  ret <4 x float> %shuffle
221}
222define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
223; SSE-LABEL: shuffle_v4f32_0011:
224; SSE:       # BB#0:
225; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
226; SSE-NEXT:    retq
227;
228; AVX-LABEL: shuffle_v4f32_0011:
229; AVX:       # BB#0:
230; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
231; AVX-NEXT:    retq
232  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
233  ret <4 x float> %shuffle
234}
235define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
236; SSE-LABEL: shuffle_v4f32_2233:
237; SSE:       # BB#0:
238; SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
239; SSE-NEXT:    retq
240;
241; AVX-LABEL: shuffle_v4f32_2233:
242; AVX:       # BB#0:
243; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
244; AVX-NEXT:    retq
245  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
246  ret <4 x float> %shuffle
247}
248define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
249; SSE2-LABEL: shuffle_v4f32_0022:
250; SSE2:       # BB#0:
251; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
252; SSE2-NEXT:    retq
253;
254; SSE3-LABEL: shuffle_v4f32_0022:
255; SSE3:       # BB#0:
256; SSE3-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
257; SSE3-NEXT:    retq
258;
259; SSSE3-LABEL: shuffle_v4f32_0022:
260; SSSE3:       # BB#0:
261; SSSE3-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
262; SSSE3-NEXT:    retq
263;
264; SSE41-LABEL: shuffle_v4f32_0022:
265; SSE41:       # BB#0:
266; SSE41-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
267; SSE41-NEXT:    retq
268;
269; AVX-LABEL: shuffle_v4f32_0022:
270; AVX:       # BB#0:
271; AVX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
272; AVX-NEXT:    retq
273  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
274  ret <4 x float> %shuffle
275}
276define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
277; SSE2-LABEL: shuffle_v4f32_1133:
278; SSE2:       # BB#0:
279; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
280; SSE2-NEXT:    retq
281;
282; SSE3-LABEL: shuffle_v4f32_1133:
283; SSE3:       # BB#0:
284; SSE3-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
285; SSE3-NEXT:    retq
286;
287; SSSE3-LABEL: shuffle_v4f32_1133:
288; SSSE3:       # BB#0:
289; SSSE3-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
290; SSSE3-NEXT:    retq
291;
292; SSE41-LABEL: shuffle_v4f32_1133:
293; SSE41:       # BB#0:
294; SSE41-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
295; SSE41-NEXT:    retq
296;
297; AVX-LABEL: shuffle_v4f32_1133:
298; AVX:       # BB#0:
299; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
300; AVX-NEXT:    retq
301  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
302  ret <4 x float> %shuffle
303}
304
305define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
306; SSE-LABEL: shuffle_v4f32_0145:
307; SSE:       # BB#0:
308; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
309; SSE-NEXT:    retq
310;
311; AVX-LABEL: shuffle_v4f32_0145:
312; AVX:       # BB#0:
313; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
314; AVX-NEXT:    retq
315  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
316  ret <4 x float> %shuffle
317}
318
319define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) {
320; SSE-LABEL: shuffle_v4f32_6723:
321; SSE:       # BB#0:
322; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
323; SSE-NEXT:    movapd %xmm1, %xmm0
324; SSE-NEXT:    retq
325;
326; AVX-LABEL: shuffle_v4f32_6723:
327; AVX:       # BB#0:
328; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
329; AVX-NEXT:    retq
330  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
331  ret <4 x float> %shuffle
332}
333
334define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
335; SSE2-LABEL: shuffle_v4i32_0124:
336; SSE2:       # BB#0:
337; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
338; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
339; SSE2-NEXT:    retq
340;
341; SSE3-LABEL: shuffle_v4i32_0124:
342; SSE3:       # BB#0:
343; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
344; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
345; SSE3-NEXT:    retq
346;
347; SSSE3-LABEL: shuffle_v4i32_0124:
348; SSSE3:       # BB#0:
349; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
350; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
351; SSSE3-NEXT:    retq
352;
353; SSE41-LABEL: shuffle_v4i32_0124:
354; SSE41:       # BB#0:
355; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
356; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
357; SSE41-NEXT:    retq
358;
359; AVX1-LABEL: shuffle_v4i32_0124:
360; AVX1:       # BB#0:
361; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
362; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
363; AVX1-NEXT:    retq
364;
365; AVX2-LABEL: shuffle_v4i32_0124:
366; AVX2:       # BB#0:
367; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
368; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
369; AVX2-NEXT:    retq
370  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
371  ret <4 x i32> %shuffle
372}
373define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
374; SSE2-LABEL: shuffle_v4i32_0142:
375; SSE2:       # BB#0:
376; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
377; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
378; SSE2-NEXT:    retq
379;
380; SSE3-LABEL: shuffle_v4i32_0142:
381; SSE3:       # BB#0:
382; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
383; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
384; SSE3-NEXT:    retq
385;
386; SSSE3-LABEL: shuffle_v4i32_0142:
387; SSSE3:       # BB#0:
388; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
389; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
390; SSSE3-NEXT:    retq
391;
392; SSE41-LABEL: shuffle_v4i32_0142:
393; SSE41:       # BB#0:
394; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
395; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
396; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
397; SSE41-NEXT:    retq
398;
399; AVX1-LABEL: shuffle_v4i32_0142:
400; AVX1:       # BB#0:
401; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
402; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
403; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
404; AVX1-NEXT:    retq
405;
406; AVX2-LABEL: shuffle_v4i32_0142:
407; AVX2:       # BB#0:
408; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
409; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
410; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
411; AVX2-NEXT:    retq
412  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
413  ret <4 x i32> %shuffle
414}
415define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
416; SSE2-LABEL: shuffle_v4i32_0412:
417; SSE2:       # BB#0:
418; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
419; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
420; SSE2-NEXT:    movaps %xmm1, %xmm0
421; SSE2-NEXT:    retq
422;
423; SSE3-LABEL: shuffle_v4i32_0412:
424; SSE3:       # BB#0:
425; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
426; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
427; SSE3-NEXT:    movaps %xmm1, %xmm0
428; SSE3-NEXT:    retq
429;
430; SSSE3-LABEL: shuffle_v4i32_0412:
431; SSSE3:       # BB#0:
432; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
433; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
434; SSSE3-NEXT:    movaps %xmm1, %xmm0
435; SSSE3-NEXT:    retq
436;
437; SSE41-LABEL: shuffle_v4i32_0412:
438; SSE41:       # BB#0:
439; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
440; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
441; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
442; SSE41-NEXT:    retq
443;
444; AVX1-LABEL: shuffle_v4i32_0412:
445; AVX1:       # BB#0:
446; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
447; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
448; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
449; AVX1-NEXT:    retq
450;
451; AVX2-LABEL: shuffle_v4i32_0412:
452; AVX2:       # BB#0:
453; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
454; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
455; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
456; AVX2-NEXT:    retq
457  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
458  ret <4 x i32> %shuffle
459}
460define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
461; SSE2-LABEL: shuffle_v4i32_4012:
462; SSE2:       # BB#0:
463; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
464; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
465; SSE2-NEXT:    movaps %xmm1, %xmm0
466; SSE2-NEXT:    retq
467;
468; SSE3-LABEL: shuffle_v4i32_4012:
469; SSE3:       # BB#0:
470; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
471; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
472; SSE3-NEXT:    movaps %xmm1, %xmm0
473; SSE3-NEXT:    retq
474;
475; SSSE3-LABEL: shuffle_v4i32_4012:
476; SSSE3:       # BB#0:
477; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
478; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
479; SSSE3-NEXT:    movaps %xmm1, %xmm0
480; SSSE3-NEXT:    retq
481;
482; SSE41-LABEL: shuffle_v4i32_4012:
483; SSE41:       # BB#0:
484; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
485; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
486; SSE41-NEXT:    retq
487;
488; AVX1-LABEL: shuffle_v4i32_4012:
489; AVX1:       # BB#0:
490; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
491; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
492; AVX1-NEXT:    retq
493;
494; AVX2-LABEL: shuffle_v4i32_4012:
495; AVX2:       # BB#0:
496; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
497; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
498; AVX2-NEXT:    retq
499  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
500  ret <4 x i32> %shuffle
501}
502define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
503; SSE-LABEL: shuffle_v4i32_0145:
504; SSE:       # BB#0:
505; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
506; SSE-NEXT:    retq
507;
508; AVX-LABEL: shuffle_v4i32_0145:
509; AVX:       # BB#0:
510; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
511; AVX-NEXT:    retq
512  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
513  ret <4 x i32> %shuffle
514}
515define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
516; SSE2-LABEL: shuffle_v4i32_0451:
517; SSE2:       # BB#0:
518; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
519; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
520; SSE2-NEXT:    retq
521;
522; SSE3-LABEL: shuffle_v4i32_0451:
523; SSE3:       # BB#0:
524; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
525; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
526; SSE3-NEXT:    retq
527;
528; SSSE3-LABEL: shuffle_v4i32_0451:
529; SSSE3:       # BB#0:
530; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
531; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
532; SSSE3-NEXT:    retq
533;
534; SSE41-LABEL: shuffle_v4i32_0451:
535; SSE41:       # BB#0:
536; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
537; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
538; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
539; SSE41-NEXT:    retq
540;
541; AVX1-LABEL: shuffle_v4i32_0451:
542; AVX1:       # BB#0:
543; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
544; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
545; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
546; AVX1-NEXT:    retq
547;
548; AVX2-LABEL: shuffle_v4i32_0451:
549; AVX2:       # BB#0:
550; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
551; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
552; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
553; AVX2-NEXT:    retq
554  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
555  ret <4 x i32> %shuffle
556}
557define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
558; SSE-LABEL: shuffle_v4i32_4501:
559; SSE:       # BB#0:
560; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
561; SSE-NEXT:    movdqa %xmm1, %xmm0
562; SSE-NEXT:    retq
563;
564; AVX-LABEL: shuffle_v4i32_4501:
565; AVX:       # BB#0:
566; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
567; AVX-NEXT:    retq
568  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
569  ret <4 x i32> %shuffle
570}
571define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
572; SSE2-LABEL: shuffle_v4i32_4015:
573; SSE2:       # BB#0:
574; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
575; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
576; SSE2-NEXT:    retq
577;
578; SSE3-LABEL: shuffle_v4i32_4015:
579; SSE3:       # BB#0:
580; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
581; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
582; SSE3-NEXT:    retq
583;
584; SSSE3-LABEL: shuffle_v4i32_4015:
585; SSSE3:       # BB#0:
586; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
587; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
588; SSSE3-NEXT:    retq
589;
590; SSE41-LABEL: shuffle_v4i32_4015:
591; SSE41:       # BB#0:
592; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
593; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
594; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
595; SSE41-NEXT:    retq
596;
597; AVX1-LABEL: shuffle_v4i32_4015:
598; AVX1:       # BB#0:
599; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
600; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
601; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
602; AVX1-NEXT:    retq
603;
604; AVX2-LABEL: shuffle_v4i32_4015:
605; AVX2:       # BB#0:
606; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
607; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
608; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
609; AVX2-NEXT:    retq
610  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
611  ret <4 x i32> %shuffle
612}
613
614define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
615; SSE2-LABEL: shuffle_v4f32_4zzz:
616; SSE2:       # BB#0:
617; SSE2-NEXT:    xorps %xmm1, %xmm1
618; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
619; SSE2-NEXT:    movaps %xmm1, %xmm0
620; SSE2-NEXT:    retq
621;
622; SSE3-LABEL: shuffle_v4f32_4zzz:
623; SSE3:       # BB#0:
624; SSE3-NEXT:    xorps %xmm1, %xmm1
625; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
626; SSE3-NEXT:    movaps %xmm1, %xmm0
627; SSE3-NEXT:    retq
628;
629; SSSE3-LABEL: shuffle_v4f32_4zzz:
630; SSSE3:       # BB#0:
631; SSSE3-NEXT:    xorps %xmm1, %xmm1
632; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
633; SSSE3-NEXT:    movaps %xmm1, %xmm0
634; SSSE3-NEXT:    retq
635;
636; SSE41-LABEL: shuffle_v4f32_4zzz:
637; SSE41:       # BB#0:
638; SSE41-NEXT:    xorps %xmm1, %xmm1
639; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
640; SSE41-NEXT:    retq
641;
642; AVX-LABEL: shuffle_v4f32_4zzz:
643; AVX:       # BB#0:
644; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
645; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
646; AVX-NEXT:    retq
647  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
648  ret <4 x float> %shuffle
649}
650
651define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
652; SSE2-LABEL: shuffle_v4f32_z4zz:
653; SSE2:       # BB#0:
654; SSE2-NEXT:    xorps %xmm1, %xmm1
655; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
656; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
657; SSE2-NEXT:    retq
658;
659; SSE3-LABEL: shuffle_v4f32_z4zz:
660; SSE3:       # BB#0:
661; SSE3-NEXT:    xorps %xmm1, %xmm1
662; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
663; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
664; SSE3-NEXT:    retq
665;
666; SSSE3-LABEL: shuffle_v4f32_z4zz:
667; SSSE3:       # BB#0:
668; SSSE3-NEXT:    xorps %xmm1, %xmm1
669; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
670; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
671; SSSE3-NEXT:    retq
672;
673; SSE41-LABEL: shuffle_v4f32_z4zz:
674; SSE41:       # BB#0:
675; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
676; SSE41-NEXT:    retq
677;
678; AVX-LABEL: shuffle_v4f32_z4zz:
679; AVX:       # BB#0:
680; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
681; AVX-NEXT:    retq
682  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
683  ret <4 x float> %shuffle
684}
685
686define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
687; SSE2-LABEL: shuffle_v4f32_zz4z:
688; SSE2:       # BB#0:
689; SSE2-NEXT:    xorps %xmm1, %xmm1
690; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
691; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
692; SSE2-NEXT:    movaps %xmm1, %xmm0
693; SSE2-NEXT:    retq
694;
695; SSE3-LABEL: shuffle_v4f32_zz4z:
696; SSE3:       # BB#0:
697; SSE3-NEXT:    xorps %xmm1, %xmm1
698; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
699; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
700; SSE3-NEXT:    movaps %xmm1, %xmm0
701; SSE3-NEXT:    retq
702;
703; SSSE3-LABEL: shuffle_v4f32_zz4z:
704; SSSE3:       # BB#0:
705; SSSE3-NEXT:    xorps %xmm1, %xmm1
706; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
707; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
708; SSSE3-NEXT:    movaps %xmm1, %xmm0
709; SSSE3-NEXT:    retq
710;
711; SSE41-LABEL: shuffle_v4f32_zz4z:
712; SSE41:       # BB#0:
713; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
714; SSE41-NEXT:    retq
715;
716; AVX-LABEL: shuffle_v4f32_zz4z:
717; AVX:       # BB#0:
718; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
719; AVX-NEXT:    retq
720  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
721  ret <4 x float> %shuffle
722}
723
724define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
725; SSE2-LABEL: shuffle_v4f32_zuu4:
726; SSE2:       # BB#0:
727; SSE2-NEXT:    xorps %xmm1, %xmm1
728; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
729; SSE2-NEXT:    movaps %xmm1, %xmm0
730; SSE2-NEXT:    retq
731;
732; SSE3-LABEL: shuffle_v4f32_zuu4:
733; SSE3:       # BB#0:
734; SSE3-NEXT:    xorps %xmm1, %xmm1
735; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
736; SSE3-NEXT:    movaps %xmm1, %xmm0
737; SSE3-NEXT:    retq
738;
739; SSSE3-LABEL: shuffle_v4f32_zuu4:
740; SSSE3:       # BB#0:
741; SSSE3-NEXT:    xorps %xmm1, %xmm1
742; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
743; SSSE3-NEXT:    movaps %xmm1, %xmm0
744; SSSE3-NEXT:    retq
745;
746; SSE41-LABEL: shuffle_v4f32_zuu4:
747; SSE41:       # BB#0:
748; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
749; SSE41-NEXT:    retq
750;
751; AVX-LABEL: shuffle_v4f32_zuu4:
752; AVX:       # BB#0:
753; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
754; AVX-NEXT:    retq
755  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
756  ret <4 x float> %shuffle
757}
758
759define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
760; SSE2-LABEL: shuffle_v4f32_zzz7:
761; SSE2:       # BB#0:
762; SSE2-NEXT:    xorps %xmm1, %xmm1
763; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
764; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
765; SSE2-NEXT:    movaps %xmm1, %xmm0
766; SSE2-NEXT:    retq
767;
768; SSE3-LABEL: shuffle_v4f32_zzz7:
769; SSE3:       # BB#0:
770; SSE3-NEXT:    xorps %xmm1, %xmm1
771; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
772; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
773; SSE3-NEXT:    movaps %xmm1, %xmm0
774; SSE3-NEXT:    retq
775;
776; SSSE3-LABEL: shuffle_v4f32_zzz7:
777; SSSE3:       # BB#0:
778; SSSE3-NEXT:    xorps %xmm1, %xmm1
779; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
780; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
781; SSSE3-NEXT:    movaps %xmm1, %xmm0
782; SSSE3-NEXT:    retq
783;
784; SSE41-LABEL: shuffle_v4f32_zzz7:
785; SSE41:       # BB#0:
786; SSE41-NEXT:    xorps %xmm1, %xmm1
787; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
788; SSE41-NEXT:    retq
789;
790; AVX-LABEL: shuffle_v4f32_zzz7:
791; AVX:       # BB#0:
792; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
793; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
794; AVX-NEXT:    retq
795  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
796  ret <4 x float> %shuffle
797}
798
799define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
800; SSE2-LABEL: shuffle_v4f32_z6zz:
801; SSE2:       # BB#0:
802; SSE2-NEXT:    xorps %xmm1, %xmm1
803; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
804; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
805; SSE2-NEXT:    retq
806;
807; SSE3-LABEL: shuffle_v4f32_z6zz:
808; SSE3:       # BB#0:
809; SSE3-NEXT:    xorps %xmm1, %xmm1
810; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
811; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
812; SSE3-NEXT:    retq
813;
814; SSSE3-LABEL: shuffle_v4f32_z6zz:
815; SSSE3:       # BB#0:
816; SSSE3-NEXT:    xorps %xmm1, %xmm1
817; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
818; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
819; SSSE3-NEXT:    retq
820;
821; SSE41-LABEL: shuffle_v4f32_z6zz:
822; SSE41:       # BB#0:
823; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
824; SSE41-NEXT:    retq
825;
826; AVX-LABEL: shuffle_v4f32_z6zz:
827; AVX:       # BB#0:
828; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
829; AVX-NEXT:    retq
830  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
831  ret <4 x float> %shuffle
832}
833
834define <4 x float> @shuffle_v4f32_0z23(<4 x float> %a) {
835; SSE2-LABEL: shuffle_v4f32_0z23:
836; SSE2:       # BB#0:
837; SSE2-NEXT:    xorps %xmm1, %xmm1
838; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
839; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
840; SSE2-NEXT:    movaps %xmm1, %xmm0
841; SSE2-NEXT:    retq
842;
843; SSE3-LABEL: shuffle_v4f32_0z23:
844; SSE3:       # BB#0:
845; SSE3-NEXT:    xorps %xmm1, %xmm1
846; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
847; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
848; SSE3-NEXT:    movaps %xmm1, %xmm0
849; SSE3-NEXT:    retq
850;
851; SSSE3-LABEL: shuffle_v4f32_0z23:
852; SSSE3:       # BB#0:
853; SSSE3-NEXT:    xorps %xmm1, %xmm1
854; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
855; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
856; SSSE3-NEXT:    movaps %xmm1, %xmm0
857; SSSE3-NEXT:    retq
858;
859; SSE41-LABEL: shuffle_v4f32_0z23:
860; SSE41:       # BB#0:
861; SSE41-NEXT:    xorps %xmm1, %xmm1
862; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
863; SSE41-NEXT:    retq
864;
865; AVX-LABEL: shuffle_v4f32_0z23:
866; AVX:       # BB#0:
867; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
868; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
869; AVX-NEXT:    retq
870  %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
871  ret <4 x float> %shuffle
872}
873
874define <4 x float> @shuffle_v4f32_01z3(<4 x float> %a) {
875; SSE2-LABEL: shuffle_v4f32_01z3:
876; SSE2:       # BB#0:
877; SSE2-NEXT:    xorps %xmm1, %xmm1
878; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
879; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
880; SSE2-NEXT:    retq
881;
882; SSE3-LABEL: shuffle_v4f32_01z3:
883; SSE3:       # BB#0:
884; SSE3-NEXT:    xorps %xmm1, %xmm1
885; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
886; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
887; SSE3-NEXT:    retq
888;
889; SSSE3-LABEL: shuffle_v4f32_01z3:
890; SSSE3:       # BB#0:
891; SSSE3-NEXT:    xorps %xmm1, %xmm1
892; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
893; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
894; SSSE3-NEXT:    retq
895;
896; SSE41-LABEL: shuffle_v4f32_01z3:
897; SSE41:       # BB#0:
898; SSE41-NEXT:    xorps %xmm1, %xmm1
899; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
900; SSE41-NEXT:    retq
901;
902; AVX-LABEL: shuffle_v4f32_01z3:
903; AVX:       # BB#0:
904; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
905; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
906; AVX-NEXT:    retq
907  %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
908  ret <4 x float> %shuffle
909}
910
911define <4 x float> @shuffle_v4f32_012z(<4 x float> %a) {
912; SSE2-LABEL: shuffle_v4f32_012z:
913; SSE2:       # BB#0:
914; SSE2-NEXT:    xorps %xmm1, %xmm1
915; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
916; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
917; SSE2-NEXT:    retq
918;
919; SSE3-LABEL: shuffle_v4f32_012z:
920; SSE3:       # BB#0:
921; SSE3-NEXT:    xorps %xmm1, %xmm1
922; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
923; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
924; SSE3-NEXT:    retq
925;
926; SSSE3-LABEL: shuffle_v4f32_012z:
927; SSSE3:       # BB#0:
928; SSSE3-NEXT:    xorps %xmm1, %xmm1
929; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
930; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
931; SSSE3-NEXT:    retq
932;
933; SSE41-LABEL: shuffle_v4f32_012z:
934; SSE41:       # BB#0:
935; SSE41-NEXT:    xorps %xmm1, %xmm1
936; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
937; SSE41-NEXT:    retq
938;
939; AVX-LABEL: shuffle_v4f32_012z:
940; AVX:       # BB#0:
941; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
942; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
943; AVX-NEXT:    retq
944  %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
945  ret <4 x float> %shuffle
946}
947
948define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) {
949; SSE2-LABEL: shuffle_v4f32_0zz3:
950; SSE2:       # BB#0:
951; SSE2-NEXT:    xorps %xmm1, %xmm1
952; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
953; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
954; SSE2-NEXT:    retq
955;
956; SSE3-LABEL: shuffle_v4f32_0zz3:
957; SSE3:       # BB#0:
958; SSE3-NEXT:    xorps %xmm1, %xmm1
959; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
960; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
961; SSE3-NEXT:    retq
962;
963; SSSE3-LABEL: shuffle_v4f32_0zz3:
964; SSSE3:       # BB#0:
965; SSSE3-NEXT:    xorps %xmm1, %xmm1
966; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
967; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
968; SSSE3-NEXT:    retq
969;
970; SSE41-LABEL: shuffle_v4f32_0zz3:
971; SSE41:       # BB#0:
972; SSE41-NEXT:    xorps %xmm1, %xmm1
973; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
974; SSE41-NEXT:    retq
975;
976; AVX-LABEL: shuffle_v4f32_0zz3:
977; AVX:       # BB#0:
978; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
979; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
980; AVX-NEXT:    retq
981  %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
982  ret <4 x float> %shuffle
983}
984
985define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) {
986; SSE2-LABEL: shuffle_v4f32_0z2z:
987; SSE2:       # BB#0:
988; SSE2-NEXT:    xorps %xmm1, %xmm1
989; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
990; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
991; SSE2-NEXT:    retq
992;
993; SSE3-LABEL: shuffle_v4f32_0z2z:
994; SSE3:       # BB#0:
995; SSE3-NEXT:    xorps %xmm1, %xmm1
996; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
997; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
998; SSE3-NEXT:    retq
999;
1000; SSSE3-LABEL: shuffle_v4f32_0z2z:
1001; SSSE3:       # BB#0:
1002; SSSE3-NEXT:    xorps %xmm1, %xmm1
1003; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
1004; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1005; SSSE3-NEXT:    retq
1006;
1007; SSE41-LABEL: shuffle_v4f32_0z2z:
1008; SSE41:       # BB#0:
1009; SSE41-NEXT:    xorps %xmm1, %xmm1
1010; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1011; SSE41-NEXT:    retq
1012;
1013; AVX-LABEL: shuffle_v4f32_0z2z:
1014; AVX:       # BB#0:
1015; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1016; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1017; AVX-NEXT:    retq
1018  %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
1019  ret <4 x float> %shuffle
1020}
1021
1022define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) {
1023; SSE-LABEL: shuffle_v4f32_u051:
1024; SSE:       # BB#0:
1025; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1026; SSE-NEXT:    movaps %xmm1, %xmm0
1027; SSE-NEXT:    retq
1028;
1029; AVX-LABEL: shuffle_v4f32_u051:
1030; AVX:       # BB#0:
1031; AVX-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1032; AVX-NEXT:    retq
1033  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 undef, i32 0, i32 5, i32 1>
1034  ret <4 x float> %shuffle
1035}
1036
1037define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %a, <4 x float> %b) {
1038; SSE2-LABEL: shuffle_v4f32_0zz4:
1039; SSE2:       # BB#0:
1040; SSE2-NEXT:    xorps %xmm2, %xmm2
1041; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
1042; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1043; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1044; SSE2-NEXT:    movaps %xmm2, %xmm0
1045; SSE2-NEXT:    retq
1046;
1047; SSE3-LABEL: shuffle_v4f32_0zz4:
1048; SSE3:       # BB#0:
1049; SSE3-NEXT:    xorps %xmm2, %xmm2
1050; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
1051; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1052; SSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1053; SSE3-NEXT:    movaps %xmm2, %xmm0
1054; SSE3-NEXT:    retq
1055;
1056; SSSE3-LABEL: shuffle_v4f32_0zz4:
1057; SSSE3:       # BB#0:
1058; SSSE3-NEXT:    xorps %xmm2, %xmm2
1059; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
1060; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1061; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1062; SSSE3-NEXT:    movaps %xmm2, %xmm0
1063; SSSE3-NEXT:    retq
1064;
1065; SSE41-LABEL: shuffle_v4f32_0zz4:
1066; SSE41:       # BB#0:
1067; SSE41-NEXT:    insertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0]
1068; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1069; SSE41-NEXT:    retq
1070;
1071; AVX-LABEL: shuffle_v4f32_0zz4:
1072; AVX:       # BB#0:
1073; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0]
1074; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1075; AVX-NEXT:    retq
1076  %shuffle = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <4 x i32> <i32 undef, i32 5, i32 6, i32 0>
1077  %shuffle1 = shufflevector <4 x float> %a, <4 x float> %shuffle, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1078  ret <4 x float> %shuffle1
1079}
1080
1081define <4 x float> @shuffle_v4f32_0zz6(<4 x float> %a, <4 x float> %b) {
1082; SSE2-LABEL: shuffle_v4f32_0zz6:
1083; SSE2:       # BB#0:
1084; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1085; SSE2-NEXT:    xorps %xmm1, %xmm1
1086; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1087; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
1088; SSE2-NEXT:    movaps %xmm1, %xmm0
1089; SSE2-NEXT:    retq
1090;
1091; SSE3-LABEL: shuffle_v4f32_0zz6:
1092; SSE3:       # BB#0:
1093; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1094; SSE3-NEXT:    xorps %xmm1, %xmm1
1095; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1096; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
1097; SSE3-NEXT:    movaps %xmm1, %xmm0
1098; SSE3-NEXT:    retq
1099;
1100; SSSE3-LABEL: shuffle_v4f32_0zz6:
1101; SSSE3:       # BB#0:
1102; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1103; SSSE3-NEXT:    xorps %xmm1, %xmm1
1104; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1105; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
1106; SSSE3-NEXT:    movaps %xmm1, %xmm0
1107; SSSE3-NEXT:    retq
1108;
1109; SSE41-LABEL: shuffle_v4f32_0zz6:
1110; SSE41:       # BB#0:
1111; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
1112; SSE41-NEXT:    retq
1113;
1114; AVX-LABEL: shuffle_v4f32_0zz6:
1115; AVX:       # BB#0:
1116; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
1117; AVX-NEXT:    retq
1118  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 6>
1119  %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1120  ret <4 x float> %shuffle1
1121}
1122
1123define <4 x float> @shuffle_v4f32_0z24(<4 x float> %a, <4 x float> %b) {
1124; SSE2-LABEL: shuffle_v4f32_0z24:
1125; SSE2:       # BB#0:
1126; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1127; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1128; SSE2-NEXT:    xorps %xmm1, %xmm1
1129; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1130; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1131; SSE2-NEXT:    movaps %xmm1, %xmm0
1132; SSE2-NEXT:    retq
1133;
1134; SSE3-LABEL: shuffle_v4f32_0z24:
1135; SSE3:       # BB#0:
1136; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1137; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1138; SSE3-NEXT:    xorps %xmm1, %xmm1
1139; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1140; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1141; SSE3-NEXT:    movaps %xmm1, %xmm0
1142; SSE3-NEXT:    retq
1143;
1144; SSSE3-LABEL: shuffle_v4f32_0z24:
1145; SSSE3:       # BB#0:
1146; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1147; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1148; SSSE3-NEXT:    xorps %xmm1, %xmm1
1149; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1150; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1151; SSSE3-NEXT:    movaps %xmm1, %xmm0
1152; SSSE3-NEXT:    retq
1153;
1154; SSE41-LABEL: shuffle_v4f32_0z24:
1155; SSE41:       # BB#0:
1156; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
1157; SSE41-NEXT:    retq
1158;
1159; AVX-LABEL: shuffle_v4f32_0z24:
1160; AVX:       # BB#0:
1161; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
1162; AVX-NEXT:    retq
1163  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 4>
1164  %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1165  ret <4 x float> %shuffle1
1166}
1167
1168define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
1169; SSE2-LABEL: shuffle_v4i32_4zzz:
1170; SSE2:       # BB#0:
1171; SSE2-NEXT:    xorps %xmm1, %xmm1
1172; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1173; SSE2-NEXT:    movaps %xmm1, %xmm0
1174; SSE2-NEXT:    retq
1175;
1176; SSE3-LABEL: shuffle_v4i32_4zzz:
1177; SSE3:       # BB#0:
1178; SSE3-NEXT:    xorps %xmm1, %xmm1
1179; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1180; SSE3-NEXT:    movaps %xmm1, %xmm0
1181; SSE3-NEXT:    retq
1182;
1183; SSSE3-LABEL: shuffle_v4i32_4zzz:
1184; SSSE3:       # BB#0:
1185; SSSE3-NEXT:    xorps %xmm1, %xmm1
1186; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1187; SSSE3-NEXT:    movaps %xmm1, %xmm0
1188; SSSE3-NEXT:    retq
1189;
1190; SSE41-LABEL: shuffle_v4i32_4zzz:
1191; SSE41:       # BB#0:
1192; SSE41-NEXT:    pxor %xmm1, %xmm1
1193; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1194; SSE41-NEXT:    retq
1195;
1196; AVX-LABEL: shuffle_v4i32_4zzz:
1197; AVX:       # BB#0:
1198; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1199; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1200; AVX-NEXT:    retq
1201  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1202  ret <4 x i32> %shuffle
1203}
1204
1205define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
1206; SSE2-LABEL: shuffle_v4i32_z4zz:
1207; SSE2:       # BB#0:
1208; SSE2-NEXT:    xorps %xmm1, %xmm1
1209; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1210; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1211; SSE2-NEXT:    retq
1212;
1213; SSE3-LABEL: shuffle_v4i32_z4zz:
1214; SSE3:       # BB#0:
1215; SSE3-NEXT:    xorps %xmm1, %xmm1
1216; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1217; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1218; SSE3-NEXT:    retq
1219;
1220; SSSE3-LABEL: shuffle_v4i32_z4zz:
1221; SSSE3:       # BB#0:
1222; SSSE3-NEXT:    xorps %xmm1, %xmm1
1223; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1224; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1225; SSSE3-NEXT:    retq
1226;
1227; SSE41-LABEL: shuffle_v4i32_z4zz:
1228; SSE41:       # BB#0:
1229; SSE41-NEXT:    pxor %xmm1, %xmm1
1230; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1231; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1232; SSE41-NEXT:    retq
1233;
1234; AVX-LABEL: shuffle_v4i32_z4zz:
1235; AVX:       # BB#0:
1236; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1237; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1238; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
1239; AVX-NEXT:    retq
1240  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
1241  ret <4 x i32> %shuffle
1242}
1243
1244define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
1245; SSE2-LABEL: shuffle_v4i32_zz4z:
1246; SSE2:       # BB#0:
1247; SSE2-NEXT:    xorps %xmm1, %xmm1
1248; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1249; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1250; SSE2-NEXT:    retq
1251;
1252; SSE3-LABEL: shuffle_v4i32_zz4z:
1253; SSE3:       # BB#0:
1254; SSE3-NEXT:    xorps %xmm1, %xmm1
1255; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1256; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1257; SSE3-NEXT:    retq
1258;
1259; SSSE3-LABEL: shuffle_v4i32_zz4z:
1260; SSSE3:       # BB#0:
1261; SSSE3-NEXT:    xorps %xmm1, %xmm1
1262; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1263; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1264; SSSE3-NEXT:    retq
1265;
1266; SSE41-LABEL: shuffle_v4i32_zz4z:
1267; SSE41:       # BB#0:
1268; SSE41-NEXT:    pxor %xmm1, %xmm1
1269; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1270; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1271; SSE41-NEXT:    retq
1272;
1273; AVX-LABEL: shuffle_v4i32_zz4z:
1274; AVX:       # BB#0:
1275; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1276; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1277; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
1278; AVX-NEXT:    retq
1279  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
1280  ret <4 x i32> %shuffle
1281}
1282
1283define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) {
1284; SSE-LABEL: shuffle_v4i32_zuu4:
1285; SSE:       # BB#0:
1286; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1287; SSE-NEXT:    retq
1288;
1289; AVX-LABEL: shuffle_v4i32_zuu4:
1290; AVX:       # BB#0:
1291; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1292; AVX-NEXT:    retq
1293  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
1294  ret <4 x i32> %shuffle
1295}
1296
1297define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) {
1298; SSE2-LABEL: shuffle_v4i32_z6zz:
1299; SSE2:       # BB#0:
1300; SSE2-NEXT:    xorps %xmm1, %xmm1
1301; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1302; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1303; SSE2-NEXT:    retq
1304;
1305; SSE3-LABEL: shuffle_v4i32_z6zz:
1306; SSE3:       # BB#0:
1307; SSE3-NEXT:    xorps %xmm1, %xmm1
1308; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1309; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1310; SSE3-NEXT:    retq
1311;
1312; SSSE3-LABEL: shuffle_v4i32_z6zz:
1313; SSSE3:       # BB#0:
1314; SSSE3-NEXT:    xorps %xmm1, %xmm1
1315; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1316; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1317; SSSE3-NEXT:    retq
1318;
1319; SSE41-LABEL: shuffle_v4i32_z6zz:
1320; SSE41:       # BB#0:
1321; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
1322; SSE41-NEXT:    pxor %xmm0, %xmm0
1323; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1324; SSE41-NEXT:    retq
1325;
1326; AVX1-LABEL: shuffle_v4i32_z6zz:
1327; AVX1:       # BB#0:
1328; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1329; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1330; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1331; AVX1-NEXT:    retq
1332;
1333; AVX2-LABEL: shuffle_v4i32_z6zz:
1334; AVX2:       # BB#0:
1335; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1336; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1337; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1338; AVX2-NEXT:    retq
1339  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
1340  ret <4 x i32> %shuffle
1341}
1342
1343define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
1344; SSE2-LABEL: shuffle_v4i32_7012:
1345; SSE2:       # BB#0:
1346; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1347; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1348; SSE2-NEXT:    movaps %xmm1, %xmm0
1349; SSE2-NEXT:    retq
1350;
1351; SSE3-LABEL: shuffle_v4i32_7012:
1352; SSE3:       # BB#0:
1353; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1354; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1355; SSE3-NEXT:    movaps %xmm1, %xmm0
1356; SSE3-NEXT:    retq
1357;
1358; SSSE3-LABEL: shuffle_v4i32_7012:
1359; SSSE3:       # BB#0:
1360; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1361; SSSE3-NEXT:    retq
1362;
1363; SSE41-LABEL: shuffle_v4i32_7012:
1364; SSE41:       # BB#0:
1365; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1366; SSE41-NEXT:    retq
1367;
1368; AVX-LABEL: shuffle_v4i32_7012:
1369; AVX:       # BB#0:
1370; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1371; AVX-NEXT:    retq
1372  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
1373  ret <4 x i32> %shuffle
1374}
1375
1376define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
1377; SSE2-LABEL: shuffle_v4i32_6701:
1378; SSE2:       # BB#0:
1379; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1380; SSE2-NEXT:    movapd %xmm1, %xmm0
1381; SSE2-NEXT:    retq
1382;
1383; SSE3-LABEL: shuffle_v4i32_6701:
1384; SSE3:       # BB#0:
1385; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1386; SSE3-NEXT:    movapd %xmm1, %xmm0
1387; SSE3-NEXT:    retq
1388;
1389; SSSE3-LABEL: shuffle_v4i32_6701:
1390; SSSE3:       # BB#0:
1391; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1392; SSSE3-NEXT:    retq
1393;
1394; SSE41-LABEL: shuffle_v4i32_6701:
1395; SSE41:       # BB#0:
1396; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1397; SSE41-NEXT:    retq
1398;
1399; AVX-LABEL: shuffle_v4i32_6701:
1400; AVX:       # BB#0:
1401; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1402; AVX-NEXT:    retq
1403  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1404  ret <4 x i32> %shuffle
1405}
1406
1407define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
1408; SSE2-LABEL: shuffle_v4i32_5670:
1409; SSE2:       # BB#0:
1410; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1411; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1412; SSE2-NEXT:    movaps %xmm1, %xmm0
1413; SSE2-NEXT:    retq
1414;
1415; SSE3-LABEL: shuffle_v4i32_5670:
1416; SSE3:       # BB#0:
1417; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1418; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1419; SSE3-NEXT:    movaps %xmm1, %xmm0
1420; SSE3-NEXT:    retq
1421;
1422; SSSE3-LABEL: shuffle_v4i32_5670:
1423; SSSE3:       # BB#0:
1424; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1425; SSSE3-NEXT:    retq
1426;
1427; SSE41-LABEL: shuffle_v4i32_5670:
1428; SSE41:       # BB#0:
1429; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1430; SSE41-NEXT:    retq
1431;
1432; AVX-LABEL: shuffle_v4i32_5670:
1433; AVX:       # BB#0:
1434; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1435; AVX-NEXT:    retq
1436  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
1437  ret <4 x i32> %shuffle
1438}
1439
1440define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
1441; SSE2-LABEL: shuffle_v4i32_1234:
1442; SSE2:       # BB#0:
1443; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1444; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1445; SSE2-NEXT:    retq
1446;
1447; SSE3-LABEL: shuffle_v4i32_1234:
1448; SSE3:       # BB#0:
1449; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1450; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1451; SSE3-NEXT:    retq
1452;
1453; SSSE3-LABEL: shuffle_v4i32_1234:
1454; SSSE3:       # BB#0:
1455; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1456; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1457; SSSE3-NEXT:    retq
1458;
1459; SSE41-LABEL: shuffle_v4i32_1234:
1460; SSE41:       # BB#0:
1461; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1462; SSE41-NEXT:    movdqa %xmm1, %xmm0
1463; SSE41-NEXT:    retq
1464;
1465; AVX-LABEL: shuffle_v4i32_1234:
1466; AVX:       # BB#0:
1467; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1468; AVX-NEXT:    retq
1469  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
1470  ret <4 x i32> %shuffle
1471}
1472
1473define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
1474; SSE2-LABEL: shuffle_v4i32_2345:
1475; SSE2:       # BB#0:
1476; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1477; SSE2-NEXT:    retq
1478;
1479; SSE3-LABEL: shuffle_v4i32_2345:
1480; SSE3:       # BB#0:
1481; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1482; SSE3-NEXT:    retq
1483;
1484; SSSE3-LABEL: shuffle_v4i32_2345:
1485; SSSE3:       # BB#0:
1486; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1487; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1488; SSSE3-NEXT:    retq
1489;
1490; SSE41-LABEL: shuffle_v4i32_2345:
1491; SSE41:       # BB#0:
1492; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1493; SSE41-NEXT:    movdqa %xmm1, %xmm0
1494; SSE41-NEXT:    retq
1495;
1496; AVX-LABEL: shuffle_v4i32_2345:
1497; AVX:       # BB#0:
1498; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1499; AVX-NEXT:    retq
1500  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1501  ret <4 x i32> %shuffle
1502}
1503
1504define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) {
1505; SSE-LABEL: shuffle_v4i32_40u1:
1506; SSE:       # BB#0:
1507; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1508; SSE-NEXT:    movdqa %xmm1, %xmm0
1509; SSE-NEXT:    retq
1510;
1511; AVX-LABEL: shuffle_v4i32_40u1:
1512; AVX:       # BB#0:
1513; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1514; AVX-NEXT:    retq
1515  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1>
1516  ret <4 x i32> %shuffle
1517}
1518
1519define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
1520; SSE2-LABEL: shuffle_v4i32_3456:
1521; SSE2:       # BB#0:
1522; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1523; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1524; SSE2-NEXT:    retq
1525;
1526; SSE3-LABEL: shuffle_v4i32_3456:
1527; SSE3:       # BB#0:
1528; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1529; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1530; SSE3-NEXT:    retq
1531;
1532; SSSE3-LABEL: shuffle_v4i32_3456:
1533; SSSE3:       # BB#0:
1534; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1535; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1536; SSSE3-NEXT:    retq
1537;
1538; SSE41-LABEL: shuffle_v4i32_3456:
1539; SSE41:       # BB#0:
1540; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1541; SSE41-NEXT:    movdqa %xmm1, %xmm0
1542; SSE41-NEXT:    retq
1543;
1544; AVX-LABEL: shuffle_v4i32_3456:
1545; AVX:       # BB#0:
1546; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1547; AVX-NEXT:    retq
1548  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1549  ret <4 x i32> %shuffle
1550}
1551
1552define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
1553; SSE2-LABEL: shuffle_v4i32_0u1u:
1554; SSE2:       # BB#0:
1555; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1556; SSE2-NEXT:    retq
1557;
1558; SSE3-LABEL: shuffle_v4i32_0u1u:
1559; SSE3:       # BB#0:
1560; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1561; SSE3-NEXT:    retq
1562;
1563; SSSE3-LABEL: shuffle_v4i32_0u1u:
1564; SSSE3:       # BB#0:
1565; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1566; SSSE3-NEXT:    retq
1567;
1568; SSE41-LABEL: shuffle_v4i32_0u1u:
1569; SSE41:       # BB#0:
1570; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1571; SSE41-NEXT:    retq
1572;
1573; AVX-LABEL: shuffle_v4i32_0u1u:
1574; AVX:       # BB#0:
1575; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1576; AVX-NEXT:    retq
1577  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
1578  ret <4 x i32> %shuffle
1579}
1580
1581define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
1582; SSE2-LABEL: shuffle_v4i32_0z1z:
1583; SSE2:       # BB#0:
1584; SSE2-NEXT:    pxor %xmm1, %xmm1
1585; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1586; SSE2-NEXT:    retq
1587;
1588; SSE3-LABEL: shuffle_v4i32_0z1z:
1589; SSE3:       # BB#0:
1590; SSE3-NEXT:    pxor %xmm1, %xmm1
1591; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1592; SSE3-NEXT:    retq
1593;
1594; SSSE3-LABEL: shuffle_v4i32_0z1z:
1595; SSSE3:       # BB#0:
1596; SSSE3-NEXT:    pxor %xmm1, %xmm1
1597; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1598; SSSE3-NEXT:    retq
1599;
1600; SSE41-LABEL: shuffle_v4i32_0z1z:
1601; SSE41:       # BB#0:
1602; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1603; SSE41-NEXT:    retq
1604;
1605; AVX-LABEL: shuffle_v4i32_0z1z:
1606; AVX:       # BB#0:
1607; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1608; AVX-NEXT:    retq
1609  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
1610  ret <4 x i32> %shuffle
1611}
1612
1613define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) {
1614; SSE-LABEL: shuffle_v4i32_01zu:
1615; SSE:       # BB#0:
1616; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1617; SSE-NEXT:    retq
1618;
1619; AVX-LABEL: shuffle_v4i32_01zu:
1620; AVX:       # BB#0:
1621; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1622; AVX-NEXT:    retq
1623  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef>
1624  ret <4 x i32> %shuffle
1625}
1626
1627define <4 x i32> @shuffle_v4i32_0z23(<4 x i32> %a) {
1628; SSE2-LABEL: shuffle_v4i32_0z23:
1629; SSE2:       # BB#0:
1630; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
1631; SSE2-NEXT:    retq
1632;
1633; SSE3-LABEL: shuffle_v4i32_0z23:
1634; SSE3:       # BB#0:
1635; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1636; SSE3-NEXT:    retq
1637;
1638; SSSE3-LABEL: shuffle_v4i32_0z23:
1639; SSSE3:       # BB#0:
1640; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1641; SSSE3-NEXT:    retq
1642;
1643; SSE41-LABEL: shuffle_v4i32_0z23:
1644; SSE41:       # BB#0:
1645; SSE41-NEXT:    pxor %xmm1, %xmm1
1646; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1647; SSE41-NEXT:    retq
1648;
1649; AVX1-LABEL: shuffle_v4i32_0z23:
1650; AVX1:       # BB#0:
1651; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1652; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1653; AVX1-NEXT:    retq
1654;
1655; AVX2-LABEL: shuffle_v4i32_0z23:
1656; AVX2:       # BB#0:
1657; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1658; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1659; AVX2-NEXT:    retq
1660  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
1661  ret <4 x i32> %shuffle
1662}
1663
1664define <4 x i32> @shuffle_v4i32_01z3(<4 x i32> %a) {
1665; SSE2-LABEL: shuffle_v4i32_01z3:
1666; SSE2:       # BB#0:
1667; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
1668; SSE2-NEXT:    retq
1669;
1670; SSE3-LABEL: shuffle_v4i32_01z3:
1671; SSE3:       # BB#0:
1672; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1673; SSE3-NEXT:    retq
1674;
1675; SSSE3-LABEL: shuffle_v4i32_01z3:
1676; SSSE3:       # BB#0:
1677; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1678; SSSE3-NEXT:    retq
1679;
1680; SSE41-LABEL: shuffle_v4i32_01z3:
1681; SSE41:       # BB#0:
1682; SSE41-NEXT:    pxor %xmm1, %xmm1
1683; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1684; SSE41-NEXT:    retq
1685;
1686; AVX1-LABEL: shuffle_v4i32_01z3:
1687; AVX1:       # BB#0:
1688; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1689; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1690; AVX1-NEXT:    retq
1691;
1692; AVX2-LABEL: shuffle_v4i32_01z3:
1693; AVX2:       # BB#0:
1694; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1695; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1696; AVX2-NEXT:    retq
1697  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
1698  ret <4 x i32> %shuffle
1699}
1700
1701define <4 x i32> @shuffle_v4i32_012z(<4 x i32> %a) {
1702; SSE2-LABEL: shuffle_v4i32_012z:
1703; SSE2:       # BB#0:
1704; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
1705; SSE2-NEXT:    retq
1706;
1707; SSE3-LABEL: shuffle_v4i32_012z:
1708; SSE3:       # BB#0:
1709; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1710; SSE3-NEXT:    retq
1711;
1712; SSSE3-LABEL: shuffle_v4i32_012z:
1713; SSSE3:       # BB#0:
1714; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1715; SSSE3-NEXT:    retq
1716;
1717; SSE41-LABEL: shuffle_v4i32_012z:
1718; SSE41:       # BB#0:
1719; SSE41-NEXT:    pxor %xmm1, %xmm1
1720; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1721; SSE41-NEXT:    retq
1722;
1723; AVX1-LABEL: shuffle_v4i32_012z:
1724; AVX1:       # BB#0:
1725; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1726; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1727; AVX1-NEXT:    retq
1728;
1729; AVX2-LABEL: shuffle_v4i32_012z:
1730; AVX2:       # BB#0:
1731; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1732; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
1733; AVX2-NEXT:    retq
1734  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1735  ret <4 x i32> %shuffle
1736}
1737
1738define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) {
1739; SSE2-LABEL: shuffle_v4i32_0zz3:
1740; SSE2:       # BB#0:
1741; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
1742; SSE2-NEXT:    retq
1743;
1744; SSE3-LABEL: shuffle_v4i32_0zz3:
1745; SSE3:       # BB#0:
1746; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1747; SSE3-NEXT:    retq
1748;
1749; SSSE3-LABEL: shuffle_v4i32_0zz3:
1750; SSSE3:       # BB#0:
1751; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
1752; SSSE3-NEXT:    retq
1753;
1754; SSE41-LABEL: shuffle_v4i32_0zz3:
1755; SSE41:       # BB#0:
1756; SSE41-NEXT:    pxor %xmm1, %xmm1
1757; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1758; SSE41-NEXT:    retq
1759;
1760; AVX1-LABEL: shuffle_v4i32_0zz3:
1761; AVX1:       # BB#0:
1762; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1763; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1764; AVX1-NEXT:    retq
1765;
1766; AVX2-LABEL: shuffle_v4i32_0zz3:
1767; AVX2:       # BB#0:
1768; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1769; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
1770; AVX2-NEXT:    retq
1771  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
1772  ret <4 x i32> %shuffle
1773}
1774
1775define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
1776; SSE-LABEL: shuffle_v4i32_bitcast_0415:
1777; SSE:       # BB#0:
1778; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1779; SSE-NEXT:    retq
1780;
1781; AVX-LABEL: shuffle_v4i32_bitcast_0415:
1782; AVX:       # BB#0:
1783; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1784; AVX-NEXT:    retq
1785  %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4>
1786  %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
1787  %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1788  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
1789  ret <4 x i32> %bitcast32
1790}
1791
1792define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) {
1793; SSE-LABEL: shuffle_v4f32_bitcast_4401:
1794; SSE:       # BB#0:
1795; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1796; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1797; SSE-NEXT:    movapd %xmm1, %xmm0
1798; SSE-NEXT:    retq
1799;
1800; AVX-LABEL: shuffle_v4f32_bitcast_4401:
1801; AVX:       # BB#0:
1802; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1803; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1804; AVX-NEXT:    retq
1805  %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
1806  %2 = bitcast <4 x i32> %1 to <2 x double>
1807  %3 = bitcast <4 x float> %a to <2 x double>
1808  %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2>
1809  %5 = bitcast <2 x double> %4 to <4 x float>
1810  ret <4 x float> %5
1811}
1812
1813define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) {
1814; SSE-LABEL: shuffle_v4f32_bitcast_0045:
1815; SSE:       # BB#0:
1816; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1817; SSE-NEXT:    retq
1818;
1819; AVX-LABEL: shuffle_v4f32_bitcast_0045:
1820; AVX:       # BB#0:
1821; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1822; AVX-NEXT:    retq
1823  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
1824  %2 = bitcast <4 x i32> %b to <4 x float>
1825  %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5>
1826  ret <4 x float> %3
1827}
1828
1829define <4 x float> @mask_v4f32_4127(<4 x float> %a, <4 x float> %b) {
1830; SSE2-LABEL: mask_v4f32_4127:
1831; SSE2:       # BB#0:
1832; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
1833; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
1834; SSE2-NEXT:    movaps %xmm1, %xmm0
1835; SSE2-NEXT:    retq
1836;
1837; SSE3-LABEL: mask_v4f32_4127:
1838; SSE3:       # BB#0:
1839; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
1840; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
1841; SSE3-NEXT:    movaps %xmm1, %xmm0
1842; SSE3-NEXT:    retq
1843;
1844; SSSE3-LABEL: mask_v4f32_4127:
1845; SSSE3:       # BB#0:
1846; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
1847; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
1848; SSSE3-NEXT:    movaps %xmm1, %xmm0
1849; SSSE3-NEXT:    retq
1850;
1851; SSE41-LABEL: mask_v4f32_4127:
1852; SSE41:       # BB#0:
1853; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
1854; SSE41-NEXT:    retq
1855;
1856; AVX-LABEL: mask_v4f32_4127:
1857; AVX:       # BB#0:
1858; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
1859; AVX-NEXT:    retq
1860  %1 = bitcast <4 x float> %a to <4 x i32>
1861  %2 = bitcast <4 x float> %b to <4 x i32>
1862  %3 = and <4 x i32> %1, <i32  0, i32 -1, i32 -1, i32  0>
1863  %4 = and <4 x i32> %2, <i32 -1, i32  0, i32  0, i32 -1>
1864  %5 = or <4 x i32> %4, %3
1865  %6 = bitcast <4 x i32> %5 to <4 x float>
1866  ret <4 x float> %6
1867}
1868
1869define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) {
1870; SSE2-LABEL: mask_v4f32_0127:
1871; SSE2:       # BB#0:
1872; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1873; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1874; SSE2-NEXT:    movaps %xmm1, %xmm0
1875; SSE2-NEXT:    retq
1876;
1877; SSE3-LABEL: mask_v4f32_0127:
1878; SSE3:       # BB#0:
1879; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1880; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1881; SSE3-NEXT:    movaps %xmm1, %xmm0
1882; SSE3-NEXT:    retq
1883;
1884; SSSE3-LABEL: mask_v4f32_0127:
1885; SSSE3:       # BB#0:
1886; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1887; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1888; SSSE3-NEXT:    movaps %xmm1, %xmm0
1889; SSSE3-NEXT:    retq
1890;
1891; SSE41-LABEL: mask_v4f32_0127:
1892; SSE41:       # BB#0:
1893; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
1894; SSE41-NEXT:    retq
1895;
1896; AVX-LABEL: mask_v4f32_0127:
1897; AVX:       # BB#0:
1898; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
1899; AVX-NEXT:    retq
1900  %1 = bitcast <4 x float> %a to <2 x i64>
1901  %2 = bitcast <4 x float> %b to <2 x i64>
1902  %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
1903  %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
1904  %5 = or <2 x i64> %4, %3
1905  %6 = bitcast <2 x i64> %5 to <4 x float>
1906  ret <4 x float> %6
1907}
1908
1909define <4 x i32> @mask_v4i32_0127(<4 x i32> %a, <4 x i32> %b) {
1910; SSE2-LABEL: mask_v4i32_0127:
1911; SSE2:       # BB#0:
1912; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1913; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1914; SSE2-NEXT:    movaps %xmm1, %xmm0
1915; SSE2-NEXT:    retq
1916;
1917; SSE3-LABEL: mask_v4i32_0127:
1918; SSE3:       # BB#0:
1919; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1920; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1921; SSE3-NEXT:    movaps %xmm1, %xmm0
1922; SSE3-NEXT:    retq
1923;
1924; SSSE3-LABEL: mask_v4i32_0127:
1925; SSSE3:       # BB#0:
1926; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1927; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1928; SSSE3-NEXT:    movaps %xmm1, %xmm0
1929; SSSE3-NEXT:    retq
1930;
1931; SSE41-LABEL: mask_v4i32_0127:
1932; SSE41:       # BB#0:
1933; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
1934; SSE41-NEXT:    retq
1935;
1936; AVX1-LABEL: mask_v4i32_0127:
1937; AVX1:       # BB#0:
1938; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
1939; AVX1-NEXT:    retq
1940;
1941; AVX2-LABEL: mask_v4i32_0127:
1942; AVX2:       # BB#0:
1943; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
1944; AVX2-NEXT:    retq
1945  %1 = bitcast <4 x i32> %a to <2 x i64>
1946  %2 = bitcast <4 x i32> %b to <2 x i64>
1947  %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
1948  %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
1949  %5 = or <2 x i64> %4, %3
1950  %6 = bitcast <2 x i64> %5 to <4 x i32>
1951  ret <4 x i32> %6
1952}
1953
1954define <4 x float> @broadcast_v4f32_0101_from_v2f32(<2 x float>* %x) {
1955; SSE2-LABEL: broadcast_v4f32_0101_from_v2f32:
1956; SSE2:       # BB#0:
1957; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1958; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1959; SSE2-NEXT:    retq
1960;
1961; SSE3-LABEL: broadcast_v4f32_0101_from_v2f32:
1962; SSE3:       # BB#0:
1963; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1964; SSE3-NEXT:    retq
1965;
1966; SSSE3-LABEL: broadcast_v4f32_0101_from_v2f32:
1967; SSSE3:       # BB#0:
1968; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1969; SSSE3-NEXT:    retq
1970;
1971; SSE41-LABEL: broadcast_v4f32_0101_from_v2f32:
1972; SSE41:       # BB#0:
1973; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1974; SSE41-NEXT:    retq
1975;
1976; AVX-LABEL: broadcast_v4f32_0101_from_v2f32:
1977; AVX:       # BB#0:
1978; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1979; AVX-NEXT:    retq
1980  %1 = load <2 x float>, <2 x float>* %x, align 1
1981  %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1982  ret <4 x float> %2
1983}
1984
1985define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
1986; SSE-LABEL: insert_reg_and_zero_v4i32:
1987; SSE:       # BB#0:
1988; SSE-NEXT:    movd %edi, %xmm0
1989; SSE-NEXT:    retq
1990;
1991; AVX-LABEL: insert_reg_and_zero_v4i32:
1992; AVX:       # BB#0:
1993; AVX-NEXT:    vmovd %edi, %xmm0
1994; AVX-NEXT:    retq
1995  %v = insertelement <4 x i32> undef, i32 %a, i32 0
1996  %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1997  ret <4 x i32> %shuffle
1998}
1999
2000define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
2001; SSE-LABEL: insert_mem_and_zero_v4i32:
2002; SSE:       # BB#0:
2003; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2004; SSE-NEXT:    retq
2005;
2006; AVX-LABEL: insert_mem_and_zero_v4i32:
2007; AVX:       # BB#0:
2008; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2009; AVX-NEXT:    retq
2010  %a = load i32, i32* %ptr
2011  %v = insertelement <4 x i32> undef, i32 %a, i32 0
2012  %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
2013  ret <4 x i32> %shuffle
2014}
2015
2016define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
2017; SSE2-LABEL: insert_reg_and_zero_v4f32:
2018; SSE2:       # BB#0:
2019; SSE2-NEXT:    xorps %xmm1, %xmm1
2020; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2021; SSE2-NEXT:    movaps %xmm1, %xmm0
2022; SSE2-NEXT:    retq
2023;
2024; SSE3-LABEL: insert_reg_and_zero_v4f32:
2025; SSE3:       # BB#0:
2026; SSE3-NEXT:    xorps %xmm1, %xmm1
2027; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2028; SSE3-NEXT:    movaps %xmm1, %xmm0
2029; SSE3-NEXT:    retq
2030;
2031; SSSE3-LABEL: insert_reg_and_zero_v4f32:
2032; SSSE3:       # BB#0:
2033; SSSE3-NEXT:    xorps %xmm1, %xmm1
2034; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2035; SSSE3-NEXT:    movaps %xmm1, %xmm0
2036; SSSE3-NEXT:    retq
2037;
2038; SSE41-LABEL: insert_reg_and_zero_v4f32:
2039; SSE41:       # BB#0:
2040; SSE41-NEXT:    xorps %xmm1, %xmm1
2041; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2042; SSE41-NEXT:    retq
2043;
2044; AVX-LABEL: insert_reg_and_zero_v4f32:
2045; AVX:       # BB#0:
2046; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2047; AVX-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2048; AVX-NEXT:    retq
2049  %v = insertelement <4 x float> undef, float %a, i32 0
2050  %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
2051  ret <4 x float> %shuffle
2052}
2053
2054define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
2055; SSE-LABEL: insert_mem_and_zero_v4f32:
2056; SSE:       # BB#0:
2057; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2058; SSE-NEXT:    retq
2059;
2060; AVX-LABEL: insert_mem_and_zero_v4f32:
2061; AVX:       # BB#0:
2062; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2063; AVX-NEXT:    retq
2064  %a = load float, float* %ptr
2065  %v = insertelement <4 x float> undef, float %a, i32 0
2066  %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
2067  ret <4 x float> %shuffle
2068}
2069
2070define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
2071; SSE2-LABEL: insert_reg_lo_v4i32:
2072; SSE2:       # BB#0:
2073; SSE2-NEXT:    movd %rdi, %xmm1
2074; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2075; SSE2-NEXT:    retq
2076;
2077; SSE3-LABEL: insert_reg_lo_v4i32:
2078; SSE3:       # BB#0:
2079; SSE3-NEXT:    movd %rdi, %xmm1
2080; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2081; SSE3-NEXT:    retq
2082;
2083; SSSE3-LABEL: insert_reg_lo_v4i32:
2084; SSSE3:       # BB#0:
2085; SSSE3-NEXT:    movd %rdi, %xmm1
2086; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2087; SSSE3-NEXT:    retq
2088;
2089; SSE41-LABEL: insert_reg_lo_v4i32:
2090; SSE41:       # BB#0:
2091; SSE41-NEXT:    movd %rdi, %xmm1
2092; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2093; SSE41-NEXT:    retq
2094;
2095; AVX1-LABEL: insert_reg_lo_v4i32:
2096; AVX1:       # BB#0:
2097; AVX1-NEXT:    vmovq %rdi, %xmm1
2098; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2099; AVX1-NEXT:    retq
2100;
2101; AVX2-LABEL: insert_reg_lo_v4i32:
2102; AVX2:       # BB#0:
2103; AVX2-NEXT:    vmovq %rdi, %xmm1
2104; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
2105; AVX2-NEXT:    retq
2106  %a.cast = bitcast i64 %a to <2 x i32>
2107  %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2108  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
2109  ret <4 x i32> %shuffle
2110}
2111
2112define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
2113; SSE2-LABEL: insert_mem_lo_v4i32:
2114; SSE2:       # BB#0:
2115; SSE2-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2116; SSE2-NEXT:    retq
2117;
2118; SSE3-LABEL: insert_mem_lo_v4i32:
2119; SSE3:       # BB#0:
2120; SSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2121; SSE3-NEXT:    retq
2122;
2123; SSSE3-LABEL: insert_mem_lo_v4i32:
2124; SSSE3:       # BB#0:
2125; SSSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2126; SSSE3-NEXT:    retq
2127;
2128; SSE41-LABEL: insert_mem_lo_v4i32:
2129; SSE41:       # BB#0:
2130; SSE41-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
2131; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2132; SSE41-NEXT:    retq
2133;
2134; AVX1-LABEL: insert_mem_lo_v4i32:
2135; AVX1:       # BB#0:
2136; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
2137; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2138; AVX1-NEXT:    retq
2139;
2140; AVX2-LABEL: insert_mem_lo_v4i32:
2141; AVX2:       # BB#0:
2142; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
2143; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
2144; AVX2-NEXT:    retq
2145  %a = load <2 x i32>, <2 x i32>* %ptr
2146  %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2147  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
2148  ret <4 x i32> %shuffle
2149}
2150
2151define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
2152; SSE-LABEL: insert_reg_hi_v4i32:
2153; SSE:       # BB#0:
2154; SSE-NEXT:    movd %rdi, %xmm1
2155; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2156; SSE-NEXT:    retq
2157;
2158; AVX-LABEL: insert_reg_hi_v4i32:
2159; AVX:       # BB#0:
2160; AVX-NEXT:    vmovq %rdi, %xmm1
2161; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2162; AVX-NEXT:    retq
2163  %a.cast = bitcast i64 %a to <2 x i32>
2164  %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2165  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
2166  ret <4 x i32> %shuffle
2167}
2168
2169define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
2170; SSE-LABEL: insert_mem_hi_v4i32:
2171; SSE:       # BB#0:
2172; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
2173; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2174; SSE-NEXT:    retq
2175;
2176; AVX-LABEL: insert_mem_hi_v4i32:
2177; AVX:       # BB#0:
2178; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
2179; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2180; AVX-NEXT:    retq
2181  %a = load <2 x i32>, <2 x i32>* %ptr
2182  %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2183  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
2184  ret <4 x i32> %shuffle
2185}
2186
2187define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
2188; SSE-LABEL: insert_reg_lo_v4f32:
2189; SSE:       # BB#0:
2190; SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2191; SSE-NEXT:    movapd %xmm1, %xmm0
2192; SSE-NEXT:    retq
2193;
2194; AVX-LABEL: insert_reg_lo_v4f32:
2195; AVX:       # BB#0:
2196; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2197; AVX-NEXT:    retq
2198  %a.cast = bitcast double %a to <2 x float>
2199  %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2200  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
2201  ret <4 x float> %shuffle
2202}
2203
2204define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
2205; SSE-LABEL: insert_mem_lo_v4f32:
2206; SSE:       # BB#0:
2207; SSE-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2208; SSE-NEXT:    retq
2209;
2210; AVX-LABEL: insert_mem_lo_v4f32:
2211; AVX:       # BB#0:
2212; AVX-NEXT:    vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2213; AVX-NEXT:    retq
2214  %a = load <2 x float>, <2 x float>* %ptr
2215  %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2216  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
2217  ret <4 x float> %shuffle
2218}
2219
2220define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
2221; SSE-LABEL: insert_reg_hi_v4f32:
2222; SSE:       # BB#0:
2223; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2224; SSE-NEXT:    movapd %xmm1, %xmm0
2225; SSE-NEXT:    retq
2226;
2227; AVX-LABEL: insert_reg_hi_v4f32:
2228; AVX:       # BB#0:
2229; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2230; AVX-NEXT:    retq
2231  %a.cast = bitcast double %a to <2 x float>
2232  %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2233  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
2234  ret <4 x float> %shuffle
2235}
2236
2237define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
2238; SSE-LABEL: insert_mem_hi_v4f32:
2239; SSE:       # BB#0:
2240; SSE-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2241; SSE-NEXT:    retq
2242;
2243; AVX-LABEL: insert_mem_hi_v4f32:
2244; AVX:       # BB#0:
2245; AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2246; AVX-NEXT:    retq
2247  %a = load <2 x float>, <2 x float>* %ptr
2248  %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2249  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
2250  ret <4 x float> %shuffle
2251}
2252
2253define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
2254; SSE-LABEL: shuffle_mem_v4f32_3210:
2255; SSE:       # BB#0:
2256; SSE-NEXT:    movaps (%rdi), %xmm0
2257; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2258; SSE-NEXT:    retq
2259;
2260; AVX-LABEL: shuffle_mem_v4f32_3210:
2261; AVX:       # BB#0:
2262; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
2263; AVX-NEXT:    retq
2264  %a = load <4 x float>, <4 x float>* %ptr
2265  %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2266  ret <4 x float> %shuffle
2267}
2268
2269define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) {
2270; SSE-LABEL: insert_dup_mem_v4i32:
2271; SSE:       # BB#0:
2272; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2273; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2274; SSE-NEXT:    retq
2275;
2276; AVX-LABEL: insert_dup_mem_v4i32:
2277; AVX:       # BB#0:
2278; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
2279; AVX-NEXT:    retq
2280  %tmp = load i32, i32* %ptr, align 4
2281  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2282  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
2283  ret <4 x i32> %tmp2
2284}
2285
2286;
2287; Shuffle to logical bit shifts
2288;
2289
2290define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) {
2291; SSE-LABEL: shuffle_v4i32_z0zX:
2292; SSE:       # BB#0:
2293; SSE-NEXT:    psllq $32, %xmm0
2294; SSE-NEXT:    retq
2295;
2296; AVX-LABEL: shuffle_v4i32_z0zX:
2297; AVX:       # BB#0:
2298; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
2299; AVX-NEXT:    retq
2300  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef>
2301  ret <4 x i32> %shuffle
2302}
2303
2304define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
2305; SSE-LABEL: shuffle_v4i32_1z3z:
2306; SSE:       # BB#0:
2307; SSE-NEXT:    psrlq $32, %xmm0
2308; SSE-NEXT:    retq
2309;
2310; AVX-LABEL: shuffle_v4i32_1z3z:
2311; AVX:       # BB#0:
2312; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
2313; AVX-NEXT:    retq
2314  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
2315  ret <4 x i32> %shuffle
2316}
2317