1; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
3
4target triple = "x86_64-unknown-unknown"
5
6define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
7; AVX1-LABEL: shuffle_v8f32_00000000:
8; AVX1:       # BB#0:
9; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
10; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
11; AVX1-NEXT:    retq
12;
13; AVX2-LABEL: shuffle_v8f32_00000000:
14; AVX2:       # BB#0:
15; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
16; AVX2-NEXT:    retq
17  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18  ret <8 x float> %shuffle
19}
20
21define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
22; AVX1-LABEL: shuffle_v8f32_00000010:
23; AVX1:       # BB#0:
24; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
25; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
26; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
27; AVX1-NEXT:    retq
28;
29; AVX2-LABEL: shuffle_v8f32_00000010:
30; AVX2:       # BB#0:
31; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
32; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
33; AVX2-NEXT:    retq
34  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
35  ret <8 x float> %shuffle
36}
37
38define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
39; AVX1-LABEL: shuffle_v8f32_00000200:
40; AVX1:       # BB#0:
41; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
42; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
43; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
44; AVX1-NEXT:    retq
45;
46; AVX2-LABEL: shuffle_v8f32_00000200:
47; AVX2:       # BB#0:
48; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
49; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
50; AVX2-NEXT:    retq
51  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
52  ret <8 x float> %shuffle
53}
54
55define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
56; AVX1-LABEL: shuffle_v8f32_00003000:
57; AVX1:       # BB#0:
58; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
59; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
60; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
61; AVX1-NEXT:    retq
62;
63; AVX2-LABEL: shuffle_v8f32_00003000:
64; AVX2:       # BB#0:
65; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
66; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
67; AVX2-NEXT:    retq
68  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
69  ret <8 x float> %shuffle
70}
71
72define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
73; AVX1-LABEL: shuffle_v8f32_00040000:
74; AVX1:       # BB#0:
75; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
76; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
77; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
78; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
79; AVX1-NEXT:    retq
80;
81; AVX2-LABEL: shuffle_v8f32_00040000:
82; AVX2:       # BB#0:
83; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
84; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
85; AVX2-NEXT:    retq
86  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
87  ret <8 x float> %shuffle
88}
89
90define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
91; AVX1-LABEL: shuffle_v8f32_00500000:
92; AVX1:       # BB#0:
93; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
94; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
95; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
96; AVX1-NEXT:    retq
97;
98; AVX2-LABEL: shuffle_v8f32_00500000:
99; AVX2:       # BB#0:
100; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
101; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
102; AVX2-NEXT:    retq
103  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
104  ret <8 x float> %shuffle
105}
106
107define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
108; AVX1-LABEL: shuffle_v8f32_06000000:
109; AVX1:       # BB#0:
110; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
111; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
112; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
113; AVX1-NEXT:    retq
114;
115; AVX2-LABEL: shuffle_v8f32_06000000:
116; AVX2:       # BB#0:
117; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
118; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
119; AVX2-NEXT:    retq
120  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121  ret <8 x float> %shuffle
122}
123
124define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
125; AVX1-LABEL: shuffle_v8f32_70000000:
126; AVX1:       # BB#0:
127; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
128; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
129; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
130; AVX1-NEXT:    retq
131;
132; AVX2-LABEL: shuffle_v8f32_70000000:
133; AVX2:       # BB#0:
134; AVX2-NEXT:    movl $7, %eax
135; AVX2-NEXT:    vmovd %eax, %xmm1
136; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
137; AVX2-NEXT:    retq
138  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
139  ret <8 x float> %shuffle
140}
141
142define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
143; ALL-LABEL: shuffle_v8f32_01014545:
144; ALL:       # BB#0:
145; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
146; ALL-NEXT:    retq
147  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
148  ret <8 x float> %shuffle
149}
150
151define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
152; AVX1-LABEL: shuffle_v8f32_00112233:
153; AVX1:       # BB#0:
154; AVX1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
155; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
156; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
157; AVX1-NEXT:    retq
158;
159; AVX2-LABEL: shuffle_v8f32_00112233:
160; AVX2:       # BB#0:
161; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
162; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
163; AVX2-NEXT:    retq
164  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
165  ret <8 x float> %shuffle
166}
167
168define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
169; AVX1-LABEL: shuffle_v8f32_00001111:
170; AVX1:       # BB#0:
171; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
172; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
173; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
174; AVX1-NEXT:    retq
175;
176; AVX2-LABEL: shuffle_v8f32_00001111:
177; AVX2:       # BB#0:
178; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
179; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
180; AVX2-NEXT:    retq
181  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
182  ret <8 x float> %shuffle
183}
184
185define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
186; ALL-LABEL: shuffle_v8f32_81a3c5e7:
187; ALL:       # BB#0:
188; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
189; ALL-NEXT:    retq
190  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
191  ret <8 x float> %shuffle
192}
193
194define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
195; AVX1-LABEL: shuffle_v8f32_08080808:
196; AVX1:       # BB#0:
197; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
198; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
199; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
200; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
201; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
202; AVX1-NEXT:    retq
203;
204; AVX2-LABEL: shuffle_v8f32_08080808:
205; AVX2:       # BB#0:
206; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
207; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
208; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
209; AVX2-NEXT:    retq
210  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
211  ret <8 x float> %shuffle
212}
213
214define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
215; ALL-LABEL: shuffle_v8f32_08084c4c:
216; ALL:       # BB#0:
217; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
218; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
219; ALL-NEXT:    retq
220  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
221  ret <8 x float> %shuffle
222}
223
224define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
225; ALL-LABEL: shuffle_v8f32_8823cc67:
226; ALL:       # BB#0:
227; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
228; ALL-NEXT:    retq
229  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
230  ret <8 x float> %shuffle
231}
232
233define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
234; ALL-LABEL: shuffle_v8f32_9832dc76:
235; ALL:       # BB#0:
236; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
237; ALL-NEXT:    retq
238  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
239  ret <8 x float> %shuffle
240}
241
242define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
243; ALL-LABEL: shuffle_v8f32_9810dc54:
244; ALL:       # BB#0:
245; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
246; ALL-NEXT:    retq
247  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
248  ret <8 x float> %shuffle
249}
250
251define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
252; ALL-LABEL: shuffle_v8f32_08194c5d:
253; ALL:       # BB#0:
254; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
255; ALL-NEXT:    retq
256  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
257  ret <8 x float> %shuffle
258}
259
260define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
261; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
262; ALL:       # BB#0:
263; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
264; ALL-NEXT:    retq
265  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
266  ret <8 x float> %shuffle
267}
268
269define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
270; AVX1-LABEL: shuffle_v8f32_08192a3b:
271; AVX1:       # BB#0:
272; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
273; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
274; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
275; AVX1-NEXT:    retq
276;
277; AVX2-LABEL: shuffle_v8f32_08192a3b:
278; AVX2:       # BB#0:
279; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
280; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
281; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
282; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
283; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
284; AVX2-NEXT:    retq
285  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
286  ret <8 x float> %shuffle
287}
288
289define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
290; AVX1-LABEL: shuffle_v8f32_08991abb:
291; AVX1:       # BB#0:
292; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
293; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
294; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
295; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
296; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
297; AVX1-NEXT:    retq
298;
299; AVX2-LABEL: shuffle_v8f32_08991abb:
300; AVX2:       # BB#0:
301; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
302; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
303; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
304; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
305; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
306; AVX2-NEXT:    retq
307  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
308  ret <8 x float> %shuffle
309}
310
311define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
312; AVX1-LABEL: shuffle_v8f32_091b2d3f:
313; AVX1:       # BB#0:
314; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
315; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
316; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
317; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
318; AVX1-NEXT:    retq
319;
320; AVX2-LABEL: shuffle_v8f32_091b2d3f:
321; AVX2:       # BB#0:
322; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
323; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
324; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
325; AVX2-NEXT:    retq
326  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
327  ret <8 x float> %shuffle
328}
329
330define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
331; AVX1-LABEL: shuffle_v8f32_09ab1def:
332; AVX1:       # BB#0:
333; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
334; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
335; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
336; AVX1-NEXT:    retq
337;
338; AVX2-LABEL: shuffle_v8f32_09ab1def:
339; AVX2:       # BB#0:
340; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
341; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
342; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
343; AVX2-NEXT:    retq
344  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
345  ret <8 x float> %shuffle
346}
347
348define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
349; ALL-LABEL: shuffle_v8f32_00014445:
350; ALL:       # BB#0:
351; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
352; ALL-NEXT:    retq
353  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
354  ret <8 x float> %shuffle
355}
356
357define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
358; ALL-LABEL: shuffle_v8f32_00204464:
359; ALL:       # BB#0:
360; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
361; ALL-NEXT:    retq
362  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
363  ret <8 x float> %shuffle
364}
365
366define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
367; ALL-LABEL: shuffle_v8f32_03004744:
368; ALL:       # BB#0:
369; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
370; ALL-NEXT:    retq
371  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
372  ret <8 x float> %shuffle
373}
374
375define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
376; ALL-LABEL: shuffle_v8f32_10005444:
377; ALL:       # BB#0:
378; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
379; ALL-NEXT:    retq
380  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
381  ret <8 x float> %shuffle
382}
383
384define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
385; ALL-LABEL: shuffle_v8f32_22006644:
386; ALL:       # BB#0:
387; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
388; ALL-NEXT:    retq
389  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
390  ret <8 x float> %shuffle
391}
392
393define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
394; ALL-LABEL: shuffle_v8f32_33307774:
395; ALL:       # BB#0:
396; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
397; ALL-NEXT:    retq
398  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
399  ret <8 x float> %shuffle
400}
401
402define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
403; ALL-LABEL: shuffle_v8f32_32107654:
404; ALL:       # BB#0:
405; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
406; ALL-NEXT:    retq
407  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
408  ret <8 x float> %shuffle
409}
410
411define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
412; ALL-LABEL: shuffle_v8f32_00234467:
413; ALL:       # BB#0:
414; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
415; ALL-NEXT:    retq
416  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
417  ret <8 x float> %shuffle
418}
419
420define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
421; ALL-LABEL: shuffle_v8f32_00224466:
422; ALL:       # BB#0:
423; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
424; ALL-NEXT:    retq
425  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
426  ret <8 x float> %shuffle
427}
428
429define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
430; ALL-LABEL: shuffle_v8f32_10325476:
431; ALL:       # BB#0:
432; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
433; ALL-NEXT:    retq
434  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
435  ret <8 x float> %shuffle
436}
437
438define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
439; ALL-LABEL: shuffle_v8f32_11335577:
440; ALL:       # BB#0:
441; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
442; ALL-NEXT:    retq
443  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
444  ret <8 x float> %shuffle
445}
446
447define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
448; ALL-LABEL: shuffle_v8f32_10235467:
449; ALL:       # BB#0:
450; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
451; ALL-NEXT:    retq
452  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
453  ret <8 x float> %shuffle
454}
455
456define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
457; ALL-LABEL: shuffle_v8f32_10225466:
458; ALL:       # BB#0:
459; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
460; ALL-NEXT:    retq
461  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
462  ret <8 x float> %shuffle
463}
464
465define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
466; ALL-LABEL: shuffle_v8f32_00015444:
467; ALL:       # BB#0:
468; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
469; ALL-NEXT:    retq
470  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
471  ret <8 x float> %shuffle
472}
473
474define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
475; ALL-LABEL: shuffle_v8f32_00204644:
476; ALL:       # BB#0:
477; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
478; ALL-NEXT:    retq
479  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
480  ret <8 x float> %shuffle
481}
482
483define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
484; ALL-LABEL: shuffle_v8f32_03004474:
485; ALL:       # BB#0:
486; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
487; ALL-NEXT:    retq
488  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
489  ret <8 x float> %shuffle
490}
491
492define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
493; ALL-LABEL: shuffle_v8f32_10004444:
494; ALL:       # BB#0:
495; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
496; ALL-NEXT:    retq
497  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
498  ret <8 x float> %shuffle
499}
500
501define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
502; ALL-LABEL: shuffle_v8f32_22006446:
503; ALL:       # BB#0:
504; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
505; ALL-NEXT:    retq
506  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
507  ret <8 x float> %shuffle
508}
509
510define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
511; ALL-LABEL: shuffle_v8f32_33307474:
512; ALL:       # BB#0:
513; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
514; ALL-NEXT:    retq
515  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
516  ret <8 x float> %shuffle
517}
518
519define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
520; ALL-LABEL: shuffle_v8f32_32104567:
521; ALL:       # BB#0:
522; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
523; ALL-NEXT:    retq
524  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
525  ret <8 x float> %shuffle
526}
527
528define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
529; ALL-LABEL: shuffle_v8f32_00236744:
530; ALL:       # BB#0:
531; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
532; ALL-NEXT:    retq
533  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
534  ret <8 x float> %shuffle
535}
536
537define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
538; ALL-LABEL: shuffle_v8f32_00226644:
539; ALL:       # BB#0:
540; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
541; ALL-NEXT:    retq
542  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
543  ret <8 x float> %shuffle
544}
545
546define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
547; ALL-LABEL: shuffle_v8f32_10324567:
548; ALL:       # BB#0:
549; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
550; ALL-NEXT:    retq
551  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
552  ret <8 x float> %shuffle
553}
554
555define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
556; ALL-LABEL: shuffle_v8f32_11334567:
557; ALL:       # BB#0:
558; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
559; ALL-NEXT:    retq
560  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
561  ret <8 x float> %shuffle
562}
563
564define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
565; ALL-LABEL: shuffle_v8f32_01235467:
566; ALL:       # BB#0:
567; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
568; ALL-NEXT:    retq
569  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
570  ret <8 x float> %shuffle
571}
572
573define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
574; ALL-LABEL: shuffle_v8f32_01235466:
575; ALL:       # BB#0:
576; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
577; ALL-NEXT:    retq
578  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
579  ret <8 x float> %shuffle
580}
581
582define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
583; ALL-LABEL: shuffle_v8f32_002u6u44:
584; ALL:       # BB#0:
585; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
586; ALL-NEXT:    retq
587  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
588  ret <8 x float> %shuffle
589}
590
591define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
592; ALL-LABEL: shuffle_v8f32_00uu66uu:
593; ALL:       # BB#0:
594; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
595; ALL-NEXT:    retq
596  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
597  ret <8 x float> %shuffle
598}
599
600define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
601; ALL-LABEL: shuffle_v8f32_103245uu:
602; ALL:       # BB#0:
603; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
604; ALL-NEXT:    retq
605  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
606  ret <8 x float> %shuffle
607}
608
609define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
610; ALL-LABEL: shuffle_v8f32_1133uu67:
611; ALL:       # BB#0:
612; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
613; ALL-NEXT:    retq
614  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
615  ret <8 x float> %shuffle
616}
617
618define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
619; ALL-LABEL: shuffle_v8f32_0uu354uu:
620; ALL:       # BB#0:
621; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
622; ALL-NEXT:    retq
623  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
624  ret <8 x float> %shuffle
625}
626
627define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
628; ALL-LABEL: shuffle_v8f32_uuu3uu66:
629; ALL:       # BB#0:
630; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
631; ALL-NEXT:    retq
632  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
633  ret <8 x float> %shuffle
634}
635
636define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
637; AVX1-LABEL: shuffle_v8f32_c348cda0:
638; AVX1:       # BB#0:
639; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
640; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
641; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
642; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
643; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
644; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
645; AVX1-NEXT:    retq
646;
647; AVX2-LABEL: shuffle_v8f32_c348cda0:
648; AVX2:       # BB#0:
649; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
650; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
651; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
652; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
653; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
654; AVX2-NEXT:    retq
655  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
656  ret <8 x float> %shuffle
657}
658
659define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
660; AVX1-LABEL: shuffle_v8f32_f511235a:
661; AVX1:       # BB#0:
662; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
663; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
664; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
665; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
666; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
667; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
668; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
669; AVX1-NEXT:    retq
670;
671; AVX2-LABEL: shuffle_v8f32_f511235a:
672; AVX2:       # BB#0:
673; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
674; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
675; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
676; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
677; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
678; AVX2-NEXT:    retq
679  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
680  ret <8 x float> %shuffle
681}
682
683define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
684; AVX1-LABEL: shuffle_v8f32_32103210:
685; AVX1:       # BB#0:
686; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
687; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
688; AVX1-NEXT:    retq
689;
690; AVX2-LABEL: shuffle_v8f32_32103210:
691; AVX2:       # BB#0:
692; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
693; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
694; AVX2-NEXT:    retq
695  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
696  ret <8 x float> %shuffle
697}
698
699define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
700; AVX1-LABEL: shuffle_v8f32_76547654:
701; AVX1:       # BB#0:
702; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
703; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
704; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
705; AVX1-NEXT:    retq
706;
707; AVX2-LABEL: shuffle_v8f32_76547654:
708; AVX2:       # BB#0:
709; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
710; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
711; AVX2-NEXT:    retq
712  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
713  ret <8 x float> %shuffle
714}
715
716define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
717; AVX1-LABEL: shuffle_v8f32_76543210:
718; AVX1:       # BB#0:
719; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
720; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
721; AVX1-NEXT:    retq
722;
723; AVX2-LABEL: shuffle_v8f32_76543210:
724; AVX2:       # BB#0:
725; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
726; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
727; AVX2-NEXT:    retq
728  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
729  ret <8 x float> %shuffle
730}
731
732define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
733; ALL-LABEL: shuffle_v8f32_3210ba98:
734; ALL:       # BB#0:
735; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
736; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
737; ALL-NEXT:    retq
738  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
739  ret <8 x float> %shuffle
740}
741
742define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
743; ALL-LABEL: shuffle_v8f32_3210fedc:
744; ALL:       # BB#0:
745; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
746; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
747; ALL-NEXT:    retq
748  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
749  ret <8 x float> %shuffle
750}
751
752define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
753; ALL-LABEL: shuffle_v8f32_7654fedc:
754; ALL:       # BB#0:
755; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
756; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
757; ALL-NEXT:    retq
758  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
759  ret <8 x float> %shuffle
760}
761
762define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
763; ALL-LABEL: shuffle_v8f32_fedc7654:
764; ALL:       # BB#0:
765; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
766; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
767; ALL-NEXT:    retq
768  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
769  ret <8 x float> %shuffle
770}
771
772define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
773; AVX1-LABEL: PR21138:
774; AVX1:       # BB#0:
775; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
776; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
777; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
778; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
779; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
780; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
781; AVX1-NEXT:    retq
782;
783; AVX2-LABEL: PR21138:
784; AVX2:       # BB#0:
785; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
786; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
787; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
788; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
789; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
790; AVX2-NEXT:    retq
791  %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
792  ret <8 x float> %shuffle
793}
794
795define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
796; ALL-LABEL: shuffle_v8f32_ba987654:
797; ALL:       # BB#0:
798; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
799; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
800; ALL-NEXT:    retq
801  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
802  ret <8 x float> %shuffle
803}
804
805define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
806; ALL-LABEL: shuffle_v8f32_ba983210:
807; ALL:       # BB#0:
808; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
809; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
810; ALL-NEXT:    retq
811  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
812  ret <8 x float> %shuffle
813}
814
815define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
816; ALL-LABEL: shuffle_v8f32_80u1c4u5:
817; ALL:       # BB#0:
818; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
819; ALL-NEXT:    retq
820  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
821  ret <8 x float> %shuffle
822}
823
824define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
825; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
826; ALL:       # BB#0:
827; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
828; ALL-NEXT:    retq
829  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
830  ret <8 x float> %shuffle
831}
832
833define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
834; AVX1-LABEL: shuffle_v8i32_00000000:
835; AVX1:       # BB#0:
836; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
837; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
838; AVX1-NEXT:    retq
839;
840; AVX2-LABEL: shuffle_v8i32_00000000:
841; AVX2:       # BB#0:
842; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
843; AVX2-NEXT:    retq
844  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
845  ret <8 x i32> %shuffle
846}
847
848define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
849; AVX1-LABEL: shuffle_v8i32_00000010:
850; AVX1:       # BB#0:
851; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
852; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
853; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
854; AVX1-NEXT:    retq
855;
856; AVX2-LABEL: shuffle_v8i32_00000010:
857; AVX2:       # BB#0:
858; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
859; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
860; AVX2-NEXT:    retq
861  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
862  ret <8 x i32> %shuffle
863}
864
865define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
866; AVX1-LABEL: shuffle_v8i32_00000200:
867; AVX1:       # BB#0:
868; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
869; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
870; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
871; AVX1-NEXT:    retq
872;
873; AVX2-LABEL: shuffle_v8i32_00000200:
874; AVX2:       # BB#0:
875; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
876; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
877; AVX2-NEXT:    retq
878  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
879  ret <8 x i32> %shuffle
880}
881
882define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
883; AVX1-LABEL: shuffle_v8i32_00003000:
884; AVX1:       # BB#0:
885; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
886; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
887; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
888; AVX1-NEXT:    retq
889;
890; AVX2-LABEL: shuffle_v8i32_00003000:
891; AVX2:       # BB#0:
892; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
893; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
894; AVX2-NEXT:    retq
895  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
896  ret <8 x i32> %shuffle
897}
898
899define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
900; AVX1-LABEL: shuffle_v8i32_00040000:
901; AVX1:       # BB#0:
902; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
903; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
904; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
905; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
906; AVX1-NEXT:    retq
907;
908; AVX2-LABEL: shuffle_v8i32_00040000:
909; AVX2:       # BB#0:
910; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
911; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
912; AVX2-NEXT:    retq
913  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
914  ret <8 x i32> %shuffle
915}
916
917define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
918; AVX1-LABEL: shuffle_v8i32_00500000:
919; AVX1:       # BB#0:
920; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
921; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
922; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
923; AVX1-NEXT:    retq
924;
925; AVX2-LABEL: shuffle_v8i32_00500000:
926; AVX2:       # BB#0:
927; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
928; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
929; AVX2-NEXT:    retq
930  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
931  ret <8 x i32> %shuffle
932}
933
934define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
935; AVX1-LABEL: shuffle_v8i32_06000000:
936; AVX1:       # BB#0:
937; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
938; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
939; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
940; AVX1-NEXT:    retq
941;
942; AVX2-LABEL: shuffle_v8i32_06000000:
943; AVX2:       # BB#0:
944; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
945; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
946; AVX2-NEXT:    retq
947  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
948  ret <8 x i32> %shuffle
949}
950
951define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
952; AVX1-LABEL: shuffle_v8i32_70000000:
953; AVX1:       # BB#0:
954; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
955; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
956; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
957; AVX1-NEXT:    retq
958;
959; AVX2-LABEL: shuffle_v8i32_70000000:
960; AVX2:       # BB#0:
961; AVX2-NEXT:    movl $7, %eax
962; AVX2-NEXT:    vmovd %eax, %xmm1
963; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
964; AVX2-NEXT:    retq
965  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
966  ret <8 x i32> %shuffle
967}
968
969define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
970; AVX1-LABEL: shuffle_v8i32_01014545:
971; AVX1:       # BB#0:
972; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
973; AVX1-NEXT:    retq
974;
975; AVX2-LABEL: shuffle_v8i32_01014545:
976; AVX2:       # BB#0:
977; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
978; AVX2-NEXT:    retq
979  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
980  ret <8 x i32> %shuffle
981}
982
983define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
984; AVX1-LABEL: shuffle_v8i32_00112233:
985; AVX1:       # BB#0:
986; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
987; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
988; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
989; AVX1-NEXT:    retq
990;
991; AVX2-LABEL: shuffle_v8i32_00112233:
992; AVX2:       # BB#0:
993; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
994; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
995; AVX2-NEXT:    retq
996  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
997  ret <8 x i32> %shuffle
998}
999
1000define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1001; AVX1-LABEL: shuffle_v8i32_00001111:
1002; AVX1:       # BB#0:
1003; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1004; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1005; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1006; AVX1-NEXT:    retq
1007;
1008; AVX2-LABEL: shuffle_v8i32_00001111:
1009; AVX2:       # BB#0:
1010; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1011; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1012; AVX2-NEXT:    retq
1013  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1014  ret <8 x i32> %shuffle
1015}
1016
1017define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1018; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
1019; AVX1:       # BB#0:
1020; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1021; AVX1-NEXT:    retq
1022;
1023; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1024; AVX2:       # BB#0:
1025; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1026; AVX2-NEXT:    retq
1027  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1028  ret <8 x i32> %shuffle
1029}
1030
1031define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1032; AVX1-LABEL: shuffle_v8i32_08080808:
1033; AVX1:       # BB#0:
1034; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1035; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
1036; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1037; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1038; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1039; AVX1-NEXT:    retq
1040;
1041; AVX2-LABEL: shuffle_v8i32_08080808:
1042; AVX2:       # BB#0:
1043; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
1044; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
1045; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1046; AVX2-NEXT:    retq
1047  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1048  ret <8 x i32> %shuffle
1049}
1050
1051define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1052; AVX1-LABEL: shuffle_v8i32_08084c4c:
1053; AVX1:       # BB#0:
1054; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1055; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1056; AVX1-NEXT:    retq
1057;
1058; AVX2-LABEL: shuffle_v8i32_08084c4c:
1059; AVX2:       # BB#0:
1060; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1061; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1062; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1063; AVX2-NEXT:    retq
1064  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1065  ret <8 x i32> %shuffle
1066}
1067
1068define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1069; AVX1-LABEL: shuffle_v8i32_8823cc67:
1070; AVX1:       # BB#0:
1071; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1072; AVX1-NEXT:    retq
1073;
1074; AVX2-LABEL: shuffle_v8i32_8823cc67:
1075; AVX2:       # BB#0:
1076; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1077; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1078; AVX2-NEXT:    retq
1079  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1080  ret <8 x i32> %shuffle
1081}
1082
1083define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1084; AVX1-LABEL: shuffle_v8i32_9832dc76:
1085; AVX1:       # BB#0:
1086; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1087; AVX1-NEXT:    retq
1088;
1089; AVX2-LABEL: shuffle_v8i32_9832dc76:
1090; AVX2:       # BB#0:
1091; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1092; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1093; AVX2-NEXT:    retq
1094  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1095  ret <8 x i32> %shuffle
1096}
1097
1098define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1099; AVX1-LABEL: shuffle_v8i32_9810dc54:
1100; AVX1:       # BB#0:
1101; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1102; AVX1-NEXT:    retq
1103;
1104; AVX2-LABEL: shuffle_v8i32_9810dc54:
1105; AVX2:       # BB#0:
1106; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1107; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1108; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1109; AVX2-NEXT:    retq
1110  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1111  ret <8 x i32> %shuffle
1112}
1113
1114define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1115; AVX1-LABEL: shuffle_v8i32_08194c5d:
1116; AVX1:       # BB#0:
1117; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1118; AVX1-NEXT:    retq
1119;
1120; AVX2-LABEL: shuffle_v8i32_08194c5d:
1121; AVX2:       # BB#0:
1122; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1123; AVX2-NEXT:    retq
1124  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1125  ret <8 x i32> %shuffle
1126}
1127
1128define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1129; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1130; AVX1:       # BB#0:
1131; AVX1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1132; AVX1-NEXT:    retq
1133;
1134; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1135; AVX2:       # BB#0:
1136; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1137; AVX2-NEXT:    retq
1138  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1139  ret <8 x i32> %shuffle
1140}
1141
1142define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1143; AVX1-LABEL: shuffle_v8i32_08192a3b:
1144; AVX1:       # BB#0:
1145; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1146; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1147; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1148; AVX1-NEXT:    retq
1149;
1150; AVX2-LABEL: shuffle_v8i32_08192a3b:
1151; AVX2:       # BB#0:
1152; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1153; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1154; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1155; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1156; AVX2-NEXT:    retq
1157  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1158  ret <8 x i32> %shuffle
1159}
1160
1161define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1162; AVX1-LABEL: shuffle_v8i32_08991abb:
1163; AVX1:       # BB#0:
1164; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1165; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1166; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1167; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1168; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1169; AVX1-NEXT:    retq
1170;
1171; AVX2-LABEL: shuffle_v8i32_08991abb:
1172; AVX2:       # BB#0:
1173; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1174; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
1175; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1176; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1177; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1178; AVX2-NEXT:    retq
1179  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1180  ret <8 x i32> %shuffle
1181}
1182
1183define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1184; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1185; AVX1:       # BB#0:
1186; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1187; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1188; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1189; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1190; AVX1-NEXT:    retq
1191;
1192; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1193; AVX2:       # BB#0:
1194; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1195; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1196; AVX2-NEXT:    retq
1197  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1198  ret <8 x i32> %shuffle
1199}
1200
1201define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1202; AVX1-LABEL: shuffle_v8i32_09ab1def:
1203; AVX1:       # BB#0:
1204; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1205; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1206; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1207; AVX1-NEXT:    retq
1208;
1209; AVX2-LABEL: shuffle_v8i32_09ab1def:
1210; AVX2:       # BB#0:
1211; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1212; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
1213; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1214; AVX2-NEXT:    retq
1215  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1216  ret <8 x i32> %shuffle
1217}
1218
1219define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1220; AVX1-LABEL: shuffle_v8i32_00014445:
1221; AVX1:       # BB#0:
1222; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1223; AVX1-NEXT:    retq
1224;
1225; AVX2-LABEL: shuffle_v8i32_00014445:
1226; AVX2:       # BB#0:
1227; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1228; AVX2-NEXT:    retq
1229  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1230  ret <8 x i32> %shuffle
1231}
1232
1233define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1234; AVX1-LABEL: shuffle_v8i32_00204464:
1235; AVX1:       # BB#0:
1236; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1237; AVX1-NEXT:    retq
1238;
1239; AVX2-LABEL: shuffle_v8i32_00204464:
1240; AVX2:       # BB#0:
1241; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1242; AVX2-NEXT:    retq
1243  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1244  ret <8 x i32> %shuffle
1245}
1246
1247define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1248; AVX1-LABEL: shuffle_v8i32_03004744:
1249; AVX1:       # BB#0:
1250; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1251; AVX1-NEXT:    retq
1252;
1253; AVX2-LABEL: shuffle_v8i32_03004744:
1254; AVX2:       # BB#0:
1255; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1256; AVX2-NEXT:    retq
1257  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1258  ret <8 x i32> %shuffle
1259}
1260
1261define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1262; AVX1-LABEL: shuffle_v8i32_10005444:
1263; AVX1:       # BB#0:
1264; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1265; AVX1-NEXT:    retq
1266;
1267; AVX2-LABEL: shuffle_v8i32_10005444:
1268; AVX2:       # BB#0:
1269; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1270; AVX2-NEXT:    retq
1271  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1272  ret <8 x i32> %shuffle
1273}
1274
1275define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1276; AVX1-LABEL: shuffle_v8i32_22006644:
1277; AVX1:       # BB#0:
1278; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1279; AVX1-NEXT:    retq
1280;
1281; AVX2-LABEL: shuffle_v8i32_22006644:
1282; AVX2:       # BB#0:
1283; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1284; AVX2-NEXT:    retq
1285  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1286  ret <8 x i32> %shuffle
1287}
1288
1289define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1290; AVX1-LABEL: shuffle_v8i32_33307774:
1291; AVX1:       # BB#0:
1292; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1293; AVX1-NEXT:    retq
1294;
1295; AVX2-LABEL: shuffle_v8i32_33307774:
1296; AVX2:       # BB#0:
1297; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1298; AVX2-NEXT:    retq
1299  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1300  ret <8 x i32> %shuffle
1301}
1302
1303define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1304; AVX1-LABEL: shuffle_v8i32_32107654:
1305; AVX1:       # BB#0:
1306; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1307; AVX1-NEXT:    retq
1308;
1309; AVX2-LABEL: shuffle_v8i32_32107654:
1310; AVX2:       # BB#0:
1311; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1312; AVX2-NEXT:    retq
1313  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1314  ret <8 x i32> %shuffle
1315}
1316
1317define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1318; AVX1-LABEL: shuffle_v8i32_00234467:
1319; AVX1:       # BB#0:
1320; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1321; AVX1-NEXT:    retq
1322;
1323; AVX2-LABEL: shuffle_v8i32_00234467:
1324; AVX2:       # BB#0:
1325; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1326; AVX2-NEXT:    retq
1327  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1328  ret <8 x i32> %shuffle
1329}
1330
1331define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1332; AVX1-LABEL: shuffle_v8i32_00224466:
1333; AVX1:       # BB#0:
1334; AVX1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1335; AVX1-NEXT:    retq
1336;
1337; AVX2-LABEL: shuffle_v8i32_00224466:
1338; AVX2:       # BB#0:
1339; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1340; AVX2-NEXT:    retq
1341  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1342  ret <8 x i32> %shuffle
1343}
1344
1345define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1346; AVX1-LABEL: shuffle_v8i32_10325476:
1347; AVX1:       # BB#0:
1348; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1349; AVX1-NEXT:    retq
1350;
1351; AVX2-LABEL: shuffle_v8i32_10325476:
1352; AVX2:       # BB#0:
1353; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1354; AVX2-NEXT:    retq
1355  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1356  ret <8 x i32> %shuffle
1357}
1358
1359define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1360; AVX1-LABEL: shuffle_v8i32_11335577:
1361; AVX1:       # BB#0:
1362; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1363; AVX1-NEXT:    retq
1364;
1365; AVX2-LABEL: shuffle_v8i32_11335577:
1366; AVX2:       # BB#0:
1367; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1368; AVX2-NEXT:    retq
1369  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1370  ret <8 x i32> %shuffle
1371}
1372
1373define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1374; AVX1-LABEL: shuffle_v8i32_10235467:
1375; AVX1:       # BB#0:
1376; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1377; AVX1-NEXT:    retq
1378;
1379; AVX2-LABEL: shuffle_v8i32_10235467:
1380; AVX2:       # BB#0:
1381; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1382; AVX2-NEXT:    retq
1383  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1384  ret <8 x i32> %shuffle
1385}
1386
1387define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1388; AVX1-LABEL: shuffle_v8i32_10225466:
1389; AVX1:       # BB#0:
1390; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1391; AVX1-NEXT:    retq
1392;
1393; AVX2-LABEL: shuffle_v8i32_10225466:
1394; AVX2:       # BB#0:
1395; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1396; AVX2-NEXT:    retq
1397  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1398  ret <8 x i32> %shuffle
1399}
1400
1401define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1402; AVX1-LABEL: shuffle_v8i32_00015444:
1403; AVX1:       # BB#0:
1404; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1405; AVX1-NEXT:    retq
1406;
1407; AVX2-LABEL: shuffle_v8i32_00015444:
1408; AVX2:       # BB#0:
1409; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1410; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1411; AVX2-NEXT:    retq
1412  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1413  ret <8 x i32> %shuffle
1414}
1415
1416define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1417; AVX1-LABEL: shuffle_v8i32_00204644:
1418; AVX1:       # BB#0:
1419; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1420; AVX1-NEXT:    retq
1421;
1422; AVX2-LABEL: shuffle_v8i32_00204644:
1423; AVX2:       # BB#0:
1424; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1425; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1426; AVX2-NEXT:    retq
1427  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1428  ret <8 x i32> %shuffle
1429}
1430
1431define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1432; AVX1-LABEL: shuffle_v8i32_03004474:
1433; AVX1:       # BB#0:
1434; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1435; AVX1-NEXT:    retq
1436;
1437; AVX2-LABEL: shuffle_v8i32_03004474:
1438; AVX2:       # BB#0:
1439; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1440; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1441; AVX2-NEXT:    retq
1442  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1443  ret <8 x i32> %shuffle
1444}
1445
1446define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1447; AVX1-LABEL: shuffle_v8i32_10004444:
1448; AVX1:       # BB#0:
1449; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1450; AVX1-NEXT:    retq
1451;
1452; AVX2-LABEL: shuffle_v8i32_10004444:
1453; AVX2:       # BB#0:
1454; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1455; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1456; AVX2-NEXT:    retq
1457  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1458  ret <8 x i32> %shuffle
1459}
1460
1461define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1462; AVX1-LABEL: shuffle_v8i32_22006446:
1463; AVX1:       # BB#0:
1464; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1465; AVX1-NEXT:    retq
1466;
1467; AVX2-LABEL: shuffle_v8i32_22006446:
1468; AVX2:       # BB#0:
1469; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1470; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1471; AVX2-NEXT:    retq
1472  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1473  ret <8 x i32> %shuffle
1474}
1475
1476define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1477; AVX1-LABEL: shuffle_v8i32_33307474:
1478; AVX1:       # BB#0:
1479; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1480; AVX1-NEXT:    retq
1481;
1482; AVX2-LABEL: shuffle_v8i32_33307474:
1483; AVX2:       # BB#0:
1484; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1485; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1486; AVX2-NEXT:    retq
1487  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1488  ret <8 x i32> %shuffle
1489}
1490
1491define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1492; AVX1-LABEL: shuffle_v8i32_32104567:
1493; AVX1:       # BB#0:
1494; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1495; AVX1-NEXT:    retq
1496;
1497; AVX2-LABEL: shuffle_v8i32_32104567:
1498; AVX2:       # BB#0:
1499; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1500; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1501; AVX2-NEXT:    retq
1502  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1503  ret <8 x i32> %shuffle
1504}
1505
1506define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1507; AVX1-LABEL: shuffle_v8i32_00236744:
1508; AVX1:       # BB#0:
1509; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1510; AVX1-NEXT:    retq
1511;
1512; AVX2-LABEL: shuffle_v8i32_00236744:
1513; AVX2:       # BB#0:
1514; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1515; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1516; AVX2-NEXT:    retq
1517  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1518  ret <8 x i32> %shuffle
1519}
1520
1521define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1522; AVX1-LABEL: shuffle_v8i32_00226644:
1523; AVX1:       # BB#0:
1524; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1525; AVX1-NEXT:    retq
1526;
1527; AVX2-LABEL: shuffle_v8i32_00226644:
1528; AVX2:       # BB#0:
1529; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1530; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1531; AVX2-NEXT:    retq
1532  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1533  ret <8 x i32> %shuffle
1534}
1535
1536define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1537; AVX1-LABEL: shuffle_v8i32_10324567:
1538; AVX1:       # BB#0:
1539; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1540; AVX1-NEXT:    retq
1541;
1542; AVX2-LABEL: shuffle_v8i32_10324567:
1543; AVX2:       # BB#0:
1544; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1545; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1546; AVX2-NEXT:    retq
1547  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1548  ret <8 x i32> %shuffle
1549}
1550
1551define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1552; AVX1-LABEL: shuffle_v8i32_11334567:
1553; AVX1:       # BB#0:
1554; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1555; AVX1-NEXT:    retq
1556;
1557; AVX2-LABEL: shuffle_v8i32_11334567:
1558; AVX2:       # BB#0:
1559; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1560; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1561; AVX2-NEXT:    retq
1562  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1563  ret <8 x i32> %shuffle
1564}
1565
1566define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1567; AVX1-LABEL: shuffle_v8i32_01235467:
1568; AVX1:       # BB#0:
1569; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1570; AVX1-NEXT:    retq
1571;
1572; AVX2-LABEL: shuffle_v8i32_01235467:
1573; AVX2:       # BB#0:
1574; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1575; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1576; AVX2-NEXT:    retq
1577  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1578  ret <8 x i32> %shuffle
1579}
1580
1581define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1582; AVX1-LABEL: shuffle_v8i32_01235466:
1583; AVX1:       # BB#0:
1584; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1585; AVX1-NEXT:    retq
1586;
1587; AVX2-LABEL: shuffle_v8i32_01235466:
1588; AVX2:       # BB#0:
1589; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1590; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1591; AVX2-NEXT:    retq
1592  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1593  ret <8 x i32> %shuffle
1594}
1595
1596define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1597; AVX1-LABEL: shuffle_v8i32_002u6u44:
1598; AVX1:       # BB#0:
1599; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1600; AVX1-NEXT:    retq
1601;
1602; AVX2-LABEL: shuffle_v8i32_002u6u44:
1603; AVX2:       # BB#0:
1604; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1605; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1606; AVX2-NEXT:    retq
1607  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1608  ret <8 x i32> %shuffle
1609}
1610
1611define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1612; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1613; AVX1:       # BB#0:
1614; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1615; AVX1-NEXT:    retq
1616;
1617; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1618; AVX2:       # BB#0:
1619; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1620; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1621; AVX2-NEXT:    retq
1622  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1623  ret <8 x i32> %shuffle
1624}
1625
1626define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1627; AVX1-LABEL: shuffle_v8i32_103245uu:
1628; AVX1:       # BB#0:
1629; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1630; AVX1-NEXT:    retq
1631;
1632; AVX2-LABEL: shuffle_v8i32_103245uu:
1633; AVX2:       # BB#0:
1634; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1635; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1636; AVX2-NEXT:    retq
1637  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1638  ret <8 x i32> %shuffle
1639}
1640
1641define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1642; AVX1-LABEL: shuffle_v8i32_1133uu67:
1643; AVX1:       # BB#0:
1644; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1645; AVX1-NEXT:    retq
1646;
1647; AVX2-LABEL: shuffle_v8i32_1133uu67:
1648; AVX2:       # BB#0:
1649; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1650; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1651; AVX2-NEXT:    retq
1652  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1653  ret <8 x i32> %shuffle
1654}
1655
1656define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1657; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1658; AVX1:       # BB#0:
1659; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1660; AVX1-NEXT:    retq
1661;
1662; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1663; AVX2:       # BB#0:
1664; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1665; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1666; AVX2-NEXT:    retq
1667  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1668  ret <8 x i32> %shuffle
1669}
1670
1671define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1672; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1673; AVX1:       # BB#0:
1674; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1675; AVX1-NEXT:    retq
1676;
1677; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1678; AVX2:       # BB#0:
1679; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1680; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1681; AVX2-NEXT:    retq
1682  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1683  ret <8 x i32> %shuffle
1684}
1685
1686define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1687; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1688; AVX1:       # BB#0:
1689; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1690; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1691; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1692; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1693; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1694; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1695; AVX1-NEXT:    retq
1696;
1697; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1698; AVX2:       # BB#0:
1699; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1700; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1701; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1702; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1703; AVX2-NEXT:    retq
1704  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1705  ret <8 x i32> %shuffle
1706}
1707
1708define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1709; AVX1-LABEL: shuffle_v8i32_32103210:
1710; AVX1:       # BB#0:
1711; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1712; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1713; AVX1-NEXT:    retq
1714;
1715; AVX2-LABEL: shuffle_v8i32_32103210:
1716; AVX2:       # BB#0:
1717; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1718; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1719; AVX2-NEXT:    retq
1720  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1721  ret <8 x i32> %shuffle
1722}
1723
1724define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1725; AVX1-LABEL: shuffle_v8i32_76547654:
1726; AVX1:       # BB#0:
1727; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1728; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1729; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1730; AVX1-NEXT:    retq
1731;
1732; AVX2-LABEL: shuffle_v8i32_76547654:
1733; AVX2:       # BB#0:
1734; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1735; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1736; AVX2-NEXT:    retq
1737  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1738  ret <8 x i32> %shuffle
1739}
1740
1741define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1742; AVX1-LABEL: shuffle_v8i32_76543210:
1743; AVX1:       # BB#0:
1744; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1745; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1746; AVX1-NEXT:    retq
1747;
1748; AVX2-LABEL: shuffle_v8i32_76543210:
1749; AVX2:       # BB#0:
1750; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1751; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1752; AVX2-NEXT:    retq
1753  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1754  ret <8 x i32> %shuffle
1755}
1756
1757define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1758; AVX1-LABEL: shuffle_v8i32_3210ba98:
1759; AVX1:       # BB#0:
1760; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1761; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1762; AVX1-NEXT:    retq
1763;
1764; AVX2-LABEL: shuffle_v8i32_3210ba98:
1765; AVX2:       # BB#0:
1766; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1767; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1768; AVX2-NEXT:    retq
1769  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1770  ret <8 x i32> %shuffle
1771}
1772
1773define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1774; AVX1-LABEL: shuffle_v8i32_3210fedc:
1775; AVX1:       # BB#0:
1776; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1777; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1778; AVX1-NEXT:    retq
1779;
1780; AVX2-LABEL: shuffle_v8i32_3210fedc:
1781; AVX2:       # BB#0:
1782; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1783; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1784; AVX2-NEXT:    retq
1785  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1786  ret <8 x i32> %shuffle
1787}
1788
1789define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1790; AVX1-LABEL: shuffle_v8i32_7654fedc:
1791; AVX1:       # BB#0:
1792; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1793; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1794; AVX1-NEXT:    retq
1795;
1796; AVX2-LABEL: shuffle_v8i32_7654fedc:
1797; AVX2:       # BB#0:
1798; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1799; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1800; AVX2-NEXT:    retq
1801  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1802  ret <8 x i32> %shuffle
1803}
1804
1805define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1806; AVX1-LABEL: shuffle_v8i32_fedc7654:
1807; AVX1:       # BB#0:
1808; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1809; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1810; AVX1-NEXT:    retq
1811;
1812; AVX2-LABEL: shuffle_v8i32_fedc7654:
1813; AVX2:       # BB#0:
1814; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1815; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1816; AVX2-NEXT:    retq
1817  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1818  ret <8 x i32> %shuffle
1819}
1820
1821define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1822; AVX1-LABEL: shuffle_v8i32_ba987654:
1823; AVX1:       # BB#0:
1824; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1825; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1826; AVX1-NEXT:    retq
1827;
1828; AVX2-LABEL: shuffle_v8i32_ba987654:
1829; AVX2:       # BB#0:
1830; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1831; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1832; AVX2-NEXT:    retq
1833  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1834  ret <8 x i32> %shuffle
1835}
1836
1837define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1838; AVX1-LABEL: shuffle_v8i32_ba983210:
1839; AVX1:       # BB#0:
1840; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1841; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1842; AVX1-NEXT:    retq
1843;
1844; AVX2-LABEL: shuffle_v8i32_ba983210:
1845; AVX2:       # BB#0:
1846; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1847; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1848; AVX2-NEXT:    retq
1849  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1850  ret <8 x i32> %shuffle
1851}
1852
1853define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1854; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1855; AVX1:       # BB#0:
1856; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1857; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1858; AVX1-NEXT:    retq
1859;
1860; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1861; AVX2:       # BB#0:
1862; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1863; AVX2-NEXT:    retq
1864  %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1865  ret <8 x i32> %shuffle
1866}
1867
1868define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1869; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1870; AVX1:       # BB#0:
1871; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1872; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1873; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1874; AVX1-NEXT:    retq
1875;
1876; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1877; AVX2:       # BB#0:
1878; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1879; AVX2-NEXT:    retq
1880  %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1881  ret <8 x i32> %shuffle
1882}
1883
1884define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1885; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1886; AVX1:       # BB#0:
1887; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1888; AVX1-NEXT:    retq
1889;
1890; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1891; AVX2:       # BB#0:
1892; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1893; AVX2-NEXT:    retq
1894  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1895  ret <8 x i32> %shuffle
1896}
1897
1898define <8 x float> @splat_mem_v8f32_2(float* %p) {
1899; ALL-LABEL: splat_mem_v8f32_2:
1900; ALL:       # BB#0:
1901; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
1902; ALL-NEXT:    retq
1903  %1 = load float, float* %p
1904  %2 = insertelement <4 x float> undef, float %1, i32 0
1905  %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
1906  ret <8 x float> %3
1907}
1908
1909define <8 x float> @splat_v8f32(<4 x float> %r) {
1910; AVX1-LABEL: splat_v8f32:
1911; AVX1:       # BB#0:
1912; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1913; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1914; AVX1-NEXT:    retq
1915;
1916; AVX2-LABEL: splat_v8f32:
1917; AVX2:       # BB#0:
1918; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
1919; AVX2-NEXT:    retq
1920  %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
1921  ret <8 x float> %1
1922}
1923
1924;
1925; Shuffle to logical bit shifts
1926;
1927
1928define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
1929; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
1930; AVX1:       # BB#0:
1931; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1932; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
1933; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
1934; AVX1-NEXT:    retq
1935;
1936; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
1937; AVX2:       # BB#0:
1938; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
1939; AVX2-NEXT:    retq
1940  %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
1941  ret <8 x i32> %shuffle
1942}
1943
1944define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
1945; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
1946; AVX1:       # BB#0:
1947; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1948; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1949; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1950; AVX1-NEXT:    retq
1951;
1952; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
1953; AVX2:       # BB#0:
1954; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
1955; AVX2-NEXT:    retq
1956  %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
1957  ret <8 x i32> %shuffle
1958}
1959
1960define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
1961; AVX1-LABEL: shuffle_v8i32_B012F456:
1962; AVX1:       # BB#0:
1963; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
1964; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
1965; AVX1-NEXT:    retq
1966;
1967; AVX2-LABEL: shuffle_v8i32_B012F456:
1968; AVX2:       # BB#0:
1969; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
1970; AVX2-NEXT:    retq
1971  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
1972  ret <8 x i32> %shuffle
1973}
1974
1975define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
1976; AVX1-LABEL: shuffle_v8i32_1238567C:
1977; AVX1:       # BB#0:
1978; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
1979; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1980; AVX1-NEXT:    retq
1981;
1982; AVX2-LABEL: shuffle_v8i32_1238567C:
1983; AVX2:       # BB#0:
1984; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
1985; AVX2-NEXT:    retq
1986  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
1987  ret <8 x i32> %shuffle
1988}
1989
1990define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
1991; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
1992; AVX1:       # BB#0:
1993; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
1994; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
1995; AVX1-NEXT:    retq
1996;
1997; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
1998; AVX2:       # BB#0:
1999; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2000; AVX2-NEXT:    retq
2001  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2002  ret <8 x i32> %shuffle
2003}
2004
2005define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2006; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2007; AVX1:       # BB#0:
2008; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2009; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2010; AVX1-NEXT:    retq
2011;
2012; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2013; AVX2:       # BB#0:
2014; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2015; AVX2-NEXT:    retq
2016  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2017  ret <8 x i32> %shuffle
2018}
2019
2020define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2021; AVX1-LABEL: shuffle_v8i32_30127456:
2022; AVX1:       # BB#0:
2023; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2024; AVX1-NEXT:    retq
2025;
2026; AVX2-LABEL: shuffle_v8i32_30127456:
2027; AVX2:       # BB#0:
2028; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2029; AVX2-NEXT:    retq
2030  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2031  ret <8 x i32> %shuffle
2032}
2033
2034define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2035; AVX1-LABEL: shuffle_v8i32_12305674:
2036; AVX1:       # BB#0:
2037; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2038; AVX1-NEXT:    retq
2039;
2040; AVX2-LABEL: shuffle_v8i32_12305674:
2041; AVX2:       # BB#0:
2042; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2043; AVX2-NEXT:    retq
2044  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2045  ret <8 x i32> %shuffle
2046}
2047
2048define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2049; ALL-LABEL: concat_v2f32_1:
2050; ALL:       # BB#0: # %entry
2051; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2052; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
2053; ALL-NEXT:    retq
2054entry:
2055  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2056  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2057  %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2058  %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2059  %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2060  ret <8 x float> %tmp76
2061}
2062
2063define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2064; ALL-LABEL: concat_v2f32_2:
2065; ALL:       # BB#0: # %entry
2066; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2067; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
2068; ALL-NEXT:    retq
2069entry:
2070  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2071  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2072  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2073  ret <8 x float> %tmp76
2074}
2075
2076define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2077; ALL-LABEL: concat_v2f32_3:
2078; ALL:       # BB#0: # %entry
2079; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2080; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
2081; ALL-NEXT:    retq
2082entry:
2083  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2084  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2085  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2086  %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2087  ret <8 x float> %res
2088}
2089
2090define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
2091; AVX1-LABEL: insert_mem_and_zero_v8i32:
2092; AVX1:       # BB#0:
2093; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2094; AVX1-NEXT:    retq
2095;
2096; AVX2-LABEL: insert_mem_and_zero_v8i32:
2097; AVX2:       # BB#0:
2098; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2099; AVX2-NEXT:    retq
2100  %a = load i32, i32* %ptr
2101  %v = insertelement <8 x i32> undef, i32 %a, i32 0
2102  %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2103  ret <8 x i32> %shuffle
2104}
2105
2106