1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
8
9define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
10; SSE2-LABEL: zext_16i8_to_8i16:
11; SSE2:       # BB#0: # %entry
12; SSE2-NEXT:    pxor %xmm1, %xmm1
13; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
14; SSE2-NEXT:    retq
15;
16; SSSE3-LABEL: zext_16i8_to_8i16:
17; SSSE3:       # BB#0: # %entry
18; SSSE3-NEXT:    pxor %xmm1, %xmm1
19; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
20; SSSE3-NEXT:    retq
21;
22; SSE41-LABEL: zext_16i8_to_8i16:
23; SSE41:       # BB#0: # %entry
24; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
25; SSE41-NEXT:    retq
26;
27; AVX-LABEL: zext_16i8_to_8i16:
28; AVX:       # BB#0: # %entry
29; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
30; AVX-NEXT:    retq
31entry:
32  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
33  %C = zext <8 x i8> %B to <8 x i16>
34  ret <8 x i16> %C
35}
36
37; PR17654
38define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
39; SSE2-LABEL: zext_16i8_to_16i16:
40; SSE2:       # BB#0: # %entry
41; SSE2-NEXT:    movdqa %xmm0, %xmm1
42; SSE2-NEXT:    pxor %xmm2, %xmm2
43; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
44; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
45; SSE2-NEXT:    retq
46;
47; SSSE3-LABEL: zext_16i8_to_16i16:
48; SSSE3:       # BB#0: # %entry
49; SSSE3-NEXT:    movdqa %xmm0, %xmm1
50; SSSE3-NEXT:    pxor %xmm2, %xmm2
51; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
52; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
53; SSSE3-NEXT:    retq
54;
55; SSE41-LABEL: zext_16i8_to_16i16:
56; SSE41:       # BB#0: # %entry
57; SSE41-NEXT:    movdqa %xmm0, %xmm1
58; SSE41-NEXT:    pxor %xmm2, %xmm2
59; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
60; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
61; SSE41-NEXT:    retq
62;
63; AVX1-LABEL: zext_16i8_to_16i16:
64; AVX1:       # BB#0: # %entry
65; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
66; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
67; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
68; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
69; AVX1-NEXT:    retq
70;
71; AVX2-LABEL: zext_16i8_to_16i16:
72; AVX2:       # BB#0: # %entry
73; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
74; AVX2-NEXT:    retq
75;
76; AVX512-LABEL: zext_16i8_to_16i16:
77; AVX512:       # BB#0: # %entry
78; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
79; AVX512-NEXT:    retq
80entry:
81  %B = zext <16 x i8> %A to <16 x i16>
82  ret <16 x i16> %B
83}
84
85define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
86; SSE2-LABEL: zext_16i8_to_4i32:
87; SSE2:       # BB#0: # %entry
88; SSE2-NEXT:    pxor %xmm1, %xmm1
89; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
90; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
91; SSE2-NEXT:    retq
92;
93; SSSE3-LABEL: zext_16i8_to_4i32:
94; SSSE3:       # BB#0: # %entry
95; SSSE3-NEXT:    pxor %xmm1, %xmm1
96; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
97; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
98; SSSE3-NEXT:    retq
99;
100; SSE41-LABEL: zext_16i8_to_4i32:
101; SSE41:       # BB#0: # %entry
102; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
103; SSE41-NEXT:    retq
104;
105; AVX-LABEL: zext_16i8_to_4i32:
106; AVX:       # BB#0: # %entry
107; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
108; AVX-NEXT:    retq
109entry:
110  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111  %C = zext <4 x i8> %B to <4 x i32>
112  ret <4 x i32> %C
113}
114
115define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
116; SSE2-LABEL: zext_16i8_to_8i32:
117; SSE2:       # BB#0: # %entry
118; SSE2-NEXT:    movdqa %xmm0, %xmm1
119; SSE2-NEXT:    pxor %xmm2, %xmm2
120; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
121; SSE2-NEXT:    movdqa %xmm1, %xmm0
122; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
123; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
124; SSE2-NEXT:    retq
125;
126; SSSE3-LABEL: zext_16i8_to_8i32:
127; SSSE3:       # BB#0: # %entry
128; SSSE3-NEXT:    movdqa %xmm0, %xmm1
129; SSSE3-NEXT:    pxor %xmm2, %xmm2
130; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
131; SSSE3-NEXT:    movdqa %xmm1, %xmm0
132; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
133; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
134; SSSE3-NEXT:    retq
135;
136; SSE41-LABEL: zext_16i8_to_8i32:
137; SSE41:       # BB#0: # %entry
138; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
139; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
140; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
141; SSE41-NEXT:    movdqa %xmm2, %xmm0
142; SSE41-NEXT:    retq
143;
144; AVX1-LABEL: zext_16i8_to_8i32:
145; AVX1:       # BB#0: # %entry
146; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
147; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
148; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
149; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
150; AVX1-NEXT:    retq
151;
152; AVX2-LABEL: zext_16i8_to_8i32:
153; AVX2:       # BB#0: # %entry
154; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
155; AVX2-NEXT:    retq
156;
157; AVX512-LABEL: zext_16i8_to_8i32:
158; AVX512:       # BB#0: # %entry
159; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
160; AVX512-NEXT:    retq
161entry:
162  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
163  %C = zext <8 x i8> %B to <8 x i32>
164  ret <8 x i32> %C
165}
166
167define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
168; SSE2-LABEL: zext_16i8_to_2i64:
169; SSE2:       # BB#0: # %entry
170; SSE2-NEXT:    pxor %xmm1, %xmm1
171; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
172; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
173; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
174; SSE2-NEXT:    retq
175;
176; SSSE3-LABEL: zext_16i8_to_2i64:
177; SSSE3:       # BB#0: # %entry
178; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
179; SSSE3-NEXT:    retq
180;
181; SSE41-LABEL: zext_16i8_to_2i64:
182; SSE41:       # BB#0: # %entry
183; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
184; SSE41-NEXT:    retq
185;
186; AVX-LABEL: zext_16i8_to_2i64:
187; AVX:       # BB#0: # %entry
188; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
189; AVX-NEXT:    retq
190entry:
191  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
192  %C = zext <2 x i8> %B to <2 x i64>
193  ret <2 x i64> %C
194}
195
196define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
197; SSE2-LABEL: zext_16i8_to_4i64:
198; SSE2:       # BB#0: # %entry
199; SSE2-NEXT:    movdqa %xmm0, %xmm1
200; SSE2-NEXT:    pxor %xmm2, %xmm2
201; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
202; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
203; SSE2-NEXT:    movdqa %xmm1, %xmm0
204; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
205; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
206; SSE2-NEXT:    retq
207;
208; SSSE3-LABEL: zext_16i8_to_4i64:
209; SSSE3:       # BB#0: # %entry
210; SSSE3-NEXT:    movdqa %xmm0, %xmm1
211; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
212; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
213; SSSE3-NEXT:    retq
214;
215; SSE41-LABEL: zext_16i8_to_4i64:
216; SSE41:       # BB#0: # %entry
217; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
218; SSE41-NEXT:    psrld $16, %xmm0
219; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
220; SSE41-NEXT:    movdqa %xmm2, %xmm0
221; SSE41-NEXT:    retq
222;
223; AVX1-LABEL: zext_16i8_to_4i64:
224; AVX1:       # BB#0: # %entry
225; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
226; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
227; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
228; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
229; AVX1-NEXT:    retq
230;
231; AVX2-LABEL: zext_16i8_to_4i64:
232; AVX2:       # BB#0: # %entry
233; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
234; AVX2-NEXT:    retq
235;
236; AVX512-LABEL: zext_16i8_to_4i64:
237; AVX512:       # BB#0: # %entry
238; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
239; AVX512-NEXT:    retq
240entry:
241  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
242  %C = zext <4 x i8> %B to <4 x i64>
243  ret <4 x i64> %C
244}
245
246define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
247; SSE2-LABEL: zext_8i16_to_4i32:
248; SSE2:       # BB#0: # %entry
249; SSE2-NEXT:    pxor %xmm1, %xmm1
250; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
251; SSE2-NEXT:    retq
252;
253; SSSE3-LABEL: zext_8i16_to_4i32:
254; SSSE3:       # BB#0: # %entry
255; SSSE3-NEXT:    pxor %xmm1, %xmm1
256; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
257; SSSE3-NEXT:    retq
258;
259; SSE41-LABEL: zext_8i16_to_4i32:
260; SSE41:       # BB#0: # %entry
261; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
262; SSE41-NEXT:    retq
263;
264; AVX-LABEL: zext_8i16_to_4i32:
265; AVX:       # BB#0: # %entry
266; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
267; AVX-NEXT:    retq
268entry:
269  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
270  %C = zext <4 x i16> %B to <4 x i32>
271  ret <4 x i32> %C
272}
273
274define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
275; SSE2-LABEL: zext_8i16_to_8i32:
276; SSE2:       # BB#0: # %entry
277; SSE2-NEXT:    movdqa %xmm0, %xmm1
278; SSE2-NEXT:    pxor %xmm2, %xmm2
279; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
280; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
281; SSE2-NEXT:    retq
282;
283; SSSE3-LABEL: zext_8i16_to_8i32:
284; SSSE3:       # BB#0: # %entry
285; SSSE3-NEXT:    movdqa %xmm0, %xmm1
286; SSSE3-NEXT:    pxor %xmm2, %xmm2
287; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
288; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
289; SSSE3-NEXT:    retq
290;
291; SSE41-LABEL: zext_8i16_to_8i32:
292; SSE41:       # BB#0: # %entry
293; SSE41-NEXT:    movdqa %xmm0, %xmm1
294; SSE41-NEXT:    pxor %xmm2, %xmm2
295; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
296; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
297; SSE41-NEXT:    retq
298;
299; AVX1-LABEL: zext_8i16_to_8i32:
300; AVX1:       # BB#0: # %entry
301; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
302; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
303; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
304; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
305; AVX1-NEXT:    retq
306;
307; AVX2-LABEL: zext_8i16_to_8i32:
308; AVX2:       # BB#0: # %entry
309; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
310; AVX2-NEXT:    retq
311;
312; AVX512-LABEL: zext_8i16_to_8i32:
313; AVX512:       # BB#0: # %entry
314; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
315; AVX512-NEXT:    retq
316entry:
317  %B = zext <8 x i16> %A to <8 x i32>
318  ret <8 x i32>%B
319}
320
321define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
322; SSE2-LABEL: zext_8i16_to_2i64:
323; SSE2:       # BB#0: # %entry
324; SSE2-NEXT:    pxor %xmm1, %xmm1
325; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
326; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
327; SSE2-NEXT:    retq
328;
329; SSSE3-LABEL: zext_8i16_to_2i64:
330; SSSE3:       # BB#0: # %entry
331; SSSE3-NEXT:    pxor %xmm1, %xmm1
332; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
333; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
334; SSSE3-NEXT:    retq
335;
336; SSE41-LABEL: zext_8i16_to_2i64:
337; SSE41:       # BB#0: # %entry
338; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
339; SSE41-NEXT:    retq
340;
341; AVX-LABEL: zext_8i16_to_2i64:
342; AVX:       # BB#0: # %entry
343; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
344; AVX-NEXT:    retq
345entry:
346  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
347  %C = zext <2 x i16> %B to <2 x i64>
348  ret <2 x i64> %C
349}
350
351define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
352; SSE2-LABEL: zext_8i16_to_4i64:
353; SSE2:       # BB#0: # %entry
354; SSE2-NEXT:    movdqa %xmm0, %xmm1
355; SSE2-NEXT:    pxor %xmm2, %xmm2
356; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
357; SSE2-NEXT:    movdqa %xmm1, %xmm0
358; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
359; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
360; SSE2-NEXT:    retq
361;
362; SSSE3-LABEL: zext_8i16_to_4i64:
363; SSSE3:       # BB#0: # %entry
364; SSSE3-NEXT:    movdqa %xmm0, %xmm1
365; SSSE3-NEXT:    pxor %xmm2, %xmm2
366; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
367; SSSE3-NEXT:    movdqa %xmm1, %xmm0
368; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
369; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
370; SSSE3-NEXT:    retq
371;
372; SSE41-LABEL: zext_8i16_to_4i64:
373; SSE41:       # BB#0: # %entry
374; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
375; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
376; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
377; SSE41-NEXT:    movdqa %xmm2, %xmm0
378; SSE41-NEXT:    retq
379;
380; AVX1-LABEL: zext_8i16_to_4i64:
381; AVX1:       # BB#0: # %entry
382; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
383; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
384; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
385; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
386; AVX1-NEXT:    retq
387;
388; AVX2-LABEL: zext_8i16_to_4i64:
389; AVX2:       # BB#0: # %entry
390; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
391; AVX2-NEXT:    retq
392;
393; AVX512-LABEL: zext_8i16_to_4i64:
394; AVX512:       # BB#0: # %entry
395; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
396; AVX512-NEXT:    retq
397entry:
398  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
399  %C = zext <4 x i16> %B to <4 x i64>
400  ret <4 x i64> %C
401}
402
403define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
404; SSE2-LABEL: zext_4i32_to_2i64:
405; SSE2:       # BB#0: # %entry
406; SSE2-NEXT:    pxor %xmm1, %xmm1
407; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
408; SSE2-NEXT:    retq
409;
410; SSSE3-LABEL: zext_4i32_to_2i64:
411; SSSE3:       # BB#0: # %entry
412; SSSE3-NEXT:    pxor %xmm1, %xmm1
413; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
414; SSSE3-NEXT:    retq
415;
416; SSE41-LABEL: zext_4i32_to_2i64:
417; SSE41:       # BB#0: # %entry
418; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
419; SSE41-NEXT:    retq
420;
421; AVX-LABEL: zext_4i32_to_2i64:
422; AVX:       # BB#0: # %entry
423; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
424; AVX-NEXT:    retq
425entry:
426  %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
427  %C = zext <2 x i32> %B to <2 x i64>
428  ret <2 x i64> %C
429}
430
431define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
432; SSE2-LABEL: zext_4i32_to_4i64:
433; SSE2:       # BB#0: # %entry
434; SSE2-NEXT:    movdqa %xmm0, %xmm1
435; SSE2-NEXT:    pxor %xmm2, %xmm2
436; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
437; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
438; SSE2-NEXT:    retq
439;
440; SSSE3-LABEL: zext_4i32_to_4i64:
441; SSSE3:       # BB#0: # %entry
442; SSSE3-NEXT:    movdqa %xmm0, %xmm1
443; SSSE3-NEXT:    pxor %xmm2, %xmm2
444; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
445; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
446; SSSE3-NEXT:    retq
447;
448; SSE41-LABEL: zext_4i32_to_4i64:
449; SSE41:       # BB#0: # %entry
450; SSE41-NEXT:    movdqa %xmm0, %xmm1
451; SSE41-NEXT:    pxor %xmm2, %xmm2
452; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
453; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
454; SSE41-NEXT:    retq
455;
456; AVX1-LABEL: zext_4i32_to_4i64:
457; AVX1:       # BB#0: # %entry
458; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
459; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
460; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
461; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
462; AVX1-NEXT:    retq
463;
464; AVX2-LABEL: zext_4i32_to_4i64:
465; AVX2:       # BB#0: # %entry
466; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
467; AVX2-NEXT:    retq
468;
469; AVX512-LABEL: zext_4i32_to_4i64:
470; AVX512:       # BB#0: # %entry
471; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
472; AVX512-NEXT:    retq
473entry:
474  %B = zext <4 x i32> %A to <4 x i64>
475  ret <4 x i64>%B
476}
477
478define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
479; SSE2-LABEL: load_zext_2i8_to_2i64:
480; SSE2:       # BB#0: # %entry
481; SSE2-NEXT:    movzwl (%rdi), %eax
482; SSE2-NEXT:    movd %eax, %xmm0
483; SSE2-NEXT:    pxor %xmm1, %xmm1
484; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
485; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
486; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
487; SSE2-NEXT:    retq
488;
489; SSSE3-LABEL: load_zext_2i8_to_2i64:
490; SSSE3:       # BB#0: # %entry
491; SSSE3-NEXT:    movzwl (%rdi), %eax
492; SSSE3-NEXT:    movd %eax, %xmm0
493; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
494; SSSE3-NEXT:    retq
495;
496; SSE41-LABEL: load_zext_2i8_to_2i64:
497; SSE41:       # BB#0: # %entry
498; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
499; SSE41-NEXT:    retq
500;
501; AVX-LABEL: load_zext_2i8_to_2i64:
502; AVX:       # BB#0: # %entry
503; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
504; AVX-NEXT:    retq
505entry:
506 %X = load <2 x i8>, <2 x i8>* %ptr
507 %Y = zext <2 x i8> %X to <2 x i64>
508 ret <2 x i64> %Y
509}
510
511define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
512; SSE2-LABEL: load_zext_4i8_to_4i32:
513; SSE2:       # BB#0: # %entry
514; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
515; SSE2-NEXT:    pxor %xmm1, %xmm1
516; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
517; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
518; SSE2-NEXT:    retq
519;
520; SSSE3-LABEL: load_zext_4i8_to_4i32:
521; SSSE3:       # BB#0: # %entry
522; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
523; SSSE3-NEXT:    pxor %xmm1, %xmm1
524; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
525; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
526; SSSE3-NEXT:    retq
527;
528; SSE41-LABEL: load_zext_4i8_to_4i32:
529; SSE41:       # BB#0: # %entry
530; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
531; SSE41-NEXT:    retq
532;
533; AVX-LABEL: load_zext_4i8_to_4i32:
534; AVX:       # BB#0: # %entry
535; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
536; AVX-NEXT:    retq
537entry:
538 %X = load <4 x i8>, <4 x i8>* %ptr
539 %Y = zext <4 x i8> %X to <4 x i32>
540 ret <4 x i32> %Y
541}
542
543define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
544; SSE2-LABEL: load_zext_4i8_to_4i64:
545; SSE2:       # BB#0: # %entry
546; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
547; SSE2-NEXT:    pxor %xmm2, %xmm2
548; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
549; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
550; SSE2-NEXT:    movdqa %xmm1, %xmm0
551; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
552; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
553; SSE2-NEXT:    retq
554;
555; SSSE3-LABEL: load_zext_4i8_to_4i64:
556; SSSE3:       # BB#0: # %entry
557; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
558; SSSE3-NEXT:    movdqa %xmm1, %xmm0
559; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
560; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
561; SSSE3-NEXT:    retq
562;
563; SSE41-LABEL: load_zext_4i8_to_4i64:
564; SSE41:       # BB#0: # %entry
565; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
566; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
567; SSE41-NEXT:    retq
568;
569; AVX1-LABEL: load_zext_4i8_to_4i64:
570; AVX1:       # BB#0: # %entry
571; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
572; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
573; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
574; AVX1-NEXT:    retq
575;
576; AVX2-LABEL: load_zext_4i8_to_4i64:
577; AVX2:       # BB#0: # %entry
578; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
579; AVX2-NEXT:    retq
580;
581; AVX512-LABEL: load_zext_4i8_to_4i64:
582; AVX512:       # BB#0: # %entry
583; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
584; AVX512-NEXT:    retq
585entry:
586 %X = load <4 x i8>, <4 x i8>* %ptr
587 %Y = zext <4 x i8> %X to <4 x i64>
588 ret <4 x i64> %Y
589}
590
591define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
592; SSE2-LABEL: load_zext_8i8_to_8i16:
593; SSE2:       # BB#0: # %entry
594; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
595; SSE2-NEXT:    pxor %xmm1, %xmm1
596; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
597; SSE2-NEXT:    retq
598;
599; SSSE3-LABEL: load_zext_8i8_to_8i16:
600; SSSE3:       # BB#0: # %entry
601; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
602; SSSE3-NEXT:    pxor %xmm1, %xmm1
603; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
604; SSSE3-NEXT:    retq
605;
606; SSE41-LABEL: load_zext_8i8_to_8i16:
607; SSE41:       # BB#0: # %entry
608; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
609; SSE41-NEXT:    retq
610;
611; AVX-LABEL: load_zext_8i8_to_8i16:
612; AVX:       # BB#0: # %entry
613; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
614; AVX-NEXT:    retq
615entry:
616 %X = load <8 x i8>, <8 x i8>* %ptr
617 %Y = zext <8 x i8> %X to <8 x i16>
618 ret <8 x i16> %Y
619}
620
621define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
622; SSE2-LABEL: load_zext_8i8_to_8i32:
623; SSE2:       # BB#0: # %entry
624; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
625; SSE2-NEXT:    pxor %xmm2, %xmm2
626; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
627; SSE2-NEXT:    movdqa %xmm1, %xmm0
628; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
629; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
630; SSE2-NEXT:    retq
631;
632; SSSE3-LABEL: load_zext_8i8_to_8i32:
633; SSSE3:       # BB#0: # %entry
634; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
635; SSSE3-NEXT:    pxor %xmm2, %xmm2
636; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
637; SSSE3-NEXT:    movdqa %xmm1, %xmm0
638; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
639; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
640; SSSE3-NEXT:    retq
641;
642; SSE41-LABEL: load_zext_8i8_to_8i32:
643; SSE41:       # BB#0: # %entry
644; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
645; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
646; SSE41-NEXT:    retq
647;
648; AVX1-LABEL: load_zext_8i8_to_8i32:
649; AVX1:       # BB#0: # %entry
650; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
651; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
652; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
653; AVX1-NEXT:    retq
654;
655; AVX2-LABEL: load_zext_8i8_to_8i32:
656; AVX2:       # BB#0: # %entry
657; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
658; AVX2-NEXT:    retq
659;
660; AVX512-LABEL: load_zext_8i8_to_8i32:
661; AVX512:       # BB#0: # %entry
662; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
663; AVX512-NEXT:    retq
664entry:
665 %X = load <8 x i8>, <8 x i8>* %ptr
666 %Y = zext <8 x i8> %X to <8 x i32>
667 ret <8 x i32> %Y
668}
669
670define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
671; SSE2-LABEL: load_zext_16i8_to_8i32:
672; SSE2:       # BB#0: # %entry
673; SSE2-NEXT:    movdqa (%rdi), %xmm1
674; SSE2-NEXT:    pxor %xmm2, %xmm2
675; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
676; SSE2-NEXT:    movdqa %xmm1, %xmm0
677; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
678; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
679; SSE2-NEXT:    retq
680;
681; SSSE3-LABEL: load_zext_16i8_to_8i32:
682; SSSE3:       # BB#0: # %entry
683; SSSE3-NEXT:    movdqa (%rdi), %xmm1
684; SSSE3-NEXT:    pxor %xmm2, %xmm2
685; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
686; SSSE3-NEXT:    movdqa %xmm1, %xmm0
687; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
688; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
689; SSSE3-NEXT:    retq
690;
691; SSE41-LABEL: load_zext_16i8_to_8i32:
692; SSE41:       # BB#0: # %entry
693; SSE41-NEXT:    movdqa (%rdi), %xmm1
694; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
695; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
696; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
697; SSE41-NEXT:    retq
698;
699; AVX1-LABEL: load_zext_16i8_to_8i32:
700; AVX1:       # BB#0: # %entry
701; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
702; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
703; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
704; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
705; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
706; AVX1-NEXT:    retq
707;
708; AVX2-LABEL: load_zext_16i8_to_8i32:
709; AVX2:       # BB#0: # %entry
710; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
711; AVX2-NEXT:    retq
712;
713; AVX512-LABEL: load_zext_16i8_to_8i32:
714; AVX512:       # BB#0: # %entry
715; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
716; AVX512-NEXT:    retq
717entry:
718 %X = load <16 x i8>, <16 x i8>* %ptr
719 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
720 %Z = zext <8 x i8> %Y to <8 x i32>
721 ret <8 x i32> %Z
722}
723
724define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
725; SSE2-LABEL: load_zext_8i8_to_8i64:
726; SSE2:       # BB#0: # %entry
727; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
728; SSE2-NEXT:    pxor %xmm4, %xmm4
729; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
730; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
731; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
732; SSE2-NEXT:    movdqa %xmm1, %xmm0
733; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
734; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
735; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
736; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
737; SSE2-NEXT:    movdqa %xmm3, %xmm2
738; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
739; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
740; SSE2-NEXT:    retq
741;
742; SSSE3-LABEL: load_zext_8i8_to_8i64:
743; SSSE3:       # BB#0: # %entry
744; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
745; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
746; SSSE3-NEXT:    movdqa %xmm1, %xmm0
747; SSSE3-NEXT:    pshufb %xmm4, %xmm0
748; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
749; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
750; SSSE3-NEXT:    pshufb %xmm5, %xmm1
751; SSSE3-NEXT:    movdqa %xmm3, %xmm2
752; SSSE3-NEXT:    pshufb %xmm4, %xmm2
753; SSSE3-NEXT:    pshufb %xmm5, %xmm3
754; SSSE3-NEXT:    retq
755;
756; SSE41-LABEL: load_zext_8i8_to_8i64:
757; SSE41:       # BB#0: # %entry
758; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
759; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
760; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
761; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
762; SSE41-NEXT:    retq
763;
764; AVX1-LABEL: load_zext_8i8_to_8i64:
765; AVX1:       # BB#0: # %entry
766; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
767; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
768; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
769; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
770; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
771; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
772; AVX1-NEXT:    retq
773;
774; AVX2-LABEL: load_zext_8i8_to_8i64:
775; AVX2:       # BB#0: # %entry
776; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
777; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
778; AVX2-NEXT:    retq
779;
780; AVX512-LABEL: load_zext_8i8_to_8i64:
781; AVX512:       # BB#0: # %entry
782; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
783; AVX512-NEXT:    retq
784entry:
785 %X = load <8 x i8>, <8 x i8>* %ptr
786 %Y = zext <8 x i8> %X to <8 x i64>
787 ret <8 x i64> %Y
788}
789
790define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
791; SSE2-LABEL: load_zext_16i8_to_16i16:
792; SSE2:       # BB#0: # %entry
793; SSE2-NEXT:    movdqa (%rdi), %xmm1
794; SSE2-NEXT:    pxor %xmm2, %xmm2
795; SSE2-NEXT:    movdqa %xmm1, %xmm0
796; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
797; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
798; SSE2-NEXT:    retq
799;
800; SSSE3-LABEL: load_zext_16i8_to_16i16:
801; SSSE3:       # BB#0: # %entry
802; SSSE3-NEXT:    movdqa (%rdi), %xmm1
803; SSSE3-NEXT:    pxor %xmm2, %xmm2
804; SSSE3-NEXT:    movdqa %xmm1, %xmm0
805; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
806; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
807; SSSE3-NEXT:    retq
808;
809; SSE41-LABEL: load_zext_16i8_to_16i16:
810; SSE41:       # BB#0: # %entry
811; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
812; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
813; SSE41-NEXT:    retq
814;
815; AVX1-LABEL: load_zext_16i8_to_16i16:
816; AVX1:       # BB#0: # %entry
817; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
818; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
819; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
820; AVX1-NEXT:    retq
821;
822; AVX2-LABEL: load_zext_16i8_to_16i16:
823; AVX2:       # BB#0: # %entry
824; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
825; AVX2-NEXT:    retq
826;
827; AVX512-LABEL: load_zext_16i8_to_16i16:
828; AVX512:       # BB#0: # %entry
829; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
830; AVX512-NEXT:    retq
831entry:
832 %X = load <16 x i8>, <16 x i8>* %ptr
833 %Y = zext <16 x i8> %X to <16 x i16>
834 ret <16 x i16> %Y
835}
836
837define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
838; SSE2-LABEL: load_zext_2i16_to_2i64:
839; SSE2:       # BB#0: # %entry
840; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
841; SSE2-NEXT:    pxor %xmm1, %xmm1
842; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
843; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
844; SSE2-NEXT:    retq
845;
846; SSSE3-LABEL: load_zext_2i16_to_2i64:
847; SSSE3:       # BB#0: # %entry
848; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
849; SSSE3-NEXT:    pxor %xmm1, %xmm1
850; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
851; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
852; SSSE3-NEXT:    retq
853;
854; SSE41-LABEL: load_zext_2i16_to_2i64:
855; SSE41:       # BB#0: # %entry
856; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
857; SSE41-NEXT:    retq
858;
859; AVX-LABEL: load_zext_2i16_to_2i64:
860; AVX:       # BB#0: # %entry
861; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
862; AVX-NEXT:    retq
863entry:
864 %X = load <2 x i16>, <2 x i16>* %ptr
865 %Y = zext <2 x i16> %X to <2 x i64>
866 ret <2 x i64> %Y
867}
868
869define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
870; SSE2-LABEL: load_zext_4i16_to_4i32:
871; SSE2:       # BB#0: # %entry
872; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
873; SSE2-NEXT:    pxor %xmm1, %xmm1
874; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
875; SSE2-NEXT:    retq
876;
877; SSSE3-LABEL: load_zext_4i16_to_4i32:
878; SSSE3:       # BB#0: # %entry
879; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
880; SSSE3-NEXT:    pxor %xmm1, %xmm1
881; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
882; SSSE3-NEXT:    retq
883;
884; SSE41-LABEL: load_zext_4i16_to_4i32:
885; SSE41:       # BB#0: # %entry
886; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
887; SSE41-NEXT:    retq
888;
889; AVX-LABEL: load_zext_4i16_to_4i32:
890; AVX:       # BB#0: # %entry
891; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
892; AVX-NEXT:    retq
893entry:
894 %X = load <4 x i16>, <4 x i16>* %ptr
895 %Y = zext <4 x i16> %X to <4 x i32>
896 ret <4 x i32> %Y
897}
898
899define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
900; SSE2-LABEL: load_zext_4i16_to_4i64:
901; SSE2:       # BB#0: # %entry
902; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
903; SSE2-NEXT:    pxor %xmm2, %xmm2
904; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
905; SSE2-NEXT:    movdqa %xmm1, %xmm0
906; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
907; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
908; SSE2-NEXT:    retq
909;
910; SSSE3-LABEL: load_zext_4i16_to_4i64:
911; SSSE3:       # BB#0: # %entry
912; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
913; SSSE3-NEXT:    pxor %xmm2, %xmm2
914; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
915; SSSE3-NEXT:    movdqa %xmm1, %xmm0
916; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
917; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
918; SSSE3-NEXT:    retq
919;
920; SSE41-LABEL: load_zext_4i16_to_4i64:
921; SSE41:       # BB#0: # %entry
922; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
923; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
924; SSE41-NEXT:    retq
925;
926; AVX1-LABEL: load_zext_4i16_to_4i64:
927; AVX1:       # BB#0: # %entry
928; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
929; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
930; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
931; AVX1-NEXT:    retq
932;
933; AVX2-LABEL: load_zext_4i16_to_4i64:
934; AVX2:       # BB#0: # %entry
935; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
936; AVX2-NEXT:    retq
937;
938; AVX512-LABEL: load_zext_4i16_to_4i64:
939; AVX512:       # BB#0: # %entry
940; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
941; AVX512-NEXT:    retq
942entry:
943 %X = load <4 x i16>, <4 x i16>* %ptr
944 %Y = zext <4 x i16> %X to <4 x i64>
945 ret <4 x i64> %Y
946}
947
948define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
949; SSE2-LABEL: load_zext_8i16_to_8i32:
950; SSE2:       # BB#0: # %entry
951; SSE2-NEXT:    movdqa (%rdi), %xmm1
952; SSE2-NEXT:    pxor %xmm2, %xmm2
953; SSE2-NEXT:    movdqa %xmm1, %xmm0
954; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
955; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
956; SSE2-NEXT:    retq
957;
958; SSSE3-LABEL: load_zext_8i16_to_8i32:
959; SSSE3:       # BB#0: # %entry
960; SSSE3-NEXT:    movdqa (%rdi), %xmm1
961; SSSE3-NEXT:    pxor %xmm2, %xmm2
962; SSSE3-NEXT:    movdqa %xmm1, %xmm0
963; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
964; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
965; SSSE3-NEXT:    retq
966;
967; SSE41-LABEL: load_zext_8i16_to_8i32:
968; SSE41:       # BB#0: # %entry
969; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
970; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
971; SSE41-NEXT:    retq
972;
973; AVX1-LABEL: load_zext_8i16_to_8i32:
974; AVX1:       # BB#0: # %entry
975; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
976; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
977; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
978; AVX1-NEXT:    retq
979;
980; AVX2-LABEL: load_zext_8i16_to_8i32:
981; AVX2:       # BB#0: # %entry
982; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
983; AVX2-NEXT:    retq
984;
985; AVX512-LABEL: load_zext_8i16_to_8i32:
986; AVX512:       # BB#0: # %entry
987; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
988; AVX512-NEXT:    retq
989entry:
990 %X = load <8 x i16>, <8 x i16>* %ptr
991 %Y = zext <8 x i16> %X to <8 x i32>
992 ret <8 x i32> %Y
993}
994
995define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
996; SSE2-LABEL: load_zext_2i32_to_2i64:
997; SSE2:       # BB#0: # %entry
998; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
999; SSE2-NEXT:    pxor %xmm1, %xmm1
1000; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1001; SSE2-NEXT:    retq
1002;
1003; SSSE3-LABEL: load_zext_2i32_to_2i64:
1004; SSSE3:       # BB#0: # %entry
1005; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1006; SSSE3-NEXT:    pxor %xmm1, %xmm1
1007; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1008; SSSE3-NEXT:    retq
1009;
1010; SSE41-LABEL: load_zext_2i32_to_2i64:
1011; SSE41:       # BB#0: # %entry
1012; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1013; SSE41-NEXT:    retq
1014;
1015; AVX-LABEL: load_zext_2i32_to_2i64:
1016; AVX:       # BB#0: # %entry
1017; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1018; AVX-NEXT:    retq
1019entry:
1020 %X = load <2 x i32>, <2 x i32>* %ptr
1021 %Y = zext <2 x i32> %X to <2 x i64>
1022 ret <2 x i64> %Y
1023}
1024
1025define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1026; SSE2-LABEL: load_zext_4i32_to_4i64:
1027; SSE2:       # BB#0: # %entry
1028; SSE2-NEXT:    movdqa (%rdi), %xmm1
1029; SSE2-NEXT:    pxor %xmm2, %xmm2
1030; SSE2-NEXT:    movdqa %xmm1, %xmm0
1031; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1032; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1033; SSE2-NEXT:    retq
1034;
1035; SSSE3-LABEL: load_zext_4i32_to_4i64:
1036; SSSE3:       # BB#0: # %entry
1037; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1038; SSSE3-NEXT:    pxor %xmm2, %xmm2
1039; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1040; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1041; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1042; SSSE3-NEXT:    retq
1043;
1044; SSE41-LABEL: load_zext_4i32_to_4i64:
1045; SSE41:       # BB#0: # %entry
1046; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1047; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1048; SSE41-NEXT:    retq
1049;
1050; AVX1-LABEL: load_zext_4i32_to_4i64:
1051; AVX1:       # BB#0: # %entry
1052; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1053; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1054; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1055; AVX1-NEXT:    retq
1056;
1057; AVX2-LABEL: load_zext_4i32_to_4i64:
1058; AVX2:       # BB#0: # %entry
1059; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1060; AVX2-NEXT:    retq
1061;
1062; AVX512-LABEL: load_zext_4i32_to_4i64:
1063; AVX512:       # BB#0: # %entry
1064; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1065; AVX512-NEXT:    retq
1066entry:
1067 %X = load <4 x i32>, <4 x i32>* %ptr
1068 %Y = zext <4 x i32> %X to <4 x i64>
1069 ret <4 x i64> %Y
1070}
1071
1072define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1073; SSE2-LABEL: zext_8i8_to_8i32:
1074; SSE2:       # BB#0: # %entry
1075; SSE2-NEXT:    movdqa %xmm0, %xmm1
1076; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1077; SSE2-NEXT:    pxor %xmm2, %xmm2
1078; SSE2-NEXT:    movdqa %xmm1, %xmm0
1079; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1080; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1081; SSE2-NEXT:    retq
1082;
1083; SSSE3-LABEL: zext_8i8_to_8i32:
1084; SSSE3:       # BB#0: # %entry
1085; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1086; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1087; SSSE3-NEXT:    pxor %xmm2, %xmm2
1088; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1089; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1090; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1091; SSSE3-NEXT:    retq
1092;
1093; SSE41-LABEL: zext_8i8_to_8i32:
1094; SSE41:       # BB#0: # %entry
1095; SSE41-NEXT:    movdqa %xmm0, %xmm1
1096; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
1097; SSE41-NEXT:    pxor %xmm2, %xmm2
1098; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1099; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1100; SSE41-NEXT:    retq
1101;
1102; AVX1-LABEL: zext_8i8_to_8i32:
1103; AVX1:       # BB#0: # %entry
1104; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1105; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1106; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1107; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1108; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1109; AVX1-NEXT:    retq
1110;
1111; AVX2-LABEL: zext_8i8_to_8i32:
1112; AVX2:       # BB#0: # %entry
1113; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1114; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1115; AVX2-NEXT:    retq
1116;
1117; AVX512-LABEL: zext_8i8_to_8i32:
1118; AVX512:       # BB#0: # %entry
1119; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1120; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1121; AVX512-NEXT:    retq
1122entry:
1123  %t = zext <8 x i8> %z to <8 x i32>
1124  ret <8 x i32> %t
1125}
1126
1127define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1128; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1129; SSE2:       # BB#0: # %entry
1130; SSE2-NEXT:    movdqa %xmm0, %xmm1
1131; SSE2-NEXT:    pxor %xmm2, %xmm2
1132; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1133; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1134; SSE2-NEXT:    retq
1135;
1136; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1137; SSSE3:       # BB#0: # %entry
1138; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1139; SSSE3-NEXT:    pxor %xmm2, %xmm2
1140; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1141; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1142; SSSE3-NEXT:    retq
1143;
1144; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1145; SSE41:       # BB#0: # %entry
1146; SSE41-NEXT:    movdqa %xmm0, %xmm1
1147; SSE41-NEXT:    pxor %xmm2, %xmm2
1148; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1149; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1150; SSE41-NEXT:    retq
1151;
1152; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1153; AVX1:       # BB#0: # %entry
1154; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1155; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1156; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1157; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1158; AVX1-NEXT:    retq
1159;
1160; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1161; AVX2:       # BB#0: # %entry
1162; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1163; AVX2-NEXT:    retq
1164;
1165; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1166; AVX512:       # BB#0: # %entry
1167; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1168; AVX512-NEXT:    retq
1169entry:
1170  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1171  %Z = bitcast <16 x i16> %B to <8 x i32>
1172  ret <8 x i32> %Z
1173}
1174
1175define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1176; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1177; SSE2:       # BB#0: # %entry
1178; SSE2-NEXT:    movdqa %xmm0, %xmm1
1179; SSE2-NEXT:    pxor %xmm2, %xmm2
1180; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1181; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1182; SSE2-NEXT:    retq
1183;
1184; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1185; SSSE3:       # BB#0: # %entry
1186; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1187; SSSE3-NEXT:    pxor %xmm2, %xmm2
1188; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1189; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1190; SSSE3-NEXT:    retq
1191;
1192; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1193; SSE41:       # BB#0: # %entry
1194; SSE41-NEXT:    movdqa %xmm0, %xmm1
1195; SSE41-NEXT:    pxor %xmm2, %xmm2
1196; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1197; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1198; SSE41-NEXT:    retq
1199;
1200; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1201; AVX1:       # BB#0: # %entry
1202; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
1203; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1204; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1205; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
1206; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1207; AVX1-NEXT:    retq
1208;
1209; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1210; AVX2:       # BB#0: # %entry
1211; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1212; AVX2-NEXT:    retq
1213;
1214; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1215; AVX512:       # BB#0: # %entry
1216; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1217; AVX512-NEXT:    retq
1218entry:
1219  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1220  %Z = bitcast <8 x i32> %B to <4 x i64>
1221  ret <4 x i64> %Z
1222}
1223
1224define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1225; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1226; SSE2:       # BB#0: # %entry
1227; SSE2-NEXT:    movdqa %xmm0, %xmm1
1228; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1229; SSE2-NEXT:    packuswb %xmm1, %xmm1
1230; SSE2-NEXT:    pxor %xmm2, %xmm2
1231; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1232; SSE2-NEXT:    movdqa %xmm1, %xmm0
1233; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1234; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1235; SSE2-NEXT:    retq
1236;
1237; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1238; SSSE3:       # BB#0: # %entry
1239; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1240; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero
1241; SSSE3-NEXT:    pxor %xmm2, %xmm2
1242; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1243; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1244; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1245; SSSE3-NEXT:    retq
1246;
1247; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1248; SSE41:       # BB#0: # %entry
1249; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1250; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1251; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1252; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1253; SSE41-NEXT:    movdqa %xmm2, %xmm0
1254; SSE41-NEXT:    retq
1255;
1256; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1257; AVX1:       # BB#0: # %entry
1258; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1259; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1260; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1261; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1262; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1263; AVX1-NEXT:    retq
1264;
1265; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1266; AVX2:       # BB#0: # %entry
1267; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1268; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1269; AVX2-NEXT:    retq
1270;
1271; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1272; AVX512:       # BB#0: # %entry
1273; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1274; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1275; AVX512-NEXT:    retq
1276entry:
1277  %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1278  %Z = bitcast <32 x i8> %B to <8 x i32>
1279  ret <8 x i32> %Z
1280}
1281
1282define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1283; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1284; SSE2:       # BB#0: # %entry
1285; SSE2-NEXT:    pxor %xmm1, %xmm1
1286; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1287; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1288; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1289; SSE2-NEXT:    retq
1290;
1291; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1292; SSSE3:       # BB#0: # %entry
1293; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1294; SSSE3-NEXT:    retq
1295;
1296; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1297; SSE41:       # BB#0: # %entry
1298; SSE41-NEXT:    psrlq $48, %xmm0
1299; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1300; SSE41-NEXT:    retq
1301;
1302; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
1303; AVX:       # BB#0: # %entry
1304; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
1305; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1306; AVX-NEXT:    retq
1307entry:
1308  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1309  %Z = bitcast <16 x i8> %B to <2 x i64>
1310  ret <2 x i64> %Z
1311}
1312
1313define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1314; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1315; SSE2:       # BB#0: # %entry
1316; SSE2-NEXT:    movdqa %xmm0, %xmm1
1317; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1318; SSE2-NEXT:    pxor %xmm2, %xmm2
1319; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1320; SSE2-NEXT:    movdqa %xmm1, %xmm0
1321; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1322; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1323; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1324; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1325; SSE2-NEXT:    retq
1326;
1327; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1328; SSSE3:       # BB#0: # %entry
1329; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1330; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1331; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1332; SSSE3-NEXT:    retq
1333;
1334; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1335; SSE41:       # BB#0: # %entry
1336; SSE41-NEXT:    movdqa %xmm0, %xmm1
1337; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1338; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1339; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1340; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1341; SSE41-NEXT:    movdqa %xmm2, %xmm0
1342; SSE41-NEXT:    retq
1343;
1344; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1345; AVX1:       # BB#0: # %entry
1346; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1347; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1348; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1349; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1350; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1351; AVX1-NEXT:    retq
1352;
1353; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1354; AVX2:       # BB#0: # %entry
1355; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1356; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1357; AVX2-NEXT:    retq
1358;
1359; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1360; AVX512:       # BB#0: # %entry
1361; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1362; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1363; AVX512-NEXT:    retq
1364entry:
1365  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1366  %Z = bitcast <32 x i8> %B to <4 x i64>
1367  ret <4 x i64> %Z
1368}
1369
1370define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1371; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1372; SSE2:       # BB#0: # %entry
1373; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1374; SSE2-NEXT:    pxor %xmm1, %xmm1
1375; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1376; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1377; SSE2-NEXT:    retq
1378;
1379; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1380; SSSE3:       # BB#0: # %entry
1381; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1382; SSSE3-NEXT:    retq
1383;
1384; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1385; SSE41:       # BB#0: # %entry
1386; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1387; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1388; SSE41-NEXT:    retq
1389;
1390; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
1391; AVX:       # BB#0: # %entry
1392; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1393; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1394; AVX-NEXT:    retq
1395entry:
1396  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1397  %Z = bitcast <8 x i16> %B to <2 x i64>
1398  ret <2 x i64> %Z
1399}
1400
1401define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1402; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1403; SSE2:       # BB#0: # %entry
1404; SSE2-NEXT:    movdqa %xmm0, %xmm1
1405; SSE2-NEXT:    pxor %xmm2, %xmm2
1406; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1407; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1408; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1409; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1410; SSE2-NEXT:    retq
1411;
1412; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1413; SSSE3:       # BB#0: # %entry
1414; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1415; SSSE3-NEXT:    pxor %xmm2, %xmm2
1416; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1417; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1418; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1419; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1420; SSSE3-NEXT:    retq
1421;
1422; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1423; SSE41:       # BB#0: # %entry
1424; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1425; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1426; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1427; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1428; SSE41-NEXT:    movdqa %xmm2, %xmm0
1429; SSE41-NEXT:    retq
1430;
1431; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1432; AVX1:       # BB#0: # %entry
1433; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1434; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1435; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1436; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1437; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1438; AVX1-NEXT:    retq
1439;
1440; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1441; AVX2:       # BB#0: # %entry
1442; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1443; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1444; AVX2-NEXT:    retq
1445;
1446; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1447; AVX512:       # BB#0: # %entry
1448; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1449; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1450; AVX512-NEXT:    retq
1451entry:
1452  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1453  %Z = bitcast <16 x i16> %B to <4 x i64>
1454  ret <4 x i64> %Z
1455}
1456
1457define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1458; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1:
1459; SSE:       # BB#0: # %entry
1460; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1461; SSE-NEXT:    pxor %xmm1, %xmm1
1462; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1463; SSE-NEXT:    retq
1464;
1465; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
1466; AVX:       # BB#0: # %entry
1467; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1468; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1469; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1470; AVX-NEXT:    retq
1471entry:
1472  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1473  %Z = bitcast <8 x i16> %B to <4 x i32>
1474  ret <4 x i32> %Z
1475}
1476
1477define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1478; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1479; SSE2:       # BB#0: # %entry
1480; SSE2-NEXT:    movdqa %xmm0, %xmm1
1481; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1482; SSE2-NEXT:    pxor %xmm2, %xmm2
1483; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1484; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1485; SSE2-NEXT:    retq
1486;
1487; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1488; SSSE3:       # BB#0: # %entry
1489; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1490; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1491; SSSE3-NEXT:    pxor %xmm2, %xmm2
1492; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1493; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1494; SSSE3-NEXT:    retq
1495;
1496; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1497; SSE41:       # BB#0: # %entry
1498; SSE41-NEXT:    movdqa %xmm0, %xmm1
1499; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1500; SSE41-NEXT:    pxor %xmm2, %xmm2
1501; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1502; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1503; SSE41-NEXT:    retq
1504;
1505; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
1506; AVX1:       # BB#0: # %entry
1507; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1508; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1509; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1510; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1511; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1512; AVX1-NEXT:    retq
1513;
1514; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1515; AVX2:       # BB#0: # %entry
1516; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1517; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1518; AVX2-NEXT:    retq
1519;
1520; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
1521; AVX512:       # BB#0: # %entry
1522; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1523; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1524; AVX512-NEXT:    retq
1525entry:
1526  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
1527  %Z = bitcast <16 x i16> %B to <8 x i32>
1528  ret <8 x i32> %Z
1529}
1530
1531define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
1532; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1533; SSE2:       # BB#0: # %entry
1534; SSE2-NEXT:    pxor %xmm2, %xmm2
1535; SSE2-NEXT:    movdqa %xmm1, %xmm0
1536; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1537; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1538; SSE2-NEXT:    retq
1539;
1540; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
1541; SSSE3:       # BB#0: # %entry
1542; SSSE3-NEXT:    pxor %xmm2, %xmm2
1543; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1544; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1545; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1546; SSSE3-NEXT:    retq
1547;
1548; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
1549; SSE41:       # BB#0: # %entry
1550; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1551; SSE41-NEXT:    pxor %xmm2, %xmm2
1552; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1553; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1554; SSE41-NEXT:    movdqa %xmm2, %xmm1
1555; SSE41-NEXT:    retq
1556;
1557; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
1558; AVX1:       # BB#0: # %entry
1559; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1560; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
1561; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1562; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1563; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1564; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1565; AVX1-NEXT:    retq
1566;
1567; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1568; AVX2:       # BB#0: # %entry
1569; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
1570; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1571; AVX2-NEXT:    retq
1572;
1573; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
1574; AVX512:       # BB#0: # %entry
1575; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
1576; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1577; AVX512-NEXT:    retq
1578entry:
1579  %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
1580  %Z = bitcast <16 x i16> %B to <8 x i32>
1581  ret <8 x i32> %Z
1582}
1583
1584define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
1585; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
1586; SSE:       # BB#0: # %entry
1587; SSE-NEXT:    pxor %xmm1, %xmm1
1588; SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1589; SSE-NEXT:    retq
1590;
1591; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
1592; AVX:       # BB#0: # %entry
1593; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1594; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1595; AVX-NEXT:    retq
1596entry:
1597  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
1598  %Z = bitcast <4 x i32> %B to <2 x i64>
1599  ret <2 x i64> %Z
1600}
1601
1602define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
1603; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
1604; SSE2:       # BB#0: # %entry
1605; SSE2-NEXT:    movdqa %xmm0, %xmm1
1606; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
1607; SSE2-NEXT:    pand %xmm1, %xmm0
1608; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1609; SSE2-NEXT:    retq
1610;
1611; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
1612; SSSE3:       # BB#0: # %entry
1613; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1614; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
1615; SSSE3-NEXT:    pand %xmm1, %xmm0
1616; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1617; SSSE3-NEXT:    retq
1618;
1619; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
1620; SSE41:       # BB#0: # %entry
1621; SSE41-NEXT:    movdqa %xmm0, %xmm1
1622; SSE41-NEXT:    pxor %xmm0, %xmm0
1623; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1624; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1625; SSE41-NEXT:    retq
1626;
1627; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
1628; AVX1:       # BB#0: # %entry
1629; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
1630; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1631; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1632; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1633; AVX1-NEXT:    retq
1634;
1635; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
1636; AVX2:       # BB#0: # %entry
1637; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
1638; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1639; AVX2-NEXT:    retq
1640;
1641; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
1642; AVX512:       # BB#0: # %entry
1643; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
1644; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1645; AVX512-NEXT:    retq
1646entry:
1647  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
1648  %Z = bitcast <8 x i32> %B to <4 x i64>
1649  ret <4 x i64> %Z
1650}
1651