1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
10
11define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
12; SSE2-LABEL: zext_16i8_to_8i16:
13; SSE2:       # %bb.0: # %entry
14; SSE2-NEXT:    pxor %xmm1, %xmm1
15; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
16; SSE2-NEXT:    retq
17;
18; SSSE3-LABEL: zext_16i8_to_8i16:
19; SSSE3:       # %bb.0: # %entry
20; SSSE3-NEXT:    pxor %xmm1, %xmm1
21; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
22; SSSE3-NEXT:    retq
23;
24; SSE41-LABEL: zext_16i8_to_8i16:
25; SSE41:       # %bb.0: # %entry
26; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
27; SSE41-NEXT:    retq
28;
29; AVX-LABEL: zext_16i8_to_8i16:
30; AVX:       # %bb.0: # %entry
31; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
32; AVX-NEXT:    retq
33entry:
34  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
35  %C = zext <8 x i8> %B to <8 x i16>
36  ret <8 x i16> %C
37}
38
39; PR17654
40define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
41; SSE2-LABEL: zext_16i8_to_16i16:
42; SSE2:       # %bb.0: # %entry
43; SSE2-NEXT:    movdqa %xmm0, %xmm1
44; SSE2-NEXT:    pxor %xmm2, %xmm2
45; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
46; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
47; SSE2-NEXT:    retq
48;
49; SSSE3-LABEL: zext_16i8_to_16i16:
50; SSSE3:       # %bb.0: # %entry
51; SSSE3-NEXT:    movdqa %xmm0, %xmm1
52; SSSE3-NEXT:    pxor %xmm2, %xmm2
53; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
54; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
55; SSSE3-NEXT:    retq
56;
57; SSE41-LABEL: zext_16i8_to_16i16:
58; SSE41:       # %bb.0: # %entry
59; SSE41-NEXT:    movdqa %xmm0, %xmm1
60; SSE41-NEXT:    pxor %xmm2, %xmm2
61; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
62; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
63; SSE41-NEXT:    retq
64;
65; AVX1-LABEL: zext_16i8_to_16i16:
66; AVX1:       # %bb.0: # %entry
67; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
68; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
69; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
70; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
71; AVX1-NEXT:    retq
72;
73; AVX2-LABEL: zext_16i8_to_16i16:
74; AVX2:       # %bb.0: # %entry
75; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
76; AVX2-NEXT:    retq
77;
78; AVX512-LABEL: zext_16i8_to_16i16:
79; AVX512:       # %bb.0: # %entry
80; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
81; AVX512-NEXT:    retq
82entry:
83  %B = zext <16 x i8> %A to <16 x i16>
84  ret <16 x i16> %B
85}
86
87define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
88; SSE2-LABEL: zext_32i8_to_32i16:
89; SSE2:       # %bb.0: # %entry
90; SSE2-NEXT:    movdqa %xmm1, %xmm3
91; SSE2-NEXT:    movdqa %xmm0, %xmm1
92; SSE2-NEXT:    pxor %xmm4, %xmm4
93; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
94; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
95; SSE2-NEXT:    movdqa %xmm3, %xmm2
96; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
97; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
98; SSE2-NEXT:    retq
99;
100; SSSE3-LABEL: zext_32i8_to_32i16:
101; SSSE3:       # %bb.0: # %entry
102; SSSE3-NEXT:    movdqa %xmm1, %xmm3
103; SSSE3-NEXT:    movdqa %xmm0, %xmm1
104; SSSE3-NEXT:    pxor %xmm4, %xmm4
105; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
106; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
107; SSSE3-NEXT:    movdqa %xmm3, %xmm2
108; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
109; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
110; SSSE3-NEXT:    retq
111;
112; SSE41-LABEL: zext_32i8_to_32i16:
113; SSE41:       # %bb.0: # %entry
114; SSE41-NEXT:    movdqa %xmm1, %xmm3
115; SSE41-NEXT:    movdqa %xmm0, %xmm1
116; SSE41-NEXT:    pxor %xmm4, %xmm4
117; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
118; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
119; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
120; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
121; SSE41-NEXT:    retq
122;
123; AVX1-LABEL: zext_32i8_to_32i16:
124; AVX1:       # %bb.0: # %entry
125; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
126; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
127; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
128; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
129; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
130; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
131; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
132; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
133; AVX1-NEXT:    vmovaps %ymm2, %ymm0
134; AVX1-NEXT:    retq
135;
136; AVX2-LABEL: zext_32i8_to_32i16:
137; AVX2:       # %bb.0: # %entry
138; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
139; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
140; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
142; AVX2-NEXT:    retq
143;
144; AVX512F-LABEL: zext_32i8_to_32i16:
145; AVX512F:       # %bb.0: # %entry
146; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
147; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
148; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
149; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
150; AVX512F-NEXT:    retq
151;
152; AVX512BW-LABEL: zext_32i8_to_32i16:
153; AVX512BW:       # %bb.0: # %entry
154; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
155; AVX512BW-NEXT:    retq
156entry:
157  %B = zext <32 x i8> %A to <32 x i16>
158  ret <32 x i16> %B
159}
160
161define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
162; SSE2-LABEL: zext_16i8_to_4i32:
163; SSE2:       # %bb.0: # %entry
164; SSE2-NEXT:    pxor %xmm1, %xmm1
165; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
166; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
167; SSE2-NEXT:    retq
168;
169; SSSE3-LABEL: zext_16i8_to_4i32:
170; SSSE3:       # %bb.0: # %entry
171; SSSE3-NEXT:    pxor %xmm1, %xmm1
172; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
173; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
174; SSSE3-NEXT:    retq
175;
176; SSE41-LABEL: zext_16i8_to_4i32:
177; SSE41:       # %bb.0: # %entry
178; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
179; SSE41-NEXT:    retq
180;
181; AVX-LABEL: zext_16i8_to_4i32:
182; AVX:       # %bb.0: # %entry
183; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
184; AVX-NEXT:    retq
185entry:
186  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
187  %C = zext <4 x i8> %B to <4 x i32>
188  ret <4 x i32> %C
189}
190
191define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
192; SSE2-LABEL: zext_16i8_to_8i32:
193; SSE2:       # %bb.0: # %entry
194; SSE2-NEXT:    movdqa %xmm0, %xmm1
195; SSE2-NEXT:    pxor %xmm2, %xmm2
196; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
197; SSE2-NEXT:    movdqa %xmm1, %xmm0
198; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
199; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
200; SSE2-NEXT:    retq
201;
202; SSSE3-LABEL: zext_16i8_to_8i32:
203; SSSE3:       # %bb.0: # %entry
204; SSSE3-NEXT:    movdqa %xmm0, %xmm1
205; SSSE3-NEXT:    pxor %xmm2, %xmm2
206; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
207; SSSE3-NEXT:    movdqa %xmm1, %xmm0
208; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
209; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
210; SSSE3-NEXT:    retq
211;
212; SSE41-LABEL: zext_16i8_to_8i32:
213; SSE41:       # %bb.0: # %entry
214; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
215; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
216; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
217; SSE41-NEXT:    movdqa %xmm2, %xmm0
218; SSE41-NEXT:    retq
219;
220; AVX1-LABEL: zext_16i8_to_8i32:
221; AVX1:       # %bb.0: # %entry
222; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
223; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
224; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
225; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
226; AVX1-NEXT:    retq
227;
228; AVX2-LABEL: zext_16i8_to_8i32:
229; AVX2:       # %bb.0: # %entry
230; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
231; AVX2-NEXT:    retq
232;
233; AVX512-LABEL: zext_16i8_to_8i32:
234; AVX512:       # %bb.0: # %entry
235; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
236; AVX512-NEXT:    retq
237entry:
238  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
239  %C = zext <8 x i8> %B to <8 x i32>
240  ret <8 x i32> %C
241}
242
243define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
244; SSE2-LABEL: zext_16i8_to_16i32:
245; SSE2:       # %bb.0: # %entry
246; SSE2-NEXT:    movdqa %xmm0, %xmm3
247; SSE2-NEXT:    pxor %xmm4, %xmm4
248; SSE2-NEXT:    movdqa %xmm0, %xmm1
249; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
250; SSE2-NEXT:    movdqa %xmm1, %xmm0
251; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
252; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
253; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
254; SSE2-NEXT:    movdqa %xmm3, %xmm2
255; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
256; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
257; SSE2-NEXT:    retq
258;
259; SSSE3-LABEL: zext_16i8_to_16i32:
260; SSSE3:       # %bb.0: # %entry
261; SSSE3-NEXT:    movdqa %xmm0, %xmm3
262; SSSE3-NEXT:    pxor %xmm4, %xmm4
263; SSSE3-NEXT:    movdqa %xmm0, %xmm1
264; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
265; SSSE3-NEXT:    movdqa %xmm1, %xmm0
266; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
267; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
268; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
269; SSSE3-NEXT:    movdqa %xmm3, %xmm2
270; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
271; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
272; SSSE3-NEXT:    retq
273;
274; SSE41-LABEL: zext_16i8_to_16i32:
275; SSE41:       # %bb.0: # %entry
276; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
277; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
278; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
279; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
280; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
281; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
282; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
283; SSE41-NEXT:    movdqa %xmm4, %xmm0
284; SSE41-NEXT:    retq
285;
286; AVX1-LABEL: zext_16i8_to_16i32:
287; AVX1:       # %bb.0: # %entry
288; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
289; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
290; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
291; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
292; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
293; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
294; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
295; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
296; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
297; AVX1-NEXT:    vmovaps %ymm2, %ymm0
298; AVX1-NEXT:    retq
299;
300; AVX2-LABEL: zext_16i8_to_16i32:
301; AVX2:       # %bb.0: # %entry
302; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
303; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
304; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
305; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
306; AVX2-NEXT:    retq
307;
308; AVX512-LABEL: zext_16i8_to_16i32:
309; AVX512:       # %bb.0: # %entry
310; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
311; AVX512-NEXT:    retq
312entry:
313  %B = zext <16 x i8> %A to <16 x i32>
314  ret <16 x i32> %B
315}
316
317define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
318; SSE2-LABEL: zext_16i8_to_2i64:
319; SSE2:       # %bb.0: # %entry
320; SSE2-NEXT:    pxor %xmm1, %xmm1
321; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
322; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
323; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
324; SSE2-NEXT:    retq
325;
326; SSSE3-LABEL: zext_16i8_to_2i64:
327; SSSE3:       # %bb.0: # %entry
328; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
329; SSSE3-NEXT:    retq
330;
331; SSE41-LABEL: zext_16i8_to_2i64:
332; SSE41:       # %bb.0: # %entry
333; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
334; SSE41-NEXT:    retq
335;
336; AVX-LABEL: zext_16i8_to_2i64:
337; AVX:       # %bb.0: # %entry
338; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
339; AVX-NEXT:    retq
340entry:
341  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
342  %C = zext <2 x i8> %B to <2 x i64>
343  ret <2 x i64> %C
344}
345
346define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
347; SSE2-LABEL: zext_16i8_to_4i64:
348; SSE2:       # %bb.0: # %entry
349; SSE2-NEXT:    movdqa %xmm0, %xmm1
350; SSE2-NEXT:    pxor %xmm2, %xmm2
351; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
352; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
353; SSE2-NEXT:    movdqa %xmm1, %xmm0
354; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
355; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
356; SSE2-NEXT:    retq
357;
358; SSSE3-LABEL: zext_16i8_to_4i64:
359; SSSE3:       # %bb.0: # %entry
360; SSSE3-NEXT:    movdqa %xmm0, %xmm1
361; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
362; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
363; SSSE3-NEXT:    retq
364;
365; SSE41-LABEL: zext_16i8_to_4i64:
366; SSE41:       # %bb.0: # %entry
367; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
368; SSE41-NEXT:    psrld $16, %xmm0
369; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
370; SSE41-NEXT:    movdqa %xmm2, %xmm0
371; SSE41-NEXT:    retq
372;
373; AVX1-LABEL: zext_16i8_to_4i64:
374; AVX1:       # %bb.0: # %entry
375; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
376; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
377; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
378; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
379; AVX1-NEXT:    retq
380;
381; AVX2-LABEL: zext_16i8_to_4i64:
382; AVX2:       # %bb.0: # %entry
383; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
384; AVX2-NEXT:    retq
385;
386; AVX512-LABEL: zext_16i8_to_4i64:
387; AVX512:       # %bb.0: # %entry
388; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
389; AVX512-NEXT:    retq
390entry:
391  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
392  %C = zext <4 x i8> %B to <4 x i64>
393  ret <4 x i64> %C
394}
395
396define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
397; SSE2-LABEL: zext_16i8_to_8i64:
398; SSE2:       # %bb.0: # %entry
399; SSE2-NEXT:    movdqa %xmm0, %xmm3
400; SSE2-NEXT:    pxor %xmm4, %xmm4
401; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
402; SSE2-NEXT:    movdqa %xmm3, %xmm1
403; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
404; SSE2-NEXT:    movdqa %xmm1, %xmm0
405; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
406; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
407; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
408; SSE2-NEXT:    movdqa %xmm3, %xmm2
409; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
410; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
411; SSE2-NEXT:    retq
412;
413; SSSE3-LABEL: zext_16i8_to_8i64:
414; SSSE3:       # %bb.0: # %entry
415; SSSE3-NEXT:    movdqa %xmm0, %xmm3
416; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
417; SSSE3-NEXT:    movdqa %xmm3, %xmm1
418; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
419; SSSE3-NEXT:    movdqa %xmm3, %xmm2
420; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
421; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
422; SSSE3-NEXT:    retq
423;
424; SSE41-LABEL: zext_16i8_to_8i64:
425; SSE41:       # %bb.0: # %entry
426; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
427; SSE41-NEXT:    movdqa %xmm0, %xmm1
428; SSE41-NEXT:    psrld $16, %xmm1
429; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
430; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
431; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
432; SSE41-NEXT:    psrlq $48, %xmm0
433; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
434; SSE41-NEXT:    movdqa %xmm4, %xmm0
435; SSE41-NEXT:    retq
436;
437; AVX1-LABEL: zext_16i8_to_8i64:
438; AVX1:       # %bb.0: # %entry
439; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
440; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm2
441; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
442; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
443; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
444; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
445; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
446; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
447; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
448; AVX1-NEXT:    vmovaps %ymm2, %ymm0
449; AVX1-NEXT:    retq
450;
451; AVX2-LABEL: zext_16i8_to_8i64:
452; AVX2:       # %bb.0: # %entry
453; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
454; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
455; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
456; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
457; AVX2-NEXT:    retq
458;
459; AVX512-LABEL: zext_16i8_to_8i64:
460; AVX512:       # %bb.0: # %entry
461; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
462; AVX512-NEXT:    retq
463entry:
464  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
465  %C = zext <8 x i8> %B to <8 x i64>
466  ret <8 x i64> %C
467}
468
469define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
470; SSE2-LABEL: zext_8i16_to_4i32:
471; SSE2:       # %bb.0: # %entry
472; SSE2-NEXT:    pxor %xmm1, %xmm1
473; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
474; SSE2-NEXT:    retq
475;
476; SSSE3-LABEL: zext_8i16_to_4i32:
477; SSSE3:       # %bb.0: # %entry
478; SSSE3-NEXT:    pxor %xmm1, %xmm1
479; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
480; SSSE3-NEXT:    retq
481;
482; SSE41-LABEL: zext_8i16_to_4i32:
483; SSE41:       # %bb.0: # %entry
484; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
485; SSE41-NEXT:    retq
486;
487; AVX-LABEL: zext_8i16_to_4i32:
488; AVX:       # %bb.0: # %entry
489; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
490; AVX-NEXT:    retq
491entry:
492  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
493  %C = zext <4 x i16> %B to <4 x i32>
494  ret <4 x i32> %C
495}
496
497define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
498; SSE2-LABEL: zext_8i16_to_8i32:
499; SSE2:       # %bb.0: # %entry
500; SSE2-NEXT:    movdqa %xmm0, %xmm1
501; SSE2-NEXT:    pxor %xmm2, %xmm2
502; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
503; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
504; SSE2-NEXT:    retq
505;
506; SSSE3-LABEL: zext_8i16_to_8i32:
507; SSSE3:       # %bb.0: # %entry
508; SSSE3-NEXT:    movdqa %xmm0, %xmm1
509; SSSE3-NEXT:    pxor %xmm2, %xmm2
510; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
511; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
512; SSSE3-NEXT:    retq
513;
514; SSE41-LABEL: zext_8i16_to_8i32:
515; SSE41:       # %bb.0: # %entry
516; SSE41-NEXT:    movdqa %xmm0, %xmm1
517; SSE41-NEXT:    pxor %xmm2, %xmm2
518; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
519; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
520; SSE41-NEXT:    retq
521;
522; AVX1-LABEL: zext_8i16_to_8i32:
523; AVX1:       # %bb.0: # %entry
524; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
525; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
526; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
527; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
528; AVX1-NEXT:    retq
529;
530; AVX2-LABEL: zext_8i16_to_8i32:
531; AVX2:       # %bb.0: # %entry
532; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
533; AVX2-NEXT:    retq
534;
535; AVX512-LABEL: zext_8i16_to_8i32:
536; AVX512:       # %bb.0: # %entry
537; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
538; AVX512-NEXT:    retq
539entry:
540  %B = zext <8 x i16> %A to <8 x i32>
541  ret <8 x i32>%B
542}
543
544define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
545; SSE2-LABEL: zext_16i16_to_16i32:
546; SSE2:       # %bb.0: # %entry
547; SSE2-NEXT:    movdqa %xmm1, %xmm3
548; SSE2-NEXT:    movdqa %xmm0, %xmm1
549; SSE2-NEXT:    pxor %xmm4, %xmm4
550; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
551; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
552; SSE2-NEXT:    movdqa %xmm3, %xmm2
553; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
554; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
555; SSE2-NEXT:    retq
556;
557; SSSE3-LABEL: zext_16i16_to_16i32:
558; SSSE3:       # %bb.0: # %entry
559; SSSE3-NEXT:    movdqa %xmm1, %xmm3
560; SSSE3-NEXT:    movdqa %xmm0, %xmm1
561; SSSE3-NEXT:    pxor %xmm4, %xmm4
562; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
563; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
564; SSSE3-NEXT:    movdqa %xmm3, %xmm2
565; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
566; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
567; SSSE3-NEXT:    retq
568;
569; SSE41-LABEL: zext_16i16_to_16i32:
570; SSE41:       # %bb.0: # %entry
571; SSE41-NEXT:    movdqa %xmm1, %xmm3
572; SSE41-NEXT:    movdqa %xmm0, %xmm1
573; SSE41-NEXT:    pxor %xmm4, %xmm4
574; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
575; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
576; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
577; SSE41-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
578; SSE41-NEXT:    retq
579;
580; AVX1-LABEL: zext_16i16_to_16i32:
581; AVX1:       # %bb.0: # %entry
582; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
583; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
584; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
585; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
586; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
587; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
588; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
589; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
590; AVX1-NEXT:    vmovaps %ymm2, %ymm0
591; AVX1-NEXT:    retq
592;
593; AVX2-LABEL: zext_16i16_to_16i32:
594; AVX2:       # %bb.0: # %entry
595; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
596; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
597; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
598; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
599; AVX2-NEXT:    retq
600;
601; AVX512-LABEL: zext_16i16_to_16i32:
602; AVX512:       # %bb.0: # %entry
603; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
604; AVX512-NEXT:    retq
605entry:
606  %B = zext <16 x i16> %A to <16 x i32>
607  ret <16 x i32> %B
608}
609
610define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
611; SSE2-LABEL: zext_8i16_to_2i64:
612; SSE2:       # %bb.0: # %entry
613; SSE2-NEXT:    pxor %xmm1, %xmm1
614; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
615; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
616; SSE2-NEXT:    retq
617;
618; SSSE3-LABEL: zext_8i16_to_2i64:
619; SSSE3:       # %bb.0: # %entry
620; SSSE3-NEXT:    pxor %xmm1, %xmm1
621; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
622; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
623; SSSE3-NEXT:    retq
624;
625; SSE41-LABEL: zext_8i16_to_2i64:
626; SSE41:       # %bb.0: # %entry
627; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
628; SSE41-NEXT:    retq
629;
630; AVX-LABEL: zext_8i16_to_2i64:
631; AVX:       # %bb.0: # %entry
632; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
633; AVX-NEXT:    retq
634entry:
635  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
636  %C = zext <2 x i16> %B to <2 x i64>
637  ret <2 x i64> %C
638}
639
640define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
641; SSE2-LABEL: zext_8i16_to_4i64:
642; SSE2:       # %bb.0: # %entry
643; SSE2-NEXT:    movdqa %xmm0, %xmm1
644; SSE2-NEXT:    pxor %xmm2, %xmm2
645; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
646; SSE2-NEXT:    movdqa %xmm1, %xmm0
647; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
648; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
649; SSE2-NEXT:    retq
650;
651; SSSE3-LABEL: zext_8i16_to_4i64:
652; SSSE3:       # %bb.0: # %entry
653; SSSE3-NEXT:    movdqa %xmm0, %xmm1
654; SSSE3-NEXT:    pxor %xmm2, %xmm2
655; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
656; SSSE3-NEXT:    movdqa %xmm1, %xmm0
657; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
658; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
659; SSSE3-NEXT:    retq
660;
661; SSE41-LABEL: zext_8i16_to_4i64:
662; SSE41:       # %bb.0: # %entry
663; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
664; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
665; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
666; SSE41-NEXT:    movdqa %xmm2, %xmm0
667; SSE41-NEXT:    retq
668;
669; AVX1-LABEL: zext_8i16_to_4i64:
670; AVX1:       # %bb.0: # %entry
671; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
672; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
673; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
674; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
675; AVX1-NEXT:    retq
676;
677; AVX2-LABEL: zext_8i16_to_4i64:
678; AVX2:       # %bb.0: # %entry
679; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
680; AVX2-NEXT:    retq
681;
682; AVX512-LABEL: zext_8i16_to_4i64:
683; AVX512:       # %bb.0: # %entry
684; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
685; AVX512-NEXT:    retq
686entry:
687  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
688  %C = zext <4 x i16> %B to <4 x i64>
689  ret <4 x i64> %C
690}
691
692define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
693; SSE2-LABEL: zext_8i16_to_8i64:
694; SSE2:       # %bb.0: # %entry
695; SSE2-NEXT:    movdqa %xmm0, %xmm3
696; SSE2-NEXT:    pxor %xmm4, %xmm4
697; SSE2-NEXT:    movdqa %xmm0, %xmm1
698; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
699; SSE2-NEXT:    movdqa %xmm1, %xmm0
700; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
701; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
702; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
703; SSE2-NEXT:    movdqa %xmm3, %xmm2
704; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
705; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
706; SSE2-NEXT:    retq
707;
708; SSSE3-LABEL: zext_8i16_to_8i64:
709; SSSE3:       # %bb.0: # %entry
710; SSSE3-NEXT:    movdqa %xmm0, %xmm3
711; SSSE3-NEXT:    pxor %xmm4, %xmm4
712; SSSE3-NEXT:    movdqa %xmm0, %xmm1
713; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
714; SSSE3-NEXT:    movdqa %xmm1, %xmm0
715; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
716; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
717; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
718; SSSE3-NEXT:    movdqa %xmm3, %xmm2
719; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
720; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
721; SSSE3-NEXT:    retq
722;
723; SSE41-LABEL: zext_8i16_to_8i64:
724; SSE41:       # %bb.0: # %entry
725; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
726; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
727; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
728; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
729; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
730; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
731; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
732; SSE41-NEXT:    movdqa %xmm4, %xmm0
733; SSE41-NEXT:    retq
734;
735; AVX1-LABEL: zext_8i16_to_8i64:
736; AVX1:       # %bb.0: # %entry
737; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
738; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
739; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
740; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
741; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
742; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
743; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
744; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
745; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
746; AVX1-NEXT:    vmovaps %ymm2, %ymm0
747; AVX1-NEXT:    retq
748;
749; AVX2-LABEL: zext_8i16_to_8i64:
750; AVX2:       # %bb.0: # %entry
751; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
752; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
753; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
754; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
755; AVX2-NEXT:    retq
756;
757; AVX512-LABEL: zext_8i16_to_8i64:
758; AVX512:       # %bb.0: # %entry
759; AVX512-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
760; AVX512-NEXT:    retq
761entry:
762  %B = zext <8 x i16> %A to <8 x i64>
763  ret <8 x i64> %B
764}
765
766define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
767; SSE2-LABEL: zext_4i32_to_2i64:
768; SSE2:       # %bb.0: # %entry
769; SSE2-NEXT:    xorps %xmm1, %xmm1
770; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
771; SSE2-NEXT:    retq
772;
773; SSSE3-LABEL: zext_4i32_to_2i64:
774; SSSE3:       # %bb.0: # %entry
775; SSSE3-NEXT:    xorps %xmm1, %xmm1
776; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
777; SSSE3-NEXT:    retq
778;
779; SSE41-LABEL: zext_4i32_to_2i64:
780; SSE41:       # %bb.0: # %entry
781; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
782; SSE41-NEXT:    retq
783;
784; AVX-LABEL: zext_4i32_to_2i64:
785; AVX:       # %bb.0: # %entry
786; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
787; AVX-NEXT:    retq
788entry:
789  %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
790  %C = zext <2 x i32> %B to <2 x i64>
791  ret <2 x i64> %C
792}
793
794define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
795; SSE2-LABEL: zext_4i32_to_4i64:
796; SSE2:       # %bb.0: # %entry
797; SSE2-NEXT:    movaps %xmm0, %xmm1
798; SSE2-NEXT:    xorps %xmm2, %xmm2
799; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
800; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
801; SSE2-NEXT:    retq
802;
803; SSSE3-LABEL: zext_4i32_to_4i64:
804; SSSE3:       # %bb.0: # %entry
805; SSSE3-NEXT:    movaps %xmm0, %xmm1
806; SSSE3-NEXT:    xorps %xmm2, %xmm2
807; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
808; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
809; SSSE3-NEXT:    retq
810;
811; SSE41-LABEL: zext_4i32_to_4i64:
812; SSE41:       # %bb.0: # %entry
813; SSE41-NEXT:    movdqa %xmm0, %xmm1
814; SSE41-NEXT:    pxor %xmm2, %xmm2
815; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
816; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
817; SSE41-NEXT:    retq
818;
819; AVX1-LABEL: zext_4i32_to_4i64:
820; AVX1:       # %bb.0: # %entry
821; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
822; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
823; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
824; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
825; AVX1-NEXT:    retq
826;
827; AVX2-LABEL: zext_4i32_to_4i64:
828; AVX2:       # %bb.0: # %entry
829; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
830; AVX2-NEXT:    retq
831;
832; AVX512-LABEL: zext_4i32_to_4i64:
833; AVX512:       # %bb.0: # %entry
834; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
835; AVX512-NEXT:    retq
836entry:
837  %B = zext <4 x i32> %A to <4 x i64>
838  ret <4 x i64>%B
839}
840
841define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
842; SSE2-LABEL: zext_8i32_to_8i64:
843; SSE2:       # %bb.0: # %entry
844; SSE2-NEXT:    movaps %xmm1, %xmm3
845; SSE2-NEXT:    movaps %xmm0, %xmm1
846; SSE2-NEXT:    xorps %xmm4, %xmm4
847; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
848; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
849; SSE2-NEXT:    movaps %xmm3, %xmm2
850; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
851; SSE2-NEXT:    unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
852; SSE2-NEXT:    retq
853;
854; SSSE3-LABEL: zext_8i32_to_8i64:
855; SSSE3:       # %bb.0: # %entry
856; SSSE3-NEXT:    movaps %xmm1, %xmm3
857; SSSE3-NEXT:    movaps %xmm0, %xmm1
858; SSSE3-NEXT:    xorps %xmm4, %xmm4
859; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
860; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
861; SSSE3-NEXT:    movaps %xmm3, %xmm2
862; SSSE3-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
863; SSSE3-NEXT:    unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
864; SSSE3-NEXT:    retq
865;
866; SSE41-LABEL: zext_8i32_to_8i64:
867; SSE41:       # %bb.0: # %entry
868; SSE41-NEXT:    movdqa %xmm1, %xmm3
869; SSE41-NEXT:    movdqa %xmm0, %xmm1
870; SSE41-NEXT:    pxor %xmm4, %xmm4
871; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
872; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
873; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
874; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
875; SSE41-NEXT:    retq
876;
877; AVX1-LABEL: zext_8i32_to_8i64:
878; AVX1:       # %bb.0: # %entry
879; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
880; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
881; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
882; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
883; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
884; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
885; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
886; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
887; AVX1-NEXT:    vmovaps %ymm2, %ymm0
888; AVX1-NEXT:    retq
889;
890; AVX2-LABEL: zext_8i32_to_8i64:
891; AVX2:       # %bb.0: # %entry
892; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
893; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
894; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
895; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
896; AVX2-NEXT:    retq
897;
898; AVX512-LABEL: zext_8i32_to_8i64:
899; AVX512:       # %bb.0: # %entry
900; AVX512-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
901; AVX512-NEXT:    retq
902entry:
903  %B = zext <8 x i32> %A to <8 x i64>
904  ret <8 x i64>%B
905}
906
907define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
908; SSE2-LABEL: load_zext_2i8_to_2i64:
909; SSE2:       # %bb.0: # %entry
910; SSE2-NEXT:    movzwl (%rdi), %eax
911; SSE2-NEXT:    movd %eax, %xmm0
912; SSE2-NEXT:    pxor %xmm1, %xmm1
913; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
914; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
915; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
916; SSE2-NEXT:    retq
917;
918; SSSE3-LABEL: load_zext_2i8_to_2i64:
919; SSSE3:       # %bb.0: # %entry
920; SSSE3-NEXT:    movzwl (%rdi), %eax
921; SSSE3-NEXT:    movd %eax, %xmm0
922; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
923; SSSE3-NEXT:    retq
924;
925; SSE41-LABEL: load_zext_2i8_to_2i64:
926; SSE41:       # %bb.0: # %entry
927; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
928; SSE41-NEXT:    retq
929;
930; AVX-LABEL: load_zext_2i8_to_2i64:
931; AVX:       # %bb.0: # %entry
932; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
933; AVX-NEXT:    retq
934entry:
935 %X = load <2 x i8>, <2 x i8>* %ptr
936 %Y = zext <2 x i8> %X to <2 x i64>
937 ret <2 x i64> %Y
938}
939
940define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
941; SSE2-LABEL: load_zext_4i8_to_4i32:
942; SSE2:       # %bb.0: # %entry
943; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
944; SSE2-NEXT:    pxor %xmm1, %xmm1
945; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
946; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
947; SSE2-NEXT:    retq
948;
949; SSSE3-LABEL: load_zext_4i8_to_4i32:
950; SSSE3:       # %bb.0: # %entry
951; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
952; SSSE3-NEXT:    pxor %xmm1, %xmm1
953; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
954; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
955; SSSE3-NEXT:    retq
956;
957; SSE41-LABEL: load_zext_4i8_to_4i32:
958; SSE41:       # %bb.0: # %entry
959; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
960; SSE41-NEXT:    retq
961;
962; AVX-LABEL: load_zext_4i8_to_4i32:
963; AVX:       # %bb.0: # %entry
964; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
965; AVX-NEXT:    retq
966entry:
967 %X = load <4 x i8>, <4 x i8>* %ptr
968 %Y = zext <4 x i8> %X to <4 x i32>
969 ret <4 x i32> %Y
970}
971
972define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
973; SSE2-LABEL: load_zext_4i8_to_4i64:
974; SSE2:       # %bb.0: # %entry
975; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
976; SSE2-NEXT:    pxor %xmm2, %xmm2
977; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
978; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
979; SSE2-NEXT:    movdqa %xmm1, %xmm0
980; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
981; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
982; SSE2-NEXT:    retq
983;
984; SSSE3-LABEL: load_zext_4i8_to_4i64:
985; SSSE3:       # %bb.0: # %entry
986; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
987; SSSE3-NEXT:    movdqa %xmm1, %xmm0
988; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
989; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
990; SSSE3-NEXT:    retq
991;
992; SSE41-LABEL: load_zext_4i8_to_4i64:
993; SSE41:       # %bb.0: # %entry
994; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
995; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
996; SSE41-NEXT:    retq
997;
998; AVX1-LABEL: load_zext_4i8_to_4i64:
999; AVX1:       # %bb.0: # %entry
1000; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1001; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1002; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1003; AVX1-NEXT:    retq
1004;
1005; AVX2-LABEL: load_zext_4i8_to_4i64:
1006; AVX2:       # %bb.0: # %entry
1007; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1008; AVX2-NEXT:    retq
1009;
1010; AVX512-LABEL: load_zext_4i8_to_4i64:
1011; AVX512:       # %bb.0: # %entry
1012; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1013; AVX512-NEXT:    retq
1014entry:
1015 %X = load <4 x i8>, <4 x i8>* %ptr
1016 %Y = zext <4 x i8> %X to <4 x i64>
1017 ret <4 x i64> %Y
1018}
1019
1020define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
1021; SSE2-LABEL: load_zext_8i8_to_8i16:
1022; SSE2:       # %bb.0: # %entry
1023; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1024; SSE2-NEXT:    pxor %xmm1, %xmm1
1025; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1026; SSE2-NEXT:    retq
1027;
1028; SSSE3-LABEL: load_zext_8i8_to_8i16:
1029; SSSE3:       # %bb.0: # %entry
1030; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1031; SSSE3-NEXT:    pxor %xmm1, %xmm1
1032; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1033; SSSE3-NEXT:    retq
1034;
1035; SSE41-LABEL: load_zext_8i8_to_8i16:
1036; SSE41:       # %bb.0: # %entry
1037; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1038; SSE41-NEXT:    retq
1039;
1040; AVX-LABEL: load_zext_8i8_to_8i16:
1041; AVX:       # %bb.0: # %entry
1042; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1043; AVX-NEXT:    retq
1044entry:
1045 %X = load <8 x i8>, <8 x i8>* %ptr
1046 %Y = zext <8 x i8> %X to <8 x i16>
1047 ret <8 x i16> %Y
1048}
1049
1050define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
1051; SSE2-LABEL: load_zext_8i8_to_8i32:
1052; SSE2:       # %bb.0: # %entry
1053; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1054; SSE2-NEXT:    pxor %xmm2, %xmm2
1055; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1056; SSE2-NEXT:    movdqa %xmm1, %xmm0
1057; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1058; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1059; SSE2-NEXT:    retq
1060;
1061; SSSE3-LABEL: load_zext_8i8_to_8i32:
1062; SSSE3:       # %bb.0: # %entry
1063; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1064; SSSE3-NEXT:    pxor %xmm2, %xmm2
1065; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1066; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1067; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1068; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1069; SSSE3-NEXT:    retq
1070;
1071; SSE41-LABEL: load_zext_8i8_to_8i32:
1072; SSE41:       # %bb.0: # %entry
1073; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1074; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1075; SSE41-NEXT:    retq
1076;
1077; AVX1-LABEL: load_zext_8i8_to_8i32:
1078; AVX1:       # %bb.0: # %entry
1079; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1080; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1081; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1082; AVX1-NEXT:    retq
1083;
1084; AVX2-LABEL: load_zext_8i8_to_8i32:
1085; AVX2:       # %bb.0: # %entry
1086; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1087; AVX2-NEXT:    retq
1088;
1089; AVX512-LABEL: load_zext_8i8_to_8i32:
1090; AVX512:       # %bb.0: # %entry
1091; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1092; AVX512-NEXT:    retq
1093entry:
1094 %X = load <8 x i8>, <8 x i8>* %ptr
1095 %Y = zext <8 x i8> %X to <8 x i32>
1096 ret <8 x i32> %Y
1097}
1098
1099define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
1100; SSE2-LABEL: load_zext_16i8_to_8i32:
1101; SSE2:       # %bb.0: # %entry
1102; SSE2-NEXT:    movdqa (%rdi), %xmm1
1103; SSE2-NEXT:    pxor %xmm2, %xmm2
1104; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1105; SSE2-NEXT:    movdqa %xmm1, %xmm0
1106; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1107; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1108; SSE2-NEXT:    retq
1109;
1110; SSSE3-LABEL: load_zext_16i8_to_8i32:
1111; SSSE3:       # %bb.0: # %entry
1112; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1113; SSSE3-NEXT:    pxor %xmm2, %xmm2
1114; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1115; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1116; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1117; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1118; SSSE3-NEXT:    retq
1119;
1120; SSE41-LABEL: load_zext_16i8_to_8i32:
1121; SSE41:       # %bb.0: # %entry
1122; SSE41-NEXT:    movdqa (%rdi), %xmm1
1123; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1124; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
1125; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1126; SSE41-NEXT:    retq
1127;
1128; AVX1-LABEL: load_zext_16i8_to_8i32:
1129; AVX1:       # %bb.0: # %entry
1130; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
1131; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1132; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1133; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1134; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1135; AVX1-NEXT:    retq
1136;
1137; AVX2-LABEL: load_zext_16i8_to_8i32:
1138; AVX2:       # %bb.0: # %entry
1139; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1140; AVX2-NEXT:    retq
1141;
1142; AVX512-LABEL: load_zext_16i8_to_8i32:
1143; AVX512:       # %bb.0: # %entry
1144; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1145; AVX512-NEXT:    retq
1146entry:
1147 %X = load <16 x i8>, <16 x i8>* %ptr
1148 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1149 %Z = zext <8 x i8> %Y to <8 x i32>
1150 ret <8 x i32> %Z
1151}
1152
1153define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
1154; SSE2-LABEL: load_zext_8i8_to_8i64:
1155; SSE2:       # %bb.0: # %entry
1156; SSE2-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
1157; SSE2-NEXT:    pxor %xmm4, %xmm4
1158; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1159; SSE2-NEXT:    movdqa %xmm3, %xmm1
1160; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1161; SSE2-NEXT:    movdqa %xmm1, %xmm0
1162; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1163; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1164; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1165; SSE2-NEXT:    movdqa %xmm3, %xmm2
1166; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1167; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1168; SSE2-NEXT:    retq
1169;
1170; SSSE3-LABEL: load_zext_8i8_to_8i64:
1171; SSSE3:       # %bb.0: # %entry
1172; SSSE3-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
1173; SSSE3-NEXT:    movdqa %xmm3, %xmm0
1174; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1175; SSSE3-NEXT:    movdqa %xmm3, %xmm1
1176; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
1177; SSSE3-NEXT:    movdqa %xmm3, %xmm2
1178; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
1179; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
1180; SSSE3-NEXT:    retq
1181;
1182; SSE41-LABEL: load_zext_8i8_to_8i64:
1183; SSE41:       # %bb.0: # %entry
1184; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1185; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1186; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1187; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1188; SSE41-NEXT:    retq
1189;
1190; AVX1-LABEL: load_zext_8i8_to_8i64:
1191; AVX1:       # %bb.0: # %entry
1192; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1193; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1194; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1195; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1196; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1197; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1198; AVX1-NEXT:    retq
1199;
1200; AVX2-LABEL: load_zext_8i8_to_8i64:
1201; AVX2:       # %bb.0: # %entry
1202; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1203; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1204; AVX2-NEXT:    retq
1205;
1206; AVX512-LABEL: load_zext_8i8_to_8i64:
1207; AVX512:       # %bb.0: # %entry
1208; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1209; AVX512-NEXT:    retq
1210entry:
1211 %X = load <8 x i8>, <8 x i8>* %ptr
1212 %Y = zext <8 x i8> %X to <8 x i64>
1213 ret <8 x i64> %Y
1214}
1215
1216define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
1217; SSE2-LABEL: load_zext_16i8_to_16i16:
1218; SSE2:       # %bb.0: # %entry
1219; SSE2-NEXT:    movdqa (%rdi), %xmm1
1220; SSE2-NEXT:    pxor %xmm2, %xmm2
1221; SSE2-NEXT:    movdqa %xmm1, %xmm0
1222; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1223; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1224; SSE2-NEXT:    retq
1225;
1226; SSSE3-LABEL: load_zext_16i8_to_16i16:
1227; SSSE3:       # %bb.0: # %entry
1228; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1229; SSSE3-NEXT:    pxor %xmm2, %xmm2
1230; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1231; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1232; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1233; SSSE3-NEXT:    retq
1234;
1235; SSE41-LABEL: load_zext_16i8_to_16i16:
1236; SSE41:       # %bb.0: # %entry
1237; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1238; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1239; SSE41-NEXT:    retq
1240;
1241; AVX1-LABEL: load_zext_16i8_to_16i16:
1242; AVX1:       # %bb.0: # %entry
1243; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1244; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1245; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1246; AVX1-NEXT:    retq
1247;
1248; AVX2-LABEL: load_zext_16i8_to_16i16:
1249; AVX2:       # %bb.0: # %entry
1250; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1251; AVX2-NEXT:    retq
1252;
1253; AVX512-LABEL: load_zext_16i8_to_16i16:
1254; AVX512:       # %bb.0: # %entry
1255; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1256; AVX512-NEXT:    retq
1257entry:
1258 %X = load <16 x i8>, <16 x i8>* %ptr
1259 %Y = zext <16 x i8> %X to <16 x i16>
1260 ret <16 x i16> %Y
1261}
1262
1263define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
1264; SSE2-LABEL: load_zext_2i16_to_2i64:
1265; SSE2:       # %bb.0: # %entry
1266; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1267; SSE2-NEXT:    pxor %xmm1, %xmm1
1268; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1269; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1270; SSE2-NEXT:    retq
1271;
1272; SSSE3-LABEL: load_zext_2i16_to_2i64:
1273; SSSE3:       # %bb.0: # %entry
1274; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1275; SSSE3-NEXT:    pxor %xmm1, %xmm1
1276; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1277; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1278; SSSE3-NEXT:    retq
1279;
1280; SSE41-LABEL: load_zext_2i16_to_2i64:
1281; SSE41:       # %bb.0: # %entry
1282; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1283; SSE41-NEXT:    retq
1284;
1285; AVX-LABEL: load_zext_2i16_to_2i64:
1286; AVX:       # %bb.0: # %entry
1287; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1288; AVX-NEXT:    retq
1289entry:
1290 %X = load <2 x i16>, <2 x i16>* %ptr
1291 %Y = zext <2 x i16> %X to <2 x i64>
1292 ret <2 x i64> %Y
1293}
1294
1295define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
1296; SSE2-LABEL: load_zext_4i16_to_4i32:
1297; SSE2:       # %bb.0: # %entry
1298; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1299; SSE2-NEXT:    pxor %xmm1, %xmm1
1300; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1301; SSE2-NEXT:    retq
1302;
1303; SSSE3-LABEL: load_zext_4i16_to_4i32:
1304; SSSE3:       # %bb.0: # %entry
1305; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1306; SSSE3-NEXT:    pxor %xmm1, %xmm1
1307; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1308; SSSE3-NEXT:    retq
1309;
1310; SSE41-LABEL: load_zext_4i16_to_4i32:
1311; SSE41:       # %bb.0: # %entry
1312; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1313; SSE41-NEXT:    retq
1314;
1315; AVX-LABEL: load_zext_4i16_to_4i32:
1316; AVX:       # %bb.0: # %entry
1317; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1318; AVX-NEXT:    retq
1319entry:
1320 %X = load <4 x i16>, <4 x i16>* %ptr
1321 %Y = zext <4 x i16> %X to <4 x i32>
1322 ret <4 x i32> %Y
1323}
1324
1325define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
1326; SSE2-LABEL: load_zext_4i16_to_4i64:
1327; SSE2:       # %bb.0: # %entry
1328; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1329; SSE2-NEXT:    pxor %xmm2, %xmm2
1330; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1331; SSE2-NEXT:    movdqa %xmm1, %xmm0
1332; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1333; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1334; SSE2-NEXT:    retq
1335;
1336; SSSE3-LABEL: load_zext_4i16_to_4i64:
1337; SSSE3:       # %bb.0: # %entry
1338; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1339; SSSE3-NEXT:    pxor %xmm2, %xmm2
1340; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1341; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1342; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1343; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1344; SSSE3-NEXT:    retq
1345;
1346; SSE41-LABEL: load_zext_4i16_to_4i64:
1347; SSE41:       # %bb.0: # %entry
1348; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1349; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1350; SSE41-NEXT:    retq
1351;
1352; AVX1-LABEL: load_zext_4i16_to_4i64:
1353; AVX1:       # %bb.0: # %entry
1354; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1355; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1356; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1357; AVX1-NEXT:    retq
1358;
1359; AVX2-LABEL: load_zext_4i16_to_4i64:
1360; AVX2:       # %bb.0: # %entry
1361; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1362; AVX2-NEXT:    retq
1363;
1364; AVX512-LABEL: load_zext_4i16_to_4i64:
1365; AVX512:       # %bb.0: # %entry
1366; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1367; AVX512-NEXT:    retq
1368entry:
1369 %X = load <4 x i16>, <4 x i16>* %ptr
1370 %Y = zext <4 x i16> %X to <4 x i64>
1371 ret <4 x i64> %Y
1372}
1373
1374define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
1375; SSE2-LABEL: load_zext_8i16_to_8i32:
1376; SSE2:       # %bb.0: # %entry
1377; SSE2-NEXT:    movdqa (%rdi), %xmm1
1378; SSE2-NEXT:    pxor %xmm2, %xmm2
1379; SSE2-NEXT:    movdqa %xmm1, %xmm0
1380; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1381; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1382; SSE2-NEXT:    retq
1383;
1384; SSSE3-LABEL: load_zext_8i16_to_8i32:
1385; SSSE3:       # %bb.0: # %entry
1386; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1387; SSSE3-NEXT:    pxor %xmm2, %xmm2
1388; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1389; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1390; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1391; SSSE3-NEXT:    retq
1392;
1393; SSE41-LABEL: load_zext_8i16_to_8i32:
1394; SSE41:       # %bb.0: # %entry
1395; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1396; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1397; SSE41-NEXT:    retq
1398;
1399; AVX1-LABEL: load_zext_8i16_to_8i32:
1400; AVX1:       # %bb.0: # %entry
1401; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1402; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1403; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1404; AVX1-NEXT:    retq
1405;
1406; AVX2-LABEL: load_zext_8i16_to_8i32:
1407; AVX2:       # %bb.0: # %entry
1408; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1409; AVX2-NEXT:    retq
1410;
1411; AVX512-LABEL: load_zext_8i16_to_8i32:
1412; AVX512:       # %bb.0: # %entry
1413; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1414; AVX512-NEXT:    retq
1415entry:
1416 %X = load <8 x i16>, <8 x i16>* %ptr
1417 %Y = zext <8 x i16> %X to <8 x i32>
1418 ret <8 x i32> %Y
1419}
1420
1421define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
1422; SSE2-LABEL: load_zext_2i32_to_2i64:
1423; SSE2:       # %bb.0: # %entry
1424; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1425; SSE2-NEXT:    xorps %xmm1, %xmm1
1426; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1427; SSE2-NEXT:    retq
1428;
1429; SSSE3-LABEL: load_zext_2i32_to_2i64:
1430; SSSE3:       # %bb.0: # %entry
1431; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1432; SSSE3-NEXT:    xorps %xmm1, %xmm1
1433; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1434; SSSE3-NEXT:    retq
1435;
1436; SSE41-LABEL: load_zext_2i32_to_2i64:
1437; SSE41:       # %bb.0: # %entry
1438; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1439; SSE41-NEXT:    retq
1440;
1441; AVX-LABEL: load_zext_2i32_to_2i64:
1442; AVX:       # %bb.0: # %entry
1443; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1444; AVX-NEXT:    retq
1445entry:
1446 %X = load <2 x i32>, <2 x i32>* %ptr
1447 %Y = zext <2 x i32> %X to <2 x i64>
1448 ret <2 x i64> %Y
1449}
1450
1451define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1452; SSE2-LABEL: load_zext_4i32_to_4i64:
1453; SSE2:       # %bb.0: # %entry
1454; SSE2-NEXT:    movaps (%rdi), %xmm1
1455; SSE2-NEXT:    xorps %xmm2, %xmm2
1456; SSE2-NEXT:    movaps %xmm1, %xmm0
1457; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1458; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1459; SSE2-NEXT:    retq
1460;
1461; SSSE3-LABEL: load_zext_4i32_to_4i64:
1462; SSSE3:       # %bb.0: # %entry
1463; SSSE3-NEXT:    movaps (%rdi), %xmm1
1464; SSSE3-NEXT:    xorps %xmm2, %xmm2
1465; SSSE3-NEXT:    movaps %xmm1, %xmm0
1466; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1467; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1468; SSSE3-NEXT:    retq
1469;
1470; SSE41-LABEL: load_zext_4i32_to_4i64:
1471; SSE41:       # %bb.0: # %entry
1472; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1473; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1474; SSE41-NEXT:    retq
1475;
1476; AVX1-LABEL: load_zext_4i32_to_4i64:
1477; AVX1:       # %bb.0: # %entry
1478; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1479; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1480; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1481; AVX1-NEXT:    retq
1482;
1483; AVX2-LABEL: load_zext_4i32_to_4i64:
1484; AVX2:       # %bb.0: # %entry
1485; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1486; AVX2-NEXT:    retq
1487;
1488; AVX512-LABEL: load_zext_4i32_to_4i64:
1489; AVX512:       # %bb.0: # %entry
1490; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1491; AVX512-NEXT:    retq
1492entry:
1493 %X = load <4 x i32>, <4 x i32>* %ptr
1494 %Y = zext <4 x i32> %X to <4 x i64>
1495 ret <4 x i64> %Y
1496}
1497
1498define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1499; SSE2-LABEL: zext_8i8_to_8i32:
1500; SSE2:       # %bb.0: # %entry
1501; SSE2-NEXT:    movdqa %xmm0, %xmm1
1502; SSE2-NEXT:    pxor %xmm2, %xmm2
1503; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1504; SSE2-NEXT:    movdqa %xmm1, %xmm0
1505; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1506; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1507; SSE2-NEXT:    retq
1508;
1509; SSSE3-LABEL: zext_8i8_to_8i32:
1510; SSSE3:       # %bb.0: # %entry
1511; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1512; SSSE3-NEXT:    pxor %xmm2, %xmm2
1513; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1514; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1515; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1516; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1517; SSSE3-NEXT:    retq
1518;
1519; SSE41-LABEL: zext_8i8_to_8i32:
1520; SSE41:       # %bb.0: # %entry
1521; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1522; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1523; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1524; SSE41-NEXT:    movdqa %xmm2, %xmm0
1525; SSE41-NEXT:    retq
1526;
1527; AVX1-LABEL: zext_8i8_to_8i32:
1528; AVX1:       # %bb.0: # %entry
1529; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1530; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1531; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1532; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1533; AVX1-NEXT:    retq
1534;
1535; AVX2-LABEL: zext_8i8_to_8i32:
1536; AVX2:       # %bb.0: # %entry
1537; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1538; AVX2-NEXT:    retq
1539;
1540; AVX512-LABEL: zext_8i8_to_8i32:
1541; AVX512:       # %bb.0: # %entry
1542; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1543; AVX512-NEXT:    retq
1544entry:
1545  %t = zext <8 x i8> %z to <8 x i32>
1546  ret <8 x i32> %t
1547}
1548
1549define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1550; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1551; SSE2:       # %bb.0: # %entry
1552; SSE2-NEXT:    movdqa %xmm0, %xmm1
1553; SSE2-NEXT:    pxor %xmm2, %xmm2
1554; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1555; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1556; SSE2-NEXT:    retq
1557;
1558; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1559; SSSE3:       # %bb.0: # %entry
1560; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1561; SSSE3-NEXT:    pxor %xmm2, %xmm2
1562; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1563; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1564; SSSE3-NEXT:    retq
1565;
1566; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1567; SSE41:       # %bb.0: # %entry
1568; SSE41-NEXT:    movdqa %xmm0, %xmm1
1569; SSE41-NEXT:    pxor %xmm2, %xmm2
1570; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1571; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1572; SSE41-NEXT:    retq
1573;
1574; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1575; AVX1:       # %bb.0: # %entry
1576; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1577; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1578; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1579; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1580; AVX1-NEXT:    retq
1581;
1582; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1583; AVX2:       # %bb.0: # %entry
1584; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1585; AVX2-NEXT:    retq
1586;
1587; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1588; AVX512:       # %bb.0: # %entry
1589; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1590; AVX512-NEXT:    retq
1591entry:
1592  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1593  %Z = bitcast <16 x i16> %B to <8 x i32>
1594  ret <8 x i32> %Z
1595}
1596
1597define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1598; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1599; SSE2:       # %bb.0: # %entry
1600; SSE2-NEXT:    movaps %xmm0, %xmm1
1601; SSE2-NEXT:    xorps %xmm2, %xmm2
1602; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1603; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1604; SSE2-NEXT:    retq
1605;
1606; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1607; SSSE3:       # %bb.0: # %entry
1608; SSSE3-NEXT:    movaps %xmm0, %xmm1
1609; SSSE3-NEXT:    xorps %xmm2, %xmm2
1610; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1611; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1612; SSSE3-NEXT:    retq
1613;
1614; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1615; SSE41:       # %bb.0: # %entry
1616; SSE41-NEXT:    movdqa %xmm0, %xmm1
1617; SSE41-NEXT:    pxor %xmm2, %xmm2
1618; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1619; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1620; SSE41-NEXT:    retq
1621;
1622; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1623; AVX1:       # %bb.0: # %entry
1624; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1625; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1626; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1627; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1628; AVX1-NEXT:    retq
1629;
1630; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1631; AVX2:       # %bb.0: # %entry
1632; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1633; AVX2-NEXT:    retq
1634;
1635; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1636; AVX512:       # %bb.0: # %entry
1637; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1638; AVX512-NEXT:    retq
1639entry:
1640  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1641  %Z = bitcast <8 x i32> %B to <4 x i64>
1642  ret <4 x i64> %Z
1643}
1644
1645define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1646; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1647; SSE2:       # %bb.0: # %entry
1648; SSE2-NEXT:    movdqa %xmm0, %xmm1
1649; SSE2-NEXT:    pxor %xmm2, %xmm2
1650; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1651; SSE2-NEXT:    movdqa %xmm1, %xmm0
1652; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1653; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1654; SSE2-NEXT:    retq
1655;
1656; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1657; SSSE3:       # %bb.0: # %entry
1658; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1659; SSSE3-NEXT:    pxor %xmm2, %xmm2
1660; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1661; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1662; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1663; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1664; SSSE3-NEXT:    retq
1665;
1666; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1667; SSE41:       # %bb.0: # %entry
1668; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1669; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1670; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1671; SSE41-NEXT:    movdqa %xmm2, %xmm0
1672; SSE41-NEXT:    retq
1673;
1674; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1675; AVX1:       # %bb.0: # %entry
1676; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1677; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1678; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1679; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1680; AVX1-NEXT:    retq
1681;
1682; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1683; AVX2:       # %bb.0: # %entry
1684; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1685; AVX2-NEXT:    retq
1686;
1687; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1688; AVX512:       # %bb.0: # %entry
1689; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1690; AVX512-NEXT:    retq
1691entry:
1692  %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1693  %Z = bitcast <32 x i8> %B to <8 x i32>
1694  ret <8 x i32> %Z
1695}
1696
1697define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1698; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1699; SSE2:       # %bb.0: # %entry
1700; SSE2-NEXT:    pxor %xmm1, %xmm1
1701; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1702; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1703; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1704; SSE2-NEXT:    retq
1705;
1706; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1707; SSSE3:       # %bb.0: # %entry
1708; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1709; SSSE3-NEXT:    retq
1710;
1711; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1712; SSE41:       # %bb.0: # %entry
1713; SSE41-NEXT:    psrlq $48, %xmm0
1714; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1715; SSE41-NEXT:    retq
1716;
1717; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6:
1718; AVX1:       # %bb.0: # %entry
1719; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
1720; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1721; AVX1-NEXT:    retq
1722;
1723; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1724; AVX2-SLOW:       # %bb.0: # %entry
1725; AVX2-SLOW-NEXT:    vpsrlq $48, %xmm0, %xmm0
1726; AVX2-SLOW-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1727; AVX2-SLOW-NEXT:    retq
1728;
1729; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6:
1730; AVX2-FAST:       # %bb.0: # %entry
1731; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1732; AVX2-FAST-NEXT:    retq
1733;
1734; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6:
1735; AVX512F:       # %bb.0: # %entry
1736; AVX512F-NEXT:    vpsrlq $48, %xmm0, %xmm0
1737; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1738; AVX512F-NEXT:    retq
1739;
1740; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1741; AVX512BW:       # %bb.0: # %entry
1742; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1743; AVX512BW-NEXT:    retq
1744entry:
1745  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1746  %Z = bitcast <16 x i8> %B to <2 x i64>
1747  ret <2 x i64> %Z
1748}
1749
1750define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1751; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1752; SSE2:       # %bb.0: # %entry
1753; SSE2-NEXT:    movdqa %xmm0, %xmm1
1754; SSE2-NEXT:    psrlq $8, %xmm1
1755; SSE2-NEXT:    pxor %xmm2, %xmm2
1756; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1757; SSE2-NEXT:    movdqa %xmm1, %xmm0
1758; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1759; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1760; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1761; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1762; SSE2-NEXT:    retq
1763;
1764; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1765; SSSE3:       # %bb.0: # %entry
1766; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1767; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1768; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1769; SSSE3-NEXT:    retq
1770;
1771; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1772; SSE41:       # %bb.0: # %entry
1773; SSE41-NEXT:    movdqa %xmm0, %xmm1
1774; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1775; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1776; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1777; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1778; SSE41-NEXT:    movdqa %xmm2, %xmm0
1779; SSE41-NEXT:    retq
1780;
1781; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1782; AVX1:       # %bb.0: # %entry
1783; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1784; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1785; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1786; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1787; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1788; AVX1-NEXT:    retq
1789;
1790; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1791; AVX2:       # %bb.0: # %entry
1792; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1793; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1794; AVX2-NEXT:    retq
1795;
1796; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1797; AVX512:       # %bb.0: # %entry
1798; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1799; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1800; AVX512-NEXT:    retq
1801entry:
1802  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1803  %Z = bitcast <32 x i8> %B to <4 x i64>
1804  ret <4 x i64> %Z
1805}
1806
1807define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1808; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1809; SSE2:       # %bb.0: # %entry
1810; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1811; SSE2-NEXT:    pxor %xmm1, %xmm1
1812; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1813; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1814; SSE2-NEXT:    retq
1815;
1816; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1817; SSSE3:       # %bb.0: # %entry
1818; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1819; SSSE3-NEXT:    retq
1820;
1821; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1822; SSE41:       # %bb.0: # %entry
1823; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1824; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1825; SSE41-NEXT:    retq
1826;
1827; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6:
1828; AVX1:       # %bb.0: # %entry
1829; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1830; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1831; AVX1-NEXT:    retq
1832;
1833; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1834; AVX2-SLOW:       # %bb.0: # %entry
1835; AVX2-SLOW-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1836; AVX2-SLOW-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1837; AVX2-SLOW-NEXT:    retq
1838;
1839; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6:
1840; AVX2-FAST:       # %bb.0: # %entry
1841; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1842; AVX2-FAST-NEXT:    retq
1843;
1844; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6:
1845; AVX512F:       # %bb.0: # %entry
1846; AVX512F-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1847; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1848; AVX512F-NEXT:    retq
1849;
1850; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1851; AVX512BW:       # %bb.0: # %entry
1852; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1853; AVX512BW-NEXT:    retq
1854entry:
1855  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1856  %Z = bitcast <8 x i16> %B to <2 x i64>
1857  ret <2 x i64> %Z
1858}
1859
1860define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1861; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1862; SSE2:       # %bb.0: # %entry
1863; SSE2-NEXT:    movdqa %xmm0, %xmm1
1864; SSE2-NEXT:    pxor %xmm2, %xmm2
1865; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1866; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1867; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1868; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1869; SSE2-NEXT:    retq
1870;
1871; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1872; SSSE3:       # %bb.0: # %entry
1873; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1874; SSSE3-NEXT:    pxor %xmm2, %xmm2
1875; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1876; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1877; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1878; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1879; SSSE3-NEXT:    retq
1880;
1881; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1882; SSE41:       # %bb.0: # %entry
1883; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1884; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1885; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1886; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1887; SSE41-NEXT:    movdqa %xmm2, %xmm0
1888; SSE41-NEXT:    retq
1889;
1890; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1891; AVX1:       # %bb.0: # %entry
1892; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1893; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1894; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1895; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1896; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1897; AVX1-NEXT:    retq
1898;
1899; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1900; AVX2:       # %bb.0: # %entry
1901; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1902; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1903; AVX2-NEXT:    retq
1904;
1905; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1906; AVX512:       # %bb.0: # %entry
1907; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1908; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1909; AVX512-NEXT:    retq
1910entry:
1911  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1912  %Z = bitcast <16 x i16> %B to <4 x i64>
1913  ret <4 x i64> %Z
1914}
1915
1916define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1917; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1918; SSE2:       # %bb.0: # %entry
1919; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1920; SSE2-NEXT:    pxor %xmm1, %xmm1
1921; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1922; SSE2-NEXT:    retq
1923;
1924; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1925; SSSE3:       # %bb.0: # %entry
1926; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1927; SSSE3-NEXT:    pxor %xmm1, %xmm1
1928; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1929; SSSE3-NEXT:    retq
1930;
1931; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1932; SSE41:       # %bb.0: # %entry
1933; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1934; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1935; SSE41-NEXT:    retq
1936;
1937; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
1938; AVX1:       # %bb.0: # %entry
1939; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1940; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1941; AVX1-NEXT:    retq
1942;
1943; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1944; AVX2-SLOW:       # %bb.0: # %entry
1945; AVX2-SLOW-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1946; AVX2-SLOW-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1947; AVX2-SLOW-NEXT:    retq
1948;
1949; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
1950; AVX2-FAST:       # %bb.0: # %entry
1951; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1952; AVX2-FAST-NEXT:    retq
1953;
1954; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
1955; AVX512F:       # %bb.0: # %entry
1956; AVX512F-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1957; AVX512F-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1958; AVX512F-NEXT:    retq
1959;
1960; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1961; AVX512BW:       # %bb.0: # %entry
1962; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1963; AVX512BW-NEXT:    retq
1964entry:
1965  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1966  %Z = bitcast <8 x i16> %B to <4 x i32>
1967  ret <4 x i32> %Z
1968}
1969
1970define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1971; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1972; SSE2:       # %bb.0: # %entry
1973; SSE2-NEXT:    movdqa %xmm0, %xmm1
1974; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1975; SSE2-NEXT:    pxor %xmm2, %xmm2
1976; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1977; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1978; SSE2-NEXT:    retq
1979;
1980; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1981; SSSE3:       # %bb.0: # %entry
1982; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1983; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1984; SSSE3-NEXT:    pxor %xmm2, %xmm2
1985; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1986; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1987; SSSE3-NEXT:    retq
1988;
1989; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1990; SSE41:       # %bb.0: # %entry
1991; SSE41-NEXT:    movdqa %xmm0, %xmm1
1992; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1993; SSE41-NEXT:    pxor %xmm2, %xmm2
1994; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1995; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1996; SSE41-NEXT:    retq
1997;
1998; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
1999; AVX1:       # %bb.0: # %entry
2000; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2001; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2002; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2003; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2004; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2005; AVX1-NEXT:    retq
2006;
2007; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
2008; AVX2:       # %bb.0: # %entry
2009; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2010; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2011; AVX2-NEXT:    retq
2012;
2013; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
2014; AVX512:       # %bb.0: # %entry
2015; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2016; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2017; AVX512-NEXT:    retq
2018entry:
2019  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
2020  %Z = bitcast <16 x i16> %B to <8 x i32>
2021  ret <8 x i32> %Z
2022}
2023
2024define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
2025; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2026; SSE2:       # %bb.0: # %entry
2027; SSE2-NEXT:    pxor %xmm2, %xmm2
2028; SSE2-NEXT:    movdqa %xmm1, %xmm0
2029; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2030; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2031; SSE2-NEXT:    retq
2032;
2033; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
2034; SSSE3:       # %bb.0: # %entry
2035; SSSE3-NEXT:    pxor %xmm2, %xmm2
2036; SSSE3-NEXT:    movdqa %xmm1, %xmm0
2037; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2038; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2039; SSSE3-NEXT:    retq
2040;
2041; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
2042; SSE41:       # %bb.0: # %entry
2043; SSE41-NEXT:    pxor %xmm2, %xmm2
2044; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2045; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2046; SSE41-NEXT:    retq
2047;
2048; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2049; AVX1:       # %bb.0: # %entry
2050; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2051; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2052; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2053; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2054; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2055; AVX1-NEXT:    retq
2056;
2057; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2058; AVX2:       # %bb.0: # %entry
2059; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
2060; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2061; AVX2-NEXT:    retq
2062;
2063; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2064; AVX512:       # %bb.0: # %entry
2065; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
2066; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2067; AVX512-NEXT:    retq
2068entry:
2069  %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2070  %Z = bitcast <16 x i16> %B to <8 x i32>
2071  ret <8 x i32> %Z
2072}
2073
2074define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2075; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2076; SSE:       # %bb.0: # %entry
2077; SSE-NEXT:    xorps %xmm1, %xmm1
2078; SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2079; SSE-NEXT:    retq
2080;
2081; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2082; AVX:       # %bb.0: # %entry
2083; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2084; AVX-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2085; AVX-NEXT:    retq
2086entry:
2087  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2088  %Z = bitcast <4 x i32> %B to <2 x i64>
2089  ret <2 x i64> %Z
2090}
2091
2092define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2093; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2094; SSE2:       # %bb.0: # %entry
2095; SSE2-NEXT:    movdqa %xmm0, %xmm1
2096; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2097; SSE2-NEXT:    pand %xmm1, %xmm0
2098; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2099; SSE2-NEXT:    retq
2100;
2101; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2102; SSSE3:       # %bb.0: # %entry
2103; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2104; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2105; SSSE3-NEXT:    pand %xmm1, %xmm0
2106; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2107; SSSE3-NEXT:    retq
2108;
2109; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2110; SSE41:       # %bb.0: # %entry
2111; SSE41-NEXT:    movdqa %xmm0, %xmm1
2112; SSE41-NEXT:    pxor %xmm0, %xmm0
2113; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2114; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2115; SSE41-NEXT:    retq
2116;
2117; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2118; AVX1:       # %bb.0: # %entry
2119; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2120; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2121; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2122; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2123; AVX1-NEXT:    retq
2124;
2125; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2126; AVX2:       # %bb.0: # %entry
2127; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2128; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2129; AVX2-NEXT:    retq
2130;
2131; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2132; AVX512:       # %bb.0: # %entry
2133; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2134; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2135; AVX512-NEXT:    retq
2136entry:
2137  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2138  %Z = bitcast <8 x i32> %B to <4 x i64>
2139  ret <4 x i64> %Z
2140}
2141
2142define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
2143; SSE2-LABEL: zext_32i8_to_32i32:
2144; SSE2:       # %bb.0:
2145; SSE2-NEXT:    movq %rdi, %rax
2146; SSE2-NEXT:    pxor %xmm2, %xmm2
2147; SSE2-NEXT:    movdqa %xmm0, %xmm3
2148; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2149; SSE2-NEXT:    movdqa %xmm3, %xmm8
2150; SSE2-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2151; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2152; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2153; SSE2-NEXT:    movdqa %xmm0, %xmm5
2154; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2155; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2156; SSE2-NEXT:    movdqa %xmm1, %xmm6
2157; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2158; SSE2-NEXT:    movdqa %xmm6, %xmm7
2159; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2160; SSE2-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2161; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2162; SSE2-NEXT:    movdqa %xmm1, %xmm4
2163; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2164; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2165; SSE2-NEXT:    movdqa %xmm1, 112(%rdi)
2166; SSE2-NEXT:    movdqa %xmm4, 96(%rdi)
2167; SSE2-NEXT:    movdqa %xmm6, 80(%rdi)
2168; SSE2-NEXT:    movdqa %xmm7, 64(%rdi)
2169; SSE2-NEXT:    movdqa %xmm0, 48(%rdi)
2170; SSE2-NEXT:    movdqa %xmm5, 32(%rdi)
2171; SSE2-NEXT:    movdqa %xmm3, 16(%rdi)
2172; SSE2-NEXT:    movdqa %xmm8, (%rdi)
2173; SSE2-NEXT:    retq
2174;
2175; SSSE3-LABEL: zext_32i8_to_32i32:
2176; SSSE3:       # %bb.0:
2177; SSSE3-NEXT:    movq %rdi, %rax
2178; SSSE3-NEXT:    pxor %xmm2, %xmm2
2179; SSSE3-NEXT:    movdqa %xmm0, %xmm3
2180; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2181; SSSE3-NEXT:    movdqa %xmm3, %xmm8
2182; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2183; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2184; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2185; SSSE3-NEXT:    movdqa %xmm0, %xmm5
2186; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2187; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2188; SSSE3-NEXT:    movdqa %xmm1, %xmm6
2189; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2190; SSSE3-NEXT:    movdqa %xmm6, %xmm7
2191; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2192; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2193; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2194; SSSE3-NEXT:    movdqa %xmm1, %xmm4
2195; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2196; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2197; SSSE3-NEXT:    movdqa %xmm1, 112(%rdi)
2198; SSSE3-NEXT:    movdqa %xmm4, 96(%rdi)
2199; SSSE3-NEXT:    movdqa %xmm6, 80(%rdi)
2200; SSSE3-NEXT:    movdqa %xmm7, 64(%rdi)
2201; SSSE3-NEXT:    movdqa %xmm0, 48(%rdi)
2202; SSSE3-NEXT:    movdqa %xmm5, 32(%rdi)
2203; SSSE3-NEXT:    movdqa %xmm3, 16(%rdi)
2204; SSSE3-NEXT:    movdqa %xmm8, (%rdi)
2205; SSSE3-NEXT:    retq
2206;
2207; SSE41-LABEL: zext_32i8_to_32i32:
2208; SSE41:       # %bb.0:
2209; SSE41-NEXT:    movq %rdi, %rax
2210; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2211; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1]
2212; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2213; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
2214; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
2215; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
2216; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2217; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2218; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[1,1,1,1]
2219; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2220; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
2221; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2222; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
2223; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2224; SSE41-NEXT:    movdqa %xmm1, 112(%rdi)
2225; SSE41-NEXT:    movdqa %xmm7, 96(%rdi)
2226; SSE41-NEXT:    movdqa %xmm6, 80(%rdi)
2227; SSE41-NEXT:    movdqa %xmm5, 64(%rdi)
2228; SSE41-NEXT:    movdqa %xmm0, 48(%rdi)
2229; SSE41-NEXT:    movdqa %xmm4, 32(%rdi)
2230; SSE41-NEXT:    movdqa %xmm3, 16(%rdi)
2231; SSE41-NEXT:    movdqa %xmm2, (%rdi)
2232; SSE41-NEXT:    retq
2233;
2234; AVX1-LABEL: zext_32i8_to_32i32:
2235; AVX1:       # %bb.0:
2236; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2237; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
2238; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2239; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
2240; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2241; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2242; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm3[1,1,1,1]
2243; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2244; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
2245; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2246; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2247; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
2248; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2249; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
2250; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
2251; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2252; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2253; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2254; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm3
2255; AVX1-NEXT:    vmovaps %ymm4, %ymm0
2256; AVX1-NEXT:    retq
2257;
2258; AVX2-LABEL: zext_32i8_to_32i32:
2259; AVX2:       # %bb.0:
2260; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2261; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
2262; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
2263; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2264; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2265; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
2266; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2267; AVX2-NEXT:    vmovdqa %ymm4, %ymm0
2268; AVX2-NEXT:    retq
2269;
2270; AVX512-LABEL: zext_32i8_to_32i32:
2271; AVX512:       # %bb.0:
2272; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2273; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
2274; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2275; AVX512-NEXT:    vmovdqa64 %zmm2, %zmm0
2276; AVX512-NEXT:    retq
2277  %res = zext <32 x i8>%x to <32 x i32>
2278  ret <32 x i32> %res
2279}
2280
2281define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
2282; SSE2-LABEL: zext_2i8_to_2i32:
2283; SSE2:       # %bb.0:
2284; SSE2-NEXT:    movzwl (%rdi), %eax
2285; SSE2-NEXT:    movd %eax, %xmm0
2286; SSE2-NEXT:    pxor %xmm1, %xmm1
2287; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2288; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2289; SSE2-NEXT:    paddd %xmm0, %xmm0
2290; SSE2-NEXT:    retq
2291;
2292; SSSE3-LABEL: zext_2i8_to_2i32:
2293; SSSE3:       # %bb.0:
2294; SSSE3-NEXT:    movzwl (%rdi), %eax
2295; SSSE3-NEXT:    movd %eax, %xmm0
2296; SSSE3-NEXT:    pxor %xmm1, %xmm1
2297; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2298; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2299; SSSE3-NEXT:    paddd %xmm0, %xmm0
2300; SSSE3-NEXT:    retq
2301;
2302; SSE41-LABEL: zext_2i8_to_2i32:
2303; SSE41:       # %bb.0:
2304; SSE41-NEXT:    movzwl (%rdi), %eax
2305; SSE41-NEXT:    movd %eax, %xmm0
2306; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2307; SSE41-NEXT:    paddd %xmm0, %xmm0
2308; SSE41-NEXT:    retq
2309;
2310; AVX-LABEL: zext_2i8_to_2i32:
2311; AVX:       # %bb.0:
2312; AVX-NEXT:    movzwl (%rdi), %eax
2313; AVX-NEXT:    vmovd %eax, %xmm0
2314; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2315; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2316; AVX-NEXT:    retq
2317  %x = load <2 x i8>, <2 x i8>* %addr, align 1
2318  %y = zext <2 x i8> %x to <2 x i32>
2319  %z = add <2 x i32>%y, %y
2320  ret <2 x i32>%z
2321}
2322
2323define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
2324; SSE2-LABEL: zext_4i17_to_4i32:
2325; SSE2:       # %bb.0:
2326; SSE2-NEXT:    movq (%rdi), %rax
2327; SSE2-NEXT:    movd %eax, %xmm0
2328; SSE2-NEXT:    movq %rax, %rcx
2329; SSE2-NEXT:    shrq $17, %rcx
2330; SSE2-NEXT:    movd %ecx, %xmm1
2331; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2332; SSE2-NEXT:    movl 8(%rdi), %ecx
2333; SSE2-NEXT:    shll $13, %ecx
2334; SSE2-NEXT:    movq %rax, %rdx
2335; SSE2-NEXT:    shrq $51, %rdx
2336; SSE2-NEXT:    orl %ecx, %edx
2337; SSE2-NEXT:    movd %edx, %xmm1
2338; SSE2-NEXT:    shrq $34, %rax
2339; SSE2-NEXT:    movd %eax, %xmm2
2340; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2341; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2342; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2343; SSE2-NEXT:    retq
2344;
2345; SSSE3-LABEL: zext_4i17_to_4i32:
2346; SSSE3:       # %bb.0:
2347; SSSE3-NEXT:    movq (%rdi), %rax
2348; SSSE3-NEXT:    movd %eax, %xmm0
2349; SSSE3-NEXT:    movq %rax, %rcx
2350; SSSE3-NEXT:    shrq $17, %rcx
2351; SSSE3-NEXT:    movd %ecx, %xmm1
2352; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2353; SSSE3-NEXT:    movl 8(%rdi), %ecx
2354; SSSE3-NEXT:    shll $13, %ecx
2355; SSSE3-NEXT:    movq %rax, %rdx
2356; SSSE3-NEXT:    shrq $51, %rdx
2357; SSSE3-NEXT:    orl %ecx, %edx
2358; SSSE3-NEXT:    movd %edx, %xmm1
2359; SSSE3-NEXT:    shrq $34, %rax
2360; SSSE3-NEXT:    movd %eax, %xmm2
2361; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2362; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2363; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm0
2364; SSSE3-NEXT:    retq
2365;
2366; SSE41-LABEL: zext_4i17_to_4i32:
2367; SSE41:       # %bb.0:
2368; SSE41-NEXT:    movl 8(%rdi), %eax
2369; SSE41-NEXT:    shll $13, %eax
2370; SSE41-NEXT:    movq (%rdi), %rcx
2371; SSE41-NEXT:    movq %rcx, %rdx
2372; SSE41-NEXT:    shrq $51, %rdx
2373; SSE41-NEXT:    orl %eax, %edx
2374; SSE41-NEXT:    movq %rcx, %rax
2375; SSE41-NEXT:    shrq $17, %rax
2376; SSE41-NEXT:    movd %ecx, %xmm0
2377; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
2378; SSE41-NEXT:    shrq $34, %rcx
2379; SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
2380; SSE41-NEXT:    pinsrd $3, %edx, %xmm0
2381; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
2382; SSE41-NEXT:    retq
2383;
2384; AVX1-LABEL: zext_4i17_to_4i32:
2385; AVX1:       # %bb.0:
2386; AVX1-NEXT:    movl 8(%rdi), %eax
2387; AVX1-NEXT:    shll $13, %eax
2388; AVX1-NEXT:    movq (%rdi), %rcx
2389; AVX1-NEXT:    movq %rcx, %rdx
2390; AVX1-NEXT:    shrq $51, %rdx
2391; AVX1-NEXT:    orl %eax, %edx
2392; AVX1-NEXT:    movq %rcx, %rax
2393; AVX1-NEXT:    shrq $17, %rax
2394; AVX1-NEXT:    vmovd %ecx, %xmm0
2395; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
2396; AVX1-NEXT:    shrq $34, %rcx
2397; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
2398; AVX1-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0
2399; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2400; AVX1-NEXT:    retq
2401;
2402; AVX2-LABEL: zext_4i17_to_4i32:
2403; AVX2:       # %bb.0:
2404; AVX2-NEXT:    movl 8(%rdi), %eax
2405; AVX2-NEXT:    shll $13, %eax
2406; AVX2-NEXT:    movq (%rdi), %rcx
2407; AVX2-NEXT:    movq %rcx, %rdx
2408; AVX2-NEXT:    shrq $51, %rdx
2409; AVX2-NEXT:    orl %eax, %edx
2410; AVX2-NEXT:    movq %rcx, %rax
2411; AVX2-NEXT:    shrq $17, %rax
2412; AVX2-NEXT:    vmovd %ecx, %xmm0
2413; AVX2-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
2414; AVX2-NEXT:    shrq $34, %rcx
2415; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
2416; AVX2-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0
2417; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2418; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2419; AVX2-NEXT:    retq
2420;
2421; AVX512-LABEL: zext_4i17_to_4i32:
2422; AVX512:       # %bb.0:
2423; AVX512-NEXT:    movl 8(%rdi), %eax
2424; AVX512-NEXT:    shll $13, %eax
2425; AVX512-NEXT:    movq (%rdi), %rcx
2426; AVX512-NEXT:    movq %rcx, %rdx
2427; AVX512-NEXT:    shrq $51, %rdx
2428; AVX512-NEXT:    orl %eax, %edx
2429; AVX512-NEXT:    movq %rcx, %rax
2430; AVX512-NEXT:    shrq $17, %rax
2431; AVX512-NEXT:    vmovd %ecx, %xmm0
2432; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
2433; AVX512-NEXT:    shrq $34, %rcx
2434; AVX512-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
2435; AVX512-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0
2436; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2437; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
2438; AVX512-NEXT:    retq
2439  %a = load <4 x i17>, <4 x i17>* %ptr
2440  %b = zext <4 x i17> %a to <4 x i32>
2441  ret <4 x i32> %b
2442}
2443
2444define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp {
2445; SSE2-LABEL: zext_8i6_to_8i64:
2446; SSE2:       # %bb.0: # %entry
2447; SSE2-NEXT:    movd %edi, %xmm0
2448; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2449; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2450; SSE2-NEXT:    paddw {{.*}}(%rip), %xmm3
2451; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
2452; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2453; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [63,63]
2454; SSE2-NEXT:    pand %xmm4, %xmm0
2455; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2456; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
2457; SSE2-NEXT:    pand %xmm4, %xmm1
2458; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
2459; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
2460; SSE2-NEXT:    pand %xmm4, %xmm2
2461; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2462; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
2463; SSE2-NEXT:    pand %xmm4, %xmm3
2464; SSE2-NEXT:    retq
2465;
2466; SSSE3-LABEL: zext_8i6_to_8i64:
2467; SSSE3:       # %bb.0: # %entry
2468; SSSE3-NEXT:    movd %edi, %xmm0
2469; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2470; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2471; SSSE3-NEXT:    paddw {{.*}}(%rip), %xmm3
2472; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
2473; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2474; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [63,63]
2475; SSSE3-NEXT:    pand %xmm4, %xmm0
2476; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2477; SSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
2478; SSSE3-NEXT:    pand %xmm4, %xmm1
2479; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
2480; SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
2481; SSSE3-NEXT:    pand %xmm4, %xmm2
2482; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2483; SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
2484; SSSE3-NEXT:    pand %xmm4, %xmm3
2485; SSSE3-NEXT:    retq
2486;
2487; SSE41-LABEL: zext_8i6_to_8i64:
2488; SSE41:       # %bb.0: # %entry
2489; SSE41-NEXT:    movd %edi, %xmm0
2490; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2491; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2492; SSE41-NEXT:    paddw {{.*}}(%rip), %xmm3
2493; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2494; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [63,63]
2495; SSE41-NEXT:    pand %xmm4, %xmm0
2496; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2497; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2498; SSE41-NEXT:    pand %xmm4, %xmm1
2499; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
2500; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2501; SSE41-NEXT:    pand %xmm4, %xmm2
2502; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2503; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2504; SSE41-NEXT:    pand %xmm4, %xmm3
2505; SSE41-NEXT:    retq
2506;
2507; AVX1-LABEL: zext_8i6_to_8i64:
2508; AVX1:       # %bb.0: # %entry
2509; AVX1-NEXT:    vmovd %edi, %xmm0
2510; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2511; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2512; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
2513; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
2514; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2515; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
2516; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2517; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2518; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
2519; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2520; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
2521; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2522; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
2523; AVX1-NEXT:    retq
2524;
2525; AVX2-LABEL: zext_8i6_to_8i64:
2526; AVX2:       # %bb.0: # %entry
2527; AVX2-NEXT:    vmovd %edi, %xmm0
2528; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2529; AVX2-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
2530; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
2531; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2532; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2533; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2534; AVX2-NEXT:    retq
2535;
2536; AVX512-LABEL: zext_8i6_to_8i64:
2537; AVX512:       # %bb.0: # %entry
2538; AVX512-NEXT:    vmovd %edi, %xmm0
2539; AVX512-NEXT:    vpbroadcastw %xmm0, %xmm0
2540; AVX512-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
2541; AVX512-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2542; AVX512-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
2543; AVX512-NEXT:    retq
2544entry:
2545  %a = trunc i32 %x to i6
2546  %b = insertelement <8 x i6> undef, i6 %a, i32 0
2547  %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer
2548  %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7>
2549  %e = zext <8 x i6> %d to <8 x i64>
2550  ret <8 x i64> %e
2551}
2552
2553define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
2554; SSE2-LABEL: splatshuf_zext_v4i64:
2555; SSE2:       # %bb.0:
2556; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2557; SSE2-NEXT:    pxor %xmm1, %xmm1
2558; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2559; SSE2-NEXT:    movdqa %xmm0, %xmm1
2560; SSE2-NEXT:    retq
2561;
2562; SSSE3-LABEL: splatshuf_zext_v4i64:
2563; SSSE3:       # %bb.0:
2564; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2565; SSSE3-NEXT:    pxor %xmm1, %xmm1
2566; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2567; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2568; SSSE3-NEXT:    retq
2569;
2570; SSE41-LABEL: splatshuf_zext_v4i64:
2571; SSE41:       # %bb.0:
2572; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
2573; SSE41-NEXT:    pxor %xmm2, %xmm2
2574; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
2575; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2576; SSE41-NEXT:    retq
2577;
2578; AVX1-LABEL: splatshuf_zext_v4i64:
2579; AVX1:       # %bb.0:
2580; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2581; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2582; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2583; AVX1-NEXT:    retq
2584;
2585; AVX2-LABEL: splatshuf_zext_v4i64:
2586; AVX2:       # %bb.0:
2587; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
2588; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2589; AVX2-NEXT:    retq
2590;
2591; AVX512-LABEL: splatshuf_zext_v4i64:
2592; AVX512:       # %bb.0:
2593; AVX512-NEXT:    vpbroadcastd %xmm0, %xmm0
2594; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2595; AVX512-NEXT:    retq
2596  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
2597  %ext = zext <4 x i32> %shuf to <4 x i64>
2598  ret <4 x i64> %ext
2599}
2600
2601define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {
2602; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2603; SSE2:       # %bb.0:
2604; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2605; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
2606; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2607; SSE2-NEXT:    movdqa %xmm0, %xmm1
2608; SSE2-NEXT:    retq
2609;
2610; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:
2611; SSSE3:       # %bb.0:
2612; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2613; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2614; SSSE3-NEXT:    retq
2615;
2616; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:
2617; SSE41:       # %bb.0:
2618; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2619; SSE41-NEXT:    movdqa %xmm0, %xmm1
2620; SSE41-NEXT:    retq
2621;
2622; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs:
2623; AVX1:       # %bb.0:
2624; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2625; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2626; AVX1-NEXT:    retq
2627;
2628; AVX2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2629; AVX2:       # %bb.0:
2630; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2631; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2632; AVX2-NEXT:    retq
2633;
2634; AVX512-LABEL: splatshuf_zext_v8i32_matching_undefs:
2635; AVX512:       # %bb.0:
2636; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2637; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2638; AVX512-NEXT:    retq
2639  %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2640  %ext = zext <8 x i16> %shuf to <8 x i32>
2641  ret <8 x i32> %ext
2642}
2643
2644define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) {
2645; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2646; SSE2:       # %bb.0:
2647; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2648; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
2649; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2650; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
2651; SSE2-NEXT:    pxor %xmm1, %xmm1
2652; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2653; SSE2-NEXT:    movdqa %xmm0, %xmm1
2654; SSE2-NEXT:    retq
2655;
2656; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2657; SSSE3:       # %bb.0:
2658; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2659; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2660; SSSE3-NEXT:    retq
2661;
2662; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2663; SSE41:       # %bb.0:
2664; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2665; SSE41-NEXT:    movdqa %xmm0, %xmm1
2666; SSE41-NEXT:    retq
2667;
2668; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2669; AVX1:       # %bb.0:
2670; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2671; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2672; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2673; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2674; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2675; AVX1-NEXT:    retq
2676;
2677; AVX2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2678; AVX2:       # %bb.0:
2679; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2680; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2681; AVX2-NEXT:    retq
2682;
2683; AVX512-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2684; AVX512:       # %bb.0:
2685; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2686; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2687; AVX512-NEXT:    retq
2688  %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2689  %ext = zext <8 x i16> %shuf to <8 x i32>
2690  ret <8 x i32> %ext
2691}
2692
2693define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) {
2694; SSE2-LABEL: splatshuf_zext_v16i16:
2695; SSE2:       # %bb.0:
2696; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2697; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
2698; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
2699; SSE2-NEXT:    pxor %xmm1, %xmm1
2700; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2701; SSE2-NEXT:    movdqa %xmm0, %xmm1
2702; SSE2-NEXT:    retq
2703;
2704; SSSE3-LABEL: splatshuf_zext_v16i16:
2705; SSSE3:       # %bb.0:
2706; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2707; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2708; SSSE3-NEXT:    retq
2709;
2710; SSE41-LABEL: splatshuf_zext_v16i16:
2711; SSE41:       # %bb.0:
2712; SSE41-NEXT:    movdqa %xmm0, %xmm1
2713; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2714; SSE41-NEXT:    pxor %xmm2, %xmm2
2715; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2716; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2717; SSE41-NEXT:    retq
2718;
2719; AVX1-LABEL: splatshuf_zext_v16i16:
2720; AVX1:       # %bb.0:
2721; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2722; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2723; AVX1-NEXT:    retq
2724;
2725; AVX2-LABEL: splatshuf_zext_v16i16:
2726; AVX2:       # %bb.0:
2727; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2728; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2729; AVX2-NEXT:    retq
2730;
2731; AVX512-LABEL: splatshuf_zext_v16i16:
2732; AVX512:       # %bb.0:
2733; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2734; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2735; AVX512-NEXT:    retq
2736  %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
2737  %ext = zext <16 x i8> %shuf to <16 x i16>
2738  ret <16 x i16> %ext
2739}
2740