1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6;
7; Just one 32-bit run to make sure we do reasonable things there.
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=i686 -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
9
10define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
11; SSE2-LABEL: sext_8i16_to_8i32:
12; SSE2:       # BB#0: # %entry
13; SSE2-NEXT:    movdqa %xmm0, %xmm1
14; SSE2-NEXT:   # kill: XMM0<def> XMM1<kill>
15; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
16; SSE2-NEXT:    pslld $16, %xmm0
17; SSE2-NEXT:    psrad $16, %xmm0
18; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
19; SSE2-NEXT:    pslld $16, %xmm1
20; SSE2-NEXT:    psrad $16, %xmm1
21; SSE2-NEXT:    retq
22;
23; SSSE3-LABEL: sext_8i16_to_8i32:
24; SSSE3:       # BB#0: # %entry
25; SSSE3-NEXT:    movdqa %xmm0, %xmm1
26; SSSE3-NEXT:   # kill: XMM0<def> XMM1<kill>
27; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
28; SSSE3-NEXT:    pslld $16, %xmm0
29; SSSE3-NEXT:    psrad $16, %xmm0
30; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
31; SSSE3-NEXT:    pslld $16, %xmm1
32; SSSE3-NEXT:    psrad $16, %xmm1
33; SSSE3-NEXT:    retq
34;
35; SSE41-LABEL: sext_8i16_to_8i32:
36; SSE41:       # BB#0: # %entry
37; SSE41-NEXT:    movdqa %xmm0, %xmm1
38; SSE41-NEXT:    pmovzxwd %xmm1, %xmm0
39; SSE41-NEXT:    pslld $16, %xmm0
40; SSE41-NEXT:    psrad $16, %xmm0
41; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
42; SSE41-NEXT:    pslld $16, %xmm1
43; SSE41-NEXT:    psrad $16, %xmm1
44; SSE41-NEXT:    retq
45;
46; AVX1-LABEL: sext_8i16_to_8i32:
47; AVX1:       # BB#0: # %entry
48; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
49; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
50; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
51; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
52; AVX1-NEXT:    retq
53;
54; AVX2-LABEL: sext_8i16_to_8i32:
55; AVX2:       # BB#0: # %entry
56; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
57; AVX2-NEXT:    retq
58;
59; X32-SSE41-LABEL: sext_8i16_to_8i32:
60; X32-SSE41:       # BB#0: # %entry
61; X32-SSE41-NEXT:    movdqa %xmm0, %xmm1
62; X32-SSE41-NEXT:    pmovzxwd %xmm1, %xmm0
63; X32-SSE41-NEXT:    pslld $16, %xmm0
64; X32-SSE41-NEXT:    psrad $16, %xmm0
65; X32-SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
66; X32-SSE41-NEXT:    pslld $16, %xmm1
67; X32-SSE41-NEXT:    psrad $16, %xmm1
68; X32-SSE41-NEXT:    retl
69entry:
70  %B = sext <8 x i16> %A to <8 x i32>
71  ret <8 x i32>%B
72}
73
74define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
75; SSE2-LABEL: sext_4i32_to_4i64:
76; SSE2:       # BB#0: # %entry
77; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
78; SSE2-NEXT:    movd %xmm1, %rax
79; SSE2-NEXT:    cltq
80; SSE2-NEXT:    movd %rax, %xmm2
81; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
82; SSE2-NEXT:    movd %xmm1, %rax
83; SSE2-NEXT:    cltq
84; SSE2-NEXT:    movd %rax, %xmm1
85; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
86; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
87; SSE2-NEXT:    movd %xmm0, %rax
88; SSE2-NEXT:    cltq
89; SSE2-NEXT:    movd %rax, %xmm1
90; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
91; SSE2-NEXT:    movd %xmm0, %rax
92; SSE2-NEXT:    cltq
93; SSE2-NEXT:    movd %rax, %xmm0
94; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
95; SSE2-NEXT:    movdqa %xmm2, %xmm0
96; SSE2-NEXT:    retq
97;
98; SSSE3-LABEL: sext_4i32_to_4i64:
99; SSSE3:       # BB#0: # %entry
100; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
101; SSSE3-NEXT:    movd %xmm1, %rax
102; SSSE3-NEXT:    cltq
103; SSSE3-NEXT:    movd %rax, %xmm2
104; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
105; SSSE3-NEXT:    movd %xmm1, %rax
106; SSSE3-NEXT:    cltq
107; SSSE3-NEXT:    movd %rax, %xmm1
108; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
109; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
110; SSSE3-NEXT:    movd %xmm0, %rax
111; SSSE3-NEXT:    cltq
112; SSSE3-NEXT:    movd %rax, %xmm1
113; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
114; SSSE3-NEXT:    movd %xmm0, %rax
115; SSSE3-NEXT:    cltq
116; SSSE3-NEXT:    movd %rax, %xmm0
117; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
118; SSSE3-NEXT:    movdqa %xmm2, %xmm0
119; SSSE3-NEXT:    retq
120;
121; SSE41-LABEL: sext_4i32_to_4i64:
122; SSE41:       # BB#0: # %entry
123; SSE41-NEXT:    pmovzxdq %xmm0, %xmm1
124; SSE41-NEXT:    pextrq $1, %xmm1, %rax
125; SSE41-NEXT:    cltq
126; SSE41-NEXT:    movd %rax, %xmm3
127; SSE41-NEXT:    movd %xmm1, %rax
128; SSE41-NEXT:    cltq
129; SSE41-NEXT:    movd %rax, %xmm2
130; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
131; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
132; SSE41-NEXT:    pextrq $1, %xmm0, %rax
133; SSE41-NEXT:    cltq
134; SSE41-NEXT:    movd %rax, %xmm3
135; SSE41-NEXT:    movd %xmm0, %rax
136; SSE41-NEXT:    cltq
137; SSE41-NEXT:    movd %rax, %xmm1
138; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
139; SSE41-NEXT:    movdqa %xmm2, %xmm0
140; SSE41-NEXT:    retq
141;
142; AVX1-LABEL: sext_4i32_to_4i64:
143; AVX1:       # BB#0: # %entry
144; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
145; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
146; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
147; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
148; AVX1-NEXT:    retq
149;
150; AVX2-LABEL: sext_4i32_to_4i64:
151; AVX2:       # BB#0: # %entry
152; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
153; AVX2-NEXT:    retq
154;
155; X32-SSE41-LABEL: sext_4i32_to_4i64:
156; X32-SSE41:       # BB#0: # %entry
157; X32-SSE41-NEXT:    pmovzxdq %xmm0, %xmm2
158; X32-SSE41-NEXT:    movd %xmm2, %eax
159; X32-SSE41-NEXT:    sarl $31, %eax
160; X32-SSE41-NEXT:    pextrd $2, %xmm2, %ecx
161; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm2
162; X32-SSE41-NEXT:    sarl $31, %ecx
163; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm2
164; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
165; X32-SSE41-NEXT:    movd %xmm1, %eax
166; X32-SSE41-NEXT:    sarl $31, %eax
167; X32-SSE41-NEXT:    pextrd $2, %xmm1, %ecx
168; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
169; X32-SSE41-NEXT:    sarl $31, %ecx
170; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm1
171; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
172; X32-SSE41-NEXT:    retl
173entry:
174  %B = sext <4 x i32> %A to <4 x i64>
175  ret <4 x i64>%B
176}
177
178define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
179; SSE2-LABEL: load_sext_test1:
180; SSE2:       # BB#0: # %entry
181; SSE2-NEXT:    movq (%rdi), %xmm0
182; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
183; SSE2-NEXT:    psrad $16, %xmm0
184; SSE2-NEXT:    retq
185;
186; SSSE3-LABEL: load_sext_test1:
187; SSSE3:       # BB#0: # %entry
188; SSSE3-NEXT:    movq (%rdi), %xmm0
189; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
190; SSSE3-NEXT:    psrad $16, %xmm0
191; SSSE3-NEXT:    retq
192;
193; SSE41-LABEL: load_sext_test1:
194; SSE41:       # BB#0: # %entry
195; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
196; SSE41-NEXT:    retq
197;
198; AVX-LABEL: load_sext_test1:
199; AVX:       # BB#0: # %entry
200; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
201; AVX-NEXT:    retq
202;
203; X32-SSE41-LABEL: load_sext_test1:
204; X32-SSE41:       # BB#0: # %entry
205; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
206; X32-SSE41-NEXT:    pmovsxwd (%eax), %xmm0
207; X32-SSE41-NEXT:    retl
208entry:
209 %X = load <4 x i16>, <4 x i16>* %ptr
210 %Y = sext <4 x i16> %X to <4 x i32>
211 ret <4 x i32>%Y
212}
213
214define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
215; SSE2-LABEL: load_sext_test2:
216; SSE2:       # BB#0: # %entry
217; SSE2-NEXT:    movd (%rdi), %xmm0
218; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
219; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
220; SSE2-NEXT:    psrad $24, %xmm0
221; SSE2-NEXT:    retq
222;
223; SSSE3-LABEL: load_sext_test2:
224; SSSE3:       # BB#0: # %entry
225; SSSE3-NEXT:    movd (%rdi), %xmm0
226; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
227; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
228; SSSE3-NEXT:    psrad $24, %xmm0
229; SSSE3-NEXT:    retq
230;
231; SSE41-LABEL: load_sext_test2:
232; SSE41:       # BB#0: # %entry
233; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
234; SSE41-NEXT:    retq
235;
236; AVX-LABEL: load_sext_test2:
237; AVX:       # BB#0: # %entry
238; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
239; AVX-NEXT:    retq
240;
241; X32-SSE41-LABEL: load_sext_test2:
242; X32-SSE41:       # BB#0: # %entry
243; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
244; X32-SSE41-NEXT:    pmovsxbd (%eax), %xmm0
245; X32-SSE41-NEXT:    retl
246entry:
247 %X = load <4 x i8>, <4 x i8>* %ptr
248 %Y = sext <4 x i8> %X to <4 x i32>
249 ret <4 x i32>%Y
250}
251
252define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
253; SSE2-LABEL: load_sext_test3:
254; SSE2:       # BB#0: # %entry
255; SSE2-NEXT:    movsbq 1(%rdi), %rax
256; SSE2-NEXT:    movd %rax, %xmm1
257; SSE2-NEXT:    movsbq (%rdi), %rax
258; SSE2-NEXT:    movd %rax, %xmm0
259; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
260; SSE2-NEXT:    retq
261;
262; SSSE3-LABEL: load_sext_test3:
263; SSSE3:       # BB#0: # %entry
264; SSSE3-NEXT:    movsbq 1(%rdi), %rax
265; SSSE3-NEXT:    movd %rax, %xmm1
266; SSSE3-NEXT:    movsbq (%rdi), %rax
267; SSSE3-NEXT:    movd %rax, %xmm0
268; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
269; SSSE3-NEXT:    retq
270;
271; SSE41-LABEL: load_sext_test3:
272; SSE41:       # BB#0: # %entry
273; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
274; SSE41-NEXT:    retq
275;
276; AVX-LABEL: load_sext_test3:
277; AVX:       # BB#0: # %entry
278; AVX-NEXT:    vpmovsxbq (%rdi), %xmm0
279; AVX-NEXT:    retq
280;
281; X32-SSE41-LABEL: load_sext_test3:
282; X32-SSE41:       # BB#0: # %entry
283; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
284; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
285; X32-SSE41-NEXT:    retl
286entry:
287 %X = load <2 x i8>, <2 x i8>* %ptr
288 %Y = sext <2 x i8> %X to <2 x i64>
289 ret <2 x i64>%Y
290}
291
292define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
293; SSE2-LABEL: load_sext_test4:
294; SSE2:       # BB#0: # %entry
295; SSE2-NEXT:    movswq 2(%rdi), %rax
296; SSE2-NEXT:    movd %rax, %xmm1
297; SSE2-NEXT:    movswq (%rdi), %rax
298; SSE2-NEXT:    movd %rax, %xmm0
299; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
300; SSE2-NEXT:    retq
301;
302; SSSE3-LABEL: load_sext_test4:
303; SSSE3:       # BB#0: # %entry
304; SSSE3-NEXT:    movswq 2(%rdi), %rax
305; SSSE3-NEXT:    movd %rax, %xmm1
306; SSSE3-NEXT:    movswq (%rdi), %rax
307; SSSE3-NEXT:    movd %rax, %xmm0
308; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
309; SSSE3-NEXT:    retq
310;
311; SSE41-LABEL: load_sext_test4:
312; SSE41:       # BB#0: # %entry
313; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
314; SSE41-NEXT:    retq
315;
316; AVX-LABEL: load_sext_test4:
317; AVX:       # BB#0: # %entry
318; AVX-NEXT:    vpmovsxwq (%rdi), %xmm0
319; AVX-NEXT:    retq
320;
321; X32-SSE41-LABEL: load_sext_test4:
322; X32-SSE41:       # BB#0: # %entry
323; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
324; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
325; X32-SSE41-NEXT:    retl
326entry:
327 %X = load <2 x i16>, <2 x i16>* %ptr
328 %Y = sext <2 x i16> %X to <2 x i64>
329 ret <2 x i64>%Y
330}
331
332define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
333; SSE2-LABEL: load_sext_test5:
334; SSE2:       # BB#0: # %entry
335; SSE2-NEXT:    movslq 4(%rdi), %rax
336; SSE2-NEXT:    movd %rax, %xmm1
337; SSE2-NEXT:    movslq (%rdi), %rax
338; SSE2-NEXT:    movd %rax, %xmm0
339; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
340; SSE2-NEXT:    retq
341;
342; SSSE3-LABEL: load_sext_test5:
343; SSSE3:       # BB#0: # %entry
344; SSSE3-NEXT:    movslq 4(%rdi), %rax
345; SSSE3-NEXT:    movd %rax, %xmm1
346; SSSE3-NEXT:    movslq (%rdi), %rax
347; SSSE3-NEXT:    movd %rax, %xmm0
348; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
349; SSSE3-NEXT:    retq
350;
351; SSE41-LABEL: load_sext_test5:
352; SSE41:       # BB#0: # %entry
353; SSE41-NEXT:    pmovsxdq (%rdi), %xmm0
354; SSE41-NEXT:    retq
355;
356; AVX-LABEL: load_sext_test5:
357; AVX:       # BB#0: # %entry
358; AVX-NEXT:    vpmovsxdq (%rdi), %xmm0
359; AVX-NEXT:    retq
360;
361; X32-SSE41-LABEL: load_sext_test5:
362; X32-SSE41:       # BB#0: # %entry
363; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
364; X32-SSE41-NEXT:    pmovsxdq (%eax), %xmm0
365; X32-SSE41-NEXT:    retl
366entry:
367 %X = load <2 x i32>, <2 x i32>* %ptr
368 %Y = sext <2 x i32> %X to <2 x i64>
369 ret <2 x i64>%Y
370}
371
372define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
373; SSE2-LABEL: load_sext_test6:
374; SSE2:       # BB#0: # %entry
375; SSE2-NEXT:    movq (%rdi), %xmm0
376; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
377; SSE2-NEXT:    psraw $8, %xmm0
378; SSE2-NEXT:    retq
379;
380; SSSE3-LABEL: load_sext_test6:
381; SSSE3:       # BB#0: # %entry
382; SSSE3-NEXT:    movq (%rdi), %xmm0
383; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
384; SSSE3-NEXT:    psraw $8, %xmm0
385; SSSE3-NEXT:    retq
386;
387; SSE41-LABEL: load_sext_test6:
388; SSE41:       # BB#0: # %entry
389; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
390; SSE41-NEXT:    retq
391;
392; AVX-LABEL: load_sext_test6:
393; AVX:       # BB#0: # %entry
394; AVX-NEXT:    vpmovsxbw (%rdi), %xmm0
395; AVX-NEXT:    retq
396;
397; X32-SSE41-LABEL: load_sext_test6:
398; X32-SSE41:       # BB#0: # %entry
399; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
400; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
401; X32-SSE41-NEXT:    retl
402entry:
403 %X = load <8 x i8>, <8 x i8>* %ptr
404 %Y = sext <8 x i8> %X to <8 x i16>
405 ret <8 x i16>%Y
406}
407
408define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
409; SSE2-LABEL: sext_4i1_to_4i64:
410; SSE2:       # BB#0:
411; SSE2-NEXT:    pslld $31, %xmm0
412; SSE2-NEXT:    psrad $31, %xmm0
413; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
414; SSE2-NEXT:    movd %xmm1, %rax
415; SSE2-NEXT:    cltq
416; SSE2-NEXT:    movd %rax, %xmm2
417; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
418; SSE2-NEXT:    movd %xmm1, %rax
419; SSE2-NEXT:    cltq
420; SSE2-NEXT:    movd %rax, %xmm1
421; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
422; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
423; SSE2-NEXT:    movd %xmm0, %rax
424; SSE2-NEXT:    cltq
425; SSE2-NEXT:    movd %rax, %xmm1
426; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
427; SSE2-NEXT:    movd %xmm0, %rax
428; SSE2-NEXT:    cltq
429; SSE2-NEXT:    movd %rax, %xmm0
430; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
431; SSE2-NEXT:    movdqa %xmm2, %xmm0
432; SSE2-NEXT:    retq
433;
434; SSSE3-LABEL: sext_4i1_to_4i64:
435; SSSE3:       # BB#0:
436; SSSE3-NEXT:    pslld $31, %xmm0
437; SSSE3-NEXT:    psrad $31, %xmm0
438; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
439; SSSE3-NEXT:    movd %xmm1, %rax
440; SSSE3-NEXT:    cltq
441; SSSE3-NEXT:    movd %rax, %xmm2
442; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
443; SSSE3-NEXT:    movd %xmm1, %rax
444; SSSE3-NEXT:    cltq
445; SSSE3-NEXT:    movd %rax, %xmm1
446; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
447; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
448; SSSE3-NEXT:    movd %xmm0, %rax
449; SSSE3-NEXT:    cltq
450; SSSE3-NEXT:    movd %rax, %xmm1
451; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
452; SSSE3-NEXT:    movd %xmm0, %rax
453; SSSE3-NEXT:    cltq
454; SSSE3-NEXT:    movd %rax, %xmm0
455; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
456; SSSE3-NEXT:    movdqa %xmm2, %xmm0
457; SSSE3-NEXT:    retq
458;
459; SSE41-LABEL: sext_4i1_to_4i64:
460; SSE41:       # BB#0:
461; SSE41-NEXT:    pslld $31, %xmm0
462; SSE41-NEXT:    psrad $31, %xmm0
463; SSE41-NEXT:    pmovzxdq %xmm0, %xmm1
464; SSE41-NEXT:    pextrq $1, %xmm1, %rax
465; SSE41-NEXT:    cltq
466; SSE41-NEXT:    movd %rax, %xmm3
467; SSE41-NEXT:    movd %xmm1, %rax
468; SSE41-NEXT:    cltq
469; SSE41-NEXT:    movd %rax, %xmm2
470; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
471; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
472; SSE41-NEXT:    pextrq $1, %xmm0, %rax
473; SSE41-NEXT:    cltq
474; SSE41-NEXT:    movd %rax, %xmm3
475; SSE41-NEXT:    movd %xmm0, %rax
476; SSE41-NEXT:    cltq
477; SSE41-NEXT:    movd %rax, %xmm1
478; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
479; SSE41-NEXT:    movdqa %xmm2, %xmm0
480; SSE41-NEXT:    retq
481;
482; AVX1-LABEL: sext_4i1_to_4i64:
483; AVX1:       # BB#0:
484; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
485; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
486; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
487; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
488; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
489; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
490; AVX1-NEXT:    retq
491;
492; AVX2-LABEL: sext_4i1_to_4i64:
493; AVX2:       # BB#0:
494; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
495; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
496; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
497; AVX2-NEXT:    retq
498;
499; X32-SSE41-LABEL: sext_4i1_to_4i64:
500; X32-SSE41:       # BB#0:
501; X32-SSE41-NEXT:    pslld $31, %xmm0
502; X32-SSE41-NEXT:    psrad $31, %xmm0
503; X32-SSE41-NEXT:    pmovzxdq %xmm0, %xmm2
504; X32-SSE41-NEXT:    movd %xmm2, %eax
505; X32-SSE41-NEXT:    sarl $31, %eax
506; X32-SSE41-NEXT:    pextrd $2, %xmm2, %ecx
507; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm2
508; X32-SSE41-NEXT:    sarl $31, %ecx
509; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm2
510; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
511; X32-SSE41-NEXT:    movd %xmm1, %eax
512; X32-SSE41-NEXT:    sarl $31, %eax
513; X32-SSE41-NEXT:    pextrd $2, %xmm1, %ecx
514; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
515; X32-SSE41-NEXT:    sarl $31, %ecx
516; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm1
517; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
518; X32-SSE41-NEXT:    retl
519  %extmask = sext <4 x i1> %mask to <4 x i64>
520  ret <4 x i64> %extmask
521}
522
523define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
524; SSE2-LABEL: sext_16i8_to_16i16:
525; SSE2:       # BB#0: # %entry
526; SSE2-NEXT:    movq (%rdi), %xmm0
527; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
528; SSE2-NEXT:    psraw $8, %xmm0
529; SSE2-NEXT:    movq 8(%rdi), %xmm1
530; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
531; SSE2-NEXT:    psraw $8, %xmm1
532; SSE2-NEXT:    retq
533;
534; SSSE3-LABEL: sext_16i8_to_16i16:
535; SSSE3:       # BB#0: # %entry
536; SSSE3-NEXT:    movq (%rdi), %xmm0
537; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
538; SSSE3-NEXT:    psraw $8, %xmm0
539; SSSE3-NEXT:    movq 8(%rdi), %xmm1
540; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
541; SSSE3-NEXT:    psraw $8, %xmm1
542; SSSE3-NEXT:    retq
543;
544; SSE41-LABEL: sext_16i8_to_16i16:
545; SSE41:       # BB#0: # %entry
546; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
547; SSE41-NEXT:    pmovsxbw 8(%rdi), %xmm1
548; SSE41-NEXT:    retq
549;
550; AVX1-LABEL: sext_16i8_to_16i16:
551; AVX1:       # BB#0: # %entry
552; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm0
553; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm1
554; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
555; AVX1-NEXT:    retq
556;
557; AVX2-LABEL: sext_16i8_to_16i16:
558; AVX2:       # BB#0: # %entry
559; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm0
560; AVX2-NEXT:    retq
561;
562; X32-SSE41-LABEL: sext_16i8_to_16i16:
563; X32-SSE41:       # BB#0: # %entry
564; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
565; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
566; X32-SSE41-NEXT:    pmovsxbw 8(%eax), %xmm1
567; X32-SSE41-NEXT:    retl
568entry:
569 %X = load <16 x i8>, <16 x i8>* %ptr
570 %Y = sext <16 x i8> %X to <16 x i16>
571 ret <16 x i16> %Y
572}
573
574define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
575; SSE2-LABEL: sext_4i8_to_4i64:
576; SSE2:       # BB#0:
577; SSE2-NEXT:    pslld $24, %xmm0
578; SSE2-NEXT:    psrad $24, %xmm0
579; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
580; SSE2-NEXT:    movd %xmm1, %rax
581; SSE2-NEXT:    cltq
582; SSE2-NEXT:    movd %rax, %xmm2
583; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
584; SSE2-NEXT:    movd %xmm1, %rax
585; SSE2-NEXT:    cltq
586; SSE2-NEXT:    movd %rax, %xmm1
587; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
588; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
589; SSE2-NEXT:    movd %xmm0, %rax
590; SSE2-NEXT:    cltq
591; SSE2-NEXT:    movd %rax, %xmm1
592; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
593; SSE2-NEXT:    movd %xmm0, %rax
594; SSE2-NEXT:    cltq
595; SSE2-NEXT:    movd %rax, %xmm0
596; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
597; SSE2-NEXT:    movdqa %xmm2, %xmm0
598; SSE2-NEXT:    retq
599;
600; SSSE3-LABEL: sext_4i8_to_4i64:
601; SSSE3:       # BB#0:
602; SSSE3-NEXT:    pslld $24, %xmm0
603; SSSE3-NEXT:    psrad $24, %xmm0
604; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
605; SSSE3-NEXT:    movd %xmm1, %rax
606; SSSE3-NEXT:    cltq
607; SSSE3-NEXT:    movd %rax, %xmm2
608; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
609; SSSE3-NEXT:    movd %xmm1, %rax
610; SSSE3-NEXT:    cltq
611; SSSE3-NEXT:    movd %rax, %xmm1
612; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
613; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
614; SSSE3-NEXT:    movd %xmm0, %rax
615; SSSE3-NEXT:    cltq
616; SSSE3-NEXT:    movd %rax, %xmm1
617; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
618; SSSE3-NEXT:    movd %xmm0, %rax
619; SSSE3-NEXT:    cltq
620; SSSE3-NEXT:    movd %rax, %xmm0
621; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
622; SSSE3-NEXT:    movdqa %xmm2, %xmm0
623; SSSE3-NEXT:    retq
624;
625; SSE41-LABEL: sext_4i8_to_4i64:
626; SSE41:       # BB#0:
627; SSE41-NEXT:    pslld $24, %xmm0
628; SSE41-NEXT:    psrad $24, %xmm0
629; SSE41-NEXT:    pmovzxdq %xmm0, %xmm1
630; SSE41-NEXT:    pextrq $1, %xmm1, %rax
631; SSE41-NEXT:    cltq
632; SSE41-NEXT:    movd %rax, %xmm3
633; SSE41-NEXT:    movd %xmm1, %rax
634; SSE41-NEXT:    cltq
635; SSE41-NEXT:    movd %rax, %xmm2
636; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
637; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
638; SSE41-NEXT:    pextrq $1, %xmm0, %rax
639; SSE41-NEXT:    cltq
640; SSE41-NEXT:    movd %rax, %xmm3
641; SSE41-NEXT:    movd %xmm0, %rax
642; SSE41-NEXT:    cltq
643; SSE41-NEXT:    movd %rax, %xmm1
644; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
645; SSE41-NEXT:    movdqa %xmm2, %xmm0
646; SSE41-NEXT:    retq
647;
648; AVX1-LABEL: sext_4i8_to_4i64:
649; AVX1:       # BB#0:
650; AVX1-NEXT:    vpslld $24, %xmm0, %xmm0
651; AVX1-NEXT:    vpsrad $24, %xmm0, %xmm0
652; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
653; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
654; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
655; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
656; AVX1-NEXT:    retq
657;
658; AVX2-LABEL: sext_4i8_to_4i64:
659; AVX2:       # BB#0:
660; AVX2-NEXT:    vpslld $24, %xmm0, %xmm0
661; AVX2-NEXT:    vpsrad $24, %xmm0, %xmm0
662; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
663; AVX2-NEXT:    retq
664;
665; X32-SSE41-LABEL: sext_4i8_to_4i64:
666; X32-SSE41:       # BB#0:
667; X32-SSE41-NEXT:    pslld $24, %xmm0
668; X32-SSE41-NEXT:    psrad $24, %xmm0
669; X32-SSE41-NEXT:    pmovzxdq %xmm0, %xmm2
670; X32-SSE41-NEXT:    movd %xmm2, %eax
671; X32-SSE41-NEXT:    sarl $31, %eax
672; X32-SSE41-NEXT:    pextrd $2, %xmm2, %ecx
673; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm2
674; X32-SSE41-NEXT:    sarl $31, %ecx
675; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm2
676; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
677; X32-SSE41-NEXT:    movd %xmm1, %eax
678; X32-SSE41-NEXT:    sarl $31, %eax
679; X32-SSE41-NEXT:    pextrd $2, %xmm1, %ecx
680; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
681; X32-SSE41-NEXT:    sarl $31, %ecx
682; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm1
683; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
684; X32-SSE41-NEXT:    retl
685  %extmask = sext <4 x i8> %mask to <4 x i64>
686  ret <4 x i64> %extmask
687}
688
689define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
690; SSE2-LABEL: load_sext_4i8_to_4i64:
691; SSE2:       # BB#0: # %entry
692; SSE2-NEXT:    movsbq 1(%rdi), %rax
693; SSE2-NEXT:    movd %rax, %xmm1
694; SSE2-NEXT:    movsbq (%rdi), %rax
695; SSE2-NEXT:    movd %rax, %xmm0
696; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
697; SSE2-NEXT:    movsbq 3(%rdi), %rax
698; SSE2-NEXT:    movd %rax, %xmm2
699; SSE2-NEXT:    movsbq 2(%rdi), %rax
700; SSE2-NEXT:    movd %rax, %xmm1
701; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
702; SSE2-NEXT:    retq
703;
704; SSSE3-LABEL: load_sext_4i8_to_4i64:
705; SSSE3:       # BB#0: # %entry
706; SSSE3-NEXT:    movsbq 1(%rdi), %rax
707; SSSE3-NEXT:    movd %rax, %xmm1
708; SSSE3-NEXT:    movsbq (%rdi), %rax
709; SSSE3-NEXT:    movd %rax, %xmm0
710; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
711; SSSE3-NEXT:    movsbq 3(%rdi), %rax
712; SSSE3-NEXT:    movd %rax, %xmm2
713; SSSE3-NEXT:    movsbq 2(%rdi), %rax
714; SSSE3-NEXT:    movd %rax, %xmm1
715; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
716; SSSE3-NEXT:    retq
717;
718; SSE41-LABEL: load_sext_4i8_to_4i64:
719; SSE41:       # BB#0: # %entry
720; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
721; SSE41-NEXT:    pmovsxbq 2(%rdi), %xmm1
722; SSE41-NEXT:    retq
723;
724; AVX1-LABEL: load_sext_4i8_to_4i64:
725; AVX1:       # BB#0: # %entry
726; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm0
727; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
728; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
729; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
730; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
731; AVX1-NEXT:    retq
732;
733; AVX2-LABEL: load_sext_4i8_to_4i64:
734; AVX2:       # BB#0: # %entry
735; AVX2-NEXT:    vpmovsxbq (%rdi), %ymm0
736; AVX2-NEXT:    retq
737;
738; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
739; X32-SSE41:       # BB#0: # %entry
740; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
741; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
742; X32-SSE41-NEXT:    pmovsxbq 2(%eax), %xmm1
743; X32-SSE41-NEXT:    retl
744entry:
745 %X = load <4 x i8>, <4 x i8>* %ptr
746 %Y = sext <4 x i8> %X to <4 x i64>
747 ret <4 x i64>%Y
748}
749
750define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
751; SSE2-LABEL: load_sext_4i16_to_4i64:
752; SSE2:       # BB#0: # %entry
753; SSE2-NEXT:    movswq 2(%rdi), %rax
754; SSE2-NEXT:    movd %rax, %xmm1
755; SSE2-NEXT:    movswq (%rdi), %rax
756; SSE2-NEXT:    movd %rax, %xmm0
757; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
758; SSE2-NEXT:    movswq 6(%rdi), %rax
759; SSE2-NEXT:    movd %rax, %xmm2
760; SSE2-NEXT:    movswq 4(%rdi), %rax
761; SSE2-NEXT:    movd %rax, %xmm1
762; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
763; SSE2-NEXT:    retq
764;
765; SSSE3-LABEL: load_sext_4i16_to_4i64:
766; SSSE3:       # BB#0: # %entry
767; SSSE3-NEXT:    movswq 2(%rdi), %rax
768; SSSE3-NEXT:    movd %rax, %xmm1
769; SSSE3-NEXT:    movswq (%rdi), %rax
770; SSSE3-NEXT:    movd %rax, %xmm0
771; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
772; SSSE3-NEXT:    movswq 6(%rdi), %rax
773; SSSE3-NEXT:    movd %rax, %xmm2
774; SSSE3-NEXT:    movswq 4(%rdi), %rax
775; SSSE3-NEXT:    movd %rax, %xmm1
776; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
777; SSSE3-NEXT:    retq
778;
779; SSE41-LABEL: load_sext_4i16_to_4i64:
780; SSE41:       # BB#0: # %entry
781; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
782; SSE41-NEXT:    pmovsxwq 4(%rdi), %xmm1
783; SSE41-NEXT:    retq
784;
785; AVX1-LABEL: load_sext_4i16_to_4i64:
786; AVX1:       # BB#0: # %entry
787; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
788; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
789; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
790; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
791; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
792; AVX1-NEXT:    retq
793;
794; AVX2-LABEL: load_sext_4i16_to_4i64:
795; AVX2:       # BB#0: # %entry
796; AVX2-NEXT:    vpmovsxwq (%rdi), %ymm0
797; AVX2-NEXT:    retq
798;
799; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
800; X32-SSE41:       # BB#0: # %entry
801; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
802; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
803; X32-SSE41-NEXT:    pmovsxwq 4(%eax), %xmm1
804; X32-SSE41-NEXT:    retl
805entry:
806 %X = load <4 x i16>, <4 x i16>* %ptr
807 %Y = sext <4 x i16> %X to <4 x i64>
808 ret <4 x i64>%Y
809}
810