1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
4
5define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
6; X32-LABEL: variable_shl0:
7; X32:       # %bb.0:
8; X32-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
9; X32-NEXT:    retl
10;
11; X64-LABEL: variable_shl0:
12; X64:       # %bb.0:
13; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
14; X64-NEXT:    retq
15  %k = shl <4 x i32> %x, %y
16  ret <4 x i32> %k
17}
18
19define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
20; X32-LABEL: variable_shl1:
21; X32:       # %bb.0:
22; X32-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
23; X32-NEXT:    retl
24;
25; X64-LABEL: variable_shl1:
26; X64:       # %bb.0:
27; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
28; X64-NEXT:    retq
29  %k = shl <8 x i32> %x, %y
30  ret <8 x i32> %k
31}
32
33define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
34; X32-LABEL: variable_shl2:
35; X32:       # %bb.0:
36; X32-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
37; X32-NEXT:    retl
38;
39; X64-LABEL: variable_shl2:
40; X64:       # %bb.0:
41; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
42; X64-NEXT:    retq
43  %k = shl <2 x i64> %x, %y
44  ret <2 x i64> %k
45}
46
47define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
48; X32-LABEL: variable_shl3:
49; X32:       # %bb.0:
50; X32-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
51; X32-NEXT:    retl
52;
53; X64-LABEL: variable_shl3:
54; X64:       # %bb.0:
55; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
56; X64-NEXT:    retq
57  %k = shl <4 x i64> %x, %y
58  ret <4 x i64> %k
59}
60
61define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
62; X32-LABEL: variable_srl0:
63; X32:       # %bb.0:
64; X32-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
65; X32-NEXT:    retl
66;
67; X64-LABEL: variable_srl0:
68; X64:       # %bb.0:
69; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
70; X64-NEXT:    retq
71  %k = lshr <4 x i32> %x, %y
72  ret <4 x i32> %k
73}
74
75define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
76; X32-LABEL: variable_srl1:
77; X32:       # %bb.0:
78; X32-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
79; X32-NEXT:    retl
80;
81; X64-LABEL: variable_srl1:
82; X64:       # %bb.0:
83; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
84; X64-NEXT:    retq
85  %k = lshr <8 x i32> %x, %y
86  ret <8 x i32> %k
87}
88
89define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
90; X32-LABEL: variable_srl2:
91; X32:       # %bb.0:
92; X32-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
93; X32-NEXT:    retl
94;
95; X64-LABEL: variable_srl2:
96; X64:       # %bb.0:
97; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
98; X64-NEXT:    retq
99  %k = lshr <2 x i64> %x, %y
100  ret <2 x i64> %k
101}
102
103define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
104; X32-LABEL: variable_srl3:
105; X32:       # %bb.0:
106; X32-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
107; X32-NEXT:    retl
108;
109; X64-LABEL: variable_srl3:
110; X64:       # %bb.0:
111; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
112; X64-NEXT:    retq
113  %k = lshr <4 x i64> %x, %y
114  ret <4 x i64> %k
115}
116
117define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
118; X32-LABEL: variable_sra0:
119; X32:       # %bb.0:
120; X32-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
121; X32-NEXT:    retl
122;
123; X64-LABEL: variable_sra0:
124; X64:       # %bb.0:
125; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
126; X64-NEXT:    retq
127  %k = ashr <4 x i32> %x, %y
128  ret <4 x i32> %k
129}
130
131define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
132; X32-LABEL: variable_sra1:
133; X32:       # %bb.0:
134; X32-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
135; X32-NEXT:    retl
136;
137; X64-LABEL: variable_sra1:
138; X64:       # %bb.0:
139; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
140; X64-NEXT:    retq
141  %k = ashr <8 x i32> %x, %y
142  ret <8 x i32> %k
143}
144
145;;; Shift left
146
147define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
148; X32-LABEL: vshift00:
149; X32:       # %bb.0:
150; X32-NEXT:    vpslld $2, %ymm0, %ymm0
151; X32-NEXT:    retl
152;
153; X64-LABEL: vshift00:
154; X64:       # %bb.0:
155; X64-NEXT:    vpslld $2, %ymm0, %ymm0
156; X64-NEXT:    retq
157  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
158  ret <8 x i32> %s
159}
160
161define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
162; X32-LABEL: vshift01:
163; X32:       # %bb.0:
164; X32-NEXT:    vpsllw $2, %ymm0, %ymm0
165; X32-NEXT:    retl
166;
167; X64-LABEL: vshift01:
168; X64:       # %bb.0:
169; X64-NEXT:    vpsllw $2, %ymm0, %ymm0
170; X64-NEXT:    retq
171  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
172  ret <16 x i16> %s
173}
174
175define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
176; X32-LABEL: vshift02:
177; X32:       # %bb.0:
178; X32-NEXT:    vpsllq $2, %ymm0, %ymm0
179; X32-NEXT:    retl
180;
181; X64-LABEL: vshift02:
182; X64:       # %bb.0:
183; X64-NEXT:    vpsllq $2, %ymm0, %ymm0
184; X64-NEXT:    retq
185  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
186  ret <4 x i64> %s
187}
188
189;;; Logical Shift right
190
191define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
192; X32-LABEL: vshift03:
193; X32:       # %bb.0:
194; X32-NEXT:    vpsrld $2, %ymm0, %ymm0
195; X32-NEXT:    retl
196;
197; X64-LABEL: vshift03:
198; X64:       # %bb.0:
199; X64-NEXT:    vpsrld $2, %ymm0, %ymm0
200; X64-NEXT:    retq
201  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
202  ret <8 x i32> %s
203}
204
205define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
206; X32-LABEL: vshift04:
207; X32:       # %bb.0:
208; X32-NEXT:    vpsrlw $2, %ymm0, %ymm0
209; X32-NEXT:    retl
210;
211; X64-LABEL: vshift04:
212; X64:       # %bb.0:
213; X64-NEXT:    vpsrlw $2, %ymm0, %ymm0
214; X64-NEXT:    retq
215  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
216  ret <16 x i16> %s
217}
218
219define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
220; X32-LABEL: vshift05:
221; X32:       # %bb.0:
222; X32-NEXT:    vpsrlq $2, %ymm0, %ymm0
223; X32-NEXT:    retl
224;
225; X64-LABEL: vshift05:
226; X64:       # %bb.0:
227; X64-NEXT:    vpsrlq $2, %ymm0, %ymm0
228; X64-NEXT:    retq
229  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
230  ret <4 x i64> %s
231}
232
233;;; Arithmetic Shift right
234
235define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
236; X32-LABEL: vshift06:
237; X32:       # %bb.0:
238; X32-NEXT:    vpsrad $2, %ymm0, %ymm0
239; X32-NEXT:    retl
240;
241; X64-LABEL: vshift06:
242; X64:       # %bb.0:
243; X64-NEXT:    vpsrad $2, %ymm0, %ymm0
244; X64-NEXT:    retq
245  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
246  ret <8 x i32> %s
247}
248
249define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
250; X32-LABEL: vshift07:
251; X32:       # %bb.0:
252; X32-NEXT:    vpsraw $2, %ymm0, %ymm0
253; X32-NEXT:    retl
254;
255; X64-LABEL: vshift07:
256; X64:       # %bb.0:
257; X64-NEXT:    vpsraw $2, %ymm0, %ymm0
258; X64-NEXT:    retq
259  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
260  ret <16 x i16> %s
261}
262
263define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
264; X32-LABEL: variable_sra0_load:
265; X32:       # %bb.0:
266; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
267; X32-NEXT:    vpsravd (%eax), %xmm0, %xmm0
268; X32-NEXT:    retl
269;
270; X64-LABEL: variable_sra0_load:
271; X64:       # %bb.0:
272; X64-NEXT:    vpsravd (%rdi), %xmm0, %xmm0
273; X64-NEXT:    retq
274  %y1 = load <4 x i32>, <4 x i32>* %y
275  %k = ashr <4 x i32> %x, %y1
276  ret <4 x i32> %k
277}
278
279define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
280; X32-LABEL: variable_sra1_load:
281; X32:       # %bb.0:
282; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
283; X32-NEXT:    vpsravd (%eax), %ymm0, %ymm0
284; X32-NEXT:    retl
285;
286; X64-LABEL: variable_sra1_load:
287; X64:       # %bb.0:
288; X64-NEXT:    vpsravd (%rdi), %ymm0, %ymm0
289; X64-NEXT:    retq
290  %y1 = load <8 x i32>, <8 x i32>* %y
291  %k = ashr <8 x i32> %x, %y1
292  ret <8 x i32> %k
293}
294
295define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
296; X32-LABEL: variable_shl0_load:
297; X32:       # %bb.0:
298; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
299; X32-NEXT:    vpsllvd (%eax), %xmm0, %xmm0
300; X32-NEXT:    retl
301;
302; X64-LABEL: variable_shl0_load:
303; X64:       # %bb.0:
304; X64-NEXT:    vpsllvd (%rdi), %xmm0, %xmm0
305; X64-NEXT:    retq
306  %y1 = load <4 x i32>, <4 x i32>* %y
307  %k = shl <4 x i32> %x, %y1
308  ret <4 x i32> %k
309}
310
311define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
312; X32-LABEL: variable_shl1_load:
313; X32:       # %bb.0:
314; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
315; X32-NEXT:    vpsllvd (%eax), %ymm0, %ymm0
316; X32-NEXT:    retl
317;
318; X64-LABEL: variable_shl1_load:
319; X64:       # %bb.0:
320; X64-NEXT:    vpsllvd (%rdi), %ymm0, %ymm0
321; X64-NEXT:    retq
322  %y1 = load <8 x i32>, <8 x i32>* %y
323  %k = shl <8 x i32> %x, %y1
324  ret <8 x i32> %k
325}
326
327define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
328; X32-LABEL: variable_shl2_load:
329; X32:       # %bb.0:
330; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
331; X32-NEXT:    vpsllvq (%eax), %xmm0, %xmm0
332; X32-NEXT:    retl
333;
334; X64-LABEL: variable_shl2_load:
335; X64:       # %bb.0:
336; X64-NEXT:    vpsllvq (%rdi), %xmm0, %xmm0
337; X64-NEXT:    retq
338  %y1 = load <2 x i64>, <2 x i64>* %y
339  %k = shl <2 x i64> %x, %y1
340  ret <2 x i64> %k
341}
342
343define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
344; X32-LABEL: variable_shl3_load:
345; X32:       # %bb.0:
346; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
347; X32-NEXT:    vpsllvq (%eax), %ymm0, %ymm0
348; X32-NEXT:    retl
349;
350; X64-LABEL: variable_shl3_load:
351; X64:       # %bb.0:
352; X64-NEXT:    vpsllvq (%rdi), %ymm0, %ymm0
353; X64-NEXT:    retq
354  %y1 = load <4 x i64>, <4 x i64>* %y
355  %k = shl <4 x i64> %x, %y1
356  ret <4 x i64> %k
357}
358
359define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
360; X32-LABEL: variable_srl0_load:
361; X32:       # %bb.0:
362; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
363; X32-NEXT:    vpsrlvd (%eax), %xmm0, %xmm0
364; X32-NEXT:    retl
365;
366; X64-LABEL: variable_srl0_load:
367; X64:       # %bb.0:
368; X64-NEXT:    vpsrlvd (%rdi), %xmm0, %xmm0
369; X64-NEXT:    retq
370  %y1 = load <4 x i32>, <4 x i32>* %y
371  %k = lshr <4 x i32> %x, %y1
372  ret <4 x i32> %k
373}
374
375define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
376; X32-LABEL: variable_srl1_load:
377; X32:       # %bb.0:
378; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
379; X32-NEXT:    vpsrlvd (%eax), %ymm0, %ymm0
380; X32-NEXT:    retl
381;
382; X64-LABEL: variable_srl1_load:
383; X64:       # %bb.0:
384; X64-NEXT:    vpsrlvd (%rdi), %ymm0, %ymm0
385; X64-NEXT:    retq
386  %y1 = load <8 x i32>, <8 x i32>* %y
387  %k = lshr <8 x i32> %x, %y1
388  ret <8 x i32> %k
389}
390
391define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
392; X32-LABEL: variable_srl2_load:
393; X32:       # %bb.0:
394; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
395; X32-NEXT:    vpsrlvq (%eax), %xmm0, %xmm0
396; X32-NEXT:    retl
397;
398; X64-LABEL: variable_srl2_load:
399; X64:       # %bb.0:
400; X64-NEXT:    vpsrlvq (%rdi), %xmm0, %xmm0
401; X64-NEXT:    retq
402  %y1 = load <2 x i64>, <2 x i64>* %y
403  %k = lshr <2 x i64> %x, %y1
404  ret <2 x i64> %k
405}
406
407define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
408; X32-LABEL: variable_srl3_load:
409; X32:       # %bb.0:
410; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
411; X32-NEXT:    vpsrlvq (%eax), %ymm0, %ymm0
412; X32-NEXT:    retl
413;
414; X64-LABEL: variable_srl3_load:
415; X64:       # %bb.0:
416; X64-NEXT:    vpsrlvq (%rdi), %ymm0, %ymm0
417; X64-NEXT:    retq
418  %y1 = load <4 x i64>, <4 x i64>* %y
419  %k = lshr <4 x i64> %x, %y1
420  ret <4 x i64> %k
421}
422
423define <32 x i8> @shl9(<32 x i8> %A) nounwind {
424; X32-LABEL: shl9:
425; X32:       # %bb.0:
426; X32-NEXT:    vpsllw $3, %ymm0, %ymm0
427; X32-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
428; X32-NEXT:    retl
429;
430; X64-LABEL: shl9:
431; X64:       # %bb.0:
432; X64-NEXT:    vpsllw $3, %ymm0, %ymm0
433; X64-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
434; X64-NEXT:    retq
435  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
436  ret <32 x i8> %B
437}
438
439define <32 x i8> @shr9(<32 x i8> %A) nounwind {
440; X32-LABEL: shr9:
441; X32:       # %bb.0:
442; X32-NEXT:    vpsrlw $3, %ymm0, %ymm0
443; X32-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
444; X32-NEXT:    retl
445;
446; X64-LABEL: shr9:
447; X64:       # %bb.0:
448; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0
449; X64-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
450; X64-NEXT:    retq
451  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
452  ret <32 x i8> %B
453}
454
455define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
456; X32-LABEL: sra_v32i8_7:
457; X32:       # %bb.0:
458; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
459; X32-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
460; X32-NEXT:    retl
461;
462; X64-LABEL: sra_v32i8_7:
463; X64:       # %bb.0:
464; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
465; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
466; X64-NEXT:    retq
467  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
468  ret <32 x i8> %B
469}
470
471define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
472; X32-LABEL: sra_v32i8:
473; X32:       # %bb.0:
474; X32-NEXT:    vpsrlw $3, %ymm0, %ymm0
475; X32-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
476; X32-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
477; X32-NEXT:    vpxor %ymm1, %ymm0, %ymm0
478; X32-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
479; X32-NEXT:    retl
480;
481; X64-LABEL: sra_v32i8:
482; X64:       # %bb.0:
483; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0
484; X64-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
485; X64-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
486; X64-NEXT:    vpxor %ymm1, %ymm0, %ymm0
487; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
488; X64-NEXT:    retq
489  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
490  ret <32 x i8> %B
491}
492
493define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
494; X32-LABEL: sext_v16i16:
495; X32:       # %bb.0:
496; X32-NEXT:    vpsllw $8, %ymm0, %ymm0
497; X32-NEXT:    vpsraw $8, %ymm0, %ymm0
498; X32-NEXT:    retl
499;
500; X64-LABEL: sext_v16i16:
501; X64:       # %bb.0:
502; X64-NEXT:    vpsllw $8, %ymm0, %ymm0
503; X64-NEXT:    vpsraw $8, %ymm0, %ymm0
504; X64-NEXT:    retq
505  %b = trunc <16 x i16> %a to <16 x i8>
506  %c = sext <16 x i8> %b to <16 x i16>
507  ret <16 x i16> %c
508}
509
510define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
511; X32-LABEL: sext_v8i32:
512; X32:       # %bb.0:
513; X32-NEXT:    vpslld $16, %ymm0, %ymm0
514; X32-NEXT:    vpsrad $16, %ymm0, %ymm0
515; X32-NEXT:    retl
516;
517; X64-LABEL: sext_v8i32:
518; X64:       # %bb.0:
519; X64-NEXT:    vpslld $16, %ymm0, %ymm0
520; X64-NEXT:    vpsrad $16, %ymm0, %ymm0
521; X64-NEXT:    retq
522  %b = trunc <8 x i32> %a to <8 x i16>
523  %c = sext <8 x i16> %b to <8 x i32>
524  ret <8 x i32> %c
525}
526
527define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8  x i16> %rhs) {
528; X32-LABEL: variable_shl16:
529; X32:       # %bb.0:
530; X32-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
531; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
532; X32-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
533; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
534; X32-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
535; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
536; X32-NEXT:    vzeroupper
537; X32-NEXT:    retl
538;
539; X64-LABEL: variable_shl16:
540; X64:       # %bb.0:
541; X64-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
542; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
543; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
544; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
545; X64-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
546; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
547; X64-NEXT:    vzeroupper
548; X64-NEXT:    retq
549  %res = shl <8 x i16> %lhs, %rhs
550  ret <8 x i16> %res
551}
552
553define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8  x i16> %rhs) {
554; X32-LABEL: variable_ashr16:
555; X32:       # %bb.0:
556; X32-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
557; X32-NEXT:    vpmovsxwd %xmm0, %ymm0
558; X32-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
559; X32-NEXT:    vextracti128 $1, %ymm0, %xmm1
560; X32-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
561; X32-NEXT:    vzeroupper
562; X32-NEXT:    retl
563;
564; X64-LABEL: variable_ashr16:
565; X64:       # %bb.0:
566; X64-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
567; X64-NEXT:    vpmovsxwd %xmm0, %ymm0
568; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
569; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
570; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
571; X64-NEXT:    vzeroupper
572; X64-NEXT:    retq
573  %res = ashr <8 x i16> %lhs, %rhs
574  ret <8 x i16> %res
575}
576
577define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8  x i16> %rhs) {
578; X32-LABEL: variable_lshr16:
579; X32:       # %bb.0:
580; X32-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
581; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
582; X32-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
583; X32-NEXT:    vextracti128 $1, %ymm0, %xmm1
584; X32-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
585; X32-NEXT:    vzeroupper
586; X32-NEXT:    retl
587;
588; X64-LABEL: variable_lshr16:
589; X64:       # %bb.0:
590; X64-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
591; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
592; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
593; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
594; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
595; X64-NEXT:    vzeroupper
596; X64-NEXT:    retq
597  %res = lshr <8 x i16> %lhs, %rhs
598  ret <8 x i16> %res
599}
600