1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
2
3; CHECK: variable_shl0
4; CHECK: psllvd
5; CHECK: ret
6define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
7  %k = shl <4 x i32> %x, %y
8  ret <4 x i32> %k
9}
10; CHECK: variable_shl1
11; CHECK: psllvd
12; CHECK: ret
13define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
14  %k = shl <8 x i32> %x, %y
15  ret <8 x i32> %k
16}
17; CHECK: variable_shl2
18; CHECK: psllvq
19; CHECK: ret
20define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
21  %k = shl <2 x i64> %x, %y
22  ret <2 x i64> %k
23}
24; CHECK: variable_shl3
25; CHECK: psllvq
26; CHECK: ret
27define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
28  %k = shl <4 x i64> %x, %y
29  ret <4 x i64> %k
30}
31; CHECK: variable_srl0
32; CHECK: psrlvd
33; CHECK: ret
34define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
35  %k = lshr <4 x i32> %x, %y
36  ret <4 x i32> %k
37}
38; CHECK: variable_srl1
39; CHECK: psrlvd
40; CHECK: ret
41define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
42  %k = lshr <8 x i32> %x, %y
43  ret <8 x i32> %k
44}
45; CHECK: variable_srl2
46; CHECK: psrlvq
47; CHECK: ret
48define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
49  %k = lshr <2 x i64> %x, %y
50  ret <2 x i64> %k
51}
52; CHECK: variable_srl3
53; CHECK: psrlvq
54; CHECK: ret
55define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
56  %k = lshr <4 x i64> %x, %y
57  ret <4 x i64> %k
58}
59
60; CHECK: variable_sra0
61; CHECK: vpsravd
62; CHECK: ret
63define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
64  %k = ashr <4 x i32> %x, %y
65  ret <4 x i32> %k
66}
67; CHECK: variable_sra1
68; CHECK: vpsravd
69; CHECK: ret
70define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
71  %k = ashr <8 x i32> %x, %y
72  ret <8 x i32> %k
73}
74
75;;; Shift left
76; CHECK: vpslld
77define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
78  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
792>
80  ret <8 x i32> %s
81}
82
83; CHECK: vpsllw
84define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
85  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
86  ret <16 x i16> %s
87}
88
89; CHECK: vpsllq
90define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
91  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
92  ret <4 x i64> %s
93}
94
95;;; Logical Shift right
96; CHECK: vpsrld
97define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
98  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
992>
100  ret <8 x i32> %s
101}
102
103; CHECK: vpsrlw
104define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
105  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
106  ret <16 x i16> %s
107}
108
109; CHECK: vpsrlq
110define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
111  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
112  ret <4 x i64> %s
113}
114
115;;; Arithmetic Shift right
116; CHECK: vpsrad
117define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
118  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
1192>
120  ret <8 x i32> %s
121}
122
123; CHECK: vpsraw
124define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
125  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
126  ret <16 x i16> %s
127}
128
129; CHECK: variable_sra0_load
130; CHECK: vpsravd (%
131; CHECK: ret
132define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
133  %y1 = load <4 x i32>, <4 x i32>* %y
134  %k = ashr <4 x i32> %x, %y1
135  ret <4 x i32> %k
136}
137
138; CHECK: variable_sra1_load
139; CHECK: vpsravd (%
140; CHECK: ret
141define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
142  %y1 = load <8 x i32>, <8 x i32>* %y
143  %k = ashr <8 x i32> %x, %y1
144  ret <8 x i32> %k
145}
146
147; CHECK: variable_shl0_load
148; CHECK: vpsllvd (%
149; CHECK: ret
150define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
151  %y1 = load <4 x i32>, <4 x i32>* %y
152  %k = shl <4 x i32> %x, %y1
153  ret <4 x i32> %k
154}
155; CHECK: variable_shl1_load
156; CHECK: vpsllvd (%
157; CHECK: ret
158define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
159  %y1 = load <8 x i32>, <8 x i32>* %y
160  %k = shl <8 x i32> %x, %y1
161  ret <8 x i32> %k
162}
163; CHECK: variable_shl2_load
164; CHECK: vpsllvq (%
165; CHECK: ret
166define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
167  %y1 = load <2 x i64>, <2 x i64>* %y
168  %k = shl <2 x i64> %x, %y1
169  ret <2 x i64> %k
170}
171; CHECK: variable_shl3_load
172; CHECK: vpsllvq (%
173; CHECK: ret
174define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
175  %y1 = load <4 x i64>, <4 x i64>* %y
176  %k = shl <4 x i64> %x, %y1
177  ret <4 x i64> %k
178}
179; CHECK: variable_srl0_load
180; CHECK: vpsrlvd (%
181; CHECK: ret
182define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
183  %y1 = load <4 x i32>, <4 x i32>* %y
184  %k = lshr <4 x i32> %x, %y1
185  ret <4 x i32> %k
186}
187; CHECK: variable_srl1_load
188; CHECK: vpsrlvd (%
189; CHECK: ret
190define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
191  %y1 = load <8 x i32>, <8 x i32>* %y
192  %k = lshr <8 x i32> %x, %y1
193  ret <8 x i32> %k
194}
195; CHECK: variable_srl2_load
196; CHECK: vpsrlvq (%
197; CHECK: ret
198define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
199  %y1 = load <2 x i64>, <2 x i64>* %y
200  %k = lshr <2 x i64> %x, %y1
201  ret <2 x i64> %k
202}
203; CHECK: variable_srl3_load
204; CHECK: vpsrlvq (%
205; CHECK: ret
206define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
207  %y1 = load <4 x i64>, <4 x i64>* %y
208  %k = lshr <4 x i64> %x, %y1
209  ret <4 x i64> %k
210}
211
212define <32 x i8> @shl9(<32 x i8> %A) nounwind {
213  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
214  ret <32 x i8> %B
215; CHECK-LABEL: shl9:
216; CHECK: vpsllw $3
217; CHECK: vpand
218; CHECK: ret
219}
220
221define <32 x i8> @shr9(<32 x i8> %A) nounwind {
222  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
223  ret <32 x i8> %B
224; CHECK-LABEL: shr9:
225; CHECK: vpsrlw $3
226; CHECK: vpand
227; CHECK: ret
228}
229
230define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
231  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
232  ret <32 x i8> %B
233; CHECK-LABEL: sra_v32i8_7:
234; CHECK: vpxor
235; CHECK: vpcmpgtb
236; CHECK: ret
237}
238
239define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
240  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
241  ret <32 x i8> %B
242; CHECK-LABEL: sra_v32i8:
243; CHECK: vpsrlw $3
244; CHECK: vpand
245; CHECK: vpxor
246; CHECK: vpsubb
247; CHECK: ret
248}
249
250; CHECK: _sext_v16i16
251; CHECK: vpsllw
252; CHECK: vpsraw
253; CHECK-NOT: vinsertf128
254define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
255  %b = trunc <16 x i16> %a to <16 x i8>
256  %c = sext <16 x i8> %b to <16 x i16>
257  ret <16 x i16> %c
258}
259
260; CHECK: _sext_v8i32
261; CHECK: vpslld
262; CHECK: vpsrad
263; CHECK-NOT: vinsertf128
264define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
265  %b = trunc <8 x i32> %a to <8 x i16>
266  %c = sext <8 x i16> %b to <8 x i32>
267  ret <8 x i32> %c
268}
269
270define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8  x i16> %rhs) {
271; CHECK-LABEL: variable_shl16:
272; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
273; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
274; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
275; CHECK: vpshufb
276; CHECK: vpermq
277  %res = shl <8 x i16> %lhs, %rhs
278  ret <8 x i16> %res
279}
280
281define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8  x i16> %rhs) {
282; CHECK-LABEL: variable_ashr16:
283; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
284; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]]
285; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
286; CHECK: vpshufb
287; CHECK: vpermq
288  %res = ashr <8 x i16> %lhs, %rhs
289  ret <8 x i16> %res
290}
291
292define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8  x i16> %rhs) {
293; CHECK-LABEL: variable_lshr16:
294; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
295; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
296; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
297; CHECK: vpshufb
298; CHECK: vpermq
299  %res = lshr <8 x i16> %lhs, %rhs
300  ret <8 x i16> %res
301}