1; RUN: opt < %s -instcombine -S | FileCheck %s
2target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3
4;
5; ASHR - Immediate
6;
7
8define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
9; CHECK-LABEL: @sse2_psrai_w_0
10; CHECK-NEXT: ret <8 x i16> %v
11  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
12  ret <8 x i16> %1
13}
14
15define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
16; CHECK-LABEL: @sse2_psrai_w_15
17; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
18; CHECK-NEXT: ret <8 x i16> %1
19  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
20  ret <8 x i16> %1
21}
22
23define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
24; CHECK-LABEL: @sse2_psrai_w_64
25; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
26; CHECK-NEXT: ret <8 x i16> %1
27  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
28  ret <8 x i16> %1
29}
30
31define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
32; CHECK-LABEL: @sse2_psrai_d_0
33; CHECK-NEXT: ret <4 x i32> %v
34  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
35  ret <4 x i32> %1
36}
37
38define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
39; CHECK-LABEL: @sse2_psrai_d_15
40; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
41; CHECK-NEXT: ret <4 x i32> %1
42  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
43  ret <4 x i32> %1
44}
45
46define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
47; CHECK-LABEL: @sse2_psrai_d_64
48; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
49; CHECK-NEXT: ret <4 x i32> %1
50  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
51  ret <4 x i32> %1
52}
53
54define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
55; CHECK-LABEL: @avx2_psrai_w_0
56; CHECK-NEXT: ret <16 x i16> %v
57  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
58  ret <16 x i16> %1
59}
60
61define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
62; CHECK-LABEL: @avx2_psrai_w_15
63; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
64; CHECK-NEXT: ret <16 x i16> %1
65  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
66  ret <16 x i16> %1
67}
68
69define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
70; CHECK-LABEL: @avx2_psrai_w_64
71; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
72; CHECK-NEXT: ret <16 x i16> %1
73  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
74  ret <16 x i16> %1
75}
76
77define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
78; CHECK-LABEL: @avx2_psrai_d_0
79; CHECK-NEXT: ret <8 x i32> %v
80  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
81  ret <8 x i32> %1
82}
83
84define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
85; CHECK-LABEL: @avx2_psrai_d_15
86; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
87; CHECK-NEXT: ret <8 x i32> %1
88  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
89  ret <8 x i32> %1
90}
91
92define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
93; CHECK-LABEL: @avx2_psrai_d_64
94; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
95; CHECK-NEXT: ret <8 x i32> %1
96  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
97  ret <8 x i32> %1
98}
99
100;
101; LSHR - Immediate
102;
103
104define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
105; CHECK-LABEL: @sse2_psrli_w_0
106; CHECK-NEXT: ret <8 x i16> %v
107  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
108  ret <8 x i16> %1
109}
110
111define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
112; CHECK-LABEL: @sse2_psrli_w_15
113; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
114; CHECK-NEXT: ret <8 x i16> %1
115  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
116  ret <8 x i16> %1
117}
118
119define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
120; CHECK-LABEL: @sse2_psrli_w_64
121; CHECK-NEXT: ret <8 x i16> zeroinitializer
122  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
123  ret <8 x i16> %1
124}
125
126define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
127; CHECK-LABEL: @sse2_psrli_d_0
128; CHECK-NEXT: ret <4 x i32> %v
129  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
130  ret <4 x i32> %1
131}
132
133define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
134; CHECK-LABEL: @sse2_psrli_d_15
135; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
136; CHECK-NEXT: ret <4 x i32> %1
137  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
138  ret <4 x i32> %1
139}
140
141define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
142; CHECK-LABEL: @sse2_psrli_d_64
143; CHECK-NEXT: ret <4 x i32> zeroinitializer
144  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
145  ret <4 x i32> %1
146}
147
148define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
149; CHECK-LABEL: @sse2_psrli_q_0
150; CHECK-NEXT: ret <2 x i64> %v
151  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
152  ret <2 x i64> %1
153}
154
155define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
156; CHECK-LABEL: @sse2_psrli_q_15
157; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
158; CHECK-NEXT: ret <2 x i64> %1
159  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
160  ret <2 x i64> %1
161}
162
163define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
164; CHECK-LABEL: @sse2_psrli_q_64
165; CHECK-NEXT: ret <2 x i64> zeroinitializer
166  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
167  ret <2 x i64> %1
168}
169
170define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
171; CHECK-LABEL: @avx2_psrli_w_0
172; CHECK-NEXT: ret <16 x i16> %v
173  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
174  ret <16 x i16> %1
175}
176
177define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
178; CHECK-LABEL: @avx2_psrli_w_15
179; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
180; CHECK-NEXT: ret <16 x i16> %1
181  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
182  ret <16 x i16> %1
183}
184
185define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
186; CHECK-LABEL: @avx2_psrli_w_64
187; CHECK-NEXT: ret <16 x i16> zeroinitializer
188  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
189  ret <16 x i16> %1
190}
191
192define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
193; CHECK-LABEL: @avx2_psrli_d_0
194; CHECK-NEXT: ret <8 x i32> %v
195  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
196  ret <8 x i32> %1
197}
198
199define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
200; CHECK-LABEL: @avx2_psrli_d_15
201; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
202; CHECK-NEXT: ret <8 x i32> %1
203  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
204  ret <8 x i32> %1
205}
206
207define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
208; CHECK-LABEL: @avx2_psrli_d_64
209; CHECK-NEXT: ret <8 x i32> zeroinitializer
210  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
211  ret <8 x i32> %1
212}
213
214define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
215; CHECK-LABEL: @avx2_psrli_q_0
216; CHECK-NEXT: ret <4 x i64> %v
217  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
218  ret <4 x i64> %1
219}
220
221define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
222; CHECK-LABEL: @avx2_psrli_q_15
223; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
224; CHECK-NEXT: ret <4 x i64> %1
225  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
226  ret <4 x i64> %1
227}
228
229define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
230; CHECK-LABEL: @avx2_psrli_q_64
231; CHECK-NEXT: ret <4 x i64> zeroinitializer
232  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
233  ret <4 x i64> %1
234}
235
236;
237; SHL - Immediate
238;
239
240define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
241; CHECK-LABEL: @sse2_pslli_w_0
242; CHECK-NEXT: ret <8 x i16> %v
243  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
244  ret <8 x i16> %1
245}
246
247define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
248; CHECK-LABEL: @sse2_pslli_w_15
249; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
250; CHECK-NEXT: ret <8 x i16> %1
251  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
252  ret <8 x i16> %1
253}
254
255define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
256; CHECK-LABEL: @sse2_pslli_w_64
257; CHECK-NEXT: ret <8 x i16> zeroinitializer
258  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
259  ret <8 x i16> %1
260}
261
262define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
263; CHECK-LABEL: @sse2_pslli_d_0
264; CHECK-NEXT: ret <4 x i32> %v
265  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
266  ret <4 x i32> %1
267}
268
269define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
270; CHECK-LABEL: @sse2_pslli_d_15
271; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
272; CHECK-NEXT: ret <4 x i32> %1
273  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
274  ret <4 x i32> %1
275}
276
277define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
278; CHECK-LABEL: @sse2_pslli_d_64
279; CHECK-NEXT: ret <4 x i32> zeroinitializer
280  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
281  ret <4 x i32> %1
282}
283
284define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
285; CHECK-LABEL: @sse2_pslli_q_0
286; CHECK-NEXT: ret <2 x i64> %v
287  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
288  ret <2 x i64> %1
289}
290
291define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
292; CHECK-LABEL: @sse2_pslli_q_15
293; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
294; CHECK-NEXT: ret <2 x i64> %1
295  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
296  ret <2 x i64> %1
297}
298
299define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
300; CHECK-LABEL: @sse2_pslli_q_64
301; CHECK-NEXT: ret <2 x i64> zeroinitializer
302  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
303  ret <2 x i64> %1
304}
305
306define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
307; CHECK-LABEL: @avx2_pslli_w_0
308; CHECK-NEXT: ret <16 x i16> %v
309  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
310  ret <16 x i16> %1
311}
312
313define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
314; CHECK-LABEL: @avx2_pslli_w_15
315; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
316; CHECK-NEXT: ret <16 x i16> %1
317  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
318  ret <16 x i16> %1
319}
320
321define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
322; CHECK-LABEL: @avx2_pslli_w_64
323; CHECK-NEXT: ret <16 x i16> zeroinitializer
324  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
325  ret <16 x i16> %1
326}
327
328define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
329; CHECK-LABEL: @avx2_pslli_d_0
330; CHECK-NEXT: ret <8 x i32> %v
331  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
332  ret <8 x i32> %1
333}
334
335define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
336; CHECK-LABEL: @avx2_pslli_d_15
337; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
338; CHECK-NEXT: ret <8 x i32> %1
339  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
340  ret <8 x i32> %1
341}
342
343define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
344; CHECK-LABEL: @avx2_pslli_d_64
345; CHECK-NEXT: ret <8 x i32> zeroinitializer
346  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
347  ret <8 x i32> %1
348}
349
350define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
351; CHECK-LABEL: @avx2_pslli_q_0
352; CHECK-NEXT: ret <4 x i64> %v
353  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
354  ret <4 x i64> %1
355}
356
357define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
358; CHECK-LABEL: @avx2_pslli_q_15
359; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
360; CHECK-NEXT: ret <4 x i64> %1
361  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
362  ret <4 x i64> %1
363}
364
365define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
366; CHECK-LABEL: @avx2_pslli_q_64
367; CHECK-NEXT: ret <4 x i64> zeroinitializer
368  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
369  ret <4 x i64> %1
370}
371
372;
373; ASHR - Constant Vector
374;
375
376define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
377; CHECK-LABEL: @sse2_psra_w_0
378; CHECK-NEXT: ret <8 x i16> %v
379  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
380  ret <8 x i16> %1
381}
382
383define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
384; CHECK-LABEL: @sse2_psra_w_15
385; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
386; CHECK-NEXT: ret <8 x i16> %1
387  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
388  ret <8 x i16> %1
389}
390
391define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
392; CHECK-LABEL: @sse2_psra_w_15_splat
393; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
394; CHECK-NEXT: ret <8 x i16> %1
395  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
396  ret <8 x i16> %1
397}
398
399define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
400; CHECK-LABEL: @sse2_psra_w_64
401; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
402; CHECK-NEXT: ret <8 x i16> %1
403  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
404  ret <8 x i16> %1
405}
406
407define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
408; CHECK-LABEL: @sse2_psra_d_0
409; CHECK-NEXT: ret <4 x i32> %v
410  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
411  ret <4 x i32> %1
412}
413
414define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
415; CHECK-LABEL: @sse2_psra_d_15
416; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
417; CHECK-NEXT: ret <4 x i32> %1
418  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
419  ret <4 x i32> %1
420}
421
422define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
423; CHECK-LABEL: @sse2_psra_d_15_splat
424; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
425; CHECK-NEXT: ret <4 x i32> %1
426  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
427  ret <4 x i32> %1
428}
429
430define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
431; CHECK-LABEL: @sse2_psra_d_64
432; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
433; CHECK-NEXT: ret <4 x i32> %1
434  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
435  ret <4 x i32> %1
436}
437
438define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
439; CHECK-LABEL: @avx2_psra_w_0
440; CHECK-NEXT: ret <16 x i16> %v
441  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
442  ret <16 x i16> %1
443}
444
445define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
446; CHECK-LABEL: @avx2_psra_w_15
447; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
448; CHECK-NEXT: ret <16 x i16> %1
449  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
450  ret <16 x i16> %1
451}
452
453define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
454; CHECK-LABEL: @avx2_psra_w_15_splat
455; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
456; CHECK-NEXT: ret <16 x i16> %1
457  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
458  ret <16 x i16> %1
459}
460
461define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
462; CHECK-LABEL: @avx2_psra_w_64
463; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
464; CHECK-NEXT: ret <16 x i16> %1
465  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
466  ret <16 x i16> %1
467}
468
469define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
470; CHECK-LABEL: @avx2_psra_d_0
471; CHECK-NEXT: ret <8 x i32> %v
472  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
473  ret <8 x i32> %1
474}
475
476define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
477; CHECK-LABEL: @avx2_psra_d_15
478; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
479; CHECK-NEXT: ret <8 x i32> %1
480  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
481  ret <8 x i32> %1
482}
483
484define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
485; CHECK-LABEL: @avx2_psra_d_15_splat
486; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
487; CHECK-NEXT: ret <8 x i32> %1
488  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
489  ret <8 x i32> %1
490}
491
492define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
493; CHECK-LABEL: @avx2_psra_d_64
494; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
495; CHECK-NEXT: ret <8 x i32> %1
496  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
497  ret <8 x i32> %1
498}
499
500;
501; LSHR - Constant Vector
502;
503
504define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
505; CHECK-LABEL: @sse2_psrl_w_0
506; CHECK-NEXT: ret <8 x i16> %v
507  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
508  ret <8 x i16> %1
509}
510
511define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
512; CHECK-LABEL: @sse2_psrl_w_15
513; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
514; CHECK-NEXT: ret <8 x i16> %1
515  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
516  ret <8 x i16> %1
517}
518
519define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
520; CHECK-LABEL: @sse2_psrl_w_15_splat
521; CHECK-NEXT: ret <8 x i16> zeroinitializer
522  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
523  ret <8 x i16> %1
524}
525
526define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
527; CHECK-LABEL: @sse2_psrl_w_64
528; CHECK-NEXT: ret <8 x i16> zeroinitializer
529  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
530  ret <8 x i16> %1
531}
532
533define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
534; CHECK-LABEL: @sse2_psrl_d_0
535; CHECK-NEXT: ret <4 x i32> %v
536  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
537  ret <4 x i32> %1
538}
539
540define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
541; CHECK-LABEL: @sse2_psrl_d_15
542; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
543; CHECK-NEXT: ret <4 x i32> %1
544  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
545  ret <4 x i32> %1
546}
547
548define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
549; CHECK-LABEL: @sse2_psrl_d_15_splat
550; CHECK-NEXT: ret <4 x i32> zeroinitializer
551  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
552  ret <4 x i32> %1
553}
554
555define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
556; CHECK-LABEL: @sse2_psrl_d_64
557; CHECK-NEXT: ret <4 x i32> zeroinitializer
558  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
559  ret <4 x i32> %1
560}
561
562define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
563; CHECK-LABEL: @sse2_psrl_q_0
564; CHECK-NEXT: ret <2 x i64> %v
565  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
566  ret <2 x i64> %1
567}
568
569define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
570; CHECK-LABEL: @sse2_psrl_q_15
571; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
572; CHECK-NEXT: ret <2 x i64> %1
573  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
574  ret <2 x i64> %1
575}
576
577define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
578; CHECK-LABEL: @sse2_psrl_q_64
579; CHECK-NEXT: ret <2 x i64> zeroinitializer
580  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
581  ret <2 x i64> %1
582}
583
584define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
585; CHECK-LABEL: @avx2_psrl_w_0
586; CHECK-NEXT: ret <16 x i16> %v
587  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
588  ret <16 x i16> %1
589}
590
591define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
592; CHECK-LABEL: @avx2_psrl_w_15
593; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
594; CHECK-NEXT: ret <16 x i16> %1
595  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
596  ret <16 x i16> %1
597}
598
599define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
600; CHECK-LABEL: @avx2_psrl_w_15_splat
601; CHECK-NEXT: ret <16 x i16> zeroinitializer
602  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
603  ret <16 x i16> %1
604}
605
606define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
607; CHECK-LABEL: @avx2_psrl_w_64
608; CHECK-NEXT: ret <16 x i16> zeroinitializer
609  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
610  ret <16 x i16> %1
611}
612
613define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
614; CHECK-LABEL: @avx2_psrl_d_0
615; CHECK-NEXT: ret <8 x i32> %v
616  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
617  ret <8 x i32> %1
618}
619
620define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
621; CHECK-LABEL: @avx2_psrl_d_15
622; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
623; CHECK-NEXT: ret <8 x i32> %1
624  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
625  ret <8 x i32> %1
626}
627
628define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
629; CHECK-LABEL: @avx2_psrl_d_15_splat
630; CHECK-NEXT: ret <8 x i32> zeroinitializer
631  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
632  ret <8 x i32> %1
633}
634
635define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
636; CHECK-LABEL: @avx2_psrl_d_64
637; CHECK-NEXT: ret <8 x i32> zeroinitializer
638  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
639  ret <8 x i32> %1
640}
641
642define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
643; CHECK-LABEL: @avx2_psrl_q_0
644; CHECK-NEXT: ret <4 x i64> %v
645  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
646  ret <4 x i64> %1
647}
648
649define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
650; CHECK-LABEL: @avx2_psrl_q_15
651; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
652; CHECK-NEXT: ret <4 x i64> %1
653  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
654  ret <4 x i64> %1
655}
656
657define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
658; CHECK-LABEL: @avx2_psrl_q_64
659; CHECK-NEXT: ret <4 x i64> zeroinitializer
660  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
661  ret <4 x i64> %1
662}
663
664;
665; SHL - Constant Vector
666;
667
668define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
669; CHECK-LABEL: @sse2_psll_w_0
670; CHECK-NEXT: ret <8 x i16> %v
671  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
672  ret <8 x i16> %1
673}
674
675define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
676; CHECK-LABEL: @sse2_psll_w_15
677; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
678; CHECK-NEXT: ret <8 x i16> %1
679  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
680  ret <8 x i16> %1
681}
682
683define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
684; CHECK-LABEL: @sse2_psll_w_15_splat
685; CHECK-NEXT: ret <8 x i16> zeroinitializer
686  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
687  ret <8 x i16> %1
688}
689
690define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
691; CHECK-LABEL: @sse2_psll_w_64
692; CHECK-NEXT: ret <8 x i16> zeroinitializer
693  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
694  ret <8 x i16> %1
695}
696
697define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
698; CHECK-LABEL: @sse2_psll_d_0
699; CHECK-NEXT: ret <4 x i32> %v
700  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
701  ret <4 x i32> %1
702}
703
704define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
705; CHECK-LABEL: @sse2_psll_d_15
706; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
707; CHECK-NEXT: ret <4 x i32> %1
708  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
709  ret <4 x i32> %1
710}
711
712define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
713; CHECK-LABEL: @sse2_psll_d_15_splat
714; CHECK-NEXT: ret <4 x i32> zeroinitializer
715  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
716  ret <4 x i32> %1
717}
718
719define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
720; CHECK-LABEL: @sse2_psll_d_64
721; CHECK-NEXT: ret <4 x i32> zeroinitializer
722  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
723  ret <4 x i32> %1
724}
725
726define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
727; CHECK-LABEL: @sse2_psll_q_0
728; CHECK-NEXT: ret <2 x i64> %v
729  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
730  ret <2 x i64> %1
731}
732
733define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
734; CHECK-LABEL: @sse2_psll_q_15
735; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
736; CHECK-NEXT: ret <2 x i64> %1
737  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
738  ret <2 x i64> %1
739}
740
741define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
742; CHECK-LABEL: @sse2_psll_q_64
743; CHECK-NEXT: ret <2 x i64> zeroinitializer
744  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
745  ret <2 x i64> %1
746}
747
748define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
749; CHECK-LABEL: @avx2_psll_w_0
750; CHECK-NEXT: ret <16 x i16> %v
751  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
752  ret <16 x i16> %1
753}
754
755define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
756; CHECK-LABEL: @avx2_psll_w_15
757; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
758; CHECK-NEXT: ret <16 x i16> %1
759  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
760  ret <16 x i16> %1
761}
762
763define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
764; CHECK-LABEL: @avx2_psll_w_15_splat
765; CHECK-NEXT: ret <16 x i16> zeroinitializer
766  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
767  ret <16 x i16> %1
768}
769
770define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
771; CHECK-LABEL: @avx2_psll_w_64
772; CHECK-NEXT: ret <16 x i16> zeroinitializer
773  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
774  ret <16 x i16> %1
775}
776
777define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
778; CHECK-LABEL: @avx2_psll_d_0
779; CHECK-NEXT: ret <8 x i32> %v
780  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
781  ret <8 x i32> %1
782}
783
784define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
785; CHECK-LABEL: @avx2_psll_d_15
786; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
787; CHECK-NEXT: ret <8 x i32> %1
788  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
789  ret <8 x i32> %1
790}
791
792define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
793; CHECK-LABEL: @avx2_psll_d_15_splat
794; CHECK-NEXT: ret <8 x i32> zeroinitializer
795  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
796  ret <8 x i32> %1
797}
798
799define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
800; CHECK-LABEL: @avx2_psll_d_64
801; CHECK-NEXT: ret <8 x i32> zeroinitializer
802  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
803  ret <8 x i32> %1
804}
805
806define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
807; CHECK-LABEL: @avx2_psll_q_0
808; CHECK-NEXT: ret <4 x i64> %v
809  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
810  ret <4 x i64> %1
811}
812
813define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
814; CHECK-LABEL: @avx2_psll_q_15
815; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
816; CHECK-NEXT: ret <4 x i64> %1
817  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
818  ret <4 x i64> %1
819}
820
821define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
822; CHECK-LABEL: @avx2_psll_q_64
823; CHECK-NEXT: ret <4 x i64> zeroinitializer
824  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
825  ret <4 x i64> %1
826}
827
828;
829; Vector Demanded Bits
830;
831
832define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
833; CHECK-LABEL: @sse2_psra_w_var
834; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
835; CHECK-NEXT: ret <8 x i16> %1
836  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
837  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
838  ret <8 x i16> %2
839}
840
841define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
842; CHECK-LABEL: @sse2_psra_w_var_bc
843; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16>
844; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
845; CHECK-NEXT: ret <8 x i16> %2
846  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
847  %2 = bitcast <2 x i64> %1 to <8 x i16>
848  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
849  ret <8 x i16> %3
850}
851
852define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
853; CHECK-LABEL: @sse2_psra_d_var
854; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
855; CHECK-NEXT: ret <4 x i32> %1
856  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
857  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
858  ret <4 x i32> %2
859}
860
861define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
862; CHECK-LABEL: @sse2_psra_d_var_bc
863; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32>
864; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
865; CHECK-NEXT: ret <4 x i32> %2
866  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
867  %2 = bitcast <8 x i16> %1 to <4 x i32>
868  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
869  ret <4 x i32> %3
870}
871
872define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
873; CHECK-LABEL: @avx2_psra_w_var
874; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
875; CHECK-NEXT: ret <16 x i16> %1
876  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
877  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
878  ret <16 x i16> %2
879}
880
881define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
882; CHECK-LABEL: @avx2_psra_d_var
883; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
884; CHECK-NEXT: ret <8 x i32> %1
885  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
886  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
887  ret <8 x i32> %2
888}
889
890define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
891; CHECK-LABEL: @sse2_psrl_w_var
892; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
893; CHECK-NEXT: ret <8 x i16> %1
894  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
895  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
896  ret <8 x i16> %2
897}
898
899define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
900; CHECK-LABEL: @sse2_psrl_d_var
901; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
902; CHECK-NEXT: ret <4 x i32> %1
903  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
904  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
905  ret <4 x i32> %2
906}
907
908define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
909; CHECK-LABEL: @sse2_psrl_q_var
910; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
911; CHECK-NEXT: ret <2 x i64> %1
912  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
913  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
914  ret <2 x i64> %2
915}
916
917define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
918; CHECK-LABEL: @avx2_psrl_w_var
919; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
920; CHECK-NEXT: ret <16 x i16> %1
921  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
922  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
923  ret <16 x i16> %2
924}
925
926define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
927; CHECK-LABEL: @avx2_psrl_w_var_bc
928; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16>
929; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
930; CHECK-NEXT: ret <16 x i16> %2
931  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
932  %2 = bitcast <16 x i8> %1 to <8 x i16>
933  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
934  ret <16 x i16> %3
935}
936
937define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
938; CHECK-LABEL: @avx2_psrl_d_var
939; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
940; CHECK-NEXT: ret <8 x i32> %1
941  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
942  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
943  ret <8 x i32> %2
944}
945
946define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
947; CHECK-LABEL: @avx2_psrl_d_var_bc
948; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32>
949; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
950; CHECK-NEXT: ret <8 x i32> %2
951  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
952  %2 = bitcast <2 x i64> %1 to <4 x i32>
953  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
954  ret <8 x i32> %3
955}
956
957define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
958; CHECK-LABEL: @avx2_psrl_q_var
959; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
960; CHECK-NEXT: ret <4 x i64> %1
961  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
962  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
963  ret <4 x i64> %2
964}
965
966define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
967; CHECK-LABEL: @sse2_psll_w_var
968; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
969; CHECK-NEXT: ret <8 x i16> %1
970  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
971  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
972  ret <8 x i16> %2
973}
974
975define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
976; CHECK-LABEL: @sse2_psll_d_var
977; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
978; CHECK-NEXT: ret <4 x i32> %1
979  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
980  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
981  ret <4 x i32> %2
982}
983
984define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
985; CHECK-LABEL: @sse2_psll_q_var
986; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
987; CHECK-NEXT: ret <2 x i64> %1
988  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
989  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
990  ret <2 x i64> %2
991}
992
993define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
994; CHECK-LABEL: @avx2_psll_w_var
995; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
996; CHECK-NEXT: ret <16 x i16> %1
997  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
998  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
999  ret <16 x i16> %2
1000}
1001
1002define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
1003; CHECK-LABEL: @avx2_psll_d_var
1004; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
1005; CHECK-NEXT: ret <8 x i32> %1
1006  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1007  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
1008  ret <8 x i32> %2
1009}
1010
1011define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
1012; CHECK-LABEL: @avx2_psll_q_var
1013; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
1014; CHECK-NEXT: ret <4 x i64> %1
1015  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1016  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
1017  ret <4 x i64> %2
1018}
1019
1020;
1021; Constant Folding
1022;
1023
1024define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
1025; CHECK-LABEL: @test_sse2_psra_w_0
1026; CHECK-NEXT: ret <8 x i16> %A
1027  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
1028  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1029  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
1030  ret <8 x i16> %3
1031}
1032
1033define <8 x i16> @test_sse2_psra_w_8() {
1034; CHECK-LABEL: @test_sse2_psra_w_8
1035; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1036  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
1037  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
1038  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1039  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
1040  ret <8 x i16> %4
1041}
1042
1043define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
1044; CHECK-LABEL: @test_sse2_psra_d_0
1045; CHECK-NEXT: ret <4 x i32> %A
1046  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
1047  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1048  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
1049  ret <4 x i32> %3
1050}
1051
1052define <4 x i32> @sse2_psra_d_8() {
1053; CHECK-LABEL: @sse2_psra_d_8
1054; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1055  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
1056  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
1057  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1058  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
1059  ret <4 x i32> %4
1060}
1061
1062define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
1063; CHECK-LABEL: @test_avx2_psra_w_0
1064; CHECK-NEXT: ret <16 x i16> %A
1065  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
1066  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1067  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
1068  ret <16 x i16> %3
1069}
1070
1071define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
1072; CHECK-LABEL: @test_avx2_psra_w_8
1073; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1074  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
1075  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
1076  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1077  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
1078  ret <16 x i16> %4
1079}
1080
1081define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
1082; CHECK-LABEL: @test_avx2_psra_d_0
1083; CHECK-NEXT: ret <8 x i32> %A
1084  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
1085  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1086  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
1087  ret <8 x i32> %3
1088}
1089
1090define <8 x i32> @test_avx2_psra_d_8() {
1091; CHECK-LABEL: @test_avx2_psra_d_8
1092; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1093  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
1094  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
1095  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1096  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
1097  ret <8 x i32> %4
1098}
1099
1100define <2 x i64> @test_sse2_1() {
1101  %S = bitcast i32 1 to i32
1102  %1 = zext i32 %S to i64
1103  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1104  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1105  %4 = bitcast <2 x i64> %3 to <8 x i16>
1106  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1107  %6 = bitcast <8 x i16> %5 to <4 x i32>
1108  %7 = bitcast <2 x i64> %3 to <4 x i32>
1109  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1110  %9 = bitcast <4 x i32> %8 to <2 x i64>
1111  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1112  %11 = bitcast <2 x i64> %10 to <8 x i16>
1113  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1114  %13 = bitcast <8 x i16> %12 to <4 x i32>
1115  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1116  %15 = bitcast <4 x i32> %14 to <2 x i64>
1117  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1118  ret <2 x i64> %16
1119; CHECK: test_sse2_1
1120; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
1121}
1122
1123define <4 x i64> @test_avx2_1() {
1124  %S = bitcast i32 1 to i32
1125  %1 = zext i32 %S to i64
1126  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1127  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1128  %4 = bitcast <2 x i64> %3 to <8 x i16>
1129  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1130  %6 = bitcast <16 x i16> %5 to <8 x i32>
1131  %7 = bitcast <2 x i64> %3 to <4 x i32>
1132  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1133  %9 = bitcast <8 x i32> %8 to <4 x i64>
1134  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1135  %11 = bitcast <4 x i64> %10 to <16 x i16>
1136  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1137  %13 = bitcast <16 x i16> %12 to <8 x i32>
1138  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1139  %15 = bitcast <8 x i32> %14 to <4 x i64>
1140  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1141  ret <4 x i64> %16
1142; CHECK: test_avx2_1
1143; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
1144}
1145
1146define <2 x i64> @test_sse2_0() {
1147  %S = bitcast i32 128 to i32
1148  %1 = zext i32 %S to i64
1149  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1150  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1151  %4 = bitcast <2 x i64> %3 to <8 x i16>
1152  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1153  %6 = bitcast <8 x i16> %5 to <4 x i32>
1154  %7 = bitcast <2 x i64> %3 to <4 x i32>
1155  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1156  %9 = bitcast <4 x i32> %8 to <2 x i64>
1157  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1158  %11 = bitcast <2 x i64> %10 to <8 x i16>
1159  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1160  %13 = bitcast <8 x i16> %12 to <4 x i32>
1161  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1162  %15 = bitcast <4 x i32> %14 to <2 x i64>
1163  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1164  ret <2 x i64> %16
1165; CHECK: test_sse2_0
1166; CHECK: ret <2 x i64> zeroinitializer
1167}
1168
1169define <4 x i64> @test_avx2_0() {
1170  %S = bitcast i32 128 to i32
1171  %1 = zext i32 %S to i64
1172  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1173  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1174  %4 = bitcast <2 x i64> %3 to <8 x i16>
1175  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1176  %6 = bitcast <16 x i16> %5 to <8 x i32>
1177  %7 = bitcast <2 x i64> %3 to <4 x i32>
1178  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1179  %9 = bitcast <8 x i32> %8 to <4 x i64>
1180  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1181  %11 = bitcast <4 x i64> %10 to <16 x i16>
1182  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1183  %13 = bitcast <16 x i16> %12 to <8 x i32>
1184  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1185  %15 = bitcast <8 x i32> %14 to <4 x i64>
1186  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1187  ret <4 x i64> %16
1188; CHECK: test_avx2_0
1189; CHECK: ret <4 x i64> zeroinitializer
1190}
1191define <2 x i64> @test_sse2_psrl_1() {
1192  %S = bitcast i32 1 to i32
1193  %1 = zext i32 %S to i64
1194  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1195  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1196  %4 = bitcast <2 x i64> %3 to <8 x i16>
1197  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
1198  %6 = bitcast <8 x i16> %5 to <4 x i32>
1199  %7 = bitcast <2 x i64> %3 to <4 x i32>
1200  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1201  %9 = bitcast <4 x i32> %8 to <2 x i64>
1202  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1203  %11 = bitcast <2 x i64> %10 to <8 x i16>
1204  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1205  %13 = bitcast <8 x i16> %12 to <4 x i32>
1206  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1207  %15 = bitcast <4 x i32> %14 to <2 x i64>
1208  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1209  ret <2 x i64> %16
1210; CHECK: test_sse2_psrl_1
1211; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
1212}
1213
1214define <4 x i64> @test_avx2_psrl_1() {
1215  %S = bitcast i32 1 to i32
1216  %1 = zext i32 %S to i64
1217  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1218  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1219  %4 = bitcast <2 x i64> %3 to <8 x i16>
1220  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1221  %6 = bitcast <16 x i16> %5 to <8 x i32>
1222  %7 = bitcast <2 x i64> %3 to <4 x i32>
1223  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1224  %9 = bitcast <8 x i32> %8 to <4 x i64>
1225  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1226  %11 = bitcast <4 x i64> %10 to <16 x i16>
1227  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1228  %13 = bitcast <16 x i16> %12 to <8 x i32>
1229  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1230  %15 = bitcast <8 x i32> %14 to <4 x i64>
1231  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1232  ret <4 x i64> %16
1233; CHECK: test_avx2_psrl_1
1234; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
1235}
1236
1237define <2 x i64> @test_sse2_psrl_0() {
1238  %S = bitcast i32 128 to i32
1239  %1 = zext i32 %S to i64
1240  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1241  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1242  %4 = bitcast <2 x i64> %3 to <8 x i16>
1243  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
1244  %6 = bitcast <8 x i16> %5 to <4 x i32>
1245  %7 = bitcast <2 x i64> %3 to <4 x i32>
1246  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1247  %9 = bitcast <4 x i32> %8 to <2 x i64>
1248  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1249  %11 = bitcast <2 x i64> %10 to <8 x i16>
1250  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1251  %13 = bitcast <8 x i16> %12 to <4 x i32>
1252  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1253  %15 = bitcast <4 x i32> %14 to <2 x i64>
1254  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1255  ret <2 x i64> %16
1256; CHECK: test_sse2_psrl_0
1257; CHECK: ret <2 x i64> zeroinitializer
1258}
1259
1260define <4 x i64> @test_avx2_psrl_0() {
1261  %S = bitcast i32 128 to i32
1262  %1 = zext i32 %S to i64
1263  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1264  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1265  %4 = bitcast <2 x i64> %3 to <8 x i16>
1266  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1267  %6 = bitcast <16 x i16> %5 to <8 x i32>
1268  %7 = bitcast <2 x i64> %3 to <4 x i32>
1269  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1270  %9 = bitcast <8 x i32> %8 to <4 x i64>
1271  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1272  %11 = bitcast <4 x i64> %10 to <16 x i16>
1273  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1274  %13 = bitcast <16 x i16> %12 to <8 x i32>
1275  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1276  %15 = bitcast <8 x i32> %14 to <4 x i64>
1277  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1278  ret <4 x i64> %16
1279; CHECK: test_avx2_psrl_0
1280; CHECK: ret <4 x i64> zeroinitializer
1281}
1282
1283declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
1284declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
1285declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
1286declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
1287declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
1288declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
1289declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
1290declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
1291declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
1292declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
1293declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
1294declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
1295
1296declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
1297declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
1298declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
1299declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
1300declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
1301declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
1302declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
1303declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
1304declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
1305declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
1306declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
1307declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
1308
1309declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
1310declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
1311declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
1312declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
1313declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
1314declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
1315declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
1316declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
1317
1318attributes #1 = { nounwind readnone }
1319