1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
4
5; Splat patterns below
6
7define <4 x i32> @shl4(<4 x i32> %A) nounwind {
8; CHECK-LABEL: shl4:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    movdqa %xmm0, %xmm1
11; CHECK-NEXT:    pslld $2, %xmm1
12; CHECK-NEXT:    paddd %xmm0, %xmm0
13; CHECK-NEXT:    pxor %xmm1, %xmm0
14; CHECK-NEXT:    ret{{[l|q]}}
15entry:
16  %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
17  %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
18  %K = xor <4 x i32> %B, %C
19  ret <4 x i32> %K
20}
21
22define <4 x i32> @shr4(<4 x i32> %A) nounwind {
23; CHECK-LABEL: shr4:
24; CHECK:       # %bb.0: # %entry
25; CHECK-NEXT:    movdqa %xmm0, %xmm1
26; CHECK-NEXT:    psrld $2, %xmm1
27; CHECK-NEXT:    psrld $1, %xmm0
28; CHECK-NEXT:    pxor %xmm1, %xmm0
29; CHECK-NEXT:    ret{{[l|q]}}
30entry:
31  %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
32  %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
33  %K = xor <4 x i32> %B, %C
34  ret <4 x i32> %K
35}
36
37define <4 x i32> @sra4(<4 x i32> %A) nounwind {
38; CHECK-LABEL: sra4:
39; CHECK:       # %bb.0: # %entry
40; CHECK-NEXT:    movdqa %xmm0, %xmm1
41; CHECK-NEXT:    psrad $2, %xmm1
42; CHECK-NEXT:    psrad $1, %xmm0
43; CHECK-NEXT:    pxor %xmm1, %xmm0
44; CHECK-NEXT:    ret{{[l|q]}}
45entry:
46  %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
47  %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
48  %K = xor <4 x i32> %B, %C
49  ret <4 x i32> %K
50}
51
52define <2 x i64> @shl2(<2 x i64> %A) nounwind {
53; CHECK-LABEL: shl2:
54; CHECK:       # %bb.0: # %entry
55; CHECK-NEXT:    movdqa %xmm0, %xmm1
56; CHECK-NEXT:    psllq $2, %xmm1
57; CHECK-NEXT:    psllq $9, %xmm0
58; CHECK-NEXT:    pxor %xmm1, %xmm0
59; CHECK-NEXT:    ret{{[l|q]}}
60entry:
61  %B = shl <2 x i64> %A,  < i64 2, i64 2>
62  %C = shl <2 x i64> %A,  < i64 9, i64 9>
63  %K = xor <2 x i64> %B, %C
64  ret <2 x i64> %K
65}
66
67define <2 x i64> @shr2(<2 x i64> %A) nounwind {
68; CHECK-LABEL: shr2:
69; CHECK:       # %bb.0: # %entry
70; CHECK-NEXT:    movdqa %xmm0, %xmm1
71; CHECK-NEXT:    psrlq $8, %xmm1
72; CHECK-NEXT:    psrlq $1, %xmm0
73; CHECK-NEXT:    pxor %xmm1, %xmm0
74; CHECK-NEXT:    ret{{[l|q]}}
75entry:
76  %B = lshr <2 x i64> %A,  < i64 8, i64 8>
77  %C = lshr <2 x i64> %A,  < i64 1, i64 1>
78  %K = xor <2 x i64> %B, %C
79  ret <2 x i64> %K
80}
81
82define <8 x i16> @shl8(<8 x i16> %A) nounwind {
83; CHECK-LABEL: shl8:
84; CHECK:       # %bb.0: # %entry
85; CHECK-NEXT:    movdqa %xmm0, %xmm1
86; CHECK-NEXT:    psllw $2, %xmm1
87; CHECK-NEXT:    paddw %xmm0, %xmm0
88; CHECK-NEXT:    pxor %xmm1, %xmm0
89; CHECK-NEXT:    ret{{[l|q]}}
90entry:
91  %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
92  %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
93  %K = xor <8 x i16> %B, %C
94  ret <8 x i16> %K
95}
96
97define <8 x i16> @shr8(<8 x i16> %A) nounwind {
98; CHECK-LABEL: shr8:
99; CHECK:       # %bb.0: # %entry
100; CHECK-NEXT:    movdqa %xmm0, %xmm1
101; CHECK-NEXT:    psrlw $2, %xmm1
102; CHECK-NEXT:    psrlw $1, %xmm0
103; CHECK-NEXT:    pxor %xmm1, %xmm0
104; CHECK-NEXT:    ret{{[l|q]}}
105entry:
106  %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
107  %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
108  %K = xor <8 x i16> %B, %C
109  ret <8 x i16> %K
110}
111
112define <8 x i16> @sra8(<8 x i16> %A) nounwind {
113; CHECK-LABEL: sra8:
114; CHECK:       # %bb.0: # %entry
115; CHECK-NEXT:    movdqa %xmm0, %xmm1
116; CHECK-NEXT:    psraw $2, %xmm1
117; CHECK-NEXT:    psraw $1, %xmm0
118; CHECK-NEXT:    pxor %xmm1, %xmm0
119; CHECK-NEXT:    ret{{[l|q]}}
120entry:
121  %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
122  %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123  %K = xor <8 x i16> %B, %C
124  ret <8 x i16> %K
125}
126
127; non-splat test
128
129define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
130; X86-LABEL: sll8_nosplat:
131; X86:       # %bb.0: # %entry
132; X86-NEXT:    movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
133; X86-NEXT:    pmullw %xmm0, %xmm1
134; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
135; X86-NEXT:    pxor %xmm1, %xmm0
136; X86-NEXT:    retl
137;
138; X64-LABEL: sll8_nosplat:
139; X64:       # %bb.0: # %entry
140; X64-NEXT:    movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
141; X64-NEXT:    pmullw %xmm0, %xmm1
142; X64-NEXT:    pmullw {{.*}}(%rip), %xmm0
143; X64-NEXT:    pxor %xmm1, %xmm0
144; X64-NEXT:    retq
145entry:
146  %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
147  %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
148  %K = xor <8 x i16> %B, %C
149  ret <8 x i16> %K
150}
151
152define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
153; CHECK-LABEL: shr2_nosplat:
154; CHECK:       # %bb.0: # %entry
155; CHECK-NEXT:    movdqa %xmm0, %xmm2
156; CHECK-NEXT:    psrlq $8, %xmm2
157; CHECK-NEXT:    movdqa %xmm0, %xmm1
158; CHECK-NEXT:    psrlq $1, %xmm1
159; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
160; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
161; CHECK-NEXT:    xorps %xmm2, %xmm1
162; CHECK-NEXT:    movaps %xmm1, %xmm0
163; CHECK-NEXT:    ret{{[l|q]}}
164entry:
165  %B = lshr <2 x i64> %A,  < i64 8, i64 1>
166  %C = lshr <2 x i64> %A,  < i64 1, i64 0>
167  %K = xor <2 x i64> %B, %C
168  ret <2 x i64> %K
169}
170
171; Other shifts
172
173define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
174; CHECK-LABEL: shl2_other:
175; CHECK:       # %bb.0: # %entry
176; CHECK-NEXT:    movdqa %xmm0, %xmm1
177; CHECK-NEXT:    pslld $2, %xmm1
178; CHECK-NEXT:    pslld $9, %xmm0
179; CHECK-NEXT:    pxor %xmm1, %xmm0
180; CHECK-NEXT:    ret{{[l|q]}}
181entry:
182  %B = shl <2 x i32> %A,  < i32 2, i32 2>
183  %C = shl <2 x i32> %A,  < i32 9, i32 9>
184  %K = xor <2 x i32> %B, %C
185  ret <2 x i32> %K
186}
187
188define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
189; CHECK-LABEL: shr2_other:
190; CHECK:       # %bb.0: # %entry
191; CHECK-NEXT:    movdqa %xmm0, %xmm1
192; CHECK-NEXT:    psrld $8, %xmm1
193; CHECK-NEXT:    psrld $1, %xmm0
194; CHECK-NEXT:    pxor %xmm1, %xmm0
195; CHECK-NEXT:    ret{{[l|q]}}
196entry:
197  %B = lshr <2 x i32> %A,  < i32 8, i32 8>
198  %C = lshr <2 x i32> %A,  < i32 1, i32 1>
199  %K = xor <2 x i32> %B, %C
200  ret <2 x i32> %K
201}
202
203define <16 x i8> @shl9(<16 x i8> %A) nounwind {
204; X86-LABEL: shl9:
205; X86:       # %bb.0:
206; X86-NEXT:    psllw $3, %xmm0
207; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
208; X86-NEXT:    retl
209;
210; X64-LABEL: shl9:
211; X64:       # %bb.0:
212; X64-NEXT:    psllw $3, %xmm0
213; X64-NEXT:    pand {{.*}}(%rip), %xmm0
214; X64-NEXT:    retq
215  %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
216  ret <16 x i8> %B
217}
218
219define <16 x i8> @shr9(<16 x i8> %A) nounwind {
220; X86-LABEL: shr9:
221; X86:       # %bb.0:
222; X86-NEXT:    psrlw $3, %xmm0
223; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
224; X86-NEXT:    retl
225;
226; X64-LABEL: shr9:
227; X64:       # %bb.0:
228; X64-NEXT:    psrlw $3, %xmm0
229; X64-NEXT:    pand {{.*}}(%rip), %xmm0
230; X64-NEXT:    retq
231  %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
232  ret <16 x i8> %B
233}
234
235define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
236; CHECK-LABEL: sra_v16i8_7:
237; CHECK:       # %bb.0:
238; CHECK-NEXT:    pxor %xmm1, %xmm1
239; CHECK-NEXT:    pcmpgtb %xmm0, %xmm1
240; CHECK-NEXT:    movdqa %xmm1, %xmm0
241; CHECK-NEXT:    ret{{[l|q]}}
242  %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
243  ret <16 x i8> %B
244}
245
246define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
247; X86-LABEL: sra_v16i8:
248; X86:       # %bb.0:
249; X86-NEXT:    psrlw $3, %xmm0
250; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
251; X86-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
252; X86-NEXT:    pxor %xmm1, %xmm0
253; X86-NEXT:    psubb %xmm1, %xmm0
254; X86-NEXT:    retl
255;
256; X64-LABEL: sra_v16i8:
257; X64:       # %bb.0:
258; X64-NEXT:    psrlw $3, %xmm0
259; X64-NEXT:    pand {{.*}}(%rip), %xmm0
260; X64-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
261; X64-NEXT:    pxor %xmm1, %xmm0
262; X64-NEXT:    psubb %xmm1, %xmm0
263; X64-NEXT:    retq
264  %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
265  ret <16 x i8> %B
266}
267