1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSSE3
4; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK-AVX
5
6define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
7; CHECK-SSE-LABEL: test1:
8; CHECK-SSE:       # %bb.0:
9; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
10; CHECK-SSE-NEXT:    retl
11;
12; CHECK-AVX-LABEL: test1:
13; CHECK-AVX:       # %bb.0:
14; CHECK-AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,0]
15; CHECK-AVX-NEXT:    retl
16  %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
17  ret <4 x i32> %C
18}
19
20define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
21; CHECK-SSE2-LABEL: test2:
22; CHECK-SSE2:       # %bb.0:
23; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
24; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
25; CHECK-SSE2-NEXT:    retl
26;
27; CHECK-SSSE3-LABEL: test2:
28; CHECK-SSSE3:       # %bb.0:
29; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
30; CHECK-SSSE3-NEXT:    movdqa %xmm1, %xmm0
31; CHECK-SSSE3-NEXT:    retl
32;
33; CHECK-AVX-LABEL: test2:
34; CHECK-AVX:       # %bb.0:
35; CHECK-AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
36; CHECK-AVX-NEXT:    retl
37  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
38  ret <4 x i32> %C
39}
40
41define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
42; CHECK-SSE2-LABEL: test3:
43; CHECK-SSE2:       # %bb.0:
44; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
45; CHECK-SSE2-NEXT:    retl
46;
47; CHECK-SSSE3-LABEL: test3:
48; CHECK-SSSE3:       # %bb.0:
49; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
50; CHECK-SSSE3-NEXT:    movdqa %xmm1, %xmm0
51; CHECK-SSSE3-NEXT:    retl
52;
53; CHECK-AVX-LABEL: test3:
54; CHECK-AVX:       # %bb.0:
55; CHECK-AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
56; CHECK-AVX-NEXT:    retl
57  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
58  ret <4 x i32> %C
59}
60
61define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
62; CHECK-SSE2-LABEL: test4:
63; CHECK-SSE2:       # %bb.0:
64; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
65; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
66; CHECK-SSE2-NEXT:    retl
67;
68; CHECK-SSSE3-LABEL: test4:
69; CHECK-SSSE3:       # %bb.0:
70; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
71; CHECK-SSSE3-NEXT:    retl
72;
73; CHECK-AVX-LABEL: test4:
74; CHECK-AVX:       # %bb.0:
75; CHECK-AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
76; CHECK-AVX-NEXT:    retl
77  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
78  ret <4 x i32> %C
79}
80
81define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
82; CHECK-SSE-LABEL: test5:
83; CHECK-SSE:       # %bb.0:
84; CHECK-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
85; CHECK-SSE-NEXT:    movaps %xmm1, %xmm0
86; CHECK-SSE-NEXT:    retl
87;
88; CHECK-AVX-LABEL: test5:
89; CHECK-AVX:       # %bb.0:
90; CHECK-AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm1[1],xmm0[0]
91; CHECK-AVX-NEXT:    retl
92  %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
93  ret <4 x float> %C
94}
95
96define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
97; CHECK-SSE2-LABEL: test6:
98; CHECK-SSE2:       # %bb.0:
99; CHECK-SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
100; CHECK-SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
101; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
102; CHECK-SSE2-NEXT:    retl
103;
104; CHECK-SSSE3-LABEL: test6:
105; CHECK-SSSE3:       # %bb.0:
106; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
107; CHECK-SSSE3-NEXT:    movdqa %xmm1, %xmm0
108; CHECK-SSSE3-NEXT:    retl
109;
110; CHECK-AVX-LABEL: test6:
111; CHECK-AVX:       # %bb.0:
112; CHECK-AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
113; CHECK-AVX-NEXT:    retl
114  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
115  ret <8 x i16> %C
116}
117
118define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
119; CHECK-SSE2-LABEL: test7:
120; CHECK-SSE2:       # %bb.0:
121; CHECK-SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
122; CHECK-SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
123; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
124; CHECK-SSE2-NEXT:    retl
125;
126; CHECK-SSSE3-LABEL: test7:
127; CHECK-SSSE3:       # %bb.0:
128; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
129; CHECK-SSSE3-NEXT:    movdqa %xmm1, %xmm0
130; CHECK-SSSE3-NEXT:    retl
131;
132; CHECK-AVX-LABEL: test7:
133; CHECK-AVX:       # %bb.0:
134; CHECK-AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
135; CHECK-AVX-NEXT:    retl
136  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
137  ret <8 x i16> %C
138}
139
140define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
141; CHECK-SSE2-LABEL: test8:
142; CHECK-SSE2:       # %bb.0:
143; CHECK-SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
144; CHECK-SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
145; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
146; CHECK-SSE2-NEXT:    retl
147;
148; CHECK-SSSE3-LABEL: test8:
149; CHECK-SSSE3:       # %bb.0:
150; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
151; CHECK-SSSE3-NEXT:    movdqa %xmm1, %xmm0
152; CHECK-SSSE3-NEXT:    retl
153;
154; CHECK-AVX-LABEL: test8:
155; CHECK-AVX:       # %bb.0:
156; CHECK-AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
157; CHECK-AVX-NEXT:    retl
158  %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
159  ret <16 x i8> %C
160}
161
162; Check that we don't do unary (circular on single operand) palignr incorrectly.
163; (It is possible, but before this testcase was committed, it was being done
164; incorrectly.  In particular, one of the operands of the palignr node
165; was an UNDEF.)
166define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
167; CHECK-SSE2-LABEL: test9:
168; CHECK-SSE2:       # %bb.0:
169; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
170; CHECK-SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
171; CHECK-SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
172; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
173; CHECK-SSE2-NEXT:    retl
174;
175; CHECK-SSSE3-LABEL: test9:
176; CHECK-SSSE3:       # %bb.0:
177; CHECK-SSSE3-NEXT:    movdqa %xmm1, %xmm0
178; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
179; CHECK-SSSE3-NEXT:    retl
180;
181; CHECK-AVX-LABEL: test9:
182; CHECK-AVX:       # %bb.0:
183; CHECK-AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
184; CHECK-AVX-NEXT:    retl
185  %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
186  ret <8 x i16> %C
187}
188
189