1; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
2; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s
3
4define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
5; CHECK-LABEL: test1:
6; CHECK:       # BB#0:
7; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
8; CHECK-NEXT:    retl
9;
10; CHECK-YONAH-LABEL: test1:
11; CHECK-YONAH:       # BB#0:
12; CHECK-YONAH-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
13; CHECK-YONAH-NEXT:    retl
14  %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
15	ret <4 x i32> %C
16}
17
18define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
19; CHECK-LABEL: test2:
20; CHECK:       # BB#0:
21; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
22; CHECK-NEXT:    movdqa %xmm1, %xmm0
23; CHECK-NEXT:    retl
24;
25; CHECK-YONAH-LABEL: test2:
26; CHECK-YONAH:       # BB#0:
27; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
28; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
29; CHECK-YONAH-NEXT:    retl
30  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
31	ret <4 x i32> %C
32}
33
34define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
35; CHECK-LABEL: test3:
36; CHECK:       # BB#0:
37; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
38; CHECK-NEXT:    movdqa %xmm1, %xmm0
39; CHECK-NEXT:    retl
40;
41; CHECK-YONAH-LABEL: test3:
42; CHECK-YONAH:       # BB#0:
43; CHECK-YONAH-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
44; CHECK-YONAH-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
45; CHECK-YONAH-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
46; CHECK-YONAH-NEXT:    retl
47  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
48	ret <4 x i32> %C
49}
50
51define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
52; CHECK-LABEL: test4:
53; CHECK:       # BB#0:
54; CHECK-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
55; CHECK-NEXT:    retl
56;
57; CHECK-YONAH-LABEL: test4:
58; CHECK-YONAH:       # BB#0:
59; CHECK-YONAH-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
60; CHECK-YONAH-NEXT:    movapd %xmm1, %xmm0
61; CHECK-YONAH-NEXT:    retl
62  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
63	ret <4 x i32> %C
64}
65
66define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
67; CHECK-LABEL: test5:
68; CHECK:       # BB#0:
69; CHECK-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
70; CHECK-NEXT:    movapd %xmm1, %xmm0
71; CHECK-NEXT:    retl
72;
73; CHECK-YONAH-LABEL: test5:
74; CHECK-YONAH:       # BB#0:
75; CHECK-YONAH-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
76; CHECK-YONAH-NEXT:    movapd %xmm1, %xmm0
77; CHECK-YONAH-NEXT:    retl
78  %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
79	ret <4 x float> %C
80}
81
82define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
83; CHECK-LABEL: test6:
84; CHECK:       # BB#0:
85; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
86; CHECK-NEXT:    movdqa %xmm1, %xmm0
87; CHECK-NEXT:    retl
88;
89; CHECK-YONAH-LABEL: test6:
90; CHECK-YONAH:       # BB#0:
91; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
92; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
93; CHECK-YONAH-NEXT:    por %xmm1, %xmm0
94; CHECK-YONAH-NEXT:    retl
95  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
96	ret <8 x i16> %C
97}
98
99define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
100; CHECK-LABEL: test7:
101; CHECK:       # BB#0:
102; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
103; CHECK-NEXT:    movdqa %xmm1, %xmm0
104; CHECK-NEXT:    retl
105;
106; CHECK-YONAH-LABEL: test7:
107; CHECK-YONAH:       # BB#0:
108; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
109; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
110; CHECK-YONAH-NEXT:    por %xmm1, %xmm0
111; CHECK-YONAH-NEXT:    retl
112  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
113	ret <8 x i16> %C
114}
115
116define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
117; CHECK-LABEL: test8:
118; CHECK:       # BB#0:
119; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
120; CHECK-NEXT:    movdqa %xmm1, %xmm0
121; CHECK-NEXT:    retl
122;
123; CHECK-YONAH-LABEL: test8:
124; CHECK-YONAH:       # BB#0:
125; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
126; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
127; CHECK-YONAH-NEXT:    por %xmm1, %xmm0
128; CHECK-YONAH-NEXT:    retl
129  %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
130	ret <16 x i8> %C
131}
132
133; Check that we don't do unary (circular on single operand) palignr incorrectly.
134; (It is possible, but before this testcase was committed, it was being done
135; incorrectly.  In particular, one of the operands of the palignr node
136; was an UNDEF.)
137define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
138; CHECK-LABEL: test9:
139; CHECK:       # BB#0:
140; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
141; CHECK-NEXT:    movdqa %xmm1, %xmm0
142; CHECK-NEXT:    retl
143;
144; CHECK-YONAH-LABEL: test9:
145; CHECK-YONAH:       # BB#0:
146; CHECK-YONAH-NEXT:    movdqa %xmm1, %xmm0
147; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
148; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
149; CHECK-YONAH-NEXT:    por %xmm0, %xmm1
150; CHECK-YONAH-NEXT:    movdqa %xmm1, %xmm0
151; CHECK-YONAH-NEXT:    retl
152  %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
153	ret <8 x i16> %C
154}
155
156