1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2
4
5;
6; EXTRQI
7;
8
9; A length of zero is equivalent to a bit length of 64.
10define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) {
11; ALL-LABEL: extrqi_len0_idx0:
12; ALL:       # BB#0:
13; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
14; ALL-NEXT:    retq
15  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0)
16  ret <2 x i64> %1
17}
18
19define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) {
20; ALL-LABEL: extrqi_len8_idx16:
21; ALL:       # BB#0:
22; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
23; ALL-NEXT:    retq
24  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16)
25  ret <2 x i64> %1
26}
27
28; If the length + index exceeds the bottom 64 bits the result is undefined.
29define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) {
30; ALL-LABEL: extrqi_len32_idx48:
31; ALL:       # BB#0:
32; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
33; ALL-NEXT:    retq
34  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48)
35  ret <2 x i64> %1
36}
37
38define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) {
39; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu:
40; BTVER1:       # BB#0:
41; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
42; BTVER1-NEXT:    retq
43;
44; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu:
45; BTVER2:       # BB#0:
46; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
47; BTVER2-NEXT:    retq
48  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
49  ret <16 x i8> %s
50}
51
52define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
53; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz:
54; BTVER1:       # BB#0:
55; BTVER1-NEXT:    movaps %xmm0, %xmm1
56; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
57; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
58; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
59; BTVER1-NEXT:    retq
60;
61; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz:
62; BTVER2:       # BB#0:
63; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
64; BTVER2-NEXT:    retq
65  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
66  ret <16 x i8> %s
67}
68
69define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
70; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
71; BTVER1:       # BB#0:
72; BTVER1-NEXT:    movaps %xmm0, %xmm1
73; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
74; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
75; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
76; BTVER1-NEXT:    retq
77;
78; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz:
79; BTVER2:       # BB#0:
80; BTVER2-NEXT:    vpsrld $16, %xmm0, %xmm0
81; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
82; BTVER2-NEXT:    retq
83  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
84  ret <16 x i8> %s
85}
86
87define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) {
88; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu:
89; BTVER1:       # BB#0:
90; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
91; BTVER1-NEXT:    retq
92;
93; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu:
94; BTVER2:       # BB#0:
95; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
96; BTVER2-NEXT:    retq
97  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
98  ret <16 x i8> %s
99}
100
101define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) {
102; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz:
103; BTVER1:       # BB#0:
104; BTVER1-NEXT:    movaps %xmm0, %xmm1
105; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
106; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
107; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
108; BTVER1-NEXT:    retq
109;
110; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz:
111; BTVER2:       # BB#0:
112; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
113; BTVER2-NEXT:    retq
114  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
115  ret <16 x i8> %s
116}
117
118define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) {
119; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu:
120; ALL:       # BB#0:
121; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
122; ALL-NEXT:    retq
123  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
124  ret <16 x i8> %s
125}
126
127define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) {
128; ALL-LABEL: shuf_1zzzuuuu:
129; ALL:       # BB#0:
130; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
131; ALL-NEXT:    retq
132  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
133  ret <8 x i16> %s
134}
135
136define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) {
137; ALL-LABEL: shuf_12zzuuuu:
138; ALL:       # BB#0:
139; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
140; ALL-NEXT:    retq
141  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
142  ret <8 x i16> %s
143}
144
145define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) {
146; ALL-LABEL: shuf_012zuuuu:
147; ALL:       # BB#0:
148; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
149; ALL-NEXT:    retq
150  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
151  ret <8 x i16> %s
152}
153
154define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) {
155; BTVER1-LABEL: shuf_0zzz1zzz:
156; BTVER1:       # BB#0:
157; BTVER1-NEXT:    movaps %xmm0, %xmm1
158; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
159; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
160; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
161; BTVER1-NEXT:    retq
162;
163; BTVER2-LABEL: shuf_0zzz1zzz:
164; BTVER2:       # BB#0:
165; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
166; BTVER2-NEXT:    retq
167  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8>
168  ret <8 x i16> %s
169}
170
171define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
172; BTVER1-LABEL: shuf_0z1z:
173; BTVER1:       # BB#0:
174; BTVER1-NEXT:    pxor %xmm1, %xmm1
175; BTVER1-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
176; BTVER1-NEXT:    retq
177;
178; BTVER2-LABEL: shuf_0z1z:
179; BTVER2:       # BB#0:
180; BTVER2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
181; BTVER2-NEXT:    retq
182  %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
183  ret <4 x i32> %s
184}
185
186;
187; INSERTQI
188;
189
190; A length of zero is equivalent to a bit length of 64.
191define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) {
192; ALL-LABEL: insertqi_len0_idx0:
193; ALL:       # BB#0:
194; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7],xmm0[u,u,u,u,u,u,u,u]
195; ALL-NEXT:    retq
196  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0)
197  ret <2 x i64> %1
198}
199
200define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) {
201; ALL-LABEL: insertqi_len8_idx16:
202; ALL:       # BB#0:
203; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
204; ALL-NEXT:    retq
205  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16)
206  ret <2 x i64> %1
207}
208
209; If the length + index exceeds the bottom 64 bits the result is undefined
210define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) {
211; ALL-LABEL: insertqi_len32_idx48:
212; ALL:       # BB#0:
213; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
214; ALL-NEXT:    retq
215  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48)
216  ret <2 x i64> %1
217}
218
219define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
220; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu:
221; ALL:       # BB#0:
222; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
223; ALL-NEXT:    retq
224  %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
225  ret <16 x i8> %s
226}
227
228define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
229; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu:
230; ALL:       # BB#0:
231; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u]
232; ALL-NEXT:    retq
233  %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
234  ret <16 x i8> %s
235}
236
237define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
238; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu:
239; ALL:       # BB#0:
240; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
241; ALL-NEXT:    retq
242  %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
243  ret <16 x i8> %s
244}
245
246define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) {
247; ALL-LABEL: shuf_0823uuuu:
248; ALL:       # BB#0:
249; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u]
250; ALL-NEXT:    retq
251  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
252  ret <8 x i16> %s
253}
254
255define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) {
256; ALL-LABEL: shuf_0183uuuu:
257; ALL:       # BB#0:
258; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u]
259; ALL-NEXT:    retq
260  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
261  ret <8 x i16> %s
262}
263
264define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) {
265; ALL-LABEL: shuf_0128uuuu:
266; ALL:       # BB#0:
267; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u]
268; ALL-NEXT:    retq
269  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
270  ret <8 x i16> %s
271}
272
273define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) {
274; ALL-LABEL: shuf_0893uuuu:
275; ALL:       # BB#0:
276; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
277; ALL-NEXT:    retq
278  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
279  ret <8 x i16> %s
280}
281
282define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) {
283; ALL-LABEL: shuf_089Auuuu:
284; ALL:       # BB#0:
285; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u]
286; ALL-NEXT:    retq
287  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
288  ret <8 x i16> %s
289}
290
291define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) {
292; ALL-LABEL: shuf_089uuuuu:
293; ALL:       # BB#0:
294; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
295; ALL-NEXT:    retq
296  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
297  ret <8 x i16> %s
298}
299
300;
301; Special Cases
302;
303
304; Out of range.
305define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) {
306; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
307; BTVER1:       # BB#0:
308; BTVER1-NEXT:    psrld $16, %xmm1
309; BTVER1-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
310; BTVER1-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
311; BTVER1-NEXT:    retq
312;
313; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
314; BTVER2:       # BB#0:
315; BTVER2-NEXT:    vpsrld $16, %xmm1, %xmm1
316; BTVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
317; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
318; BTVER2-NEXT:    retq
319  %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
320  ret <16 x i8> %1
321}
322
323define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
324; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
325; BTVER1:       # BB#0:
326; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
327; BTVER1-NEXT:    retq
328;
329; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
330; BTVER2:       # BB#0:
331; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
332; BTVER2-NEXT:    retq
333  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
334  ret <16 x i8> %1
335}
336
337define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
338; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
339; BTVER1:       # BB#0:
340; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
341; BTVER1-NEXT:    retq
342;
343; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
344; BTVER2:       # BB#0:
345; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
346; BTVER2-NEXT:    retq
347  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
348  ret <16 x i8> %1
349}
350
351define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
352; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
353; ALL:       # BB#0:
354; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
355; ALL-NEXT:    retq
356  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
357  ret <16 x i8> %1
358}
359
360declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
361declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
362