1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7
8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9target triple = "x86_64-unknown-unknown"
10
11define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
12; SSE-LABEL: shuffle_v2i64_00:
13; SSE:       # BB#0:
14; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
15; SSE-NEXT:    retq
16;
17; AVX1-LABEL: shuffle_v2i64_00:
18; AVX1:       # BB#0:
19; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
20; AVX1-NEXT:    retq
21;
22; AVX2-LABEL: shuffle_v2i64_00:
23; AVX2:       # BB#0:
24; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
25; AVX2-NEXT:    retq
26  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
27  ret <2 x i64> %shuffle
28}
29define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
30; SSE-LABEL: shuffle_v2i64_10:
31; SSE:       # BB#0:
32; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
33; SSE-NEXT:    retq
34;
35; AVX-LABEL: shuffle_v2i64_10:
36; AVX:       # BB#0:
37; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
38; AVX-NEXT:    retq
39  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
40  ret <2 x i64> %shuffle
41}
42define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
43; SSE-LABEL: shuffle_v2i64_11:
44; SSE:       # BB#0:
45; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
46; SSE-NEXT:    retq
47;
48; AVX-LABEL: shuffle_v2i64_11:
49; AVX:       # BB#0:
50; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
51; AVX-NEXT:    retq
52  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
53  ret <2 x i64> %shuffle
54}
55define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
56; SSE-LABEL: shuffle_v2i64_22:
57; SSE:       # BB#0:
58; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
59; SSE-NEXT:    retq
60;
61; AVX1-LABEL: shuffle_v2i64_22:
62; AVX1:       # BB#0:
63; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
64; AVX1-NEXT:    retq
65;
66; AVX2-LABEL: shuffle_v2i64_22:
67; AVX2:       # BB#0:
68; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
69; AVX2-NEXT:    retq
70  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
71  ret <2 x i64> %shuffle
72}
73define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
74; SSE-LABEL: shuffle_v2i64_32:
75; SSE:       # BB#0:
76; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
77; SSE-NEXT:    retq
78;
79; AVX-LABEL: shuffle_v2i64_32:
80; AVX:       # BB#0:
81; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
82; AVX-NEXT:    retq
83  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
84  ret <2 x i64> %shuffle
85}
86define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
87; SSE-LABEL: shuffle_v2i64_33:
88; SSE:       # BB#0:
89; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
90; SSE-NEXT:    retq
91;
92; AVX-LABEL: shuffle_v2i64_33:
93; AVX:       # BB#0:
94; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
95; AVX-NEXT:    retq
96  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
97  ret <2 x i64> %shuffle
98}
99
100define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
101; SSE2-LABEL: shuffle_v2f64_00:
102; SSE2:       # BB#0:
103; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
104; SSE2-NEXT:    retq
105;
106; SSE3-LABEL: shuffle_v2f64_00:
107; SSE3:       # BB#0:
108; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
109; SSE3-NEXT:    retq
110;
111; SSSE3-LABEL: shuffle_v2f64_00:
112; SSSE3:       # BB#0:
113; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
114; SSSE3-NEXT:    retq
115;
116; SSE41-LABEL: shuffle_v2f64_00:
117; SSE41:       # BB#0:
118; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
119; SSE41-NEXT:    retq
120;
121; AVX-LABEL: shuffle_v2f64_00:
122; AVX:       # BB#0:
123; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
124; AVX-NEXT:    retq
125  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
126  ret <2 x double> %shuffle
127}
128define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
129; SSE-LABEL: shuffle_v2f64_10:
130; SSE:       # BB#0:
131; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
132; SSE-NEXT:    retq
133;
134; AVX-LABEL: shuffle_v2f64_10:
135; AVX:       # BB#0:
136; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
137; AVX-NEXT:    retq
138  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
139  ret <2 x double> %shuffle
140}
141define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
142; SSE-LABEL: shuffle_v2f64_11:
143; SSE:       # BB#0:
144; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
145; SSE-NEXT:    retq
146;
147; AVX-LABEL: shuffle_v2f64_11:
148; AVX:       # BB#0:
149; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
150; AVX-NEXT:    retq
151  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
152  ret <2 x double> %shuffle
153}
154define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
155; SSE2-LABEL: shuffle_v2f64_22:
156; SSE2:       # BB#0:
157; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
158; SSE2-NEXT:    movaps %xmm1, %xmm0
159; SSE2-NEXT:    retq
160;
161; SSE3-LABEL: shuffle_v2f64_22:
162; SSE3:       # BB#0:
163; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
164; SSE3-NEXT:    retq
165;
166; SSSE3-LABEL: shuffle_v2f64_22:
167; SSSE3:       # BB#0:
168; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
169; SSSE3-NEXT:    retq
170;
171; SSE41-LABEL: shuffle_v2f64_22:
172; SSE41:       # BB#0:
173; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
174; SSE41-NEXT:    retq
175;
176; AVX-LABEL: shuffle_v2f64_22:
177; AVX:       # BB#0:
178; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
179; AVX-NEXT:    retq
180  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
181  ret <2 x double> %shuffle
182}
183define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
184; SSE-LABEL: shuffle_v2f64_32:
185; SSE:       # BB#0:
186; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
187; SSE-NEXT:    movapd %xmm1, %xmm0
188; SSE-NEXT:    retq
189;
190; AVX-LABEL: shuffle_v2f64_32:
191; AVX:       # BB#0:
192; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
193; AVX-NEXT:    retq
194  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
195  ret <2 x double> %shuffle
196}
197define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
198; SSE-LABEL: shuffle_v2f64_33:
199; SSE:       # BB#0:
200; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
201; SSE-NEXT:    movaps %xmm1, %xmm0
202; SSE-NEXT:    retq
203;
204; AVX-LABEL: shuffle_v2f64_33:
205; AVX:       # BB#0:
206; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
207; AVX-NEXT:    retq
208  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
209  ret <2 x double> %shuffle
210}
211define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
212; SSE2-LABEL: shuffle_v2f64_03:
213; SSE2:       # BB#0:
214; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
215; SSE2-NEXT:    movapd %xmm1, %xmm0
216; SSE2-NEXT:    retq
217;
218; SSE3-LABEL: shuffle_v2f64_03:
219; SSE3:       # BB#0:
220; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
221; SSE3-NEXT:    movapd %xmm1, %xmm0
222; SSE3-NEXT:    retq
223;
224; SSSE3-LABEL: shuffle_v2f64_03:
225; SSSE3:       # BB#0:
226; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
227; SSSE3-NEXT:    movapd %xmm1, %xmm0
228; SSSE3-NEXT:    retq
229;
230; SSE41-LABEL: shuffle_v2f64_03:
231; SSE41:       # BB#0:
232; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
233; SSE41-NEXT:    retq
234;
235; AVX-LABEL: shuffle_v2f64_03:
236; AVX:       # BB#0:
237; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
238; AVX-NEXT:    retq
239  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
240  ret <2 x double> %shuffle
241}
242define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
243; SSE2-LABEL: shuffle_v2f64_21:
244; SSE2:       # BB#0:
245; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
246; SSE2-NEXT:    retq
247;
248; SSE3-LABEL: shuffle_v2f64_21:
249; SSE3:       # BB#0:
250; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
251; SSE3-NEXT:    retq
252;
253; SSSE3-LABEL: shuffle_v2f64_21:
254; SSSE3:       # BB#0:
255; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
256; SSSE3-NEXT:    retq
257;
258; SSE41-LABEL: shuffle_v2f64_21:
259; SSE41:       # BB#0:
260; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
261; SSE41-NEXT:    retq
262;
263; AVX-LABEL: shuffle_v2f64_21:
264; AVX:       # BB#0:
265; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
266; AVX-NEXT:    retq
267  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
268  ret <2 x double> %shuffle
269}
270
271
272define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
273; SSE-LABEL: shuffle_v2i64_02:
274; SSE:       # BB#0:
275; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
276; SSE-NEXT:    retq
277;
278; AVX-LABEL: shuffle_v2i64_02:
279; AVX:       # BB#0:
280; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
281; AVX-NEXT:    retq
282  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
283  ret <2 x i64> %shuffle
284}
285define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
286; SSE-LABEL: shuffle_v2i64_02_copy:
287; SSE:       # BB#0:
288; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
289; SSE-NEXT:    movdqa %xmm1, %xmm0
290; SSE-NEXT:    retq
291;
292; AVX-LABEL: shuffle_v2i64_02_copy:
293; AVX:       # BB#0:
294; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
295; AVX-NEXT:    retq
296  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
297  ret <2 x i64> %shuffle
298}
299define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
300; SSE2-LABEL: shuffle_v2i64_03:
301; SSE2:       # BB#0:
302; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
303; SSE2-NEXT:    movapd %xmm1, %xmm0
304; SSE2-NEXT:    retq
305;
306; SSE3-LABEL: shuffle_v2i64_03:
307; SSE3:       # BB#0:
308; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
309; SSE3-NEXT:    movapd %xmm1, %xmm0
310; SSE3-NEXT:    retq
311;
312; SSSE3-LABEL: shuffle_v2i64_03:
313; SSSE3:       # BB#0:
314; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
315; SSSE3-NEXT:    movapd %xmm1, %xmm0
316; SSSE3-NEXT:    retq
317;
318; SSE41-LABEL: shuffle_v2i64_03:
319; SSE41:       # BB#0:
320; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
321; SSE41-NEXT:    retq
322;
323; AVX1-LABEL: shuffle_v2i64_03:
324; AVX1:       # BB#0:
325; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
326; AVX1-NEXT:    retq
327;
328; AVX2-LABEL: shuffle_v2i64_03:
329; AVX2:       # BB#0:
330; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
331; AVX2-NEXT:    retq
332  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
333  ret <2 x i64> %shuffle
334}
335define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
336; SSE2-LABEL: shuffle_v2i64_03_copy:
337; SSE2:       # BB#0:
338; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
339; SSE2-NEXT:    movapd %xmm2, %xmm0
340; SSE2-NEXT:    retq
341;
342; SSE3-LABEL: shuffle_v2i64_03_copy:
343; SSE3:       # BB#0:
344; SSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
345; SSE3-NEXT:    movapd %xmm2, %xmm0
346; SSE3-NEXT:    retq
347;
348; SSSE3-LABEL: shuffle_v2i64_03_copy:
349; SSSE3:       # BB#0:
350; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
351; SSSE3-NEXT:    movapd %xmm2, %xmm0
352; SSSE3-NEXT:    retq
353;
354; SSE41-LABEL: shuffle_v2i64_03_copy:
355; SSE41:       # BB#0:
356; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
357; SSE41-NEXT:    movdqa %xmm1, %xmm0
358; SSE41-NEXT:    retq
359;
360; AVX1-LABEL: shuffle_v2i64_03_copy:
361; AVX1:       # BB#0:
362; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
363; AVX1-NEXT:    retq
364;
365; AVX2-LABEL: shuffle_v2i64_03_copy:
366; AVX2:       # BB#0:
367; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
368; AVX2-NEXT:    retq
369  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
370  ret <2 x i64> %shuffle
371}
372define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
373; SSE2-LABEL: shuffle_v2i64_12:
374; SSE2:       # BB#0:
375; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
376; SSE2-NEXT:    retq
377;
378; SSE3-LABEL: shuffle_v2i64_12:
379; SSE3:       # BB#0:
380; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
381; SSE3-NEXT:    retq
382;
383; SSSE3-LABEL: shuffle_v2i64_12:
384; SSSE3:       # BB#0:
385; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
386; SSSE3-NEXT:    movdqa %xmm1, %xmm0
387; SSSE3-NEXT:    retq
388;
389; SSE41-LABEL: shuffle_v2i64_12:
390; SSE41:       # BB#0:
391; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
392; SSE41-NEXT:    movdqa %xmm1, %xmm0
393; SSE41-NEXT:    retq
394;
395; AVX-LABEL: shuffle_v2i64_12:
396; AVX:       # BB#0:
397; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
398; AVX-NEXT:    retq
399  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
400  ret <2 x i64> %shuffle
401}
402define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
403; SSE2-LABEL: shuffle_v2i64_12_copy:
404; SSE2:       # BB#0:
405; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
406; SSE2-NEXT:    movapd %xmm1, %xmm0
407; SSE2-NEXT:    retq
408;
409; SSE3-LABEL: shuffle_v2i64_12_copy:
410; SSE3:       # BB#0:
411; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
412; SSE3-NEXT:    movapd %xmm1, %xmm0
413; SSE3-NEXT:    retq
414;
415; SSSE3-LABEL: shuffle_v2i64_12_copy:
416; SSSE3:       # BB#0:
417; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
418; SSSE3-NEXT:    movdqa %xmm2, %xmm0
419; SSSE3-NEXT:    retq
420;
421; SSE41-LABEL: shuffle_v2i64_12_copy:
422; SSE41:       # BB#0:
423; SSE41-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
424; SSE41-NEXT:    movdqa %xmm2, %xmm0
425; SSE41-NEXT:    retq
426;
427; AVX-LABEL: shuffle_v2i64_12_copy:
428; AVX:       # BB#0:
429; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
430; AVX-NEXT:    retq
431  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
432  ret <2 x i64> %shuffle
433}
434define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
435; SSE-LABEL: shuffle_v2i64_13:
436; SSE:       # BB#0:
437; SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
438; SSE-NEXT:    retq
439;
440; AVX-LABEL: shuffle_v2i64_13:
441; AVX:       # BB#0:
442; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
443; AVX-NEXT:    retq
444  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
445  ret <2 x i64> %shuffle
446}
447define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
448; SSE-LABEL: shuffle_v2i64_13_copy:
449; SSE:       # BB#0:
450; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
451; SSE-NEXT:    movdqa %xmm1, %xmm0
452; SSE-NEXT:    retq
453;
454; AVX-LABEL: shuffle_v2i64_13_copy:
455; AVX:       # BB#0:
456; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
457; AVX-NEXT:    retq
458  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
459  ret <2 x i64> %shuffle
460}
461define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
462; SSE-LABEL: shuffle_v2i64_20:
463; SSE:       # BB#0:
464; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
465; SSE-NEXT:    movdqa %xmm1, %xmm0
466; SSE-NEXT:    retq
467;
468; AVX-LABEL: shuffle_v2i64_20:
469; AVX:       # BB#0:
470; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
471; AVX-NEXT:    retq
472  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
473  ret <2 x i64> %shuffle
474}
475define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
476; SSE-LABEL: shuffle_v2i64_20_copy:
477; SSE:       # BB#0:
478; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
479; SSE-NEXT:    movdqa %xmm2, %xmm0
480; SSE-NEXT:    retq
481;
482; AVX-LABEL: shuffle_v2i64_20_copy:
483; AVX:       # BB#0:
484; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
485; AVX-NEXT:    retq
486  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
487  ret <2 x i64> %shuffle
488}
489define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
490; SSE2-LABEL: shuffle_v2i64_21:
491; SSE2:       # BB#0:
492; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
493; SSE2-NEXT:    retq
494;
495; SSE3-LABEL: shuffle_v2i64_21:
496; SSE3:       # BB#0:
497; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
498; SSE3-NEXT:    retq
499;
500; SSSE3-LABEL: shuffle_v2i64_21:
501; SSSE3:       # BB#0:
502; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
503; SSSE3-NEXT:    retq
504;
505; SSE41-LABEL: shuffle_v2i64_21:
506; SSE41:       # BB#0:
507; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
508; SSE41-NEXT:    retq
509;
510; AVX1-LABEL: shuffle_v2i64_21:
511; AVX1:       # BB#0:
512; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
513; AVX1-NEXT:    retq
514;
515; AVX2-LABEL: shuffle_v2i64_21:
516; AVX2:       # BB#0:
517; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
518; AVX2-NEXT:    retq
519  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
520  ret <2 x i64> %shuffle
521}
522define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
523; SSE2-LABEL: shuffle_v2i64_21_copy:
524; SSE2:       # BB#0:
525; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
526; SSE2-NEXT:    movapd %xmm1, %xmm0
527; SSE2-NEXT:    retq
528;
529; SSE3-LABEL: shuffle_v2i64_21_copy:
530; SSE3:       # BB#0:
531; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
532; SSE3-NEXT:    movapd %xmm1, %xmm0
533; SSE3-NEXT:    retq
534;
535; SSSE3-LABEL: shuffle_v2i64_21_copy:
536; SSSE3:       # BB#0:
537; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
538; SSSE3-NEXT:    movapd %xmm1, %xmm0
539; SSSE3-NEXT:    retq
540;
541; SSE41-LABEL: shuffle_v2i64_21_copy:
542; SSE41:       # BB#0:
543; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
544; SSE41-NEXT:    movdqa %xmm1, %xmm0
545; SSE41-NEXT:    retq
546;
547; AVX1-LABEL: shuffle_v2i64_21_copy:
548; AVX1:       # BB#0:
549; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
550; AVX1-NEXT:    retq
551;
552; AVX2-LABEL: shuffle_v2i64_21_copy:
553; AVX2:       # BB#0:
554; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
555; AVX2-NEXT:    retq
556  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
557  ret <2 x i64> %shuffle
558}
559define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
560; SSE2-LABEL: shuffle_v2i64_30:
561; SSE2:       # BB#0:
562; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
563; SSE2-NEXT:    movapd %xmm1, %xmm0
564; SSE2-NEXT:    retq
565;
566; SSE3-LABEL: shuffle_v2i64_30:
567; SSE3:       # BB#0:
568; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
569; SSE3-NEXT:    movapd %xmm1, %xmm0
570; SSE3-NEXT:    retq
571;
572; SSSE3-LABEL: shuffle_v2i64_30:
573; SSSE3:       # BB#0:
574; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
575; SSSE3-NEXT:    retq
576;
577; SSE41-LABEL: shuffle_v2i64_30:
578; SSE41:       # BB#0:
579; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
580; SSE41-NEXT:    retq
581;
582; AVX-LABEL: shuffle_v2i64_30:
583; AVX:       # BB#0:
584; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
585; AVX-NEXT:    retq
586  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
587  ret <2 x i64> %shuffle
588}
589define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
590; SSE2-LABEL: shuffle_v2i64_30_copy:
591; SSE2:       # BB#0:
592; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
593; SSE2-NEXT:    movapd %xmm2, %xmm0
594; SSE2-NEXT:    retq
595;
596; SSE3-LABEL: shuffle_v2i64_30_copy:
597; SSE3:       # BB#0:
598; SSE3-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
599; SSE3-NEXT:    movapd %xmm2, %xmm0
600; SSE3-NEXT:    retq
601;
602; SSSE3-LABEL: shuffle_v2i64_30_copy:
603; SSSE3:       # BB#0:
604; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
605; SSSE3-NEXT:    movdqa %xmm1, %xmm0
606; SSSE3-NEXT:    retq
607;
608; SSE41-LABEL: shuffle_v2i64_30_copy:
609; SSE41:       # BB#0:
610; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
611; SSE41-NEXT:    movdqa %xmm1, %xmm0
612; SSE41-NEXT:    retq
613;
614; AVX-LABEL: shuffle_v2i64_30_copy:
615; AVX:       # BB#0:
616; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
617; AVX-NEXT:    retq
618  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
619  ret <2 x i64> %shuffle
620}
621define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
622; SSE-LABEL: shuffle_v2i64_31:
623; SSE:       # BB#0:
624; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
625; SSE-NEXT:    movdqa %xmm1, %xmm0
626; SSE-NEXT:    retq
627;
628; AVX-LABEL: shuffle_v2i64_31:
629; AVX:       # BB#0:
630; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
631; AVX-NEXT:    retq
632  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
633  ret <2 x i64> %shuffle
634}
635define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
636; SSE-LABEL: shuffle_v2i64_31_copy:
637; SSE:       # BB#0:
638; SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
639; SSE-NEXT:    movdqa %xmm2, %xmm0
640; SSE-NEXT:    retq
641;
642; AVX-LABEL: shuffle_v2i64_31_copy:
643; AVX:       # BB#0:
644; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
645; AVX-NEXT:    retq
646  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
647  ret <2 x i64> %shuffle
648}
649
650define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
651; SSE-LABEL: shuffle_v2i64_0z:
652; SSE:       # BB#0:
653; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
654; SSE-NEXT:    retq
655;
656; AVX-LABEL: shuffle_v2i64_0z:
657; AVX:       # BB#0:
658; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
659; AVX-NEXT:    retq
660  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
661  ret <2 x i64> %shuffle
662}
663
664define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
665; SSE-LABEL: shuffle_v2i64_1z:
666; SSE:       # BB#0:
667; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
668; SSE-NEXT:    retq
669;
670; AVX-LABEL: shuffle_v2i64_1z:
671; AVX:       # BB#0:
672; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
673; AVX-NEXT:    retq
674  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
675  ret <2 x i64> %shuffle
676}
677
678define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
679; SSE-LABEL: shuffle_v2i64_z0:
680; SSE:       # BB#0:
681; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
682; SSE-NEXT:    retq
683;
684; AVX-LABEL: shuffle_v2i64_z0:
685; AVX:       # BB#0:
686; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
687; AVX-NEXT:    retq
688  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
689  ret <2 x i64> %shuffle
690}
691
692define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
693; SSE2-LABEL: shuffle_v2i64_z1:
694; SSE2:       # BB#0:
695; SSE2-NEXT:    xorpd %xmm1, %xmm1
696; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
697; SSE2-NEXT:    retq
698;
699; SSE3-LABEL: shuffle_v2i64_z1:
700; SSE3:       # BB#0:
701; SSE3-NEXT:    xorpd %xmm1, %xmm1
702; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
703; SSE3-NEXT:    retq
704;
705; SSSE3-LABEL: shuffle_v2i64_z1:
706; SSSE3:       # BB#0:
707; SSSE3-NEXT:    xorpd %xmm1, %xmm1
708; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
709; SSSE3-NEXT:    retq
710;
711; SSE41-LABEL: shuffle_v2i64_z1:
712; SSE41:       # BB#0:
713; SSE41-NEXT:    pxor %xmm1, %xmm1
714; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
715; SSE41-NEXT:    retq
716;
717; AVX1-LABEL: shuffle_v2i64_z1:
718; AVX1:       # BB#0:
719; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
720; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
721; AVX1-NEXT:    retq
722;
723; AVX2-LABEL: shuffle_v2i64_z1:
724; AVX2:       # BB#0:
725; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
726; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
727; AVX2-NEXT:    retq
728  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
729  ret <2 x i64> %shuffle
730}
731
732define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
733; SSE-LABEL: shuffle_v2f64_0z:
734; SSE:       # BB#0:
735; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
736; SSE-NEXT:    retq
737;
738; AVX-LABEL: shuffle_v2f64_0z:
739; AVX:       # BB#0:
740; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
741; AVX-NEXT:    retq
742  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
743  ret <2 x double> %shuffle
744}
745
746define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
747; SSE-LABEL: shuffle_v2f64_1z:
748; SSE:       # BB#0:
749; SSE-NEXT:    xorpd %xmm1, %xmm1
750; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
751; SSE-NEXT:    retq
752;
753; AVX-LABEL: shuffle_v2f64_1z:
754; AVX:       # BB#0:
755; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
756; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
757; AVX-NEXT:    retq
758  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
759  ret <2 x double> %shuffle
760}
761
762define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
763; SSE-LABEL: shuffle_v2f64_z0:
764; SSE:       # BB#0:
765; SSE-NEXT:    xorpd %xmm1, %xmm1
766; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
767; SSE-NEXT:    movapd %xmm1, %xmm0
768; SSE-NEXT:    retq
769;
770; AVX-LABEL: shuffle_v2f64_z0:
771; AVX:       # BB#0:
772; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
773; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
774; AVX-NEXT:    retq
775  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
776  ret <2 x double> %shuffle
777}
778
779define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
780; SSE2-LABEL: shuffle_v2f64_z1:
781; SSE2:       # BB#0:
782; SSE2-NEXT:    xorpd %xmm1, %xmm1
783; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
784; SSE2-NEXT:    retq
785;
786; SSE3-LABEL: shuffle_v2f64_z1:
787; SSE3:       # BB#0:
788; SSE3-NEXT:    xorpd %xmm1, %xmm1
789; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
790; SSE3-NEXT:    retq
791;
792; SSSE3-LABEL: shuffle_v2f64_z1:
793; SSSE3:       # BB#0:
794; SSSE3-NEXT:    xorpd %xmm1, %xmm1
795; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
796; SSSE3-NEXT:    retq
797;
798; SSE41-LABEL: shuffle_v2f64_z1:
799; SSE41:       # BB#0:
800; SSE41-NEXT:    xorpd %xmm1, %xmm1
801; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
802; SSE41-NEXT:    retq
803;
804; AVX-LABEL: shuffle_v2f64_z1:
805; AVX:       # BB#0:
806; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
807; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
808; AVX-NEXT:    retq
809  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
810  ret <2 x double> %shuffle
811}
812
813define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
814; SSE-LABEL: shuffle_v2f64_bitcast_1z:
815; SSE:       # BB#0:
816; SSE-NEXT:    xorpd %xmm1, %xmm1
817; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
818; SSE-NEXT:    retq
819;
820; AVX-LABEL: shuffle_v2f64_bitcast_1z:
821; AVX:       # BB#0:
822; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
823; AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
824; AVX-NEXT:    retq
825  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
826  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
827  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
828  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
829  ret <2 x double> %bitcast64
830}
831
832define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
833; SSE-LABEL: insert_reg_and_zero_v2i64:
834; SSE:       # BB#0:
835; SSE-NEXT:    movd %rdi, %xmm0
836; SSE-NEXT:    retq
837;
838; AVX-LABEL: insert_reg_and_zero_v2i64:
839; AVX:       # BB#0:
840; AVX-NEXT:    vmovq %rdi, %xmm0
841; AVX-NEXT:    retq
842  %v = insertelement <2 x i64> undef, i64 %a, i32 0
843  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
844  ret <2 x i64> %shuffle
845}
846
847define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
848; SSE-LABEL: insert_mem_and_zero_v2i64:
849; SSE:       # BB#0:
850; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
851; SSE-NEXT:    retq
852;
853; AVX-LABEL: insert_mem_and_zero_v2i64:
854; AVX:       # BB#0:
855; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
856; AVX-NEXT:    retq
857  %a = load i64, i64* %ptr
858  %v = insertelement <2 x i64> undef, i64 %a, i32 0
859  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
860  ret <2 x i64> %shuffle
861}
862
863define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
864; SSE-LABEL: insert_reg_and_zero_v2f64:
865; SSE:       # BB#0:
866; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
867; SSE-NEXT:    retq
868;
869; AVX-LABEL: insert_reg_and_zero_v2f64:
870; AVX:       # BB#0:
871; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
872; AVX-NEXT:    retq
873  %v = insertelement <2 x double> undef, double %a, i32 0
874  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
875  ret <2 x double> %shuffle
876}
877
878define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
879; SSE-LABEL: insert_mem_and_zero_v2f64:
880; SSE:       # BB#0:
881; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
882; SSE-NEXT:    retq
883;
884; AVX-LABEL: insert_mem_and_zero_v2f64:
885; AVX:       # BB#0:
886; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
887; AVX-NEXT:    retq
888  %a = load double, double* %ptr
889  %v = insertelement <2 x double> undef, double %a, i32 0
890  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
891  ret <2 x double> %shuffle
892}
893
894define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
895; SSE2-LABEL: insert_reg_lo_v2i64:
896; SSE2:       # BB#0:
897; SSE2-NEXT:    movd %rdi, %xmm1
898; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
899; SSE2-NEXT:    retq
900;
901; SSE3-LABEL: insert_reg_lo_v2i64:
902; SSE3:       # BB#0:
903; SSE3-NEXT:    movd %rdi, %xmm1
904; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
905; SSE3-NEXT:    retq
906;
907; SSSE3-LABEL: insert_reg_lo_v2i64:
908; SSSE3:       # BB#0:
909; SSSE3-NEXT:    movd %rdi, %xmm1
910; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
911; SSSE3-NEXT:    retq
912;
913; SSE41-LABEL: insert_reg_lo_v2i64:
914; SSE41:       # BB#0:
915; SSE41-NEXT:    movd %rdi, %xmm1
916; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
917; SSE41-NEXT:    retq
918;
919; AVX1-LABEL: insert_reg_lo_v2i64:
920; AVX1:       # BB#0:
921; AVX1-NEXT:    vmovq %rdi, %xmm1
922; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
923; AVX1-NEXT:    retq
924;
925; AVX2-LABEL: insert_reg_lo_v2i64:
926; AVX2:       # BB#0:
927; AVX2-NEXT:    vmovq %rdi, %xmm1
928; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
929; AVX2-NEXT:    retq
930  %v = insertelement <2 x i64> undef, i64 %a, i32 0
931  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
932  ret <2 x i64> %shuffle
933}
934
935define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
936; SSE2-LABEL: insert_mem_lo_v2i64:
937; SSE2:       # BB#0:
938; SSE2-NEXT:    movlpd (%rdi), %xmm0
939; SSE2-NEXT:    retq
940;
941; SSE3-LABEL: insert_mem_lo_v2i64:
942; SSE3:       # BB#0:
943; SSE3-NEXT:    movlpd (%rdi), %xmm0
944; SSE3-NEXT:    retq
945;
946; SSSE3-LABEL: insert_mem_lo_v2i64:
947; SSSE3:       # BB#0:
948; SSSE3-NEXT:    movlpd (%rdi), %xmm0
949; SSSE3-NEXT:    retq
950;
951; SSE41-LABEL: insert_mem_lo_v2i64:
952; SSE41:       # BB#0:
953; SSE41-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
954; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
955; SSE41-NEXT:    retq
956;
957; AVX1-LABEL: insert_mem_lo_v2i64:
958; AVX1:       # BB#0:
959; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
960; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
961; AVX1-NEXT:    retq
962;
963; AVX2-LABEL: insert_mem_lo_v2i64:
964; AVX2:       # BB#0:
965; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
966; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
967; AVX2-NEXT:    retq
968  %a = load i64, i64* %ptr
969  %v = insertelement <2 x i64> undef, i64 %a, i32 0
970  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
971  ret <2 x i64> %shuffle
972}
973
974define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
975; SSE-LABEL: insert_reg_hi_v2i64:
976; SSE:       # BB#0:
977; SSE-NEXT:    movd %rdi, %xmm1
978; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
979; SSE-NEXT:    retq
980;
981; AVX-LABEL: insert_reg_hi_v2i64:
982; AVX:       # BB#0:
983; AVX-NEXT:    vmovq %rdi, %xmm1
984; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
985; AVX-NEXT:    retq
986  %v = insertelement <2 x i64> undef, i64 %a, i32 0
987  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
988  ret <2 x i64> %shuffle
989}
990
991define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
992; SSE-LABEL: insert_mem_hi_v2i64:
993; SSE:       # BB#0:
994; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
995; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
996; SSE-NEXT:    retq
997;
998; AVX-LABEL: insert_mem_hi_v2i64:
999; AVX:       # BB#0:
1000; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
1001; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1002; AVX-NEXT:    retq
1003  %a = load i64, i64* %ptr
1004  %v = insertelement <2 x i64> undef, i64 %a, i32 0
1005  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1006  ret <2 x i64> %shuffle
1007}
1008
1009define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1010; SSE-LABEL: insert_reg_lo_v2f64:
1011; SSE:       # BB#0:
1012; SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1013; SSE-NEXT:    movapd %xmm1, %xmm0
1014; SSE-NEXT:    retq
1015;
1016; AVX-LABEL: insert_reg_lo_v2f64:
1017; AVX:       # BB#0:
1018; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1019; AVX-NEXT:    retq
1020  %v = insertelement <2 x double> undef, double %a, i32 0
1021  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1022  ret <2 x double> %shuffle
1023}
1024
1025define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1026; SSE-LABEL: insert_mem_lo_v2f64:
1027; SSE:       # BB#0:
1028; SSE-NEXT:    movlpd (%rdi), %xmm0
1029; SSE-NEXT:    retq
1030;
1031; AVX-LABEL: insert_mem_lo_v2f64:
1032; AVX:       # BB#0:
1033; AVX-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0
1034; AVX-NEXT:    retq
1035  %a = load double, double* %ptr
1036  %v = insertelement <2 x double> undef, double %a, i32 0
1037  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1038  ret <2 x double> %shuffle
1039}
1040
1041define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1042; SSE-LABEL: insert_reg_hi_v2f64:
1043; SSE:       # BB#0:
1044; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1045; SSE-NEXT:    movapd %xmm1, %xmm0
1046; SSE-NEXT:    retq
1047;
1048; AVX-LABEL: insert_reg_hi_v2f64:
1049; AVX:       # BB#0:
1050; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1051; AVX-NEXT:    retq
1052  %v = insertelement <2 x double> undef, double %a, i32 0
1053  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1054  ret <2 x double> %shuffle
1055}
1056
1057define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1058; SSE-LABEL: insert_mem_hi_v2f64:
1059; SSE:       # BB#0:
1060; SSE-NEXT:    movhpd (%rdi), %xmm0
1061; SSE-NEXT:    retq
1062;
1063; AVX-LABEL: insert_mem_hi_v2f64:
1064; AVX:       # BB#0:
1065; AVX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0
1066; AVX-NEXT:    retq
1067  %a = load double, double* %ptr
1068  %v = insertelement <2 x double> undef, double %a, i32 0
1069  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1070  ret <2 x double> %shuffle
1071}
1072
1073define <2 x double> @insert_dup_reg_v2f64(double %a) {
1074; FIXME: We should match movddup for SSE3 and higher here.
1075;
1076; SSE2-LABEL: insert_dup_reg_v2f64:
1077; SSE2:       # BB#0:
1078; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1079; SSE2-NEXT:    retq
1080;
1081; SSE3-LABEL: insert_dup_reg_v2f64:
1082; SSE3:       # BB#0:
1083; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1084; SSE3-NEXT:    retq
1085;
1086; SSSE3-LABEL: insert_dup_reg_v2f64:
1087; SSSE3:       # BB#0:
1088; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1089; SSSE3-NEXT:    retq
1090;
1091; SSE41-LABEL: insert_dup_reg_v2f64:
1092; SSE41:       # BB#0:
1093; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
1094; SSE41-NEXT:    retq
1095;
1096; AVX-LABEL: insert_dup_reg_v2f64:
1097; AVX:       # BB#0:
1098; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1099; AVX-NEXT:    retq
1100  %v = insertelement <2 x double> undef, double %a, i32 0
1101  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1102  ret <2 x double> %shuffle
1103}
1104define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1105; SSE2-LABEL: insert_dup_mem_v2f64:
1106; SSE2:       # BB#0:
1107; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1108; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
1109; SSE2-NEXT:    retq
1110;
1111; SSE3-LABEL: insert_dup_mem_v2f64:
1112; SSE3:       # BB#0:
1113; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1114; SSE3-NEXT:    retq
1115;
1116; SSSE3-LABEL: insert_dup_mem_v2f64:
1117; SSSE3:       # BB#0:
1118; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1119; SSSE3-NEXT:    retq
1120;
1121; SSE41-LABEL: insert_dup_mem_v2f64:
1122; SSE41:       # BB#0:
1123; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
1124; SSE41-NEXT:    retq
1125;
1126; AVX-LABEL: insert_dup_mem_v2f64:
1127; AVX:       # BB#0:
1128; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
1129; AVX-NEXT:    retq
1130  %a = load double, double* %ptr
1131  %v = insertelement <2 x double> undef, double %a, i32 0
1132  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1133  ret <2 x double> %shuffle
1134}
1135
1136define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1137; SSE-LABEL: shuffle_mem_v2f64_10:
1138; SSE:       # BB#0:
1139; SSE-NEXT:    movapd (%rdi), %xmm0
1140; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
1141; SSE-NEXT:    retq
1142;
1143; AVX-LABEL: shuffle_mem_v2f64_10:
1144; AVX:       # BB#0:
1145; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
1146; AVX-NEXT:    retq
1147  %a = load <2 x double>, <2 x double>* %ptr
1148  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1149  ret <2 x double> %shuffle
1150}
1151