1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX2
4
5;
6; Unary shuffle indices from registers
7;
8
9define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
10; ALL-LABEL: var_shuffle_v4f64_v4f64_xxxx_i64:
11; ALL:       # %bb.0:
12; ALL-NEXT:    pushq %rbp
13; ALL-NEXT:    movq %rsp, %rbp
14; ALL-NEXT:    andq $-32, %rsp
15; ALL-NEXT:    subq $64, %rsp
16; ALL-NEXT:    andl $3, %esi
17; ALL-NEXT:    andl $3, %edi
18; ALL-NEXT:    andl $3, %ecx
19; ALL-NEXT:    andl $3, %edx
20; ALL-NEXT:    vmovaps %ymm0, (%rsp)
21; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
22; ALL-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
23; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
24; ALL-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
25; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
26; ALL-NEXT:    movq %rbp, %rsp
27; ALL-NEXT:    popq %rbp
28; ALL-NEXT:    retq
29  %x0 = extractelement <4 x double> %x, i64 %i0
30  %x1 = extractelement <4 x double> %x, i64 %i1
31  %x2 = extractelement <4 x double> %x, i64 %i2
32  %x3 = extractelement <4 x double> %x, i64 %i3
33  %r0 = insertelement <4 x double> undef, double %x0, i32 0
34  %r1 = insertelement <4 x double>   %r0, double %x1, i32 1
35  %r2 = insertelement <4 x double>   %r1, double %x2, i32 2
36  %r3 = insertelement <4 x double>   %r2, double %x3, i32 3
37  ret <4 x double> %r3
38}
39
40define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
41; ALL-LABEL: var_shuffle_v4f64_v4f64_uxx0_i64:
42; ALL:       # %bb.0:
43; ALL-NEXT:    pushq %rbp
44; ALL-NEXT:    movq %rsp, %rbp
45; ALL-NEXT:    andq $-32, %rsp
46; ALL-NEXT:    subq $64, %rsp
47; ALL-NEXT:    andl $3, %edx
48; ALL-NEXT:    andl $3, %esi
49; ALL-NEXT:    vmovaps %ymm0, (%rsp)
50; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
51; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
52; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
53; ALL-NEXT:    movq %rbp, %rsp
54; ALL-NEXT:    popq %rbp
55; ALL-NEXT:    retq
56  %x0 = extractelement <4 x double> %x, i64 %i0
57  %x1 = extractelement <4 x double> %x, i64 %i1
58  %x2 = extractelement <4 x double> %x, i64 %i2
59  %x3 = extractelement <4 x double> %x, i64 %i3
60  %r0 = insertelement <4 x double> undef, double undef, i32 0
61  %r1 = insertelement <4 x double>   %r0, double   %x1, i32 1
62  %r2 = insertelement <4 x double>   %r1, double   %x2, i32 2
63  %r3 = insertelement <4 x double>   %r2, double   0.0, i32 3
64  ret <4 x double> %r3
65}
66
67define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
68; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64:
69; ALL:       # %bb.0:
70; ALL-NEXT:    andl $1, %esi
71; ALL-NEXT:    andl $1, %edi
72; ALL-NEXT:    andl $1, %ecx
73; ALL-NEXT:    andl $1, %edx
74; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
75; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
76; ALL-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
77; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
78; ALL-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
79; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
80; ALL-NEXT:    retq
81  %x0 = extractelement <2 x double> %x, i64 %i0
82  %x1 = extractelement <2 x double> %x, i64 %i1
83  %x2 = extractelement <2 x double> %x, i64 %i2
84  %x3 = extractelement <2 x double> %x, i64 %i3
85  %r0 = insertelement <4 x double> undef, double %x0, i32 0
86  %r1 = insertelement <4 x double>   %r0, double %x1, i32 1
87  %r2 = insertelement <4 x double>   %r1, double %x2, i32 2
88  %r3 = insertelement <4 x double>   %r2, double %x3, i32 3
89  ret <4 x double> %r3
90}
91
92define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
93; ALL-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
94; ALL:       # %bb.0:
95; ALL-NEXT:    pushq %rbp
96; ALL-NEXT:    movq %rsp, %rbp
97; ALL-NEXT:    andq $-32, %rsp
98; ALL-NEXT:    subq $64, %rsp
99; ALL-NEXT:    andl $3, %edi
100; ALL-NEXT:    andl $3, %esi
101; ALL-NEXT:    andl $3, %edx
102; ALL-NEXT:    andl $3, %ecx
103; ALL-NEXT:    vmovaps %ymm0, (%rsp)
104; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
105; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
106; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
107; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
108; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
109; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
110; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
111; ALL-NEXT:    movq %rbp, %rsp
112; ALL-NEXT:    popq %rbp
113; ALL-NEXT:    retq
114  %x0 = extractelement <4 x i64> %x, i64 %i0
115  %x1 = extractelement <4 x i64> %x, i64 %i1
116  %x2 = extractelement <4 x i64> %x, i64 %i2
117  %x3 = extractelement <4 x i64> %x, i64 %i3
118  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
119  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
120  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
121  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
122  ret <4 x i64> %r3
123}
124
125define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
126; ALL-LABEL: var_shuffle_v4i64_v4i64_xx00_i64:
127; ALL:       # %bb.0:
128; ALL-NEXT:    pushq %rbp
129; ALL-NEXT:    movq %rsp, %rbp
130; ALL-NEXT:    andq $-32, %rsp
131; ALL-NEXT:    subq $64, %rsp
132; ALL-NEXT:    andl $3, %edi
133; ALL-NEXT:    andl $3, %esi
134; ALL-NEXT:    vmovaps %ymm0, (%rsp)
135; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
136; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
137; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
138; ALL-NEXT:    movq %rbp, %rsp
139; ALL-NEXT:    popq %rbp
140; ALL-NEXT:    retq
141  %x0 = extractelement <4 x i64> %x, i64 %i0
142  %x1 = extractelement <4 x i64> %x, i64 %i1
143  %x2 = extractelement <4 x i64> %x, i64 %i2
144  %x3 = extractelement <4 x i64> %x, i64 %i3
145  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
146  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
147  %r2 = insertelement <4 x i64>   %r1, i64   0, i32 2
148  %r3 = insertelement <4 x i64>   %r2, i64   0, i32 3
149  ret <4 x i64> %r3
150}
151
152define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
153; ALL-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
154; ALL:       # %bb.0:
155; ALL-NEXT:    andl $1, %edi
156; ALL-NEXT:    andl $1, %esi
157; ALL-NEXT:    andl $1, %edx
158; ALL-NEXT:    andl $1, %ecx
159; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
160; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
161; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
162; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
163; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
164; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
165; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
166; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
167; ALL-NEXT:    retq
168  %x0 = extractelement <2 x i64> %x, i64 %i0
169  %x1 = extractelement <2 x i64> %x, i64 %i1
170  %x2 = extractelement <2 x i64> %x, i64 %i2
171  %x3 = extractelement <2 x i64> %x, i64 %i3
172  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
173  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
174  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
175  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
176  ret <4 x i64> %r3
177}
178
179define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind {
180; ALL-LABEL: var_shuffle_v8f32_v8f32_xxxxxxxx_i32:
181; ALL:       # %bb.0:
182; ALL-NEXT:    pushq %rbp
183; ALL-NEXT:    movq %rsp, %rbp
184; ALL-NEXT:    andq $-32, %rsp
185; ALL-NEXT:    subq $64, %rsp
186; ALL-NEXT:    # kill: def $r9d killed $r9d def $r9
187; ALL-NEXT:    # kill: def $r8d killed $r8d def $r8
188; ALL-NEXT:    # kill: def $ecx killed $ecx def $rcx
189; ALL-NEXT:    # kill: def $edx killed $edx def $rdx
190; ALL-NEXT:    # kill: def $esi killed $esi def $rsi
191; ALL-NEXT:    # kill: def $edi killed $edi def $rdi
192; ALL-NEXT:    movl 24(%rbp), %r10d
193; ALL-NEXT:    andl $7, %r10d
194; ALL-NEXT:    movl 16(%rbp), %eax
195; ALL-NEXT:    andl $7, %eax
196; ALL-NEXT:    andl $7, %edi
197; ALL-NEXT:    andl $7, %esi
198; ALL-NEXT:    andl $7, %edx
199; ALL-NEXT:    andl $7, %ecx
200; ALL-NEXT:    andl $7, %r8d
201; ALL-NEXT:    vmovaps %ymm0, (%rsp)
202; ALL-NEXT:    andl $7, %r9d
203; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
204; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
205; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
206; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
207; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
208; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
209; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
210; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
211; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
212; ALL-NEXT:    movq %rbp, %rsp
213; ALL-NEXT:    popq %rbp
214; ALL-NEXT:    retq
215  %x0 = extractelement <8 x float> %x, i32 %i0
216  %x1 = extractelement <8 x float> %x, i32 %i1
217  %x2 = extractelement <8 x float> %x, i32 %i2
218  %x3 = extractelement <8 x float> %x, i32 %i3
219  %x4 = extractelement <8 x float> %x, i32 %i4
220  %x5 = extractelement <8 x float> %x, i32 %i5
221  %x6 = extractelement <8 x float> %x, i32 %i6
222  %x7 = extractelement <8 x float> %x, i32 %i7
223  %r0 = insertelement <8 x float> undef, float %x0, i32 0
224  %r1 = insertelement <8 x float>   %r0, float %x1, i32 1
225  %r2 = insertelement <8 x float>   %r1, float %x2, i32 2
226  %r3 = insertelement <8 x float>   %r2, float %x3, i32 3
227  %r4 = insertelement <8 x float>   %r3, float %x4, i32 4
228  %r5 = insertelement <8 x float>   %r4, float %x5, i32 5
229  %r6 = insertelement <8 x float>   %r5, float %x6, i32 6
230  %r7 = insertelement <8 x float>   %r6, float %x7, i32 7
231  ret <8 x float> %r7
232}
233
234define <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind {
235; ALL-LABEL: var_shuffle_v8f32_v4f32_xxxxxxxx_i32:
236; ALL:       # %bb.0:
237; ALL-NEXT:    # kill: def $r9d killed $r9d def $r9
238; ALL-NEXT:    # kill: def $r8d killed $r8d def $r8
239; ALL-NEXT:    # kill: def $ecx killed $ecx def $rcx
240; ALL-NEXT:    # kill: def $edx killed $edx def $rdx
241; ALL-NEXT:    # kill: def $esi killed $esi def $rsi
242; ALL-NEXT:    # kill: def $edi killed $edi def $rdi
243; ALL-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
244; ALL-NEXT:    andl $3, %r10d
245; ALL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
246; ALL-NEXT:    andl $3, %eax
247; ALL-NEXT:    andl $3, %edi
248; ALL-NEXT:    andl $3, %esi
249; ALL-NEXT:    andl $3, %edx
250; ALL-NEXT:    andl $3, %ecx
251; ALL-NEXT:    andl $3, %r8d
252; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
253; ALL-NEXT:    andl $3, %r9d
254; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
255; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
256; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
257; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
258; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
259; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
260; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
261; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
262; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
263; ALL-NEXT:    retq
264  %x0 = extractelement <4 x float> %x, i32 %i0
265  %x1 = extractelement <4 x float> %x, i32 %i1
266  %x2 = extractelement <4 x float> %x, i32 %i2
267  %x3 = extractelement <4 x float> %x, i32 %i3
268  %x4 = extractelement <4 x float> %x, i32 %i4
269  %x5 = extractelement <4 x float> %x, i32 %i5
270  %x6 = extractelement <4 x float> %x, i32 %i6
271  %x7 = extractelement <4 x float> %x, i32 %i7
272  %r0 = insertelement <8 x float> undef, float %x0, i32 0
273  %r1 = insertelement <8 x float>   %r0, float %x1, i32 1
274  %r2 = insertelement <8 x float>   %r1, float %x2, i32 2
275  %r3 = insertelement <8 x float>   %r2, float %x3, i32 3
276  %r4 = insertelement <8 x float>   %r3, float %x4, i32 4
277  %r5 = insertelement <8 x float>   %r4, float %x5, i32 5
278  %r6 = insertelement <8 x float>   %r5, float %x6, i32 6
279  %r7 = insertelement <8 x float>   %r6, float %x7, i32 7
280  ret <8 x float> %r7
281}
282
283define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind {
284; AVX1-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16:
285; AVX1:       # %bb.0:
286; AVX1-NEXT:    pushq %rbp
287; AVX1-NEXT:    movq %rsp, %rbp
288; AVX1-NEXT:    andq $-32, %rsp
289; AVX1-NEXT:    subq $64, %rsp
290; AVX1-NEXT:    # kill: def $r9d killed $r9d def $r9
291; AVX1-NEXT:    # kill: def $r8d killed $r8d def $r8
292; AVX1-NEXT:    # kill: def $ecx killed $ecx def $rcx
293; AVX1-NEXT:    # kill: def $edx killed $edx def $rdx
294; AVX1-NEXT:    # kill: def $esi killed $esi def $rsi
295; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
296; AVX1-NEXT:    andl $15, %edi
297; AVX1-NEXT:    vmovaps %ymm0, (%rsp)
298; AVX1-NEXT:    movzwl (%rsp,%rdi,2), %eax
299; AVX1-NEXT:    vmovd %eax, %xmm0
300; AVX1-NEXT:    andl $15, %esi
301; AVX1-NEXT:    vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0
302; AVX1-NEXT:    andl $15, %edx
303; AVX1-NEXT:    vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0
304; AVX1-NEXT:    andl $15, %ecx
305; AVX1-NEXT:    vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0
306; AVX1-NEXT:    andl $15, %r8d
307; AVX1-NEXT:    vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0
308; AVX1-NEXT:    andl $15, %r9d
309; AVX1-NEXT:    vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0
310; AVX1-NEXT:    movl 16(%rbp), %eax
311; AVX1-NEXT:    andl $15, %eax
312; AVX1-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
313; AVX1-NEXT:    movl 24(%rbp), %eax
314; AVX1-NEXT:    andl $15, %eax
315; AVX1-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
316; AVX1-NEXT:    movl 32(%rbp), %eax
317; AVX1-NEXT:    andl $15, %eax
318; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
319; AVX1-NEXT:    vmovd %eax, %xmm1
320; AVX1-NEXT:    movl 40(%rbp), %eax
321; AVX1-NEXT:    andl $15, %eax
322; AVX1-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
323; AVX1-NEXT:    movl 48(%rbp), %eax
324; AVX1-NEXT:    andl $15, %eax
325; AVX1-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
326; AVX1-NEXT:    movl 56(%rbp), %eax
327; AVX1-NEXT:    andl $15, %eax
328; AVX1-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
329; AVX1-NEXT:    movl 64(%rbp), %eax
330; AVX1-NEXT:    andl $15, %eax
331; AVX1-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
332; AVX1-NEXT:    movl 72(%rbp), %eax
333; AVX1-NEXT:    andl $15, %eax
334; AVX1-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
335; AVX1-NEXT:    movl 80(%rbp), %eax
336; AVX1-NEXT:    andl $15, %eax
337; AVX1-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
338; AVX1-NEXT:    movl 88(%rbp), %eax
339; AVX1-NEXT:    andl $15, %eax
340; AVX1-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
341; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
342; AVX1-NEXT:    movq %rbp, %rsp
343; AVX1-NEXT:    popq %rbp
344; AVX1-NEXT:    retq
345;
346; AVX2-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16:
347; AVX2:       # %bb.0:
348; AVX2-NEXT:    pushq %rbp
349; AVX2-NEXT:    movq %rsp, %rbp
350; AVX2-NEXT:    andq $-32, %rsp
351; AVX2-NEXT:    subq $64, %rsp
352; AVX2-NEXT:    # kill: def $r9d killed $r9d def $r9
353; AVX2-NEXT:    # kill: def $r8d killed $r8d def $r8
354; AVX2-NEXT:    # kill: def $ecx killed $ecx def $rcx
355; AVX2-NEXT:    # kill: def $edx killed $edx def $rdx
356; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
357; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
358; AVX2-NEXT:    andl $15, %edi
359; AVX2-NEXT:    vmovaps %ymm0, (%rsp)
360; AVX2-NEXT:    movzwl (%rsp,%rdi,2), %eax
361; AVX2-NEXT:    vmovd %eax, %xmm0
362; AVX2-NEXT:    andl $15, %esi
363; AVX2-NEXT:    vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0
364; AVX2-NEXT:    andl $15, %edx
365; AVX2-NEXT:    vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0
366; AVX2-NEXT:    andl $15, %ecx
367; AVX2-NEXT:    vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0
368; AVX2-NEXT:    andl $15, %r8d
369; AVX2-NEXT:    vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0
370; AVX2-NEXT:    andl $15, %r9d
371; AVX2-NEXT:    vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0
372; AVX2-NEXT:    movl 16(%rbp), %eax
373; AVX2-NEXT:    andl $15, %eax
374; AVX2-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
375; AVX2-NEXT:    movl 24(%rbp), %eax
376; AVX2-NEXT:    andl $15, %eax
377; AVX2-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
378; AVX2-NEXT:    movl 32(%rbp), %eax
379; AVX2-NEXT:    andl $15, %eax
380; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
381; AVX2-NEXT:    vmovd %eax, %xmm1
382; AVX2-NEXT:    movl 40(%rbp), %eax
383; AVX2-NEXT:    andl $15, %eax
384; AVX2-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
385; AVX2-NEXT:    movl 48(%rbp), %eax
386; AVX2-NEXT:    andl $15, %eax
387; AVX2-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
388; AVX2-NEXT:    movl 56(%rbp), %eax
389; AVX2-NEXT:    andl $15, %eax
390; AVX2-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
391; AVX2-NEXT:    movl 64(%rbp), %eax
392; AVX2-NEXT:    andl $15, %eax
393; AVX2-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
394; AVX2-NEXT:    movl 72(%rbp), %eax
395; AVX2-NEXT:    andl $15, %eax
396; AVX2-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
397; AVX2-NEXT:    movl 80(%rbp), %eax
398; AVX2-NEXT:    andl $15, %eax
399; AVX2-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
400; AVX2-NEXT:    movl 88(%rbp), %eax
401; AVX2-NEXT:    andl $15, %eax
402; AVX2-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
403; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
404; AVX2-NEXT:    movq %rbp, %rsp
405; AVX2-NEXT:    popq %rbp
406; AVX2-NEXT:    retq
407  %x0  = extractelement <16 x i16> %x, i32 %i0
408  %x1  = extractelement <16 x i16> %x, i32 %i1
409  %x2  = extractelement <16 x i16> %x, i32 %i2
410  %x3  = extractelement <16 x i16> %x, i32 %i3
411  %x4  = extractelement <16 x i16> %x, i32 %i4
412  %x5  = extractelement <16 x i16> %x, i32 %i5
413  %x6  = extractelement <16 x i16> %x, i32 %i6
414  %x7  = extractelement <16 x i16> %x, i32 %i7
415  %x8  = extractelement <16 x i16> %x, i32 %i8
416  %x9  = extractelement <16 x i16> %x, i32 %i9
417  %x10 = extractelement <16 x i16> %x, i32 %i10
418  %x11 = extractelement <16 x i16> %x, i32 %i11
419  %x12 = extractelement <16 x i16> %x, i32 %i12
420  %x13 = extractelement <16 x i16> %x, i32 %i13
421  %x14 = extractelement <16 x i16> %x, i32 %i14
422  %x15 = extractelement <16 x i16> %x, i32 %i15
423  %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0
424  %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1
425  %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2
426  %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3
427  %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4
428  %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5
429  %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6
430  %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7
431  %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8
432  %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9
433  %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10
434  %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11
435  %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12
436  %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13
437  %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14
438  %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15
439  ret <16 x i16> %r15
440}
441
442define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind {
443; AVX1-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
444; AVX1:       # %bb.0:
445; AVX1-NEXT:    # kill: def $r9d killed $r9d def $r9
446; AVX1-NEXT:    # kill: def $r8d killed $r8d def $r8
447; AVX1-NEXT:    # kill: def $ecx killed $ecx def $rcx
448; AVX1-NEXT:    # kill: def $edx killed $edx def $rdx
449; AVX1-NEXT:    # kill: def $esi killed $esi def $rsi
450; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
451; AVX1-NEXT:    andl $7, %edi
452; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
453; AVX1-NEXT:    movzwl -24(%rsp,%rdi,2), %eax
454; AVX1-NEXT:    vmovd %eax, %xmm0
455; AVX1-NEXT:    andl $7, %esi
456; AVX1-NEXT:    vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
457; AVX1-NEXT:    andl $7, %edx
458; AVX1-NEXT:    vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0
459; AVX1-NEXT:    andl $7, %ecx
460; AVX1-NEXT:    vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
461; AVX1-NEXT:    andl $7, %r8d
462; AVX1-NEXT:    vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0
463; AVX1-NEXT:    andl $7, %r9d
464; AVX1-NEXT:    vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
465; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
466; AVX1-NEXT:    andl $7, %eax
467; AVX1-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
468; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
469; AVX1-NEXT:    andl $7, %eax
470; AVX1-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
471; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
472; AVX1-NEXT:    andl $7, %eax
473; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
474; AVX1-NEXT:    vmovd %eax, %xmm1
475; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
476; AVX1-NEXT:    andl $7, %eax
477; AVX1-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
478; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
479; AVX1-NEXT:    andl $7, %eax
480; AVX1-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
481; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
482; AVX1-NEXT:    andl $7, %eax
483; AVX1-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
484; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
485; AVX1-NEXT:    andl $7, %eax
486; AVX1-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
487; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
488; AVX1-NEXT:    andl $7, %eax
489; AVX1-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
490; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
491; AVX1-NEXT:    andl $7, %eax
492; AVX1-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
493; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
494; AVX1-NEXT:    andl $7, %eax
495; AVX1-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
496; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
497; AVX1-NEXT:    retq
498;
499; AVX2-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
500; AVX2:       # %bb.0:
501; AVX2-NEXT:    # kill: def $r9d killed $r9d def $r9
502; AVX2-NEXT:    # kill: def $r8d killed $r8d def $r8
503; AVX2-NEXT:    # kill: def $ecx killed $ecx def $rcx
504; AVX2-NEXT:    # kill: def $edx killed $edx def $rdx
505; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
506; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
507; AVX2-NEXT:    andl $7, %edi
508; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
509; AVX2-NEXT:    movzwl -24(%rsp,%rdi,2), %eax
510; AVX2-NEXT:    vmovd %eax, %xmm0
511; AVX2-NEXT:    andl $7, %esi
512; AVX2-NEXT:    vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
513; AVX2-NEXT:    andl $7, %edx
514; AVX2-NEXT:    vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0
515; AVX2-NEXT:    andl $7, %ecx
516; AVX2-NEXT:    vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
517; AVX2-NEXT:    andl $7, %r8d
518; AVX2-NEXT:    vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0
519; AVX2-NEXT:    andl $7, %r9d
520; AVX2-NEXT:    vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
521; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
522; AVX2-NEXT:    andl $7, %eax
523; AVX2-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
524; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
525; AVX2-NEXT:    andl $7, %eax
526; AVX2-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
527; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
528; AVX2-NEXT:    andl $7, %eax
529; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
530; AVX2-NEXT:    vmovd %eax, %xmm1
531; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
532; AVX2-NEXT:    andl $7, %eax
533; AVX2-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
534; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
535; AVX2-NEXT:    andl $7, %eax
536; AVX2-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
537; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
538; AVX2-NEXT:    andl $7, %eax
539; AVX2-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
540; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
541; AVX2-NEXT:    andl $7, %eax
542; AVX2-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
543; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
544; AVX2-NEXT:    andl $7, %eax
545; AVX2-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
546; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
547; AVX2-NEXT:    andl $7, %eax
548; AVX2-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
549; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
550; AVX2-NEXT:    andl $7, %eax
551; AVX2-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
552; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
553; AVX2-NEXT:    retq
554  %x0  = extractelement <8 x i16> %x, i32 %i0
555  %x1  = extractelement <8 x i16> %x, i32 %i1
556  %x2  = extractelement <8 x i16> %x, i32 %i2
557  %x3  = extractelement <8 x i16> %x, i32 %i3
558  %x4  = extractelement <8 x i16> %x, i32 %i4
559  %x5  = extractelement <8 x i16> %x, i32 %i5
560  %x6  = extractelement <8 x i16> %x, i32 %i6
561  %x7  = extractelement <8 x i16> %x, i32 %i7
562  %x8  = extractelement <8 x i16> %x, i32 %i8
563  %x9  = extractelement <8 x i16> %x, i32 %i9
564  %x10 = extractelement <8 x i16> %x, i32 %i10
565  %x11 = extractelement <8 x i16> %x, i32 %i11
566  %x12 = extractelement <8 x i16> %x, i32 %i12
567  %x13 = extractelement <8 x i16> %x, i32 %i13
568  %x14 = extractelement <8 x i16> %x, i32 %i14
569  %x15 = extractelement <8 x i16> %x, i32 %i15
570  %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0
571  %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1
572  %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2
573  %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3
574  %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4
575  %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5
576  %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6
577  %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7
578  %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8
579  %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9
580  %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10
581  %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11
582  %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12
583  %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13
584  %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14
585  %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15
586  ret <16 x i16> %r15
587}
588
589;
590; Unary shuffle indices from memory
591;
592
593define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind {
594; ALL-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
595; ALL:       # %bb.0:
596; ALL-NEXT:    pushq %rbp
597; ALL-NEXT:    movq %rsp, %rbp
598; ALL-NEXT:    andq $-32, %rsp
599; ALL-NEXT:    subq $64, %rsp
600; ALL-NEXT:    movq (%rdi), %rax
601; ALL-NEXT:    movq 8(%rdi), %rcx
602; ALL-NEXT:    andl $3, %eax
603; ALL-NEXT:    andl $3, %ecx
604; ALL-NEXT:    movq 16(%rdi), %rdx
605; ALL-NEXT:    andl $3, %edx
606; ALL-NEXT:    movq 24(%rdi), %rsi
607; ALL-NEXT:    andl $3, %esi
608; ALL-NEXT:    vmovaps %ymm0, (%rsp)
609; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
610; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
611; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
612; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
613; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
614; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
615; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
616; ALL-NEXT:    movq %rbp, %rsp
617; ALL-NEXT:    popq %rbp
618; ALL-NEXT:    retq
619  %p0  = getelementptr inbounds i64, i64* %i, i32 0
620  %p1  = getelementptr inbounds i64, i64* %i, i32 1
621  %p2  = getelementptr inbounds i64, i64* %i, i32 2
622  %p3  = getelementptr inbounds i64, i64* %i, i32 3
623  %i0  = load i64, i64* %p0, align 4
624  %i1  = load i64, i64* %p1, align 4
625  %i2  = load i64, i64* %p2, align 4
626  %i3  = load i64, i64* %p3, align 4
627  %x0 = extractelement <4 x i64> %x, i64 %i0
628  %x1 = extractelement <4 x i64> %x, i64 %i1
629  %x2 = extractelement <4 x i64> %x, i64 %i2
630  %x3 = extractelement <4 x i64> %x, i64 %i3
631  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
632  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
633  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
634  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
635  ret <4 x i64> %r3
636}
637
638define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind {
639; ALL-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
640; ALL:       # %bb.0:
641; ALL-NEXT:    movq (%rdi), %rax
642; ALL-NEXT:    movq 8(%rdi), %rcx
643; ALL-NEXT:    andl $1, %eax
644; ALL-NEXT:    andl $1, %ecx
645; ALL-NEXT:    movq 16(%rdi), %rdx
646; ALL-NEXT:    andl $1, %edx
647; ALL-NEXT:    movq 24(%rdi), %rsi
648; ALL-NEXT:    andl $1, %esi
649; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
650; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
651; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
652; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
653; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
654; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
655; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
656; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
657; ALL-NEXT:    retq
658  %p0  = getelementptr inbounds i64, i64* %i, i32 0
659  %p1  = getelementptr inbounds i64, i64* %i, i32 1
660  %p2  = getelementptr inbounds i64, i64* %i, i32 2
661  %p3  = getelementptr inbounds i64, i64* %i, i32 3
662  %i0  = load i64, i64* %p0, align 4
663  %i1  = load i64, i64* %p1, align 4
664  %i2  = load i64, i64* %p2, align 4
665  %i3  = load i64, i64* %p3, align 4
666  %x0 = extractelement <2 x i64> %x, i64 %i0
667  %x1 = extractelement <2 x i64> %x, i64 %i1
668  %x2 = extractelement <2 x i64> %x, i64 %i2
669  %x3 = extractelement <2 x i64> %x, i64 %i3
670  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
671  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
672  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
673  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
674  ret <4 x i64> %r3
675}
676