1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL %s
3; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
4
5define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
6; KNL-LABEL: test1:
7; KNL:       ## BB#0:
8; KNL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
9; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
10; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
11; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
12; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
13; KNL-NEXT:    retq
14;
15; SKX-LABEL: test1:
16; SKX:       ## BB#0:
17; SKX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
18; SKX-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
19; SKX-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
20; SKX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
21; SKX-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
22; SKX-NEXT:    retq
23  %rrr = load float, float* %br
24  %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
25  %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
26  ret <16 x float> %rrr3
27}
28
29define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
30; KNL-LABEL: test2:
31; KNL:       ## BB#0:
32; KNL-NEXT:    vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
33; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
34; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
35; KNL-NEXT:    vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
36; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
37; KNL-NEXT:    retq
38;
39; SKX-LABEL: test2:
40; SKX:       ## BB#0:
41; SKX-NEXT:    vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
42; SKX-NEXT:    vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
43; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm2
44; SKX-NEXT:    vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
45; SKX-NEXT:    vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
46; SKX-NEXT:    retq
47  %rrr = load double, double* %br
48  %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
49  %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
50  ret <8 x double> %rrr3
51}
52
53define <16 x float> @test3(<16 x float> %x) nounwind {
54; KNL-LABEL: test3:
55; KNL:       ## BB#0:
56; KNL-NEXT:    vextractf32x4 $1, %zmm0, %xmm1
57; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
58; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
59; KNL-NEXT:    retq
60;
61; SKX-LABEL: test3:
62; SKX:       ## BB#0:
63; SKX-NEXT:    vextractf32x4 $1, %zmm0, %xmm1
64; SKX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
65; SKX-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
66; SKX-NEXT:    retq
67  %eee = extractelement <16 x float> %x, i32 4
68  %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
69  ret <16 x float> %rrr2
70}
71
72define <8 x i64> @test4(<8 x i64> %x) nounwind {
73; KNL-LABEL: test4:
74; KNL:       ## BB#0:
75; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
76; KNL-NEXT:    vmovq %xmm1, %rax
77; KNL-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
78; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
79; KNL-NEXT:    retq
80;
81; SKX-LABEL: test4:
82; SKX:       ## BB#0:
83; SKX-NEXT:    vextracti64x2 $2, %zmm0, %xmm1
84; SKX-NEXT:    vmovq %xmm1, %rax
85; SKX-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
86; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
87; SKX-NEXT:    retq
88  %eee = extractelement <8 x i64> %x, i32 4
89  %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
90  ret <8 x i64> %rrr2
91}
92
93define i32 @test5(<4 x float> %x) nounwind {
94; KNL-LABEL: test5:
95; KNL:       ## BB#0:
96; KNL-NEXT:    vextractps $3, %xmm0, %eax
97; KNL-NEXT:    retq
98;
99; SKX-LABEL: test5:
100; SKX:       ## BB#0:
101; SKX-NEXT:    vextractps $3, %xmm0, %eax
102; SKX-NEXT:    retq
103  %ef = extractelement <4 x float> %x, i32 3
104  %ei = bitcast float %ef to i32
105  ret i32 %ei
106}
107
108define void @test6(<4 x float> %x, float* %out) nounwind {
109; KNL-LABEL: test6:
110; KNL:       ## BB#0:
111; KNL-NEXT:    vextractps $3, %xmm0, (%rdi)
112; KNL-NEXT:    retq
113;
114; SKX-LABEL: test6:
115; SKX:       ## BB#0:
116; SKX-NEXT:    vextractps $3, %xmm0, (%rdi)
117; SKX-NEXT:    retq
118  %ef = extractelement <4 x float> %x, i32 3
119  store float %ef, float* %out, align 4
120  ret void
121}
122
123define float @test7(<16 x float> %x, i32 %ind) nounwind {
124; KNL-LABEL: test7:
125; KNL:       ## BB#0:
126; KNL-NEXT:    vmovd %edi, %xmm1
127; KNL-NEXT:    vpermps %zmm0, %zmm1, %zmm0
128; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
129; KNL-NEXT:    retq
130;
131; SKX-LABEL: test7:
132; SKX:       ## BB#0:
133; SKX-NEXT:    vmovd %edi, %xmm1
134; SKX-NEXT:    vpermps %zmm0, %zmm1, %zmm0
135; SKX-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
136; SKX-NEXT:    retq
137  %e = extractelement <16 x float> %x, i32 %ind
138  ret float %e
139}
140
141define double @test8(<8 x double> %x, i32 %ind) nounwind {
142; KNL-LABEL: test8:
143; KNL:       ## BB#0:
144; KNL-NEXT:    movslq %edi, %rax
145; KNL-NEXT:    vmovq %rax, %xmm1
146; KNL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
147; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
148; KNL-NEXT:    retq
149;
150; SKX-LABEL: test8:
151; SKX:       ## BB#0:
152; SKX-NEXT:    movslq %edi, %rax
153; SKX-NEXT:    vmovq %rax, %xmm1
154; SKX-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
155; SKX-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
156; SKX-NEXT:    retq
157  %e = extractelement <8 x double> %x, i32 %ind
158  ret double %e
159}
160
161define float @test9(<8 x float> %x, i32 %ind) nounwind {
162; KNL-LABEL: test9:
163; KNL:       ## BB#0:
164; KNL-NEXT:    vmovd %edi, %xmm1
165; KNL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
166; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
167; KNL-NEXT:    retq
168;
169; SKX-LABEL: test9:
170; SKX:       ## BB#0:
171; SKX-NEXT:    vmovd %edi, %xmm1
172; SKX-NEXT:    vpermps %ymm0, %ymm1, %ymm0
173; SKX-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
174; SKX-NEXT:    retq
175  %e = extractelement <8 x float> %x, i32 %ind
176  ret float %e
177}
178
179define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
180; KNL-LABEL: test10:
181; KNL:       ## BB#0:
182; KNL-NEXT:    vmovd %edi, %xmm1
183; KNL-NEXT:    vpermd %zmm0, %zmm1, %zmm0
184; KNL-NEXT:    vmovd %xmm0, %eax
185; KNL-NEXT:    retq
186;
187; SKX-LABEL: test10:
188; SKX:       ## BB#0:
189; SKX-NEXT:    vmovd %edi, %xmm1
190; SKX-NEXT:    vpermd %zmm0, %zmm1, %zmm0
191; SKX-NEXT:    vmovd %xmm0, %eax
192; SKX-NEXT:    retq
193  %e = extractelement <16 x i32> %x, i32 %ind
194  ret i32 %e
195}
196
197define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
198; KNL-LABEL: test11:
199; KNL:       ## BB#0:
200; KNL-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
201; KNL-NEXT:    kshiftlw $11, %k0, %k0
202; KNL-NEXT:    kshiftrw $15, %k0, %k0
203; KNL-NEXT:    kmovw %k0, %eax
204; KNL-NEXT:    testb %al, %al
205; KNL-NEXT:    je LBB10_2
206; KNL-NEXT:  ## BB#1: ## %A
207; KNL-NEXT:    vmovaps %zmm1, %zmm0
208; KNL-NEXT:    retq
209; KNL-NEXT:  LBB10_2: ## %B
210; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
211; KNL-NEXT:    retq
212;
213; SKX-LABEL: test11:
214; SKX:       ## BB#0:
215; SKX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
216; SKX-NEXT:    kshiftlw $11, %k0, %k0
217; SKX-NEXT:    kshiftrw $15, %k0, %k0
218; SKX-NEXT:    kmovw %k0, %eax
219; SKX-NEXT:    testb %al, %al
220; SKX-NEXT:    je LBB10_2
221; SKX-NEXT:  ## BB#1: ## %A
222; SKX-NEXT:    vmovaps %zmm1, %zmm0
223; SKX-NEXT:    retq
224; SKX-NEXT:  LBB10_2: ## %B
225; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
226; SKX-NEXT:    retq
227  %cmp_res = icmp ult <16 x i32> %a, %b
228  %ia = extractelement <16 x i1> %cmp_res, i32 4
229  br i1 %ia, label %A, label %B
230  A:
231    ret <16 x i32>%b
232  B:
233   %c = add <16 x i32>%b, %a
234   ret <16 x i32>%c
235}
236
237define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
238; KNL-LABEL: test12:
239; KNL:       ## BB#0:
240; KNL-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
241; KNL-NEXT:    vpcmpgtq %zmm1, %zmm3, %k1
242; KNL-NEXT:    kunpckbw %k0, %k1, %k0
243; KNL-NEXT:    kshiftlw $15, %k0, %k0
244; KNL-NEXT:    kshiftrw $15, %k0, %k0
245; KNL-NEXT:    kmovw %k0, %eax
246; KNL-NEXT:    testb %al, %al
247; KNL-NEXT:    cmoveq %rsi, %rdi
248; KNL-NEXT:    movq %rdi, %rax
249; KNL-NEXT:    retq
250;
251; SKX-LABEL: test12:
252; SKX:       ## BB#0:
253; SKX-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
254; SKX-NEXT:    vpcmpgtq %zmm1, %zmm3, %k1
255; SKX-NEXT:    kunpckbw %k0, %k1, %k0
256; SKX-NEXT:    kshiftlw $15, %k0, %k0
257; SKX-NEXT:    kshiftrw $15, %k0, %k0
258; SKX-NEXT:    kmovw %k0, %eax
259; SKX-NEXT:    testb %al, %al
260; SKX-NEXT:    cmoveq %rsi, %rdi
261; SKX-NEXT:    movq %rdi, %rax
262; SKX-NEXT:    retq
263  %cmpvector_func.i = icmp slt <16 x i64> %a, %b
264  %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
265  %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
266  ret i64 %res
267}
268
269define i16 @test13(i32 %a, i32 %b) {
270; KNL-LABEL: test13:
271; KNL:       ## BB#0:
272; KNL-NEXT:    cmpl %esi, %edi
273; KNL-NEXT:    setb %al
274; KNL-NEXT:    kmovw %eax, %k0
275; KNL-NEXT:    movw $-4, %ax
276; KNL-NEXT:    kmovw %eax, %k1
277; KNL-NEXT:    korw %k0, %k1, %k0
278; KNL-NEXT:    kmovw %k0, %eax
279; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
280; KNL-NEXT:    retq
281;
282; SKX-LABEL: test13:
283; SKX:       ## BB#0:
284; SKX-NEXT:    cmpl %esi, %edi
285; SKX-NEXT:    setb %al
286; SKX-NEXT:    kmovw %eax, %k0
287; SKX-NEXT:    movw $-4, %ax
288; SKX-NEXT:    kmovw %eax, %k1
289; SKX-NEXT:    korw %k0, %k1, %k0
290; SKX-NEXT:    kmovw %k0, %eax
291; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
292; SKX-NEXT:    retq
293  %cmp_res = icmp ult i32 %a, %b
294  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
295  %res = bitcast <16 x i1> %maskv to i16
296  ret i16 %res
297}
298
299define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
300; KNL-LABEL: test14:
301; KNL:       ## BB#0:
302; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
303; KNL-NEXT:    kshiftlw $11, %k0, %k0
304; KNL-NEXT:    kshiftrw $15, %k0, %k0
305; KNL-NEXT:    kmovw %k0, %eax
306; KNL-NEXT:    testb %al, %al
307; KNL-NEXT:    cmoveq %rsi, %rdi
308; KNL-NEXT:    movq %rdi, %rax
309; KNL-NEXT:    retq
310;
311; SKX-LABEL: test14:
312; SKX:       ## BB#0:
313; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
314; SKX-NEXT:    kshiftlb $3, %k0, %k0
315; SKX-NEXT:    kshiftrb $7, %k0, %k0
316; SKX-NEXT:    kmovw %k0, %eax
317; SKX-NEXT:    testb %al, %al
318; SKX-NEXT:    cmoveq %rsi, %rdi
319; SKX-NEXT:    movq %rdi, %rax
320; SKX-NEXT:    retq
321  %cmpvector_func.i = icmp slt <8 x i64> %a, %b
322  %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
323  %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
324  ret i64 %res
325}
326
327define i16 @test15(i1 *%addr) {
328; KNL-LABEL: test15:
329; KNL:       ## BB#0:
330; KNL-NEXT:    movb (%rdi), %al
331; KNL-NEXT:    xorl %ecx, %ecx
332; KNL-NEXT:    testb %al, %al
333; KNL-NEXT:    movw $-1, %ax
334; KNL-NEXT:    cmovew %cx, %ax
335; KNL-NEXT:    retq
336;
337; SKX-LABEL: test15:
338; SKX:       ## BB#0:
339; SKX-NEXT:    movb (%rdi), %al
340; SKX-NEXT:    xorl %ecx, %ecx
341; SKX-NEXT:    testb %al, %al
342; SKX-NEXT:    movw $-1, %ax
343; SKX-NEXT:    cmovew %cx, %ax
344; SKX-NEXT:    retq
345  %x = load i1 , i1 * %addr, align 1
346  %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
347  %x2 = bitcast <16 x i1>%x1 to i16
348  ret i16 %x2
349}
350
351define i16 @test16(i1 *%addr, i16 %a) {
352; KNL-LABEL: test16:
353; KNL:       ## BB#0:
354; KNL-NEXT:    movzbl (%rdi), %eax
355; KNL-NEXT:    andl $1, %eax
356; KNL-NEXT:    kmovw %eax, %k0
357; KNL-NEXT:    kmovw %esi, %k1
358; KNL-NEXT:    kshiftlw $10, %k0, %k0
359; KNL-NEXT:    korw %k0, %k1, %k0
360; KNL-NEXT:    kmovw %k0, %eax
361; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
362; KNL-NEXT:    retq
363;
364; SKX-LABEL: test16:
365; SKX:       ## BB#0:
366; SKX-NEXT:    movzbl (%rdi), %eax
367; SKX-NEXT:    andl $1, %eax
368; SKX-NEXT:    kmovd %eax, %k0
369; SKX-NEXT:    kmovw %esi, %k1
370; SKX-NEXT:    kshiftlw $10, %k0, %k0
371; SKX-NEXT:    korw %k0, %k1, %k0
372; SKX-NEXT:    kmovw %k0, %eax
373; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
374; SKX-NEXT:    retq
375  %x = load i1 , i1 * %addr, align 128
376  %a1 = bitcast i16 %a to <16 x i1>
377  %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
378  %x2 = bitcast <16 x i1>%x1 to i16
379  ret i16 %x2
380}
381
382define i8 @test17(i1 *%addr, i8 %a) {
383; KNL-LABEL: test17:
384; KNL:       ## BB#0:
385; KNL-NEXT:    movzbl (%rdi), %eax
386; KNL-NEXT:    andl $1, %eax
387; KNL-NEXT:    kmovw %eax, %k0
388; KNL-NEXT:    kmovw %esi, %k1
389; KNL-NEXT:    kshiftlw $4, %k0, %k0
390; KNL-NEXT:    korw %k0, %k1, %k0
391; KNL-NEXT:    kmovw %k0, %eax
392; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
393; KNL-NEXT:    retq
394;
395; SKX-LABEL: test17:
396; SKX:       ## BB#0:
397; SKX-NEXT:    movzbl (%rdi), %eax
398; SKX-NEXT:    andl $1, %eax
399; SKX-NEXT:    kmovd %eax, %k0
400; SKX-NEXT:    kmovb %esi, %k1
401; SKX-NEXT:    kshiftlb $4, %k0, %k0
402; SKX-NEXT:    korb %k0, %k1, %k0
403; SKX-NEXT:    kmovb %k0, %eax
404; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
405; SKX-NEXT:    retq
406  %x = load i1 , i1 * %addr, align 128
407  %a1 = bitcast i8 %a to <8 x i1>
408  %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
409  %x2 = bitcast <8 x i1>%x1 to i8
410  ret i8 %x2
411}
412
413define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
414; KNL-LABEL: extract_v8i64:
415; KNL:       ## BB#0:
416; KNL-NEXT:    vpextrq $1, %xmm0, %rax
417; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
418; KNL-NEXT:    vpextrq $1, %xmm0, (%rdi)
419; KNL-NEXT:    retq
420;
421; SKX-LABEL: extract_v8i64:
422; SKX:       ## BB#0:
423; SKX-NEXT:    vpextrq $1, %xmm0, %rax
424; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm0
425; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
426; SKX-NEXT:    retq
427  %r1 = extractelement <8 x i64> %x, i32 1
428  %r2 = extractelement <8 x i64> %x, i32 3
429  store i64 %r2, i64* %dst, align 1
430  ret i64 %r1
431}
432
433define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
434; KNL-LABEL: extract_v4i64:
435; KNL:       ## BB#0:
436; KNL-NEXT:    vpextrq $1, %xmm0, %rax
437; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
438; KNL-NEXT:    vpextrq $1, %xmm0, (%rdi)
439; KNL-NEXT:    retq
440;
441; SKX-LABEL: extract_v4i64:
442; SKX:       ## BB#0:
443; SKX-NEXT:    vpextrq $1, %xmm0, %rax
444; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm0
445; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
446; SKX-NEXT:    retq
447  %r1 = extractelement <4 x i64> %x, i32 1
448  %r2 = extractelement <4 x i64> %x, i32 3
449  store i64 %r2, i64* %dst, align 1
450  ret i64 %r1
451}
452
453define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
454; KNL-LABEL: extract_v2i64:
455; KNL:       ## BB#0:
456; KNL-NEXT:    vmovq %xmm0, %rax
457; KNL-NEXT:    vpextrq $1, %xmm0, (%rdi)
458; KNL-NEXT:    retq
459;
460; SKX-LABEL: extract_v2i64:
461; SKX:       ## BB#0:
462; SKX-NEXT:    vmovq %xmm0, %rax
463; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
464; SKX-NEXT:    retq
465  %r1 = extractelement <2 x i64> %x, i32 0
466  %r2 = extractelement <2 x i64> %x, i32 1
467  store i64 %r2, i64* %dst, align 1
468  ret i64 %r1
469}
470
471define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
472; KNL-LABEL: extract_v16i32:
473; KNL:       ## BB#0:
474; KNL-NEXT:    vpextrd $1, %xmm0, %eax
475; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
476; KNL-NEXT:    vpextrd $1, %xmm0, (%rdi)
477; KNL-NEXT:    retq
478;
479; SKX-LABEL: extract_v16i32:
480; SKX:       ## BB#0:
481; SKX-NEXT:    vpextrd $1, %xmm0, %eax
482; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
483; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
484; SKX-NEXT:    retq
485  %r1 = extractelement <16 x i32> %x, i32 1
486  %r2 = extractelement <16 x i32> %x, i32 5
487  store i32 %r2, i32* %dst, align 1
488  ret i32 %r1
489}
490
491define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
492; KNL-LABEL: extract_v8i32:
493; KNL:       ## BB#0:
494; KNL-NEXT:    vpextrd $1, %xmm0, %eax
495; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
496; KNL-NEXT:    vpextrd $1, %xmm0, (%rdi)
497; KNL-NEXT:    retq
498;
499; SKX-LABEL: extract_v8i32:
500; SKX:       ## BB#0:
501; SKX-NEXT:    vpextrd $1, %xmm0, %eax
502; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
503; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
504; SKX-NEXT:    retq
505  %r1 = extractelement <8 x i32> %x, i32 1
506  %r2 = extractelement <8 x i32> %x, i32 5
507  store i32 %r2, i32* %dst, align 1
508  ret i32 %r1
509}
510
511define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
512; KNL-LABEL: extract_v4i32:
513; KNL:       ## BB#0:
514; KNL-NEXT:    vpextrd $1, %xmm0, %eax
515; KNL-NEXT:    vpextrd $3, %xmm0, (%rdi)
516; KNL-NEXT:    retq
517;
518; SKX-LABEL: extract_v4i32:
519; SKX:       ## BB#0:
520; SKX-NEXT:    vpextrd $1, %xmm0, %eax
521; SKX-NEXT:    vpextrd $3, %xmm0, (%rdi)
522; SKX-NEXT:    retq
523  %r1 = extractelement <4 x i32> %x, i32 1
524  %r2 = extractelement <4 x i32> %x, i32 3
525  store i32 %r2, i32* %dst, align 1
526  ret i32 %r1
527}
528
529define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
530; KNL-LABEL: extract_v32i16:
531; KNL:       ## BB#0:
532; KNL-NEXT:    vpextrw $1, %xmm0, %eax
533; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
534; KNL-NEXT:    vpextrw $1, %xmm0, (%rdi)
535; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
536; KNL-NEXT:    retq
537;
538; SKX-LABEL: extract_v32i16:
539; SKX:       ## BB#0:
540; SKX-NEXT:    vpextrw $1, %xmm0, %eax
541; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
542; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
543; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
544; SKX-NEXT:    retq
545  %r1 = extractelement <32 x i16> %x, i32 1
546  %r2 = extractelement <32 x i16> %x, i32 9
547  store i16 %r2, i16* %dst, align 1
548  ret i16 %r1
549}
550
551define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
552; KNL-LABEL: extract_v16i16:
553; KNL:       ## BB#0:
554; KNL-NEXT:    vpextrw $1, %xmm0, %eax
555; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
556; KNL-NEXT:    vpextrw $1, %xmm0, (%rdi)
557; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
558; KNL-NEXT:    retq
559;
560; SKX-LABEL: extract_v16i16:
561; SKX:       ## BB#0:
562; SKX-NEXT:    vpextrw $1, %xmm0, %eax
563; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
564; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
565; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
566; SKX-NEXT:    retq
567  %r1 = extractelement <16 x i16> %x, i32 1
568  %r2 = extractelement <16 x i16> %x, i32 9
569  store i16 %r2, i16* %dst, align 1
570  ret i16 %r1
571}
572
573define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
574; KNL-LABEL: extract_v8i16:
575; KNL:       ## BB#0:
576; KNL-NEXT:    vpextrw $1, %xmm0, %eax
577; KNL-NEXT:    vpextrw $3, %xmm0, (%rdi)
578; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
579; KNL-NEXT:    retq
580;
581; SKX-LABEL: extract_v8i16:
582; SKX:       ## BB#0:
583; SKX-NEXT:    vpextrw $1, %xmm0, %eax
584; SKX-NEXT:    vpextrw $3, %xmm0, (%rdi)
585; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
586; SKX-NEXT:    retq
587  %r1 = extractelement <8 x i16> %x, i32 1
588  %r2 = extractelement <8 x i16> %x, i32 3
589  store i16 %r2, i16* %dst, align 1
590  ret i16 %r1
591}
592
593define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
594; KNL-LABEL: extract_v64i8:
595; KNL:       ## BB#0:
596; KNL-NEXT:    vpextrb $1, %xmm0, %eax
597; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
598; KNL-NEXT:    vpextrb $1, %xmm0, (%rdi)
599; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
600; KNL-NEXT:    retq
601;
602; SKX-LABEL: extract_v64i8:
603; SKX:       ## BB#0:
604; SKX-NEXT:    vpextrb $1, %xmm0, %eax
605; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
606; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
607; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
608; SKX-NEXT:    retq
609  %r1 = extractelement <64 x i8> %x, i32 1
610  %r2 = extractelement <64 x i8> %x, i32 17
611  store i8 %r2, i8* %dst, align 1
612  ret i8 %r1
613}
614
615define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
616; KNL-LABEL: extract_v32i8:
617; KNL:       ## BB#0:
618; KNL-NEXT:    vpextrb $1, %xmm0, %eax
619; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
620; KNL-NEXT:    vpextrb $1, %xmm0, (%rdi)
621; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
622; KNL-NEXT:    retq
623;
624; SKX-LABEL: extract_v32i8:
625; SKX:       ## BB#0:
626; SKX-NEXT:    vpextrb $1, %xmm0, %eax
627; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
628; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
629; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
630; SKX-NEXT:    retq
631  %r1 = extractelement <32 x i8> %x, i32 1
632  %r2 = extractelement <32 x i8> %x, i32 17
633  store i8 %r2, i8* %dst, align 1
634  ret i8 %r1
635}
636
637define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
638; KNL-LABEL: extract_v16i8:
639; KNL:       ## BB#0:
640; KNL-NEXT:    vpextrb $1, %xmm0, %eax
641; KNL-NEXT:    vpextrb $3, %xmm0, (%rdi)
642; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
643; KNL-NEXT:    retq
644;
645; SKX-LABEL: extract_v16i8:
646; SKX:       ## BB#0:
647; SKX-NEXT:    vpextrb $1, %xmm0, %eax
648; SKX-NEXT:    vpextrb $3, %xmm0, (%rdi)
649; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
650; SKX-NEXT:    retq
651  %r1 = extractelement <16 x i8> %x, i32 1
652  %r2 = extractelement <16 x i8> %x, i32 3
653  store i8 %r2, i8* %dst, align 1
654  ret i8 %r1
655}
656
657define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
658; KNL-LABEL: insert_v8i64:
659; KNL:       ## BB#0:
660; KNL-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
661; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
662; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
663; KNL-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
664; KNL-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
665; KNL-NEXT:    retq
666;
667; SKX-LABEL: insert_v8i64:
668; SKX:       ## BB#0:
669; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
670; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
671; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm1
672; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
673; SKX-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
674; SKX-NEXT:    retq
675  %val = load i64, i64* %ptr
676  %r1 = insertelement <8 x i64> %x, i64 %val, i32 1
677  %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
678  ret <8 x i64> %r2
679}
680
681define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
682; KNL-LABEL: insert_v4i64:
683; KNL:       ## BB#0:
684; KNL-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
685; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
686; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
687; KNL-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
688; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
689; KNL-NEXT:    retq
690;
691; SKX-LABEL: insert_v4i64:
692; SKX:       ## BB#0:
693; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
694; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
695; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm1
696; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
697; SKX-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0
698; SKX-NEXT:    retq
699  %val = load i64, i64* %ptr
700  %r1 = insertelement <4 x i64> %x, i64 %val, i32 1
701  %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3
702  ret <4 x i64> %r2
703}
704
705define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
706; KNL-LABEL: insert_v2i64:
707; KNL:       ## BB#0:
708; KNL-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm0
709; KNL-NEXT:    vpinsrq $3, %rdi, %xmm0, %xmm0
710; KNL-NEXT:    retq
711;
712; SKX-LABEL: insert_v2i64:
713; SKX:       ## BB#0:
714; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm0
715; SKX-NEXT:    vpinsrq $3, %rdi, %xmm0, %xmm0
716; SKX-NEXT:    retq
717  %val = load i64, i64* %ptr
718  %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
719  %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
720  ret <2 x i64> %r2
721}
722
723define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
724; KNL-LABEL: insert_v16i32:
725; KNL:       ## BB#0:
726; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
727; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
728; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
729; KNL-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
730; KNL-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
731; KNL-NEXT:    retq
732;
733; SKX-LABEL: insert_v16i32:
734; SKX:       ## BB#0:
735; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
736; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
737; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
738; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
739; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
740; SKX-NEXT:    retq
741  %val = load i32, i32* %ptr
742  %r1 = insertelement <16 x i32> %x, i32 %val, i32 1
743  %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5
744  ret <16 x i32> %r2
745}
746
747define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
748; KNL-LABEL: insert_v8i32:
749; KNL:       ## BB#0:
750; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
751; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
752; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
753; KNL-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
754; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
755; KNL-NEXT:    retq
756;
757; SKX-LABEL: insert_v8i32:
758; SKX:       ## BB#0:
759; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
760; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
761; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
762; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
763; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
764; SKX-NEXT:    retq
765  %val = load i32, i32* %ptr
766  %r1 = insertelement <8 x i32> %x, i32 %val, i32 1
767  %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5
768  ret <8 x i32> %r2
769}
770
771define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
772; KNL-LABEL: insert_v4i32:
773; KNL:       ## BB#0:
774; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
775; KNL-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
776; KNL-NEXT:    retq
777;
778; SKX-LABEL: insert_v4i32:
779; SKX:       ## BB#0:
780; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
781; SKX-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
782; SKX-NEXT:    retq
783  %val = load i32, i32* %ptr
784  %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
785  %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
786  ret <4 x i32> %r2
787}
788
789define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) {
790; KNL-LABEL: insert_v32i16:
791; KNL:       ## BB#0:
792; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm2
793; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
794; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
795; KNL-NEXT:    vpinsrw $1, %edi, %xmm2, %xmm2
796; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
797; KNL-NEXT:    retq
798;
799; SKX-LABEL: insert_v32i16:
800; SKX:       ## BB#0:
801; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
802; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
803; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
804; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
805; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
806; SKX-NEXT:    retq
807  %val = load i16, i16* %ptr
808  %r1 = insertelement <32 x i16> %x, i16 %val, i32 1
809  %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9
810  ret <32 x i16> %r2
811}
812
813define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
814; KNL-LABEL: insert_v16i16:
815; KNL:       ## BB#0:
816; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
817; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
818; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
819; KNL-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
820; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
821; KNL-NEXT:    retq
822;
823; SKX-LABEL: insert_v16i16:
824; SKX:       ## BB#0:
825; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
826; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
827; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
828; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
829; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
830; SKX-NEXT:    retq
831  %val = load i16, i16* %ptr
832  %r1 = insertelement <16 x i16> %x, i16 %val, i32 1
833  %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9
834  ret <16 x i16> %r2
835}
836
837define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
838; KNL-LABEL: insert_v8i16:
839; KNL:       ## BB#0:
840; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
841; KNL-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
842; KNL-NEXT:    retq
843;
844; SKX-LABEL: insert_v8i16:
845; SKX:       ## BB#0:
846; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
847; SKX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
848; SKX-NEXT:    retq
849  %val = load i16, i16* %ptr
850  %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
851  %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
852  ret <8 x i16> %r2
853}
854
855define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
856; KNL-LABEL: insert_v64i8:
857; KNL:       ## BB#0:
858; KNL-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm2
859; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
860; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
861; KNL-NEXT:    vpinsrb $2, %edi, %xmm2, %xmm2
862; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
863; KNL-NEXT:    retq
864;
865; SKX-LABEL: insert_v64i8:
866; SKX:       ## BB#0:
867; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
868; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
869; SKX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
870; SKX-NEXT:    vpinsrb $2, %edi, %xmm1, %xmm1
871; SKX-NEXT:    vinserti32x4 $3, %xmm1, %zmm0, %zmm0
872; SKX-NEXT:    retq
873  %val = load i8, i8* %ptr
874  %r1 = insertelement <64 x i8> %x, i8 %val, i32 1
875  %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50
876  ret <64 x i8> %r2
877}
878
879define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
880; KNL-LABEL: insert_v32i8:
881; KNL:       ## BB#0:
882; KNL-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
883; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
884; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
885; KNL-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
886; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
887; KNL-NEXT:    retq
888;
889; SKX-LABEL: insert_v32i8:
890; SKX:       ## BB#0:
891; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
892; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
893; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
894; SKX-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
895; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
896; SKX-NEXT:    retq
897  %val = load i8, i8* %ptr
898  %r1 = insertelement <32 x i8> %x, i8 %val, i32 1
899  %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17
900  ret <32 x i8> %r2
901}
902
903define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
904; KNL-LABEL: insert_v16i8:
905; KNL:       ## BB#0:
906; KNL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
907; KNL-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
908; KNL-NEXT:    retq
909;
910; SKX-LABEL: insert_v16i8:
911; SKX:       ## BB#0:
912; SKX-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
913; SKX-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
914; SKX-NEXT:    retq
915  %val = load i8, i8* %ptr
916  %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
917  %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
918  ret <16 x i8> %r2
919}
920
921define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
922; KNL-LABEL: test_insert_128_v8i64:
923; KNL:       ## BB#0:
924; KNL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
925; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
926; KNL-NEXT:    retq
927;
928; SKX-LABEL: test_insert_128_v8i64:
929; SKX:       ## BB#0:
930; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
931; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
932; SKX-NEXT:    retq
933  %r = insertelement <8 x i64> %x, i64 %y, i32 1
934  ret <8 x i64> %r
935}
936
937define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
938; KNL-LABEL: test_insert_128_v16i32:
939; KNL:       ## BB#0:
940; KNL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
941; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
942; KNL-NEXT:    retq
943;
944; SKX-LABEL: test_insert_128_v16i32:
945; SKX:       ## BB#0:
946; SKX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
947; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
948; SKX-NEXT:    retq
949  %r = insertelement <16 x i32> %x, i32 %y, i32 1
950  ret <16 x i32> %r
951}
952
953define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
954; KNL-LABEL: test_insert_128_v8f64:
955; KNL:       ## BB#0:
956; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
957; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
958; KNL-NEXT:    retq
959;
960; SKX-LABEL: test_insert_128_v8f64:
961; SKX:       ## BB#0:
962; SKX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
963; SKX-NEXT:    vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
964; SKX-NEXT:    retq
965  %r = insertelement <8 x double> %x, double %y, i32 1
966  ret <8 x double> %r
967}
968
969define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
970; KNL-LABEL: test_insert_128_v16f32:
971; KNL:       ## BB#0:
972; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
973; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
974; KNL-NEXT:    retq
975;
976; SKX-LABEL: test_insert_128_v16f32:
977; SKX:       ## BB#0:
978; SKX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
979; SKX-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
980; SKX-NEXT:    retq
981  %r = insertelement <16 x float> %x, float %y, i32 1
982  ret <16 x float> %r
983}
984
985define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
986; KNL-LABEL: test_insert_128_v16i16:
987; KNL:       ## BB#0:
988; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
989; KNL-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
990; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
991; KNL-NEXT:    retq
992;
993; SKX-LABEL: test_insert_128_v16i16:
994; SKX:       ## BB#0:
995; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
996; SKX-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
997; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
998; SKX-NEXT:    retq
999  %r = insertelement <16 x i16> %x, i16 %y, i32 10
1000  ret <16 x i16> %r
1001}
1002
1003define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
1004; KNL-LABEL: test_insert_128_v32i8:
1005; KNL:       ## BB#0:
1006; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
1007; KNL-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1008; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1009; KNL-NEXT:    retq
1010;
1011; SKX-LABEL: test_insert_128_v32i8:
1012; SKX:       ## BB#0:
1013; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
1014; SKX-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1015; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
1016; SKX-NEXT:    retq
1017  %r = insertelement <32 x i8> %x, i8 %y, i32 20
1018  ret <32 x i8> %r
1019}
1020