1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4
5define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
6; CHECK-LABEL: test1:
7; CHECK:       ## BB#0:
8; CHECK-NEXT:    vcmpleps %zmm1, %zmm0, %k1
9; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
10; CHECK-NEXT:    retq
11  %mask = fcmp ole <16 x float> %x, %y
12  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
13  ret <16 x float> %max
14}
15
16define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
17; CHECK-LABEL: test2:
18; CHECK:       ## BB#0:
19; CHECK-NEXT:    vcmplepd %zmm1, %zmm0, %k1
20; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
21; CHECK-NEXT:    retq
22  %mask = fcmp ole <8 x double> %x, %y
23  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
24  ret <8 x double> %max
25}
26
27define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
28; CHECK-LABEL: test3:
29; CHECK:       ## BB#0:
30; CHECK-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1
31; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
32; CHECK-NEXT:    retq
33  %y = load <16 x i32>, <16 x i32>* %yp, align 4
34  %mask = icmp eq <16 x i32> %x, %y
35  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
36  ret <16 x i32> %max
37}
38
39define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
40; CHECK-LABEL: test4_unsigned:
41; CHECK:       ## BB#0:
42; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1
43; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
44; CHECK-NEXT:    retq
45  %mask = icmp uge <16 x i32> %x, %y
46  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
47  ret <16 x i32> %max
48}
49
50define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
51; CHECK-LABEL: test5:
52; CHECK:       ## BB#0:
53; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
54; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
55; CHECK-NEXT:    retq
56  %mask = icmp eq <8 x i64> %x, %y
57  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
58  ret <8 x i64> %max
59}
60
61define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
62; CHECK-LABEL: test6_unsigned:
63; CHECK:       ## BB#0:
64; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1
65; CHECK-NEXT:    vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
66; CHECK-NEXT:    retq
67  %mask = icmp ugt <8 x i64> %x, %y
68  %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
69  ret <8 x i64> %max
70}
71
72define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
73; KNL-LABEL: test7:
74; KNL:       ## BB#0:
75; KNL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
76; KNL-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
77; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
78; KNL-NEXT:    retq
79;
80; SKX-LABEL: test7:
81; SKX:       ## BB#0:
82; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
83; SKX-NEXT:    vcmpltps %xmm2, %xmm0, %k1
84; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
85; SKX-NEXT:    retq
86
87  %mask = fcmp olt <4 x float> %a, zeroinitializer
88  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
89  ret <4 x float>%c
90}
91
92define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
93; KNL-LABEL: test8:
94; KNL:       ## BB#0:
95; KNL-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
96; KNL-NEXT:    vcmpltpd %xmm2, %xmm0, %xmm2
97; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
98; KNL-NEXT:    retq
99;
100; SKX-LABEL: test8:
101; SKX:       ## BB#0:
102; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
103; SKX-NEXT:    vcmpltpd %xmm2, %xmm0, %k1
104; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
105; SKX-NEXT:    retq
106  %mask = fcmp olt <2 x double> %a, zeroinitializer
107  %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
108  ret <2 x double>%c
109}
110
111define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
112; KNL-LABEL: test9:
113; KNL:       ## BB#0:
114; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
115; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
116; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
117; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
118; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
119; KNL-NEXT:    retq
120;
121; SKX-LABEL: test9:
122; SKX:       ## BB#0:
123; SKX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
124; SKX-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
125; SKX-NEXT:    retq
126  %mask = icmp eq <8 x i32> %x, %y
127  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
128  ret <8 x i32> %max
129}
130
131define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
132; KNL-LABEL: test10:
133; KNL:       ## BB#0:
134; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
135; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
136; KNL-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
137; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
138; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
139; KNL-NEXT:    retq
140;
141; SKX-LABEL: test10:
142; SKX:       ## BB#0:
143; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
144; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
145; SKX-NEXT:    retq
146
147  %mask = fcmp oeq <8 x float> %x, %y
148  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
149  ret <8 x float> %max
150}
151
152define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
153; CHECK-LABEL: test11_unsigned:
154; CHECK:       ## BB#0:
155; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
156; CHECK-NEXT:    retq
157  %mask = icmp ugt <8 x i32> %x, %y
158  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
159  ret <8 x i32> %max
160}
161
162define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
163; CHECK-LABEL: test12:
164; CHECK:       ## BB#0:
165; CHECK-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
166; CHECK-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
167; CHECK-NEXT:    kunpckbw %k0, %k1, %k0
168; CHECK-NEXT:    kmovw %k0, %eax
169; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
170; CHECK-NEXT:    retq
171  %res = icmp eq <16 x i64> %a, %b
172  %res1 = bitcast <16 x i1> %res to i16
173  ret i16 %res1
174}
175
176define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
177; KNL-LABEL: test12_v32i32:
178; KNL:       ## BB#0:
179; KNL-NEXT:    pushq %rbp
180; KNL-NEXT:    movq %rsp, %rbp
181; KNL-NEXT:    andq $-32, %rsp
182; KNL-NEXT:    subq $32, %rsp
183; KNL-NEXT:    vpcmpeqd %zmm3, %zmm1, %k0
184; KNL-NEXT:    kshiftlw $14, %k0, %k1
185; KNL-NEXT:    kshiftrw $15, %k1, %k1
186; KNL-NEXT:    kmovw %k1, %eax
187; KNL-NEXT:    kshiftlw $15, %k0, %k1
188; KNL-NEXT:    kshiftrw $15, %k1, %k1
189; KNL-NEXT:    kmovw %k1, %ecx
190; KNL-NEXT:    vmovd %ecx, %xmm1
191; KNL-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
192; KNL-NEXT:    kshiftlw $13, %k0, %k1
193; KNL-NEXT:    kshiftrw $15, %k1, %k1
194; KNL-NEXT:    kmovw %k1, %eax
195; KNL-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
196; KNL-NEXT:    kshiftlw $12, %k0, %k1
197; KNL-NEXT:    kshiftrw $15, %k1, %k1
198; KNL-NEXT:    kmovw %k1, %eax
199; KNL-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
200; KNL-NEXT:    kshiftlw $11, %k0, %k1
201; KNL-NEXT:    kshiftrw $15, %k1, %k1
202; KNL-NEXT:    kmovw %k1, %eax
203; KNL-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
204; KNL-NEXT:    kshiftlw $10, %k0, %k1
205; KNL-NEXT:    kshiftrw $15, %k1, %k1
206; KNL-NEXT:    kmovw %k1, %eax
207; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
208; KNL-NEXT:    kshiftlw $9, %k0, %k1
209; KNL-NEXT:    kshiftrw $15, %k1, %k1
210; KNL-NEXT:    kmovw %k1, %eax
211; KNL-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
212; KNL-NEXT:    kshiftlw $8, %k0, %k1
213; KNL-NEXT:    kshiftrw $15, %k1, %k1
214; KNL-NEXT:    kmovw %k1, %eax
215; KNL-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
216; KNL-NEXT:    kshiftlw $7, %k0, %k1
217; KNL-NEXT:    kshiftrw $15, %k1, %k1
218; KNL-NEXT:    kmovw %k1, %eax
219; KNL-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
220; KNL-NEXT:    kshiftlw $6, %k0, %k1
221; KNL-NEXT:    kshiftrw $15, %k1, %k1
222; KNL-NEXT:    kmovw %k1, %eax
223; KNL-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
224; KNL-NEXT:    kshiftlw $5, %k0, %k1
225; KNL-NEXT:    kshiftrw $15, %k1, %k1
226; KNL-NEXT:    kmovw %k1, %eax
227; KNL-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
228; KNL-NEXT:    kshiftlw $4, %k0, %k1
229; KNL-NEXT:    kshiftrw $15, %k1, %k1
230; KNL-NEXT:    kmovw %k1, %eax
231; KNL-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
232; KNL-NEXT:    kshiftlw $3, %k0, %k1
233; KNL-NEXT:    kshiftrw $15, %k1, %k1
234; KNL-NEXT:    kmovw %k1, %eax
235; KNL-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
236; KNL-NEXT:    kshiftlw $2, %k0, %k1
237; KNL-NEXT:    kshiftrw $15, %k1, %k1
238; KNL-NEXT:    kmovw %k1, %eax
239; KNL-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
240; KNL-NEXT:    kshiftlw $1, %k0, %k1
241; KNL-NEXT:    kshiftrw $15, %k1, %k1
242; KNL-NEXT:    kmovw %k1, %eax
243; KNL-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
244; KNL-NEXT:    kshiftlw $0, %k0, %k0
245; KNL-NEXT:    kshiftrw $15, %k0, %k0
246; KNL-NEXT:    kmovw %k0, %eax
247; KNL-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
248; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
249; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
250; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
251; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
252; KNL-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
253; KNL-NEXT:    kshiftlw $14, %k0, %k1
254; KNL-NEXT:    kshiftrw $15, %k1, %k1
255; KNL-NEXT:    kmovw %k1, %eax
256; KNL-NEXT:    kshiftlw $15, %k0, %k1
257; KNL-NEXT:    kshiftrw $15, %k1, %k1
258; KNL-NEXT:    kmovw %k1, %ecx
259; KNL-NEXT:    vmovd %ecx, %xmm0
260; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
261; KNL-NEXT:    kshiftlw $13, %k0, %k1
262; KNL-NEXT:    kshiftrw $15, %k1, %k1
263; KNL-NEXT:    kmovw %k1, %eax
264; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
265; KNL-NEXT:    kshiftlw $12, %k0, %k1
266; KNL-NEXT:    kshiftrw $15, %k1, %k1
267; KNL-NEXT:    kmovw %k1, %eax
268; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
269; KNL-NEXT:    kshiftlw $11, %k0, %k1
270; KNL-NEXT:    kshiftrw $15, %k1, %k1
271; KNL-NEXT:    kmovw %k1, %eax
272; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
273; KNL-NEXT:    kshiftlw $10, %k0, %k1
274; KNL-NEXT:    kshiftrw $15, %k1, %k1
275; KNL-NEXT:    kmovw %k1, %eax
276; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
277; KNL-NEXT:    kshiftlw $9, %k0, %k1
278; KNL-NEXT:    kshiftrw $15, %k1, %k1
279; KNL-NEXT:    kmovw %k1, %eax
280; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
281; KNL-NEXT:    kshiftlw $8, %k0, %k1
282; KNL-NEXT:    kshiftrw $15, %k1, %k1
283; KNL-NEXT:    kmovw %k1, %eax
284; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
285; KNL-NEXT:    kshiftlw $7, %k0, %k1
286; KNL-NEXT:    kshiftrw $15, %k1, %k1
287; KNL-NEXT:    kmovw %k1, %eax
288; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
289; KNL-NEXT:    kshiftlw $6, %k0, %k1
290; KNL-NEXT:    kshiftrw $15, %k1, %k1
291; KNL-NEXT:    kmovw %k1, %eax
292; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
293; KNL-NEXT:    kshiftlw $5, %k0, %k1
294; KNL-NEXT:    kshiftrw $15, %k1, %k1
295; KNL-NEXT:    kmovw %k1, %eax
296; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
297; KNL-NEXT:    kshiftlw $4, %k0, %k1
298; KNL-NEXT:    kshiftrw $15, %k1, %k1
299; KNL-NEXT:    kmovw %k1, %eax
300; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
301; KNL-NEXT:    kshiftlw $3, %k0, %k1
302; KNL-NEXT:    kshiftrw $15, %k1, %k1
303; KNL-NEXT:    kmovw %k1, %eax
304; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
305; KNL-NEXT:    kshiftlw $2, %k0, %k1
306; KNL-NEXT:    kshiftrw $15, %k1, %k1
307; KNL-NEXT:    kmovw %k1, %eax
308; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
309; KNL-NEXT:    kshiftlw $1, %k0, %k1
310; KNL-NEXT:    kshiftrw $15, %k1, %k1
311; KNL-NEXT:    kmovw %k1, %eax
312; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
313; KNL-NEXT:    kshiftlw $0, %k0, %k0
314; KNL-NEXT:    kshiftrw $15, %k0, %k0
315; KNL-NEXT:    kmovw %k0, %eax
316; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
317; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
318; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
319; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
320; KNL-NEXT:    kmovw %k0, (%rsp)
321; KNL-NEXT:    movl (%rsp), %eax
322; KNL-NEXT:    movq %rbp, %rsp
323; KNL-NEXT:    popq %rbp
324; KNL-NEXT:    retq
325;
326; SKX-LABEL: test12_v32i32:
327; SKX:       ## BB#0:
328; SKX-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
329; SKX-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
330; SKX-NEXT:    kunpckwd %k0, %k1, %k0
331; SKX-NEXT:    kmovd %k0, %eax
332; SKX-NEXT:    retq
333  %res = icmp eq <32 x i32> %a, %b
334  %res1 = bitcast <32 x i1> %res to i32
335  ret i32 %res1
336}
337
338define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
339; KNL-LABEL: test12_v64i16:
340; KNL:       ## BB#0:
341; KNL-NEXT:    pushq %rbp
342; KNL-NEXT:    movq %rsp, %rbp
343; KNL-NEXT:    andq $-32, %rsp
344; KNL-NEXT:    subq $64, %rsp
345; KNL-NEXT:    vpcmpeqw %ymm5, %ymm1, %ymm1
346; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
347; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
348; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
349; KNL-NEXT:    kshiftlw $14, %k0, %k1
350; KNL-NEXT:    kshiftrw $15, %k1, %k1
351; KNL-NEXT:    kmovw %k1, %eax
352; KNL-NEXT:    kshiftlw $15, %k0, %k1
353; KNL-NEXT:    kshiftrw $15, %k1, %k1
354; KNL-NEXT:    kmovw %k1, %ecx
355; KNL-NEXT:    vmovd %ecx, %xmm1
356; KNL-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
357; KNL-NEXT:    kshiftlw $13, %k0, %k1
358; KNL-NEXT:    kshiftrw $15, %k1, %k1
359; KNL-NEXT:    kmovw %k1, %eax
360; KNL-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
361; KNL-NEXT:    kshiftlw $12, %k0, %k1
362; KNL-NEXT:    kshiftrw $15, %k1, %k1
363; KNL-NEXT:    kmovw %k1, %eax
364; KNL-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
365; KNL-NEXT:    kshiftlw $11, %k0, %k1
366; KNL-NEXT:    kshiftrw $15, %k1, %k1
367; KNL-NEXT:    kmovw %k1, %eax
368; KNL-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
369; KNL-NEXT:    kshiftlw $10, %k0, %k1
370; KNL-NEXT:    kshiftrw $15, %k1, %k1
371; KNL-NEXT:    kmovw %k1, %eax
372; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
373; KNL-NEXT:    kshiftlw $9, %k0, %k1
374; KNL-NEXT:    kshiftrw $15, %k1, %k1
375; KNL-NEXT:    kmovw %k1, %eax
376; KNL-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
377; KNL-NEXT:    kshiftlw $8, %k0, %k1
378; KNL-NEXT:    kshiftrw $15, %k1, %k1
379; KNL-NEXT:    kmovw %k1, %eax
380; KNL-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
381; KNL-NEXT:    kshiftlw $7, %k0, %k1
382; KNL-NEXT:    kshiftrw $15, %k1, %k1
383; KNL-NEXT:    kmovw %k1, %eax
384; KNL-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
385; KNL-NEXT:    kshiftlw $6, %k0, %k1
386; KNL-NEXT:    kshiftrw $15, %k1, %k1
387; KNL-NEXT:    kmovw %k1, %eax
388; KNL-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
389; KNL-NEXT:    kshiftlw $5, %k0, %k1
390; KNL-NEXT:    kshiftrw $15, %k1, %k1
391; KNL-NEXT:    kmovw %k1, %eax
392; KNL-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
393; KNL-NEXT:    kshiftlw $4, %k0, %k1
394; KNL-NEXT:    kshiftrw $15, %k1, %k1
395; KNL-NEXT:    kmovw %k1, %eax
396; KNL-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
397; KNL-NEXT:    kshiftlw $3, %k0, %k1
398; KNL-NEXT:    kshiftrw $15, %k1, %k1
399; KNL-NEXT:    kmovw %k1, %eax
400; KNL-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
401; KNL-NEXT:    kshiftlw $2, %k0, %k1
402; KNL-NEXT:    kshiftrw $15, %k1, %k1
403; KNL-NEXT:    kmovw %k1, %eax
404; KNL-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
405; KNL-NEXT:    kshiftlw $1, %k0, %k1
406; KNL-NEXT:    kshiftrw $15, %k1, %k1
407; KNL-NEXT:    kmovw %k1, %eax
408; KNL-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
409; KNL-NEXT:    kshiftlw $0, %k0, %k0
410; KNL-NEXT:    kshiftrw $15, %k0, %k0
411; KNL-NEXT:    kmovw %k0, %eax
412; KNL-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
413; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
414; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
415; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
416; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
417; KNL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
418; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
419; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
420; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
421; KNL-NEXT:    kshiftlw $14, %k0, %k1
422; KNL-NEXT:    kshiftrw $15, %k1, %k1
423; KNL-NEXT:    kmovw %k1, %eax
424; KNL-NEXT:    kshiftlw $15, %k0, %k1
425; KNL-NEXT:    kshiftrw $15, %k1, %k1
426; KNL-NEXT:    kmovw %k1, %ecx
427; KNL-NEXT:    vmovd %ecx, %xmm0
428; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
429; KNL-NEXT:    kshiftlw $13, %k0, %k1
430; KNL-NEXT:    kshiftrw $15, %k1, %k1
431; KNL-NEXT:    kmovw %k1, %eax
432; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
433; KNL-NEXT:    kshiftlw $12, %k0, %k1
434; KNL-NEXT:    kshiftrw $15, %k1, %k1
435; KNL-NEXT:    kmovw %k1, %eax
436; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
437; KNL-NEXT:    kshiftlw $11, %k0, %k1
438; KNL-NEXT:    kshiftrw $15, %k1, %k1
439; KNL-NEXT:    kmovw %k1, %eax
440; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
441; KNL-NEXT:    kshiftlw $10, %k0, %k1
442; KNL-NEXT:    kshiftrw $15, %k1, %k1
443; KNL-NEXT:    kmovw %k1, %eax
444; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
445; KNL-NEXT:    kshiftlw $9, %k0, %k1
446; KNL-NEXT:    kshiftrw $15, %k1, %k1
447; KNL-NEXT:    kmovw %k1, %eax
448; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
449; KNL-NEXT:    kshiftlw $8, %k0, %k1
450; KNL-NEXT:    kshiftrw $15, %k1, %k1
451; KNL-NEXT:    kmovw %k1, %eax
452; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
453; KNL-NEXT:    kshiftlw $7, %k0, %k1
454; KNL-NEXT:    kshiftrw $15, %k1, %k1
455; KNL-NEXT:    kmovw %k1, %eax
456; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
457; KNL-NEXT:    kshiftlw $6, %k0, %k1
458; KNL-NEXT:    kshiftrw $15, %k1, %k1
459; KNL-NEXT:    kmovw %k1, %eax
460; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
461; KNL-NEXT:    kshiftlw $5, %k0, %k1
462; KNL-NEXT:    kshiftrw $15, %k1, %k1
463; KNL-NEXT:    kmovw %k1, %eax
464; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
465; KNL-NEXT:    kshiftlw $4, %k0, %k1
466; KNL-NEXT:    kshiftrw $15, %k1, %k1
467; KNL-NEXT:    kmovw %k1, %eax
468; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
469; KNL-NEXT:    kshiftlw $3, %k0, %k1
470; KNL-NEXT:    kshiftrw $15, %k1, %k1
471; KNL-NEXT:    kmovw %k1, %eax
472; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
473; KNL-NEXT:    kshiftlw $2, %k0, %k1
474; KNL-NEXT:    kshiftrw $15, %k1, %k1
475; KNL-NEXT:    kmovw %k1, %eax
476; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
477; KNL-NEXT:    kshiftlw $1, %k0, %k1
478; KNL-NEXT:    kshiftrw $15, %k1, %k1
479; KNL-NEXT:    kmovw %k1, %eax
480; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
481; KNL-NEXT:    kshiftlw $0, %k0, %k0
482; KNL-NEXT:    kshiftrw $15, %k0, %k0
483; KNL-NEXT:    kmovw %k0, %eax
484; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
485; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
486; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
487; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
488; KNL-NEXT:    kmovw %k0, (%rsp)
489; KNL-NEXT:    vpcmpeqw %ymm7, %ymm3, %ymm0
490; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
491; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
492; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
493; KNL-NEXT:    kshiftlw $14, %k0, %k1
494; KNL-NEXT:    kshiftrw $15, %k1, %k1
495; KNL-NEXT:    kmovw %k1, %eax
496; KNL-NEXT:    kshiftlw $15, %k0, %k1
497; KNL-NEXT:    kshiftrw $15, %k1, %k1
498; KNL-NEXT:    kmovw %k1, %ecx
499; KNL-NEXT:    vmovd %ecx, %xmm0
500; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
501; KNL-NEXT:    kshiftlw $13, %k0, %k1
502; KNL-NEXT:    kshiftrw $15, %k1, %k1
503; KNL-NEXT:    kmovw %k1, %eax
504; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
505; KNL-NEXT:    kshiftlw $12, %k0, %k1
506; KNL-NEXT:    kshiftrw $15, %k1, %k1
507; KNL-NEXT:    kmovw %k1, %eax
508; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
509; KNL-NEXT:    kshiftlw $11, %k0, %k1
510; KNL-NEXT:    kshiftrw $15, %k1, %k1
511; KNL-NEXT:    kmovw %k1, %eax
512; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
513; KNL-NEXT:    kshiftlw $10, %k0, %k1
514; KNL-NEXT:    kshiftrw $15, %k1, %k1
515; KNL-NEXT:    kmovw %k1, %eax
516; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
517; KNL-NEXT:    kshiftlw $9, %k0, %k1
518; KNL-NEXT:    kshiftrw $15, %k1, %k1
519; KNL-NEXT:    kmovw %k1, %eax
520; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
521; KNL-NEXT:    kshiftlw $8, %k0, %k1
522; KNL-NEXT:    kshiftrw $15, %k1, %k1
523; KNL-NEXT:    kmovw %k1, %eax
524; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
525; KNL-NEXT:    kshiftlw $7, %k0, %k1
526; KNL-NEXT:    kshiftrw $15, %k1, %k1
527; KNL-NEXT:    kmovw %k1, %eax
528; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
529; KNL-NEXT:    kshiftlw $6, %k0, %k1
530; KNL-NEXT:    kshiftrw $15, %k1, %k1
531; KNL-NEXT:    kmovw %k1, %eax
532; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
533; KNL-NEXT:    kshiftlw $5, %k0, %k1
534; KNL-NEXT:    kshiftrw $15, %k1, %k1
535; KNL-NEXT:    kmovw %k1, %eax
536; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
537; KNL-NEXT:    kshiftlw $4, %k0, %k1
538; KNL-NEXT:    kshiftrw $15, %k1, %k1
539; KNL-NEXT:    kmovw %k1, %eax
540; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
541; KNL-NEXT:    kshiftlw $3, %k0, %k1
542; KNL-NEXT:    kshiftrw $15, %k1, %k1
543; KNL-NEXT:    kmovw %k1, %eax
544; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
545; KNL-NEXT:    kshiftlw $2, %k0, %k1
546; KNL-NEXT:    kshiftrw $15, %k1, %k1
547; KNL-NEXT:    kmovw %k1, %eax
548; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
549; KNL-NEXT:    kshiftlw $1, %k0, %k1
550; KNL-NEXT:    kshiftrw $15, %k1, %k1
551; KNL-NEXT:    kmovw %k1, %eax
552; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
553; KNL-NEXT:    kshiftlw $0, %k0, %k0
554; KNL-NEXT:    kshiftrw $15, %k0, %k0
555; KNL-NEXT:    kmovw %k0, %eax
556; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
557; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
558; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
559; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
560; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
561; KNL-NEXT:    vpcmpeqw %ymm6, %ymm2, %ymm0
562; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
563; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
564; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
565; KNL-NEXT:    kshiftlw $14, %k0, %k1
566; KNL-NEXT:    kshiftrw $15, %k1, %k1
567; KNL-NEXT:    kmovw %k1, %eax
568; KNL-NEXT:    kshiftlw $15, %k0, %k1
569; KNL-NEXT:    kshiftrw $15, %k1, %k1
570; KNL-NEXT:    kmovw %k1, %ecx
571; KNL-NEXT:    vmovd %ecx, %xmm0
572; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
573; KNL-NEXT:    kshiftlw $13, %k0, %k1
574; KNL-NEXT:    kshiftrw $15, %k1, %k1
575; KNL-NEXT:    kmovw %k1, %eax
576; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
577; KNL-NEXT:    kshiftlw $12, %k0, %k1
578; KNL-NEXT:    kshiftrw $15, %k1, %k1
579; KNL-NEXT:    kmovw %k1, %eax
580; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
581; KNL-NEXT:    kshiftlw $11, %k0, %k1
582; KNL-NEXT:    kshiftrw $15, %k1, %k1
583; KNL-NEXT:    kmovw %k1, %eax
584; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
585; KNL-NEXT:    kshiftlw $10, %k0, %k1
586; KNL-NEXT:    kshiftrw $15, %k1, %k1
587; KNL-NEXT:    kmovw %k1, %eax
588; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
589; KNL-NEXT:    kshiftlw $9, %k0, %k1
590; KNL-NEXT:    kshiftrw $15, %k1, %k1
591; KNL-NEXT:    kmovw %k1, %eax
592; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
593; KNL-NEXT:    kshiftlw $8, %k0, %k1
594; KNL-NEXT:    kshiftrw $15, %k1, %k1
595; KNL-NEXT:    kmovw %k1, %eax
596; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
597; KNL-NEXT:    kshiftlw $7, %k0, %k1
598; KNL-NEXT:    kshiftrw $15, %k1, %k1
599; KNL-NEXT:    kmovw %k1, %eax
600; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
601; KNL-NEXT:    kshiftlw $6, %k0, %k1
602; KNL-NEXT:    kshiftrw $15, %k1, %k1
603; KNL-NEXT:    kmovw %k1, %eax
604; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
605; KNL-NEXT:    kshiftlw $5, %k0, %k1
606; KNL-NEXT:    kshiftrw $15, %k1, %k1
607; KNL-NEXT:    kmovw %k1, %eax
608; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
609; KNL-NEXT:    kshiftlw $4, %k0, %k1
610; KNL-NEXT:    kshiftrw $15, %k1, %k1
611; KNL-NEXT:    kmovw %k1, %eax
612; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
613; KNL-NEXT:    kshiftlw $3, %k0, %k1
614; KNL-NEXT:    kshiftrw $15, %k1, %k1
615; KNL-NEXT:    kmovw %k1, %eax
616; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
617; KNL-NEXT:    kshiftlw $2, %k0, %k1
618; KNL-NEXT:    kshiftrw $15, %k1, %k1
619; KNL-NEXT:    kmovw %k1, %eax
620; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
621; KNL-NEXT:    kshiftlw $1, %k0, %k1
622; KNL-NEXT:    kshiftrw $15, %k1, %k1
623; KNL-NEXT:    kmovw %k1, %eax
624; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
625; KNL-NEXT:    kshiftlw $0, %k0, %k0
626; KNL-NEXT:    kshiftrw $15, %k0, %k0
627; KNL-NEXT:    kmovw %k0, %eax
628; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
629; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
630; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
631; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
632; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
633; KNL-NEXT:    movl (%rsp), %ecx
634; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
635; KNL-NEXT:    shlq $32, %rax
636; KNL-NEXT:    orq %rcx, %rax
637; KNL-NEXT:    movq %rbp, %rsp
638; KNL-NEXT:    popq %rbp
639; KNL-NEXT:    retq
640;
641; SKX-LABEL: test12_v64i16:
642; SKX:       ## BB#0:
643; SKX-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0
644; SKX-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
645; SKX-NEXT:    kunpckdq %k0, %k1, %k0
646; SKX-NEXT:    kmovq %k0, %rax
647; SKX-NEXT:    retq
648  %res = icmp eq <64 x i16> %a, %b
649  %res1 = bitcast <64 x i1> %res to i64
650  ret i64 %res1
651}
652
653define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
654; CHECK-LABEL: test13:
655; CHECK:       ## BB#0:
656; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
657; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
658; CHECK-NEXT:    retq
659{
660  %cmpvector_i = fcmp oeq <16 x float> %a, %b
661  %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
662  ret <16 x i32> %conv
663}
664
665define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
666; CHECK-LABEL: test14:
667; CHECK:       ## BB#0:
668; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm1
669; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
670; CHECK-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
671; CHECK-NEXT:    retq
672  %sub_r = sub <16 x i32> %a, %b
673  %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
674  %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
675  %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
676  %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
677  ret <16 x i32>%res
678}
679
680define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
681; CHECK-LABEL: test15:
682; CHECK:       ## BB#0:
683; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm1
684; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
685; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
686; CHECK-NEXT:    retq
687  %sub_r = sub <8 x i64> %a, %b
688  %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
689  %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
690  %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
691  %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
692  ret <8 x i64>%res
693}
694
695define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
696; CHECK-LABEL: test16:
697; CHECK:       ## BB#0:
698; CHECK-NEXT:    vpcmpled %zmm0, %zmm1, %k1
699; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
700; CHECK-NEXT:    retq
701  %mask = icmp sge <16 x i32> %x, %y
702  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
703  ret <16 x i32> %max
704}
705
706define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
707; CHECK-LABEL: test17:
708; CHECK:       ## BB#0:
709; CHECK-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1
710; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
711; CHECK-NEXT:    retq
712  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
713  %mask = icmp sgt <16 x i32> %x, %y
714  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
715  ret <16 x i32> %max
716}
717
718define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
719; CHECK-LABEL: test18:
720; CHECK:       ## BB#0:
721; CHECK-NEXT:    vpcmpled (%rdi), %zmm0, %k1
722; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
723; CHECK-NEXT:    retq
724  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
725  %mask = icmp sle <16 x i32> %x, %y
726  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
727  ret <16 x i32> %max
728}
729
730define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
731; CHECK-LABEL: test19:
732; CHECK:       ## BB#0:
733; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1
734; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
735; CHECK-NEXT:    retq
736  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
737  %mask = icmp ule <16 x i32> %x, %y
738  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
739  ret <16 x i32> %max
740}
741
742define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
743; CHECK-LABEL: test20:
744; CHECK:       ## BB#0:
745; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
746; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
747; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
748; CHECK-NEXT:    retq
749  %mask1 = icmp eq <16 x i32> %x1, %y1
750  %mask0 = icmp eq <16 x i32> %x, %y
751  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
752  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
753  ret <16 x i32> %max
754}
755
756define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
757; CHECK-LABEL: test21:
758; CHECK:       ## BB#0:
759; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
760; CHECK-NEXT:    vpcmpleq %zmm2, %zmm3, %k1 {%k1}
761; CHECK-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
762; CHECK-NEXT:    retq
763  %mask1 = icmp sge <8 x i64> %x1, %y1
764  %mask0 = icmp sle <8 x i64> %x, %y
765  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
766  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
767  ret <8 x i64> %max
768}
769
770define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
771; CHECK-LABEL: test22:
772; CHECK:       ## BB#0:
773; CHECK-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
774; CHECK-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
775; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
776; CHECK-NEXT:    retq
777  %mask1 = icmp sgt <8 x i64> %x1, %y1
778  %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
779  %mask0 = icmp sgt <8 x i64> %x, %y
780  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
781  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
782  ret <8 x i64> %max
783}
784
785define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
786; CHECK-LABEL: test23:
787; CHECK:       ## BB#0:
788; CHECK-NEXT:    vpcmpled %zmm1, %zmm2, %k1
789; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1}
790; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
791; CHECK-NEXT:    retq
792  %mask1 = icmp sge <16 x i32> %x1, %y1
793  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
794  %mask0 = icmp ule <16 x i32> %x, %y
795  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
796  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
797  ret <16 x i32> %max
798}
799
800define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
801; CHECK-LABEL: test24:
802; CHECK:       ## BB#0:
803; CHECK-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1
804; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
805; CHECK-NEXT:    retq
806  %yb = load i64, i64* %yb.ptr, align 4
807  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
808  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
809  %mask = icmp eq <8 x i64> %x, %y
810  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
811  ret <8 x i64> %max
812}
813
814define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
815; CHECK-LABEL: test25:
816; CHECK:       ## BB#0:
817; CHECK-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1
818; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
819; CHECK-NEXT:    retq
820  %yb = load i32, i32* %yb.ptr, align 4
821  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
822  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
823  %mask = icmp sle <16 x i32> %x, %y
824  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
825  ret <16 x i32> %max
826}
827
828define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
829; CHECK-LABEL: test26:
830; CHECK:       ## BB#0:
831; CHECK-NEXT:    vpcmpled %zmm1, %zmm2, %k1
832; CHECK-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
833; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
834; CHECK-NEXT:    retq
835  %mask1 = icmp sge <16 x i32> %x1, %y1
836  %yb = load i32, i32* %yb.ptr, align 4
837  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
838  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
839  %mask0 = icmp sgt <16 x i32> %x, %y
840  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
841  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
842  ret <16 x i32> %max
843}
844
845define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
846; CHECK-LABEL: test27:
847; CHECK:       ## BB#0:
848; CHECK-NEXT:    vpcmpleq %zmm1, %zmm2, %k1
849; CHECK-NEXT:    vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
850; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
851; CHECK-NEXT:    retq
852  %mask1 = icmp sge <8 x i64> %x1, %y1
853  %yb = load i64, i64* %yb.ptr, align 4
854  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
855  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
856  %mask0 = icmp sle <8 x i64> %x, %y
857  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
858  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
859  ret <8 x i64> %max
860}
861
862define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
863; KNL-LABEL: test28:
864; KNL:       ## BB#0:
865; KNL-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
866; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
867; KNL-NEXT:    kxnorw %k1, %k0, %k1
868; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
869; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
870; KNL-NEXT:    vpmovqd %zmm0, %ymm0
871; KNL-NEXT:    retq
872;
873; SKX-LABEL: test28:
874; SKX:       ## BB#0:
875; SKX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
876; SKX-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
877; SKX-NEXT:    kxnorb %k1, %k0, %k0
878; SKX-NEXT:    vpmovm2d %k0, %ymm0
879; SKX-NEXT:    retq
880  %x_gt_y = icmp sgt <8 x i64> %x, %y
881  %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
882  %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
883  %resse = sext <8 x i1>%res to <8 x i32>
884  ret <8 x i32> %resse
885}
886
887define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
888; KNL-LABEL: test29:
889; KNL:       ## BB#0:
890; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
891; KNL-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1
892; KNL-NEXT:    kxorw %k1, %k0, %k1
893; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
894; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
895; KNL-NEXT:    vpmovdb %zmm0, %xmm0
896; KNL-NEXT:    retq
897;
898; SKX-LABEL: test29:
899; SKX:       ## BB#0:
900; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
901; SKX-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1
902; SKX-NEXT:    kxorw %k1, %k0, %k0
903; SKX-NEXT:    vpmovm2b %k0, %xmm0
904; SKX-NEXT:    retq
905  %x_gt_y = icmp sgt <16 x i32> %x, %y
906  %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
907  %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
908  %resse = sext <16 x i1>%res to <16 x i8>
909  ret <16 x i8> %resse
910}
911
912define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
913; KNL-LABEL: test30:
914; KNL:       ## BB#0:
915; KNL-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm2
916; KNL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
917; KNL-NEXT:    retq
918;
919; SKX-LABEL: test30:
920; SKX:       ## BB#0:
921; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
922; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
923; SKX-NEXT:    retq
924
925  %mask = fcmp oeq <4 x double> %x, %y
926  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
927  ret <4 x double> %max
928}
929
930define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
931; KNL-LABEL: test31:
932; KNL:       ## BB#0:
933; KNL-NEXT:    vcmpltpd (%rdi), %xmm0, %xmm2
934; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
935; KNL-NEXT:    retq
936;
937; SKX-LABEL: test31:
938; SKX:       ## BB#0:
939; SKX-NEXT:    vcmpltpd (%rdi), %xmm0, %k1
940; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
941; SKX-NEXT:    retq
942
943  %y = load <2 x double>, <2 x double>* %yp, align 4
944  %mask = fcmp olt <2 x double> %x, %y
945  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
946  ret <2 x double> %max
947}
948
949define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
950; KNL-LABEL: test32:
951; KNL:       ## BB#0:
952; KNL-NEXT:    vcmpltpd (%rdi), %ymm0, %ymm2
953; KNL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
954; KNL-NEXT:    retq
955;
956; SKX-LABEL: test32:
957; SKX:       ## BB#0:
958; SKX-NEXT:    vcmpltpd (%rdi), %ymm0, %k1
959; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
960; SKX-NEXT:    retq
961
962  %y = load <4 x double>, <4 x double>* %yp, align 4
963  %mask = fcmp ogt <4 x double> %y, %x
964  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
965  ret <4 x double> %max
966}
967
968define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
969; CHECK-LABEL: test33:
970; CHECK:       ## BB#0:
971; CHECK-NEXT:    vcmpltpd (%rdi), %zmm0, %k1
972; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
973; CHECK-NEXT:    retq
974  %y = load <8 x double>, <8 x double>* %yp, align 4
975  %mask = fcmp olt <8 x double> %x, %y
976  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
977  ret <8 x double> %max
978}
979
980define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
981; KNL-LABEL: test34:
982; KNL:       ## BB#0:
983; KNL-NEXT:    vcmpltps (%rdi), %xmm0, %xmm2
984; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
985; KNL-NEXT:    retq
986;
987; SKX-LABEL: test34:
988; SKX:       ## BB#0:
989; SKX-NEXT:    vcmpltps (%rdi), %xmm0, %k1
990; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
991; SKX-NEXT:    retq
992  %y = load <4 x float>, <4 x float>* %yp, align 4
993  %mask = fcmp olt <4 x float> %x, %y
994  %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
995  ret <4 x float> %max
996}
997
998define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
999; KNL-LABEL: test35:
1000; KNL:       ## BB#0:
1001; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
1002; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1003; KNL-NEXT:    vmovups (%rdi), %ymm2
1004; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k1
1005; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
1006; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1007; KNL-NEXT:    retq
1008;
1009; SKX-LABEL: test35:
1010; SKX:       ## BB#0:
1011; SKX-NEXT:    vcmpltps (%rdi), %ymm0, %k1
1012; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
1013; SKX-NEXT:    retq
1014
1015  %y = load <8 x float>, <8 x float>* %yp, align 4
1016  %mask = fcmp ogt <8 x float> %y, %x
1017  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
1018  ret <8 x float> %max
1019}
1020
1021define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
1022; CHECK-LABEL: test36:
1023; CHECK:       ## BB#0:
1024; CHECK-NEXT:    vcmpltps (%rdi), %zmm0, %k1
1025; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
1026; CHECK-NEXT:    retq
1027  %y = load <16 x float>, <16 x float>* %yp, align 4
1028  %mask = fcmp olt <16 x float> %x, %y
1029  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
1030  ret <16 x float> %max
1031}
1032
1033define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
1034; CHECK-LABEL: test37:
1035; CHECK:       ## BB#0:
1036; CHECK-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1
1037; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
1038; CHECK-NEXT:    retq
1039
1040  %a = load double, double* %ptr
1041  %v = insertelement <8 x double> undef, double %a, i32 0
1042  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1043
1044  %mask = fcmp ogt <8 x double> %shuffle, %x
1045  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1046  ret <8 x double> %max
1047}
1048
1049define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
1050; KNL-LABEL: test38:
1051; KNL:       ## BB#0:
1052; KNL-NEXT:    vbroadcastsd (%rdi), %ymm2
1053; KNL-NEXT:    vcmpltpd %ymm2, %ymm0, %ymm2
1054; KNL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1055; KNL-NEXT:    retq
1056;
1057; SKX-LABEL: test38:
1058; SKX:       ## BB#0:
1059; SKX-NEXT:    vcmpltpd (%rdi){1to4}, %ymm0, %k1
1060; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
1061; SKX-NEXT:    retq
1062
1063  %a = load double, double* %ptr
1064  %v = insertelement <4 x double> undef, double %a, i32 0
1065  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
1066
1067  %mask = fcmp ogt <4 x double> %shuffle, %x
1068  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
1069  ret <4 x double> %max
1070}
1071
1072define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
1073; KNL-LABEL: test39:
1074; KNL:       ## BB#0:
1075; KNL-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
1076; KNL-NEXT:    vcmpltpd %xmm2, %xmm0, %xmm2
1077; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1078; KNL-NEXT:    retq
1079;
1080; SKX-LABEL: test39:
1081; SKX:       ## BB#0:
1082; SKX-NEXT:    vcmpltpd (%rdi){1to2}, %xmm0, %k1
1083; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
1084; SKX-NEXT:    retq
1085
1086  %a = load double, double* %ptr
1087  %v = insertelement <2 x double> undef, double %a, i32 0
1088  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1089
1090  %mask = fcmp ogt <2 x double> %shuffle, %x
1091  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
1092  ret <2 x double> %max
1093}
1094
1095
1096define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, float* %ptr) nounwind {
1097; CHECK-LABEL: test40:
1098; CHECK:       ## BB#0:
1099; CHECK-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1
1100; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
1101; CHECK-NEXT:    retq
1102
1103  %a = load float, float* %ptr
1104  %v = insertelement <16  x float> undef, float %a, i32 0
1105  %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1106
1107  %mask = fcmp ogt <16  x float> %shuffle, %x
1108  %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
1109  ret <16  x float> %max
1110}
1111
1112define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, float* %ptr) nounwind {
1113; KNL-LABEL: test41:
1114; KNL:       ## BB#0:
1115; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
1116; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1117; KNL-NEXT:    vbroadcastss (%rdi), %ymm2
1118; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k1
1119; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
1120; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1121; KNL-NEXT:    retq
1122;
1123; SKX-LABEL: test41:
1124; SKX:       ## BB#0:
1125; SKX-NEXT:    vcmpltps (%rdi){1to8}, %ymm0, %k1
1126; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
1127; SKX-NEXT:    retq
1128
1129  %a = load float, float* %ptr
1130  %v = insertelement <8  x float> undef, float %a, i32 0
1131  %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1132
1133  %mask = fcmp ogt <8  x float> %shuffle, %x
1134  %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
1135  ret <8  x float> %max
1136}
1137
1138define <4  x float> @test42(<4  x float> %x, <4  x float> %x1, float* %ptr) nounwind {
1139; KNL-LABEL: test42:
1140; KNL:       ## BB#0:
1141; KNL-NEXT:    vbroadcastss (%rdi), %xmm2
1142; KNL-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
1143; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1144; KNL-NEXT:    retq
1145;
1146; SKX-LABEL: test42:
1147; SKX:       ## BB#0:
1148; SKX-NEXT:    vcmpltps (%rdi){1to4}, %xmm0, %k1
1149; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
1150; SKX-NEXT:    retq
1151
1152  %a = load float, float* %ptr
1153  %v = insertelement <4  x float> undef, float %a, i32 0
1154  %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1155
1156  %mask = fcmp ogt <4  x float> %shuffle, %x
1157  %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
1158  ret <4  x float> %max
1159}
1160
1161define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
1162; KNL-LABEL: test43:
1163; KNL:       ## BB#0:
1164; KNL-NEXT:    vpmovsxwq %xmm2, %zmm2
1165; KNL-NEXT:    vpsllq $63, %zmm2, %zmm2
1166; KNL-NEXT:    vptestmq %zmm2, %zmm2, %k1
1167; KNL-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
1168; KNL-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
1169; KNL-NEXT:    retq
1170;
1171; SKX-LABEL: test43:
1172; SKX:       ## BB#0:
1173; SKX-NEXT:    vpsllw $15, %xmm2, %xmm2
1174; SKX-NEXT:    vpmovw2m %xmm2, %k1
1175; SKX-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
1176; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
1177; SKX-NEXT:    retq
1178
1179  %a = load double, double* %ptr
1180  %v = insertelement <8 x double> undef, double %a, i32 0
1181  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1182
1183  %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
1184  %mask = and <8 x i1> %mask_cmp, %mask_in
1185  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1186  ret <8 x double> %max
1187}
1188
1189define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
1190; KNL-LABEL: test44:
1191; KNL:       ## BB#0:
1192; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1193; KNL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1194; KNL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1195; KNL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1196; KNL-NEXT:    retq
1197;
1198; SKX-LABEL: test44:
1199; SKX:       ## BB#0:
1200; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
1201; SKX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1202; SKX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1203; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1204; SKX-NEXT:    vpmovm2d %k0, %xmm0
1205; SKX-NEXT:    retq
1206  %mask = icmp eq <4 x i16> %x, %y
1207  %1 = sext <4 x i1> %mask to <4 x i32>
1208  ret <4 x i32> %1
1209}
1210
1211define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
1212; KNL-LABEL: test45:
1213; KNL:       ## BB#0:
1214; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1215; KNL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1216; KNL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1217; KNL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
1218; KNL-NEXT:    vpsrlq $63, %xmm0, %xmm0
1219; KNL-NEXT:    retq
1220;
1221; SKX-LABEL: test45:
1222; SKX:       ## BB#0:
1223; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
1224; SKX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1225; SKX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1226; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k1
1227; SKX-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1228; SKX-NEXT:    retq
1229  %mask = icmp eq <2 x i16> %x, %y
1230  %1 = zext <2 x i1> %mask to <2 x i64>
1231  ret <2 x i64> %1
1232}
1233
1234define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
1235; KNL-LABEL: test46:
1236; KNL:       ## BB#0:
1237; KNL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
1238; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1239; KNL-NEXT:    vpsllq $32, %xmm0, %xmm0
1240; KNL-NEXT:    vpsrad $31, %xmm0, %xmm1
1241; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1242; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1243; KNL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1244; KNL-NEXT:    retq
1245;
1246; SKX-LABEL: test46:
1247; SKX:       ## BB#0:
1248; SKX-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
1249; SKX-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1250; SKX-NEXT:    retq
1251  %mask = fcmp oeq <2 x float> %x, %y
1252  %1 = zext <2 x i1> %mask to <2 x i64>
1253  ret <2 x i64> %1
1254}
1255