1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 4 5define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { 6; CHECK-LABEL: test1: 7; CHECK: ## BB#0: 8; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 9; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 10; CHECK-NEXT: retq 11 %mask = fcmp ole <16 x float> %x, %y 12 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y 13 ret <16 x float> %max 14} 15 16define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { 17; CHECK-LABEL: test2: 18; CHECK: ## BB#0: 19; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 20; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 21; CHECK-NEXT: retq 22 %mask = fcmp ole <8 x double> %x, %y 23 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y 24 ret <8 x double> %max 25} 26 27define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind { 28; CHECK-LABEL: test3: 29; CHECK: ## BB#0: 30; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 31; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 32; CHECK-NEXT: retq 33 %y = load <16 x i32>, <16 x i32>* %yp, align 4 34 %mask = icmp eq <16 x i32> %x, %y 35 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 36 ret <16 x i32> %max 37} 38 39define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { 40; CHECK-LABEL: test4_unsigned: 41; CHECK: ## BB#0: 42; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 43; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} 44; CHECK-NEXT: retq 45 %mask = icmp uge <16 x i32> %x, %y 46 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y 47 ret <16 x i32> %max 48} 49 50define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { 51; CHECK-LABEL: test5: 52; CHECK: ## BB#0: 53; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 54; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 55; CHECK-NEXT: retq 56 %mask = icmp eq <8 x i64> %x, %y 57 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y 58 ret <8 x i64> %max 59} 60 61define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind { 62; CHECK-LABEL: test6_unsigned: 63; CHECK: ## BB#0: 64; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 65; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} 66; CHECK-NEXT: retq 67 %mask = icmp ugt <8 x i64> %x, %y 68 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y 69 ret <8 x i64> %max 70} 71 72define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { 73; KNL-LABEL: test7: 74; KNL: ## BB#0: 75; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 76; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 77; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 78; KNL-NEXT: retq 79; 80; SKX-LABEL: test7: 81; SKX: ## BB#0: 82; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 83; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 84; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 85; SKX-NEXT: retq 86 87 %mask = fcmp olt <4 x float> %a, zeroinitializer 88 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b 89 ret <4 x float>%c 90} 91 92define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { 93; KNL-LABEL: test8: 94; KNL: ## BB#0: 95; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 96; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 97; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 98; KNL-NEXT: retq 99; 100; SKX-LABEL: test8: 101; SKX: ## BB#0: 102; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 103; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 104; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 105; SKX-NEXT: retq 106 %mask = fcmp olt <2 x double> %a, zeroinitializer 107 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b 108 ret <2 x double>%c 109} 110 111define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { 112; KNL-LABEL: test9: 113; KNL: ## BB#0: 114; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 115; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 116; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 117; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 118; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 119; KNL-NEXT: retq 120; 121; SKX-LABEL: test9: 122; SKX: ## BB#0: 123; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 124; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 125; SKX-NEXT: retq 126 %mask = icmp eq <8 x i32> %x, %y 127 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y 128 ret <8 x i32> %max 129} 130 131define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { 132; KNL-LABEL: test10: 133; KNL: ## BB#0: 134; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 135; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 136; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 137; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 138; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 139; KNL-NEXT: retq 140; 141; SKX-LABEL: test10: 142; SKX: ## BB#0: 143; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 144; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 145; SKX-NEXT: retq 146 147 %mask = fcmp oeq <8 x float> %x, %y 148 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y 149 ret <8 x float> %max 150} 151 152define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { 153; CHECK-LABEL: test11_unsigned: 154; CHECK: ## BB#0: 155; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 156; CHECK-NEXT: retq 157 %mask = icmp ugt <8 x i32> %x, %y 158 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y 159 ret <8 x i32> %max 160} 161 162define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { 163; CHECK-LABEL: test12: 164; CHECK: ## BB#0: 165; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 166; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 167; CHECK-NEXT: kunpckbw %k0, %k1, %k0 168; CHECK-NEXT: kmovw %k0, %eax 169; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 170; CHECK-NEXT: retq 171 %res = icmp eq <16 x i64> %a, %b 172 %res1 = bitcast <16 x i1> %res to i16 173 ret i16 %res1 174} 175 176define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind { 177; KNL-LABEL: test12_v32i32: 178; KNL: ## BB#0: 179; KNL-NEXT: pushq %rbp 180; KNL-NEXT: movq %rsp, %rbp 181; KNL-NEXT: andq $-32, %rsp 182; KNL-NEXT: subq $32, %rsp 183; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0 184; KNL-NEXT: kshiftlw $14, %k0, %k1 185; KNL-NEXT: kshiftrw $15, %k1, %k1 186; KNL-NEXT: kmovw %k1, %eax 187; KNL-NEXT: kshiftlw $15, %k0, %k1 188; KNL-NEXT: kshiftrw $15, %k1, %k1 189; KNL-NEXT: kmovw %k1, %ecx 190; KNL-NEXT: vmovd %ecx, %xmm1 191; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 192; KNL-NEXT: kshiftlw $13, %k0, %k1 193; KNL-NEXT: kshiftrw $15, %k1, %k1 194; KNL-NEXT: kmovw %k1, %eax 195; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 196; KNL-NEXT: kshiftlw $12, %k0, %k1 197; KNL-NEXT: kshiftrw $15, %k1, %k1 198; KNL-NEXT: kmovw %k1, %eax 199; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 200; KNL-NEXT: kshiftlw $11, %k0, %k1 201; KNL-NEXT: kshiftrw $15, %k1, %k1 202; KNL-NEXT: kmovw %k1, %eax 203; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 204; KNL-NEXT: kshiftlw $10, %k0, %k1 205; KNL-NEXT: kshiftrw $15, %k1, %k1 206; KNL-NEXT: kmovw %k1, %eax 207; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 208; KNL-NEXT: kshiftlw $9, %k0, %k1 209; KNL-NEXT: kshiftrw $15, %k1, %k1 210; KNL-NEXT: kmovw %k1, %eax 211; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 212; KNL-NEXT: kshiftlw $8, %k0, %k1 213; KNL-NEXT: kshiftrw $15, %k1, %k1 214; KNL-NEXT: kmovw %k1, %eax 215; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 216; KNL-NEXT: kshiftlw $7, %k0, %k1 217; KNL-NEXT: kshiftrw $15, %k1, %k1 218; KNL-NEXT: kmovw %k1, %eax 219; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 220; KNL-NEXT: kshiftlw $6, %k0, %k1 221; KNL-NEXT: kshiftrw $15, %k1, %k1 222; KNL-NEXT: kmovw %k1, %eax 223; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 224; KNL-NEXT: kshiftlw $5, %k0, %k1 225; KNL-NEXT: kshiftrw $15, %k1, %k1 226; KNL-NEXT: kmovw %k1, %eax 227; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 228; KNL-NEXT: kshiftlw $4, %k0, %k1 229; KNL-NEXT: kshiftrw $15, %k1, %k1 230; KNL-NEXT: kmovw %k1, %eax 231; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 232; KNL-NEXT: kshiftlw $3, %k0, %k1 233; KNL-NEXT: kshiftrw $15, %k1, %k1 234; KNL-NEXT: kmovw %k1, %eax 235; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 236; KNL-NEXT: kshiftlw $2, %k0, %k1 237; KNL-NEXT: kshiftrw $15, %k1, %k1 238; KNL-NEXT: kmovw %k1, %eax 239; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 240; KNL-NEXT: kshiftlw $1, %k0, %k1 241; KNL-NEXT: kshiftrw $15, %k1, %k1 242; KNL-NEXT: kmovw %k1, %eax 243; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 244; KNL-NEXT: kshiftlw $0, %k0, %k0 245; KNL-NEXT: kshiftrw $15, %k0, %k0 246; KNL-NEXT: kmovw %k0, %eax 247; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 248; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 249; KNL-NEXT: vpslld $31, %zmm1, %zmm1 250; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 251; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) 252; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 253; KNL-NEXT: kshiftlw $14, %k0, %k1 254; KNL-NEXT: kshiftrw $15, %k1, %k1 255; KNL-NEXT: kmovw %k1, %eax 256; KNL-NEXT: kshiftlw $15, %k0, %k1 257; KNL-NEXT: kshiftrw $15, %k1, %k1 258; KNL-NEXT: kmovw %k1, %ecx 259; KNL-NEXT: vmovd %ecx, %xmm0 260; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 261; KNL-NEXT: kshiftlw $13, %k0, %k1 262; KNL-NEXT: kshiftrw $15, %k1, %k1 263; KNL-NEXT: kmovw %k1, %eax 264; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 265; KNL-NEXT: kshiftlw $12, %k0, %k1 266; KNL-NEXT: kshiftrw $15, %k1, %k1 267; KNL-NEXT: kmovw %k1, %eax 268; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 269; KNL-NEXT: kshiftlw $11, %k0, %k1 270; KNL-NEXT: kshiftrw $15, %k1, %k1 271; KNL-NEXT: kmovw %k1, %eax 272; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 273; KNL-NEXT: kshiftlw $10, %k0, %k1 274; KNL-NEXT: kshiftrw $15, %k1, %k1 275; KNL-NEXT: kmovw %k1, %eax 276; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 277; KNL-NEXT: kshiftlw $9, %k0, %k1 278; KNL-NEXT: kshiftrw $15, %k1, %k1 279; KNL-NEXT: kmovw %k1, %eax 280; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 281; KNL-NEXT: kshiftlw $8, %k0, %k1 282; KNL-NEXT: kshiftrw $15, %k1, %k1 283; KNL-NEXT: kmovw %k1, %eax 284; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 285; KNL-NEXT: kshiftlw $7, %k0, %k1 286; KNL-NEXT: kshiftrw $15, %k1, %k1 287; KNL-NEXT: kmovw %k1, %eax 288; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 289; KNL-NEXT: kshiftlw $6, %k0, %k1 290; KNL-NEXT: kshiftrw $15, %k1, %k1 291; KNL-NEXT: kmovw %k1, %eax 292; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 293; KNL-NEXT: kshiftlw $5, %k0, %k1 294; KNL-NEXT: kshiftrw $15, %k1, %k1 295; KNL-NEXT: kmovw %k1, %eax 296; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 297; KNL-NEXT: kshiftlw $4, %k0, %k1 298; KNL-NEXT: kshiftrw $15, %k1, %k1 299; KNL-NEXT: kmovw %k1, %eax 300; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 301; KNL-NEXT: kshiftlw $3, %k0, %k1 302; KNL-NEXT: kshiftrw $15, %k1, %k1 303; KNL-NEXT: kmovw %k1, %eax 304; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 305; KNL-NEXT: kshiftlw $2, %k0, %k1 306; KNL-NEXT: kshiftrw $15, %k1, %k1 307; KNL-NEXT: kmovw %k1, %eax 308; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 309; KNL-NEXT: kshiftlw $1, %k0, %k1 310; KNL-NEXT: kshiftrw $15, %k1, %k1 311; KNL-NEXT: kmovw %k1, %eax 312; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 313; KNL-NEXT: kshiftlw $0, %k0, %k0 314; KNL-NEXT: kshiftrw $15, %k0, %k0 315; KNL-NEXT: kmovw %k0, %eax 316; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 317; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 318; KNL-NEXT: vpslld $31, %zmm0, %zmm0 319; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 320; KNL-NEXT: kmovw %k0, (%rsp) 321; KNL-NEXT: movl (%rsp), %eax 322; KNL-NEXT: movq %rbp, %rsp 323; KNL-NEXT: popq %rbp 324; KNL-NEXT: retq 325; 326; SKX-LABEL: test12_v32i32: 327; SKX: ## BB#0: 328; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 329; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 330; SKX-NEXT: kunpckwd %k0, %k1, %k0 331; SKX-NEXT: kmovd %k0, %eax 332; SKX-NEXT: retq 333 %res = icmp eq <32 x i32> %a, %b 334 %res1 = bitcast <32 x i1> %res to i32 335 ret i32 %res1 336} 337 338define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { 339; KNL-LABEL: test12_v64i16: 340; KNL: ## BB#0: 341; KNL-NEXT: pushq %rbp 342; KNL-NEXT: movq %rsp, %rbp 343; KNL-NEXT: andq $-32, %rsp 344; KNL-NEXT: subq $64, %rsp 345; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 346; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 347; KNL-NEXT: vpslld $31, %zmm1, %zmm1 348; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 349; KNL-NEXT: kshiftlw $14, %k0, %k1 350; KNL-NEXT: kshiftrw $15, %k1, %k1 351; KNL-NEXT: kmovw %k1, %eax 352; KNL-NEXT: kshiftlw $15, %k0, %k1 353; KNL-NEXT: kshiftrw $15, %k1, %k1 354; KNL-NEXT: kmovw %k1, %ecx 355; KNL-NEXT: vmovd %ecx, %xmm1 356; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 357; KNL-NEXT: kshiftlw $13, %k0, %k1 358; KNL-NEXT: kshiftrw $15, %k1, %k1 359; KNL-NEXT: kmovw %k1, %eax 360; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 361; KNL-NEXT: kshiftlw $12, %k0, %k1 362; KNL-NEXT: kshiftrw $15, %k1, %k1 363; KNL-NEXT: kmovw %k1, %eax 364; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 365; KNL-NEXT: kshiftlw $11, %k0, %k1 366; KNL-NEXT: kshiftrw $15, %k1, %k1 367; KNL-NEXT: kmovw %k1, %eax 368; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 369; KNL-NEXT: kshiftlw $10, %k0, %k1 370; KNL-NEXT: kshiftrw $15, %k1, %k1 371; KNL-NEXT: kmovw %k1, %eax 372; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 373; KNL-NEXT: kshiftlw $9, %k0, %k1 374; KNL-NEXT: kshiftrw $15, %k1, %k1 375; KNL-NEXT: kmovw %k1, %eax 376; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 377; KNL-NEXT: kshiftlw $8, %k0, %k1 378; KNL-NEXT: kshiftrw $15, %k1, %k1 379; KNL-NEXT: kmovw %k1, %eax 380; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 381; KNL-NEXT: kshiftlw $7, %k0, %k1 382; KNL-NEXT: kshiftrw $15, %k1, %k1 383; KNL-NEXT: kmovw %k1, %eax 384; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 385; KNL-NEXT: kshiftlw $6, %k0, %k1 386; KNL-NEXT: kshiftrw $15, %k1, %k1 387; KNL-NEXT: kmovw %k1, %eax 388; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 389; KNL-NEXT: kshiftlw $5, %k0, %k1 390; KNL-NEXT: kshiftrw $15, %k1, %k1 391; KNL-NEXT: kmovw %k1, %eax 392; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 393; KNL-NEXT: kshiftlw $4, %k0, %k1 394; KNL-NEXT: kshiftrw $15, %k1, %k1 395; KNL-NEXT: kmovw %k1, %eax 396; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 397; KNL-NEXT: kshiftlw $3, %k0, %k1 398; KNL-NEXT: kshiftrw $15, %k1, %k1 399; KNL-NEXT: kmovw %k1, %eax 400; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 401; KNL-NEXT: kshiftlw $2, %k0, %k1 402; KNL-NEXT: kshiftrw $15, %k1, %k1 403; KNL-NEXT: kmovw %k1, %eax 404; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 405; KNL-NEXT: kshiftlw $1, %k0, %k1 406; KNL-NEXT: kshiftrw $15, %k1, %k1 407; KNL-NEXT: kmovw %k1, %eax 408; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 409; KNL-NEXT: kshiftlw $0, %k0, %k0 410; KNL-NEXT: kshiftrw $15, %k0, %k0 411; KNL-NEXT: kmovw %k0, %eax 412; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 413; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 414; KNL-NEXT: vpslld $31, %zmm1, %zmm1 415; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 416; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) 417; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 418; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 419; KNL-NEXT: vpslld $31, %zmm0, %zmm0 420; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 421; KNL-NEXT: kshiftlw $14, %k0, %k1 422; KNL-NEXT: kshiftrw $15, %k1, %k1 423; KNL-NEXT: kmovw %k1, %eax 424; KNL-NEXT: kshiftlw $15, %k0, %k1 425; KNL-NEXT: kshiftrw $15, %k1, %k1 426; KNL-NEXT: kmovw %k1, %ecx 427; KNL-NEXT: vmovd %ecx, %xmm0 428; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 429; KNL-NEXT: kshiftlw $13, %k0, %k1 430; KNL-NEXT: kshiftrw $15, %k1, %k1 431; KNL-NEXT: kmovw %k1, %eax 432; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 433; KNL-NEXT: kshiftlw $12, %k0, %k1 434; KNL-NEXT: kshiftrw $15, %k1, %k1 435; KNL-NEXT: kmovw %k1, %eax 436; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 437; KNL-NEXT: kshiftlw $11, %k0, %k1 438; KNL-NEXT: kshiftrw $15, %k1, %k1 439; KNL-NEXT: kmovw %k1, %eax 440; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 441; KNL-NEXT: kshiftlw $10, %k0, %k1 442; KNL-NEXT: kshiftrw $15, %k1, %k1 443; KNL-NEXT: kmovw %k1, %eax 444; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 445; KNL-NEXT: kshiftlw $9, %k0, %k1 446; KNL-NEXT: kshiftrw $15, %k1, %k1 447; KNL-NEXT: kmovw %k1, %eax 448; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 449; KNL-NEXT: kshiftlw $8, %k0, %k1 450; KNL-NEXT: kshiftrw $15, %k1, %k1 451; KNL-NEXT: kmovw %k1, %eax 452; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 453; KNL-NEXT: kshiftlw $7, %k0, %k1 454; KNL-NEXT: kshiftrw $15, %k1, %k1 455; KNL-NEXT: kmovw %k1, %eax 456; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 457; KNL-NEXT: kshiftlw $6, %k0, %k1 458; KNL-NEXT: kshiftrw $15, %k1, %k1 459; KNL-NEXT: kmovw %k1, %eax 460; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 461; KNL-NEXT: kshiftlw $5, %k0, %k1 462; KNL-NEXT: kshiftrw $15, %k1, %k1 463; KNL-NEXT: kmovw %k1, %eax 464; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 465; KNL-NEXT: kshiftlw $4, %k0, %k1 466; KNL-NEXT: kshiftrw $15, %k1, %k1 467; KNL-NEXT: kmovw %k1, %eax 468; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 469; KNL-NEXT: kshiftlw $3, %k0, %k1 470; KNL-NEXT: kshiftrw $15, %k1, %k1 471; KNL-NEXT: kmovw %k1, %eax 472; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 473; KNL-NEXT: kshiftlw $2, %k0, %k1 474; KNL-NEXT: kshiftrw $15, %k1, %k1 475; KNL-NEXT: kmovw %k1, %eax 476; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 477; KNL-NEXT: kshiftlw $1, %k0, %k1 478; KNL-NEXT: kshiftrw $15, %k1, %k1 479; KNL-NEXT: kmovw %k1, %eax 480; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 481; KNL-NEXT: kshiftlw $0, %k0, %k0 482; KNL-NEXT: kshiftrw $15, %k0, %k0 483; KNL-NEXT: kmovw %k0, %eax 484; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 485; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 486; KNL-NEXT: vpslld $31, %zmm0, %zmm0 487; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 488; KNL-NEXT: kmovw %k0, (%rsp) 489; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0 490; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 491; KNL-NEXT: vpslld $31, %zmm0, %zmm0 492; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 493; KNL-NEXT: kshiftlw $14, %k0, %k1 494; KNL-NEXT: kshiftrw $15, %k1, %k1 495; KNL-NEXT: kmovw %k1, %eax 496; KNL-NEXT: kshiftlw $15, %k0, %k1 497; KNL-NEXT: kshiftrw $15, %k1, %k1 498; KNL-NEXT: kmovw %k1, %ecx 499; KNL-NEXT: vmovd %ecx, %xmm0 500; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 501; KNL-NEXT: kshiftlw $13, %k0, %k1 502; KNL-NEXT: kshiftrw $15, %k1, %k1 503; KNL-NEXT: kmovw %k1, %eax 504; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 505; KNL-NEXT: kshiftlw $12, %k0, %k1 506; KNL-NEXT: kshiftrw $15, %k1, %k1 507; KNL-NEXT: kmovw %k1, %eax 508; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 509; KNL-NEXT: kshiftlw $11, %k0, %k1 510; KNL-NEXT: kshiftrw $15, %k1, %k1 511; KNL-NEXT: kmovw %k1, %eax 512; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 513; KNL-NEXT: kshiftlw $10, %k0, %k1 514; KNL-NEXT: kshiftrw $15, %k1, %k1 515; KNL-NEXT: kmovw %k1, %eax 516; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 517; KNL-NEXT: kshiftlw $9, %k0, %k1 518; KNL-NEXT: kshiftrw $15, %k1, %k1 519; KNL-NEXT: kmovw %k1, %eax 520; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 521; KNL-NEXT: kshiftlw $8, %k0, %k1 522; KNL-NEXT: kshiftrw $15, %k1, %k1 523; KNL-NEXT: kmovw %k1, %eax 524; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 525; KNL-NEXT: kshiftlw $7, %k0, %k1 526; KNL-NEXT: kshiftrw $15, %k1, %k1 527; KNL-NEXT: kmovw %k1, %eax 528; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 529; KNL-NEXT: kshiftlw $6, %k0, %k1 530; KNL-NEXT: kshiftrw $15, %k1, %k1 531; KNL-NEXT: kmovw %k1, %eax 532; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 533; KNL-NEXT: kshiftlw $5, %k0, %k1 534; KNL-NEXT: kshiftrw $15, %k1, %k1 535; KNL-NEXT: kmovw %k1, %eax 536; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 537; KNL-NEXT: kshiftlw $4, %k0, %k1 538; KNL-NEXT: kshiftrw $15, %k1, %k1 539; KNL-NEXT: kmovw %k1, %eax 540; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 541; KNL-NEXT: kshiftlw $3, %k0, %k1 542; KNL-NEXT: kshiftrw $15, %k1, %k1 543; KNL-NEXT: kmovw %k1, %eax 544; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 545; KNL-NEXT: kshiftlw $2, %k0, %k1 546; KNL-NEXT: kshiftrw $15, %k1, %k1 547; KNL-NEXT: kmovw %k1, %eax 548; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 549; KNL-NEXT: kshiftlw $1, %k0, %k1 550; KNL-NEXT: kshiftrw $15, %k1, %k1 551; KNL-NEXT: kmovw %k1, %eax 552; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 553; KNL-NEXT: kshiftlw $0, %k0, %k0 554; KNL-NEXT: kshiftrw $15, %k0, %k0 555; KNL-NEXT: kmovw %k0, %eax 556; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 557; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 558; KNL-NEXT: vpslld $31, %zmm0, %zmm0 559; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 560; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) 561; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 562; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 563; KNL-NEXT: vpslld $31, %zmm0, %zmm0 564; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 565; KNL-NEXT: kshiftlw $14, %k0, %k1 566; KNL-NEXT: kshiftrw $15, %k1, %k1 567; KNL-NEXT: kmovw %k1, %eax 568; KNL-NEXT: kshiftlw $15, %k0, %k1 569; KNL-NEXT: kshiftrw $15, %k1, %k1 570; KNL-NEXT: kmovw %k1, %ecx 571; KNL-NEXT: vmovd %ecx, %xmm0 572; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 573; KNL-NEXT: kshiftlw $13, %k0, %k1 574; KNL-NEXT: kshiftrw $15, %k1, %k1 575; KNL-NEXT: kmovw %k1, %eax 576; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 577; KNL-NEXT: kshiftlw $12, %k0, %k1 578; KNL-NEXT: kshiftrw $15, %k1, %k1 579; KNL-NEXT: kmovw %k1, %eax 580; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 581; KNL-NEXT: kshiftlw $11, %k0, %k1 582; KNL-NEXT: kshiftrw $15, %k1, %k1 583; KNL-NEXT: kmovw %k1, %eax 584; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 585; KNL-NEXT: kshiftlw $10, %k0, %k1 586; KNL-NEXT: kshiftrw $15, %k1, %k1 587; KNL-NEXT: kmovw %k1, %eax 588; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 589; KNL-NEXT: kshiftlw $9, %k0, %k1 590; KNL-NEXT: kshiftrw $15, %k1, %k1 591; KNL-NEXT: kmovw %k1, %eax 592; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 593; KNL-NEXT: kshiftlw $8, %k0, %k1 594; KNL-NEXT: kshiftrw $15, %k1, %k1 595; KNL-NEXT: kmovw %k1, %eax 596; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 597; KNL-NEXT: kshiftlw $7, %k0, %k1 598; KNL-NEXT: kshiftrw $15, %k1, %k1 599; KNL-NEXT: kmovw %k1, %eax 600; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 601; KNL-NEXT: kshiftlw $6, %k0, %k1 602; KNL-NEXT: kshiftrw $15, %k1, %k1 603; KNL-NEXT: kmovw %k1, %eax 604; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 605; KNL-NEXT: kshiftlw $5, %k0, %k1 606; KNL-NEXT: kshiftrw $15, %k1, %k1 607; KNL-NEXT: kmovw %k1, %eax 608; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 609; KNL-NEXT: kshiftlw $4, %k0, %k1 610; KNL-NEXT: kshiftrw $15, %k1, %k1 611; KNL-NEXT: kmovw %k1, %eax 612; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 613; KNL-NEXT: kshiftlw $3, %k0, %k1 614; KNL-NEXT: kshiftrw $15, %k1, %k1 615; KNL-NEXT: kmovw %k1, %eax 616; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 617; KNL-NEXT: kshiftlw $2, %k0, %k1 618; KNL-NEXT: kshiftrw $15, %k1, %k1 619; KNL-NEXT: kmovw %k1, %eax 620; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 621; KNL-NEXT: kshiftlw $1, %k0, %k1 622; KNL-NEXT: kshiftrw $15, %k1, %k1 623; KNL-NEXT: kmovw %k1, %eax 624; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 625; KNL-NEXT: kshiftlw $0, %k0, %k0 626; KNL-NEXT: kshiftrw $15, %k0, %k0 627; KNL-NEXT: kmovw %k0, %eax 628; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 629; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 630; KNL-NEXT: vpslld $31, %zmm0, %zmm0 631; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 632; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) 633; KNL-NEXT: movl (%rsp), %ecx 634; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax 635; KNL-NEXT: shlq $32, %rax 636; KNL-NEXT: orq %rcx, %rax 637; KNL-NEXT: movq %rbp, %rsp 638; KNL-NEXT: popq %rbp 639; KNL-NEXT: retq 640; 641; SKX-LABEL: test12_v64i16: 642; SKX: ## BB#0: 643; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0 644; SKX-NEXT: vpcmpeqw %zmm3, %zmm1, %k1 645; SKX-NEXT: kunpckdq %k0, %k1, %k0 646; SKX-NEXT: kmovq %k0, %rax 647; SKX-NEXT: retq 648 %res = icmp eq <64 x i16> %a, %b 649 %res1 = bitcast <64 x i1> %res to i64 650 ret i64 %res1 651} 652 653define <16 x i32> @test13(<16 x float>%a, <16 x float>%b) 654; CHECK-LABEL: test13: 655; CHECK: ## BB#0: 656; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 657; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 658; CHECK-NEXT: retq 659{ 660 %cmpvector_i = fcmp oeq <16 x float> %a, %b 661 %conv = zext <16 x i1> %cmpvector_i to <16 x i32> 662 ret <16 x i32> %conv 663} 664 665define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { 666; CHECK-LABEL: test14: 667; CHECK: ## BB#0: 668; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 669; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 670; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 671; CHECK-NEXT: retq 672 %sub_r = sub <16 x i32> %a, %b 673 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a 674 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32> 675 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer 676 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r 677 ret <16 x i32>%res 678} 679 680define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { 681; CHECK-LABEL: test15: 682; CHECK: ## BB#0: 683; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 684; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 685; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 686; CHECK-NEXT: retq 687 %sub_r = sub <8 x i64> %a, %b 688 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a 689 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64> 690 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer 691 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r 692 ret <8 x i64>%res 693} 694 695define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { 696; CHECK-LABEL: test16: 697; CHECK: ## BB#0: 698; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 699; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} 700; CHECK-NEXT: retq 701 %mask = icmp sge <16 x i32> %x, %y 702 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y 703 ret <16 x i32> %max 704} 705 706define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 707; CHECK-LABEL: test17: 708; CHECK: ## BB#0: 709; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 710; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 711; CHECK-NEXT: retq 712 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 713 %mask = icmp sgt <16 x i32> %x, %y 714 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 715 ret <16 x i32> %max 716} 717 718define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 719; CHECK-LABEL: test18: 720; CHECK: ## BB#0: 721; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 722; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 723; CHECK-NEXT: retq 724 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 725 %mask = icmp sle <16 x i32> %x, %y 726 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 727 ret <16 x i32> %max 728} 729 730define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 731; CHECK-LABEL: test19: 732; CHECK: ## BB#0: 733; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 734; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 735; CHECK-NEXT: retq 736 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 737 %mask = icmp ule <16 x i32> %x, %y 738 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 739 ret <16 x i32> %max 740} 741 742define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind { 743; CHECK-LABEL: test20: 744; CHECK: ## BB#0: 745; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 746; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} 747; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 748; CHECK-NEXT: retq 749 %mask1 = icmp eq <16 x i32> %x1, %y1 750 %mask0 = icmp eq <16 x i32> %x, %y 751 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 752 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y 753 ret <16 x i32> %max 754} 755 756define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind { 757; CHECK-LABEL: test21: 758; CHECK: ## BB#0: 759; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 760; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} 761; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} 762; CHECK-NEXT: retq 763 %mask1 = icmp sge <8 x i64> %x1, %y1 764 %mask0 = icmp sle <8 x i64> %x, %y 765 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 766 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 767 ret <8 x i64> %max 768} 769 770define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { 771; CHECK-LABEL: test22: 772; CHECK: ## BB#0: 773; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 774; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} 775; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 776; CHECK-NEXT: retq 777 %mask1 = icmp sgt <8 x i64> %x1, %y1 778 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 779 %mask0 = icmp sgt <8 x i64> %x, %y 780 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 781 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 782 ret <8 x i64> %max 783} 784 785define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { 786; CHECK-LABEL: test23: 787; CHECK: ## BB#0: 788; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 789; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} 790; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 791; CHECK-NEXT: retq 792 %mask1 = icmp sge <16 x i32> %x1, %y1 793 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 794 %mask0 = icmp ule <16 x i32> %x, %y 795 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 796 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 797 ret <16 x i32> %max 798} 799 800define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { 801; CHECK-LABEL: test24: 802; CHECK: ## BB#0: 803; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 804; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 805; CHECK-NEXT: retq 806 %yb = load i64, i64* %yb.ptr, align 4 807 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 808 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer 809 %mask = icmp eq <8 x i64> %x, %y 810 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 811 ret <8 x i64> %max 812} 813 814define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind { 815; CHECK-LABEL: test25: 816; CHECK: ## BB#0: 817; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 818; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 819; CHECK-NEXT: retq 820 %yb = load i32, i32* %yb.ptr, align 4 821 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 822 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer 823 %mask = icmp sle <16 x i32> %x, %y 824 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 825 ret <16 x i32> %max 826} 827 828define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { 829; CHECK-LABEL: test26: 830; CHECK: ## BB#0: 831; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 832; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} 833; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 834; CHECK-NEXT: retq 835 %mask1 = icmp sge <16 x i32> %x1, %y1 836 %yb = load i32, i32* %yb.ptr, align 4 837 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 838 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer 839 %mask0 = icmp sgt <16 x i32> %x, %y 840 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 841 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 842 ret <16 x i32> %max 843} 844 845define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { 846; CHECK-LABEL: test27: 847; CHECK: ## BB#0: 848; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1 849; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} 850; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 851; CHECK-NEXT: retq 852 %mask1 = icmp sge <8 x i64> %x1, %y1 853 %yb = load i64, i64* %yb.ptr, align 4 854 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 855 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer 856 %mask0 = icmp sle <8 x i64> %x, %y 857 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 858 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 859 ret <8 x i64> %max 860} 861 862define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) { 863; KNL-LABEL: test28: 864; KNL: ## BB#0: 865; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 866; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 867; KNL-NEXT: kxnorw %k1, %k0, %k1 868; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 869; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 870; KNL-NEXT: vpmovqd %zmm0, %ymm0 871; KNL-NEXT: retq 872; 873; SKX-LABEL: test28: 874; SKX: ## BB#0: 875; SKX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 876; SKX-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 877; SKX-NEXT: kxnorb %k1, %k0, %k0 878; SKX-NEXT: vpmovm2d %k0, %ymm0 879; SKX-NEXT: retq 880 %x_gt_y = icmp sgt <8 x i64> %x, %y 881 %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1 882 %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1 883 %resse = sext <8 x i1>%res to <8 x i32> 884 ret <8 x i32> %resse 885} 886 887define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) { 888; KNL-LABEL: test29: 889; KNL: ## BB#0: 890; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 891; KNL-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 892; KNL-NEXT: kxorw %k1, %k0, %k1 893; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 894; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 895; KNL-NEXT: vpmovdb %zmm0, %xmm0 896; KNL-NEXT: retq 897; 898; SKX-LABEL: test29: 899; SKX: ## BB#0: 900; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 901; SKX-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 902; SKX-NEXT: kxorw %k1, %k0, %k0 903; SKX-NEXT: vpmovm2b %k0, %xmm0 904; SKX-NEXT: retq 905 %x_gt_y = icmp sgt <16 x i32> %x, %y 906 %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1 907 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1 908 %resse = sext <16 x i1>%res to <16 x i8> 909 ret <16 x i8> %resse 910} 911 912define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { 913; KNL-LABEL: test30: 914; KNL: ## BB#0: 915; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 916; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 917; KNL-NEXT: retq 918; 919; SKX-LABEL: test30: 920; SKX: ## BB#0: 921; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 922; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 923; SKX-NEXT: retq 924 925 %mask = fcmp oeq <4 x double> %x, %y 926 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y 927 ret <4 x double> %max 928} 929 930define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind { 931; KNL-LABEL: test31: 932; KNL: ## BB#0: 933; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2 934; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 935; KNL-NEXT: retq 936; 937; SKX-LABEL: test31: 938; SKX: ## BB#0: 939; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 940; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 941; SKX-NEXT: retq 942 943 %y = load <2 x double>, <2 x double>* %yp, align 4 944 %mask = fcmp olt <2 x double> %x, %y 945 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 946 ret <2 x double> %max 947} 948 949define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind { 950; KNL-LABEL: test32: 951; KNL: ## BB#0: 952; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2 953; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 954; KNL-NEXT: retq 955; 956; SKX-LABEL: test32: 957; SKX: ## BB#0: 958; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 959; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 960; SKX-NEXT: retq 961 962 %y = load <4 x double>, <4 x double>* %yp, align 4 963 %mask = fcmp ogt <4 x double> %y, %x 964 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 965 ret <4 x double> %max 966} 967 968define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind { 969; CHECK-LABEL: test33: 970; CHECK: ## BB#0: 971; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 972; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 973; CHECK-NEXT: retq 974 %y = load <8 x double>, <8 x double>* %yp, align 4 975 %mask = fcmp olt <8 x double> %x, %y 976 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 977 ret <8 x double> %max 978} 979 980define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind { 981; KNL-LABEL: test34: 982; KNL: ## BB#0: 983; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2 984; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 985; KNL-NEXT: retq 986; 987; SKX-LABEL: test34: 988; SKX: ## BB#0: 989; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 990; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 991; SKX-NEXT: retq 992 %y = load <4 x float>, <4 x float>* %yp, align 4 993 %mask = fcmp olt <4 x float> %x, %y 994 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 995 ret <4 x float> %max 996} 997 998define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind { 999; KNL-LABEL: test35: 1000; KNL: ## BB#0: 1001; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 1002; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1003; KNL-NEXT: vmovups (%rdi), %ymm2 1004; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 1005; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 1006; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 1007; KNL-NEXT: retq 1008; 1009; SKX-LABEL: test35: 1010; SKX: ## BB#0: 1011; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 1012; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 1013; SKX-NEXT: retq 1014 1015 %y = load <8 x float>, <8 x float>* %yp, align 4 1016 %mask = fcmp ogt <8 x float> %y, %x 1017 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 1018 ret <8 x float> %max 1019} 1020 1021define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind { 1022; CHECK-LABEL: test36: 1023; CHECK: ## BB#0: 1024; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 1025; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 1026; CHECK-NEXT: retq 1027 %y = load <16 x float>, <16 x float>* %yp, align 4 1028 %mask = fcmp olt <16 x float> %x, %y 1029 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 1030 ret <16 x float> %max 1031} 1032 1033define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind { 1034; CHECK-LABEL: test37: 1035; CHECK: ## BB#0: 1036; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 1037; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 1038; CHECK-NEXT: retq 1039 1040 %a = load double, double* %ptr 1041 %v = insertelement <8 x double> undef, double %a, i32 0 1042 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer 1043 1044 %mask = fcmp ogt <8 x double> %shuffle, %x 1045 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 1046 ret <8 x double> %max 1047} 1048 1049define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind { 1050; KNL-LABEL: test38: 1051; KNL: ## BB#0: 1052; KNL-NEXT: vbroadcastsd (%rdi), %ymm2 1053; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2 1054; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1055; KNL-NEXT: retq 1056; 1057; SKX-LABEL: test38: 1058; SKX: ## BB#0: 1059; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 1060; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 1061; SKX-NEXT: retq 1062 1063 %a = load double, double* %ptr 1064 %v = insertelement <4 x double> undef, double %a, i32 0 1065 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer 1066 1067 %mask = fcmp ogt <4 x double> %shuffle, %x 1068 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 1069 ret <4 x double> %max 1070} 1071 1072define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind { 1073; KNL-LABEL: test39: 1074; KNL: ## BB#0: 1075; KNL-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 1076; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 1077; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1078; KNL-NEXT: retq 1079; 1080; SKX-LABEL: test39: 1081; SKX: ## BB#0: 1082; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 1083; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 1084; SKX-NEXT: retq 1085 1086 %a = load double, double* %ptr 1087 %v = insertelement <2 x double> undef, double %a, i32 0 1088 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1089 1090 %mask = fcmp ogt <2 x double> %shuffle, %x 1091 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 1092 ret <2 x double> %max 1093} 1094 1095 1096define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind { 1097; CHECK-LABEL: test40: 1098; CHECK: ## BB#0: 1099; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 1100; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 1101; CHECK-NEXT: retq 1102 1103 %a = load float, float* %ptr 1104 %v = insertelement <16 x float> undef, float %a, i32 0 1105 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1106 1107 %mask = fcmp ogt <16 x float> %shuffle, %x 1108 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 1109 ret <16 x float> %max 1110} 1111 1112define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind { 1113; KNL-LABEL: test41: 1114; KNL: ## BB#0: 1115; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 1116; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1117; KNL-NEXT: vbroadcastss (%rdi), %ymm2 1118; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 1119; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 1120; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 1121; KNL-NEXT: retq 1122; 1123; SKX-LABEL: test41: 1124; SKX: ## BB#0: 1125; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 1126; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 1127; SKX-NEXT: retq 1128 1129 %a = load float, float* %ptr 1130 %v = insertelement <8 x float> undef, float %a, i32 0 1131 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1132 1133 %mask = fcmp ogt <8 x float> %shuffle, %x 1134 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 1135 ret <8 x float> %max 1136} 1137 1138define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind { 1139; KNL-LABEL: test42: 1140; KNL: ## BB#0: 1141; KNL-NEXT: vbroadcastss (%rdi), %xmm2 1142; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 1143; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 1144; KNL-NEXT: retq 1145; 1146; SKX-LABEL: test42: 1147; SKX: ## BB#0: 1148; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 1149; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 1150; SKX-NEXT: retq 1151 1152 %a = load float, float* %ptr 1153 %v = insertelement <4 x float> undef, float %a, i32 0 1154 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1155 1156 %mask = fcmp ogt <4 x float> %shuffle, %x 1157 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 1158 ret <4 x float> %max 1159} 1160 1161define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind { 1162; KNL-LABEL: test43: 1163; KNL: ## BB#0: 1164; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 1165; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 1166; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 1167; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} 1168; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 1169; KNL-NEXT: retq 1170; 1171; SKX-LABEL: test43: 1172; SKX: ## BB#0: 1173; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 1174; SKX-NEXT: vpmovw2m %xmm2, %k1 1175; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} 1176; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 1177; SKX-NEXT: retq 1178 1179 %a = load double, double* %ptr 1180 %v = insertelement <8 x double> undef, double %a, i32 0 1181 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer 1182 1183 %mask_cmp = fcmp ogt <8 x double> %shuffle, %x 1184 %mask = and <8 x i1> %mask_cmp, %mask_in 1185 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 1186 ret <8 x double> %max 1187} 1188 1189define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 { 1190; KNL-LABEL: test44: 1191; KNL: ## BB#0: 1192; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 1193; KNL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 1194; KNL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 1195; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1196; KNL-NEXT: retq 1197; 1198; SKX-LABEL: test44: 1199; SKX: ## BB#0: 1200; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 1201; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 1202; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 1203; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1204; SKX-NEXT: vpmovm2d %k0, %xmm0 1205; SKX-NEXT: retq 1206 %mask = icmp eq <4 x i16> %x, %y 1207 %1 = sext <4 x i1> %mask to <4 x i32> 1208 ret <4 x i32> %1 1209} 1210 1211define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 { 1212; KNL-LABEL: test45: 1213; KNL: ## BB#0: 1214; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 1215; KNL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 1216; KNL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] 1217; KNL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 1218; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0 1219; KNL-NEXT: retq 1220; 1221; SKX-LABEL: test45: 1222; SKX: ## BB#0: 1223; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 1224; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 1225; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] 1226; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 1227; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} 1228; SKX-NEXT: retq 1229 %mask = icmp eq <2 x i16> %x, %y 1230 %1 = zext <2 x i1> %mask to <2 x i64> 1231 ret <2 x i64> %1 1232} 1233 1234define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 { 1235; KNL-LABEL: test46: 1236; KNL: ## BB#0: 1237; KNL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 1238; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1239; KNL-NEXT: vpsllq $32, %xmm0, %xmm0 1240; KNL-NEXT: vpsrad $31, %xmm0, %xmm1 1241; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1242; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1243; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1244; KNL-NEXT: retq 1245; 1246; SKX-LABEL: test46: 1247; SKX: ## BB#0: 1248; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k1 1249; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} 1250; SKX-NEXT: retq 1251 %mask = fcmp oeq <2 x float> %x, %y 1252 %1 = zext <2 x i1> %mask to <2 x i64> 1253 ret <2 x i64> %1 1254} 1255