1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,-avx | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx | FileCheck %s --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 6 7define i32 @veccond128(<4 x i32> %input) { 8; SSE2-LABEL: veccond128: 9; SSE2: # %bb.0: # %entry 10; SSE2-NEXT: pxor %xmm1, %xmm1 11; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 12; SSE2-NEXT: pmovmskb %xmm1, %eax 13; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF 14; SSE2-NEXT: je .LBB0_2 15; SSE2-NEXT: # %bb.1: # %if-true-block 16; SSE2-NEXT: xorl %eax, %eax 17; SSE2-NEXT: retq 18; SSE2-NEXT: .LBB0_2: # %endif-block 19; SSE2-NEXT: movl $1, %eax 20; SSE2-NEXT: retq 21; 22; SSE41-LABEL: veccond128: 23; SSE41: # %bb.0: # %entry 24; SSE41-NEXT: ptest %xmm0, %xmm0 25; SSE41-NEXT: je .LBB0_2 26; SSE41-NEXT: # %bb.1: # %if-true-block 27; SSE41-NEXT: xorl %eax, %eax 28; SSE41-NEXT: retq 29; SSE41-NEXT: .LBB0_2: # %endif-block 30; SSE41-NEXT: movl $1, %eax 31; SSE41-NEXT: retq 32; 33; AVX-LABEL: veccond128: 34; AVX: # %bb.0: # %entry 35; AVX-NEXT: vptest %xmm0, %xmm0 36; AVX-NEXT: je .LBB0_2 37; AVX-NEXT: # %bb.1: # %if-true-block 38; AVX-NEXT: xorl %eax, %eax 39; AVX-NEXT: retq 40; AVX-NEXT: .LBB0_2: # %endif-block 41; AVX-NEXT: movl $1, %eax 42; AVX-NEXT: retq 43entry: 44 %0 = bitcast <4 x i32> %input to i128 45 %1 = icmp ne i128 %0, 0 46 br i1 %1, label %if-true-block, label %endif-block 47if-true-block: 48 ret i32 0 49endif-block: 50 ret i32 1 51} 52 53define i32 @veccond256(<8 x i32> %input) { 54; SSE2-LABEL: veccond256: 55; SSE2: # %bb.0: # %entry 56; SSE2-NEXT: por %xmm1, %xmm0 57; SSE2-NEXT: pxor %xmm1, %xmm1 58; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 59; SSE2-NEXT: pmovmskb %xmm1, %eax 60; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF 61; SSE2-NEXT: je .LBB1_2 62; SSE2-NEXT: # %bb.1: # %if-true-block 63; SSE2-NEXT: xorl %eax, %eax 64; SSE2-NEXT: retq 65; SSE2-NEXT: .LBB1_2: # %endif-block 66; SSE2-NEXT: movl $1, %eax 67; SSE2-NEXT: retq 68; 69; SSE41-LABEL: veccond256: 70; SSE41: # %bb.0: # %entry 71; SSE41-NEXT: por %xmm1, %xmm0 72; SSE41-NEXT: ptest %xmm0, %xmm0 73; SSE41-NEXT: je .LBB1_2 74; SSE41-NEXT: # %bb.1: # %if-true-block 75; SSE41-NEXT: xorl %eax, %eax 76; SSE41-NEXT: retq 77; SSE41-NEXT: .LBB1_2: # %endif-block 78; SSE41-NEXT: movl $1, %eax 79; SSE41-NEXT: retq 80; 81; AVX-LABEL: veccond256: 82; AVX: # %bb.0: # %entry 83; AVX-NEXT: vptest %ymm0, %ymm0 84; AVX-NEXT: je .LBB1_2 85; AVX-NEXT: # %bb.1: # %if-true-block 86; AVX-NEXT: xorl %eax, %eax 87; AVX-NEXT: vzeroupper 88; AVX-NEXT: retq 89; AVX-NEXT: .LBB1_2: # %endif-block 90; AVX-NEXT: movl $1, %eax 91; AVX-NEXT: vzeroupper 92; AVX-NEXT: retq 93entry: 94 %0 = bitcast <8 x i32> %input to i256 95 %1 = icmp ne i256 %0, 0 96 br i1 %1, label %if-true-block, label %endif-block 97if-true-block: 98 ret i32 0 99endif-block: 100 ret i32 1 101} 102 103define i32 @veccond512(<16 x i32> %input) { 104; SSE2-LABEL: veccond512: 105; SSE2: # %bb.0: # %entry 106; SSE2-NEXT: por %xmm3, %xmm1 107; SSE2-NEXT: por %xmm2, %xmm1 108; SSE2-NEXT: por %xmm0, %xmm1 109; SSE2-NEXT: pxor %xmm0, %xmm0 110; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 111; SSE2-NEXT: pmovmskb %xmm0, %eax 112; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF 113; SSE2-NEXT: je .LBB2_2 114; SSE2-NEXT: # %bb.1: # %if-true-block 115; SSE2-NEXT: xorl %eax, %eax 116; SSE2-NEXT: retq 117; SSE2-NEXT: .LBB2_2: # %endif-block 118; SSE2-NEXT: movl $1, %eax 119; SSE2-NEXT: retq 120; 121; SSE41-LABEL: veccond512: 122; SSE41: # %bb.0: # %entry 123; SSE41-NEXT: por %xmm3, %xmm1 124; SSE41-NEXT: por %xmm2, %xmm1 125; SSE41-NEXT: por %xmm0, %xmm1 126; SSE41-NEXT: ptest %xmm1, %xmm1 127; SSE41-NEXT: je .LBB2_2 128; SSE41-NEXT: # %bb.1: # %if-true-block 129; SSE41-NEXT: xorl %eax, %eax 130; SSE41-NEXT: retq 131; SSE41-NEXT: .LBB2_2: # %endif-block 132; SSE41-NEXT: movl $1, %eax 133; SSE41-NEXT: retq 134; 135; AVX1-LABEL: veccond512: 136; AVX1: # %bb.0: # %entry 137; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 138; AVX1-NEXT: vptest %ymm0, %ymm0 139; AVX1-NEXT: je .LBB2_2 140; AVX1-NEXT: # %bb.1: # %if-true-block 141; AVX1-NEXT: xorl %eax, %eax 142; AVX1-NEXT: vzeroupper 143; AVX1-NEXT: retq 144; AVX1-NEXT: .LBB2_2: # %endif-block 145; AVX1-NEXT: movl $1, %eax 146; AVX1-NEXT: vzeroupper 147; AVX1-NEXT: retq 148; 149; AVX512-LABEL: veccond512: 150; AVX512: # %bb.0: # %entry 151; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 152; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 153; AVX512-NEXT: vptest %ymm0, %ymm0 154; AVX512-NEXT: je .LBB2_2 155; AVX512-NEXT: # %bb.1: # %if-true-block 156; AVX512-NEXT: xorl %eax, %eax 157; AVX512-NEXT: vzeroupper 158; AVX512-NEXT: retq 159; AVX512-NEXT: .LBB2_2: # %endif-block 160; AVX512-NEXT: movl $1, %eax 161; AVX512-NEXT: vzeroupper 162; AVX512-NEXT: retq 163entry: 164 %0 = bitcast <16 x i32> %input to i512 165 %1 = icmp ne i512 %0, 0 166 br i1 %1, label %if-true-block, label %endif-block 167if-true-block: 168 ret i32 0 169endif-block: 170 ret i32 1 171} 172 173define i32 @vectest128(<4 x i32> %input) { 174; SSE2-LABEL: vectest128: 175; SSE2: # %bb.0: 176; SSE2-NEXT: pxor %xmm1, %xmm1 177; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 178; SSE2-NEXT: pmovmskb %xmm1, %ecx 179; SSE2-NEXT: xorl %eax, %eax 180; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 181; SSE2-NEXT: setne %al 182; SSE2-NEXT: retq 183; 184; SSE41-LABEL: vectest128: 185; SSE41: # %bb.0: 186; SSE41-NEXT: xorl %eax, %eax 187; SSE41-NEXT: ptest %xmm0, %xmm0 188; SSE41-NEXT: setne %al 189; SSE41-NEXT: retq 190; 191; AVX-LABEL: vectest128: 192; AVX: # %bb.0: 193; AVX-NEXT: xorl %eax, %eax 194; AVX-NEXT: vptest %xmm0, %xmm0 195; AVX-NEXT: setne %al 196; AVX-NEXT: retq 197 %t0 = bitcast <4 x i32> %input to i128 198 %t1 = icmp ne i128 %t0, 0 199 %t2 = zext i1 %t1 to i32 200 ret i32 %t2 201} 202 203define i32 @vectest256(<8 x i32> %input) { 204; SSE2-LABEL: vectest256: 205; SSE2: # %bb.0: 206; SSE2-NEXT: por %xmm1, %xmm0 207; SSE2-NEXT: pxor %xmm1, %xmm1 208; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 209; SSE2-NEXT: pmovmskb %xmm1, %ecx 210; SSE2-NEXT: xorl %eax, %eax 211; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 212; SSE2-NEXT: setne %al 213; SSE2-NEXT: retq 214; 215; SSE41-LABEL: vectest256: 216; SSE41: # %bb.0: 217; SSE41-NEXT: por %xmm1, %xmm0 218; SSE41-NEXT: xorl %eax, %eax 219; SSE41-NEXT: ptest %xmm0, %xmm0 220; SSE41-NEXT: setne %al 221; SSE41-NEXT: retq 222; 223; AVX-LABEL: vectest256: 224; AVX: # %bb.0: 225; AVX-NEXT: xorl %eax, %eax 226; AVX-NEXT: vptest %ymm0, %ymm0 227; AVX-NEXT: setne %al 228; AVX-NEXT: vzeroupper 229; AVX-NEXT: retq 230 %t0 = bitcast <8 x i32> %input to i256 231 %t1 = icmp ne i256 %t0, 0 232 %t2 = zext i1 %t1 to i32 233 ret i32 %t2 234} 235 236define i32 @vectest512(<16 x i32> %input) { 237; SSE2-LABEL: vectest512: 238; SSE2: # %bb.0: 239; SSE2-NEXT: por %xmm3, %xmm1 240; SSE2-NEXT: por %xmm2, %xmm1 241; SSE2-NEXT: por %xmm0, %xmm1 242; SSE2-NEXT: pxor %xmm0, %xmm0 243; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 244; SSE2-NEXT: pmovmskb %xmm0, %ecx 245; SSE2-NEXT: xorl %eax, %eax 246; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 247; SSE2-NEXT: setne %al 248; SSE2-NEXT: retq 249; 250; SSE41-LABEL: vectest512: 251; SSE41: # %bb.0: 252; SSE41-NEXT: por %xmm3, %xmm1 253; SSE41-NEXT: por %xmm2, %xmm1 254; SSE41-NEXT: por %xmm0, %xmm1 255; SSE41-NEXT: xorl %eax, %eax 256; SSE41-NEXT: ptest %xmm1, %xmm1 257; SSE41-NEXT: setne %al 258; SSE41-NEXT: retq 259; 260; AVX1-LABEL: vectest512: 261; AVX1: # %bb.0: 262; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 263; AVX1-NEXT: xorl %eax, %eax 264; AVX1-NEXT: vptest %ymm0, %ymm0 265; AVX1-NEXT: setne %al 266; AVX1-NEXT: vzeroupper 267; AVX1-NEXT: retq 268; 269; AVX512-LABEL: vectest512: 270; AVX512: # %bb.0: 271; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 272; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 273; AVX512-NEXT: xorl %eax, %eax 274; AVX512-NEXT: vptest %ymm0, %ymm0 275; AVX512-NEXT: setne %al 276; AVX512-NEXT: vzeroupper 277; AVX512-NEXT: retq 278 %t0 = bitcast <16 x i32> %input to i512 279 %t1 = icmp ne i512 %t0, 0 280 %t2 = zext i1 %t1 to i32 281 ret i32 %t2 282} 283 284define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) { 285; SSE2-LABEL: vecsel128: 286; SSE2: # %bb.0: 287; SSE2-NEXT: movl %edi, %eax 288; SSE2-NEXT: pxor %xmm1, %xmm1 289; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 290; SSE2-NEXT: pmovmskb %xmm1, %ecx 291; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 292; SSE2-NEXT: cmovel %esi, %eax 293; SSE2-NEXT: retq 294; 295; SSE41-LABEL: vecsel128: 296; SSE41: # %bb.0: 297; SSE41-NEXT: movl %edi, %eax 298; SSE41-NEXT: ptest %xmm0, %xmm0 299; SSE41-NEXT: cmovel %esi, %eax 300; SSE41-NEXT: retq 301; 302; AVX-LABEL: vecsel128: 303; AVX: # %bb.0: 304; AVX-NEXT: movl %edi, %eax 305; AVX-NEXT: vptest %xmm0, %xmm0 306; AVX-NEXT: cmovel %esi, %eax 307; AVX-NEXT: retq 308 %t0 = bitcast <4 x i32> %input to i128 309 %t1 = icmp ne i128 %t0, 0 310 %t2 = select i1 %t1, i32 %a, i32 %b 311 ret i32 %t2 312} 313 314define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) { 315; SSE2-LABEL: vecsel256: 316; SSE2: # %bb.0: 317; SSE2-NEXT: movl %edi, %eax 318; SSE2-NEXT: por %xmm1, %xmm0 319; SSE2-NEXT: pxor %xmm1, %xmm1 320; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 321; SSE2-NEXT: pmovmskb %xmm1, %ecx 322; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 323; SSE2-NEXT: cmovel %esi, %eax 324; SSE2-NEXT: retq 325; 326; SSE41-LABEL: vecsel256: 327; SSE41: # %bb.0: 328; SSE41-NEXT: movl %edi, %eax 329; SSE41-NEXT: por %xmm1, %xmm0 330; SSE41-NEXT: ptest %xmm0, %xmm0 331; SSE41-NEXT: cmovel %esi, %eax 332; SSE41-NEXT: retq 333; 334; AVX-LABEL: vecsel256: 335; AVX: # %bb.0: 336; AVX-NEXT: movl %edi, %eax 337; AVX-NEXT: vptest %ymm0, %ymm0 338; AVX-NEXT: cmovel %esi, %eax 339; AVX-NEXT: vzeroupper 340; AVX-NEXT: retq 341 %t0 = bitcast <8 x i32> %input to i256 342 %t1 = icmp ne i256 %t0, 0 343 %t2 = select i1 %t1, i32 %a, i32 %b 344 ret i32 %t2 345} 346 347define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { 348; SSE2-LABEL: vecsel512: 349; SSE2: # %bb.0: 350; SSE2-NEXT: movl %edi, %eax 351; SSE2-NEXT: por %xmm3, %xmm1 352; SSE2-NEXT: por %xmm2, %xmm1 353; SSE2-NEXT: por %xmm0, %xmm1 354; SSE2-NEXT: pxor %xmm0, %xmm0 355; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 356; SSE2-NEXT: pmovmskb %xmm0, %ecx 357; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 358; SSE2-NEXT: cmovel %esi, %eax 359; SSE2-NEXT: retq 360; 361; SSE41-LABEL: vecsel512: 362; SSE41: # %bb.0: 363; SSE41-NEXT: movl %edi, %eax 364; SSE41-NEXT: por %xmm3, %xmm1 365; SSE41-NEXT: por %xmm2, %xmm1 366; SSE41-NEXT: por %xmm0, %xmm1 367; SSE41-NEXT: ptest %xmm1, %xmm1 368; SSE41-NEXT: cmovel %esi, %eax 369; SSE41-NEXT: retq 370; 371; AVX1-LABEL: vecsel512: 372; AVX1: # %bb.0: 373; AVX1-NEXT: movl %edi, %eax 374; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 375; AVX1-NEXT: vptest %ymm0, %ymm0 376; AVX1-NEXT: cmovel %esi, %eax 377; AVX1-NEXT: vzeroupper 378; AVX1-NEXT: retq 379; 380; AVX512-LABEL: vecsel512: 381; AVX512: # %bb.0: 382; AVX512-NEXT: movl %edi, %eax 383; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 384; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 385; AVX512-NEXT: vptest %ymm0, %ymm0 386; AVX512-NEXT: cmovel %esi, %eax 387; AVX512-NEXT: vzeroupper 388; AVX512-NEXT: retq 389 %t0 = bitcast <16 x i32> %input to i512 390 %t1 = icmp ne i512 %t0, 0 391 %t2 = select i1 %t1, i32 %a, i32 %b 392 ret i32 %t2 393} 394