1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 6 7; Lower common integer comparisons such as 'isPositive' efficiently: 8; https://llvm.org/bugs/show_bug.cgi?id=26701 9 10define <16 x i8> @test_pcmpgtb(<16 x i8> %x) { 11; SSE-LABEL: test_pcmpgtb: 12; SSE: # %bb.0: 13; SSE-NEXT: pcmpeqd %xmm1, %xmm1 14; SSE-NEXT: pcmpgtb %xmm1, %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: test_pcmpgtb: 18; AVX: # %bb.0: 19; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 20; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 21; AVX-NEXT: retq 22 %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 23 %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 24 ret <16 x i8> %not 25} 26 27define <8 x i16> @test_pcmpgtw(<8 x i16> %x) { 28; SSE-LABEL: test_pcmpgtw: 29; SSE: # %bb.0: 30; SSE-NEXT: pcmpeqd %xmm1, %xmm1 31; SSE-NEXT: pcmpgtw %xmm1, %xmm0 32; SSE-NEXT: retq 33; 34; AVX-LABEL: test_pcmpgtw: 35; AVX: # %bb.0: 36; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 37; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 38; AVX-NEXT: retq 39 %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 40 %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 41 ret <8 x i16> %not 42} 43 44define <4 x i32> @test_pcmpgtd(<4 x i32> %x) { 45; SSE-LABEL: test_pcmpgtd: 46; SSE: # %bb.0: 47; SSE-NEXT: pcmpeqd %xmm1, %xmm1 48; SSE-NEXT: pcmpgtd %xmm1, %xmm0 49; SSE-NEXT: retq 50; 51; AVX-LABEL: test_pcmpgtd: 52; AVX: # %bb.0: 53; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 54; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 55; AVX-NEXT: retq 56 %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> 57 %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1> 58 ret <4 x i32> %not 59} 60 61define <2 x i64> @test_pcmpgtq(<2 x i64> %x) { 62; SSE2-LABEL: test_pcmpgtq: 63; SSE2: # %bb.0: 64; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 65; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 66; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 67; SSE2-NEXT: retq 68; 69; SSE42-LABEL: test_pcmpgtq: 70; SSE42: # %bb.0: 71; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 72; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 73; SSE42-NEXT: retq 74; 75; AVX-LABEL: test_pcmpgtq: 76; AVX: # %bb.0: 77; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 78; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 79; AVX-NEXT: retq 80 %sign = ashr <2 x i64> %x, <i64 63, i64 63> 81 %not = xor <2 x i64> %sign, <i64 -1, i64 -1> 82 ret <2 x i64> %not 83} 84 85define <1 x i128> @test_strange_type(<1 x i128> %x) { 86; CHECK-LABEL: test_strange_type: 87; CHECK: # %bb.0: 88; CHECK-NEXT: movq %rsi, %rax 89; CHECK-NEXT: sarq $63, %rax 90; CHECK-NEXT: notq %rax 91; CHECK-NEXT: movq %rax, %rdx 92; CHECK-NEXT: retq 93 %sign = ashr <1 x i128> %x, <i128 127> 94 %not = xor <1 x i128> %sign, <i128 -1> 95 ret <1 x i128> %not 96} 97 98define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) { 99; SSE-LABEL: test_pcmpgtb_256: 100; SSE: # %bb.0: 101; SSE-NEXT: pcmpeqd %xmm2, %xmm2 102; SSE-NEXT: pcmpgtb %xmm2, %xmm0 103; SSE-NEXT: pcmpgtb %xmm2, %xmm1 104; SSE-NEXT: retq 105; 106; AVX1-LABEL: test_pcmpgtb_256: 107; AVX1: # %bb.0: 108; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 109; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 110; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 111; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 112; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 113; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 114; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 115; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 116; AVX1-NEXT: retq 117; 118; AVX2-LABEL: test_pcmpgtb_256: 119; AVX2: # %bb.0: 120; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 121; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 122; AVX2-NEXT: retq 123 %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 124 %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 125 ret <32 x i8> %not 126} 127 128define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) { 129; SSE-LABEL: test_pcmpgtw_256: 130; SSE: # %bb.0: 131; SSE-NEXT: pcmpeqd %xmm2, %xmm2 132; SSE-NEXT: pcmpgtw %xmm2, %xmm0 133; SSE-NEXT: pcmpgtw %xmm2, %xmm1 134; SSE-NEXT: retq 135; 136; AVX1-LABEL: test_pcmpgtw_256: 137; AVX1: # %bb.0: 138; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 139; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 140; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 141; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 142; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 143; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 144; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 145; AVX1-NEXT: retq 146; 147; AVX2-LABEL: test_pcmpgtw_256: 148; AVX2: # %bb.0: 149; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 150; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 151; AVX2-NEXT: retq 152 %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 153 %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 154 ret <16 x i16> %not 155} 156 157define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) { 158; SSE-LABEL: test_pcmpgtd_256: 159; SSE: # %bb.0: 160; SSE-NEXT: pcmpeqd %xmm2, %xmm2 161; SSE-NEXT: pcmpgtd %xmm2, %xmm0 162; SSE-NEXT: pcmpgtd %xmm2, %xmm1 163; SSE-NEXT: retq 164; 165; AVX1-LABEL: test_pcmpgtd_256: 166; AVX1: # %bb.0: 167; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 168; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 169; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 170; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 171; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 172; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 173; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 174; AVX1-NEXT: retq 175; 176; AVX2-LABEL: test_pcmpgtd_256: 177; AVX2: # %bb.0: 178; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 179; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 180; AVX2-NEXT: retq 181 %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 182 %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 183 ret <8 x i32> %not 184} 185 186define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) { 187; SSE2-LABEL: test_pcmpgtq_256: 188; SSE2: # %bb.0: 189; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 190; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 191; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 192; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 193; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 194; SSE2-NEXT: retq 195; 196; SSE42-LABEL: test_pcmpgtq_256: 197; SSE42: # %bb.0: 198; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 199; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 200; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 201; SSE42-NEXT: retq 202; 203; AVX1-LABEL: test_pcmpgtq_256: 204; AVX1: # %bb.0: 205; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 206; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 207; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 208; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 209; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 210; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 211; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 212; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 213; AVX1-NEXT: retq 214; 215; AVX2-LABEL: test_pcmpgtq_256: 216; AVX2: # %bb.0: 217; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 218; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 219; AVX2-NEXT: retq 220 %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63> 221 %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1> 222 ret <4 x i64> %not 223} 224 225define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) { 226; SSE-LABEL: cmpeq_zext_v16i8: 227; SSE: # %bb.0: 228; SSE-NEXT: pcmpeqb %xmm1, %xmm0 229; SSE-NEXT: pand {{.*}}(%rip), %xmm0 230; SSE-NEXT: retq 231; 232; AVX-LABEL: cmpeq_zext_v16i8: 233; AVX: # %bb.0: 234; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 235; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 236; AVX-NEXT: retq 237 %cmp = icmp eq <16 x i8> %a, %b 238 %zext = zext <16 x i1> %cmp to <16 x i8> 239 ret <16 x i8> %zext 240} 241 242define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) { 243; SSE-LABEL: cmpeq_zext_v16i16: 244; SSE: # %bb.0: 245; SSE-NEXT: pcmpeqw %xmm2, %xmm0 246; SSE-NEXT: psrlw $15, %xmm0 247; SSE-NEXT: pcmpeqw %xmm3, %xmm1 248; SSE-NEXT: psrlw $15, %xmm1 249; SSE-NEXT: retq 250; 251; AVX1-LABEL: cmpeq_zext_v16i16: 252; AVX1: # %bb.0: 253; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 254; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 255; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 256; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 257; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 258; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 259; AVX1-NEXT: retq 260; 261; AVX2-LABEL: cmpeq_zext_v16i16: 262; AVX2: # %bb.0: 263; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 264; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 265; AVX2-NEXT: retq 266 %cmp = icmp eq <16 x i16> %a, %b 267 %zext = zext <16 x i1> %cmp to <16 x i16> 268 ret <16 x i16> %zext 269} 270 271define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) { 272; SSE-LABEL: cmpeq_zext_v4i32: 273; SSE: # %bb.0: 274; SSE-NEXT: pcmpeqd %xmm1, %xmm0 275; SSE-NEXT: psrld $31, %xmm0 276; SSE-NEXT: retq 277; 278; AVX-LABEL: cmpeq_zext_v4i32: 279; AVX: # %bb.0: 280; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 281; AVX-NEXT: vpsrld $31, %xmm0, %xmm0 282; AVX-NEXT: retq 283 %cmp = icmp eq <4 x i32> %a, %b 284 %zext = zext <4 x i1> %cmp to <4 x i32> 285 ret <4 x i32> %zext 286} 287 288define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) { 289; SSE2-LABEL: cmpeq_zext_v4i64: 290; SSE2: # %bb.0: 291; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 292; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] 293; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1] 294; SSE2-NEXT: pand %xmm4, %xmm2 295; SSE2-NEXT: pand %xmm2, %xmm0 296; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 297; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 298; SSE2-NEXT: pand %xmm4, %xmm2 299; SSE2-NEXT: pand %xmm2, %xmm1 300; SSE2-NEXT: retq 301; 302; SSE42-LABEL: cmpeq_zext_v4i64: 303; SSE42: # %bb.0: 304; SSE42-NEXT: pcmpeqq %xmm2, %xmm0 305; SSE42-NEXT: psrlq $63, %xmm0 306; SSE42-NEXT: pcmpeqq %xmm3, %xmm1 307; SSE42-NEXT: psrlq $63, %xmm1 308; SSE42-NEXT: retq 309; 310; AVX1-LABEL: cmpeq_zext_v4i64: 311; AVX1: # %bb.0: 312; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 313; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 314; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2 315; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 316; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 317; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 318; AVX1-NEXT: retq 319; 320; AVX2-LABEL: cmpeq_zext_v4i64: 321; AVX2: # %bb.0: 322; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 323; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 324; AVX2-NEXT: retq 325 %cmp = icmp eq <4 x i64> %a, %b 326 %zext = zext <4 x i1> %cmp to <4 x i64> 327 ret <4 x i64> %zext 328} 329 330define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) { 331; SSE-LABEL: cmpgt_zext_v32i8: 332; SSE: # %bb.0: 333; SSE-NEXT: pcmpgtb %xmm2, %xmm0 334; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 335; SSE-NEXT: pand %xmm2, %xmm0 336; SSE-NEXT: pcmpgtb %xmm3, %xmm1 337; SSE-NEXT: pand %xmm2, %xmm1 338; SSE-NEXT: retq 339; 340; AVX1-LABEL: cmpgt_zext_v32i8: 341; AVX1: # %bb.0: 342; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 343; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 344; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 345; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 346; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 347; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 348; AVX1-NEXT: retq 349; 350; AVX2-LABEL: cmpgt_zext_v32i8: 351; AVX2: # %bb.0: 352; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 353; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 354; AVX2-NEXT: retq 355 %cmp = icmp sgt <32 x i8> %a, %b 356 %zext = zext <32 x i1> %cmp to <32 x i8> 357 ret <32 x i8> %zext 358} 359 360define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) { 361; SSE-LABEL: cmpgt_zext_v8i16: 362; SSE: # %bb.0: 363; SSE-NEXT: pcmpgtw %xmm1, %xmm0 364; SSE-NEXT: psrlw $15, %xmm0 365; SSE-NEXT: retq 366; 367; AVX-LABEL: cmpgt_zext_v8i16: 368; AVX: # %bb.0: 369; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 370; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 371; AVX-NEXT: retq 372 %cmp = icmp sgt <8 x i16> %a, %b 373 %zext = zext <8 x i1> %cmp to <8 x i16> 374 ret <8 x i16> %zext 375} 376 377define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) { 378; SSE-LABEL: cmpgt_zext_v8i32: 379; SSE: # %bb.0: 380; SSE-NEXT: pcmpgtd %xmm2, %xmm0 381; SSE-NEXT: psrld $31, %xmm0 382; SSE-NEXT: pcmpgtd %xmm3, %xmm1 383; SSE-NEXT: psrld $31, %xmm1 384; SSE-NEXT: retq 385; 386; AVX1-LABEL: cmpgt_zext_v8i32: 387; AVX1: # %bb.0: 388; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 389; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 390; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 391; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 392; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 393; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 394; AVX1-NEXT: retq 395; 396; AVX2-LABEL: cmpgt_zext_v8i32: 397; AVX2: # %bb.0: 398; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 399; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 400; AVX2-NEXT: retq 401 %cmp = icmp sgt <8 x i32> %a, %b 402 %zext = zext <8 x i1> %cmp to <8 x i32> 403 ret <8 x i32> %zext 404} 405 406define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) { 407; SSE2-LABEL: cmpgt_zext_v2i64: 408; SSE2: # %bb.0: 409; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 410; SSE2-NEXT: pxor %xmm2, %xmm1 411; SSE2-NEXT: pxor %xmm2, %xmm0 412; SSE2-NEXT: movdqa %xmm0, %xmm2 413; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 414; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 415; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 416; SSE2-NEXT: pand %xmm2, %xmm1 417; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 418; SSE2-NEXT: por %xmm1, %xmm0 419; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 420; SSE2-NEXT: retq 421; 422; SSE42-LABEL: cmpgt_zext_v2i64: 423; SSE42: # %bb.0: 424; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 425; SSE42-NEXT: psrlq $63, %xmm0 426; SSE42-NEXT: retq 427; 428; AVX-LABEL: cmpgt_zext_v2i64: 429; AVX: # %bb.0: 430; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 431; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0 432; AVX-NEXT: retq 433 %cmp = icmp sgt <2 x i64> %a, %b 434 %zext = zext <2 x i1> %cmp to <2 x i64> 435 ret <2 x i64> %zext 436} 437 438; Test that we optimize a zext of a vector setcc ne zero where all bits but the 439; lsb are known to be zero. 440define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) { 441; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 442; SSE2: # %bb.0: 443; SSE2-NEXT: movdqa %xmm0, %xmm1 444; SSE2-NEXT: psrlw $15, %xmm1 445; SSE2-NEXT: pxor %xmm2, %xmm2 446; SSE2-NEXT: movdqa %xmm1, %xmm0 447; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 448; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 449; SSE2-NEXT: retq 450; 451; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 452; SSE42: # %bb.0: 453; SSE42-NEXT: movdqa %xmm0, %xmm1 454; SSE42-NEXT: psrlw $15, %xmm1 455; SSE42-NEXT: pxor %xmm2, %xmm2 456; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 457; SSE42-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 458; SSE42-NEXT: retq 459; 460; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 461; AVX1: # %bb.0: 462; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 463; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 464; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 465; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 466; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 467; AVX1-NEXT: retq 468; 469; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 470; AVX2: # %bb.0: 471; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 472; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 473; AVX2-NEXT: retq 474 %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 475 %b = icmp ne <8 x i16> %a, zeroinitializer 476 %c = zext <8 x i1> %b to <8 x i32> 477 ret <8 x i32> %c 478} 479 480define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) { 481; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 482; SSE: # %bb.0: 483; SSE-NEXT: psrld $31, %xmm0 484; SSE-NEXT: psrld $31, %xmm1 485; SSE-NEXT: retq 486; 487; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 488; AVX1: # %bb.0: 489; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1 490; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 491; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 492; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 493; AVX1-NEXT: retq 494; 495; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 496; AVX2: # %bb.0: 497; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 498; AVX2-NEXT: retq 499 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 500 %b = icmp ne <8 x i32> %a, zeroinitializer 501 %c = zext <8 x i1> %b to <8 x i32> 502 ret <8 x i32> %c 503} 504 505define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) { 506; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 507; SSE2: # %bb.0: 508; SSE2-NEXT: psrld $31, %xmm1 509; SSE2-NEXT: psrld $31, %xmm0 510; SSE2-NEXT: packuswb %xmm1, %xmm0 511; SSE2-NEXT: retq 512; 513; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 514; SSE42: # %bb.0: 515; SSE42-NEXT: psrld $31, %xmm1 516; SSE42-NEXT: psrld $31, %xmm0 517; SSE42-NEXT: packusdw %xmm1, %xmm0 518; SSE42-NEXT: retq 519; 520; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 521; AVX1: # %bb.0: 522; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 523; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 524; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 525; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 526; AVX1-NEXT: vzeroupper 527; AVX1-NEXT: retq 528; 529; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 530; AVX2: # %bb.0: 531; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 532; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 533; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 534; AVX2-NEXT: vzeroupper 535; AVX2-NEXT: retq 536 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 537 %b = icmp ne <8 x i32> %a, zeroinitializer 538 %c = zext <8 x i1> %b to <8 x i16> 539 ret <8 x i16> %c 540} 541 542; PR26697 543define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) { 544; SSE-LABEL: cmpeq_one_mask_bit: 545; SSE: # %bb.0: 546; SSE-NEXT: psrad $31, %xmm0 547; SSE-NEXT: retq 548; 549; AVX-LABEL: cmpeq_one_mask_bit: 550; AVX: # %bb.0: 551; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 552; AVX-NEXT: retq 553 %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 554 %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer 555 %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32> 556 ret <4 x i32> %mask_bool_ext 557} 558