1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 6 7declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) 8declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) 9declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) 10 11; Use widest possible vector for movmsk comparisons (PR37087) 12 13define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) { 14; SSE-LABEL: movmskps_noneof_bitcast_v2f64: 15; SSE: # %bb.0: 16; SSE-NEXT: xorpd %xmm1, %xmm1 17; SSE-NEXT: cmpeqpd %xmm0, %xmm1 18; SSE-NEXT: movmskpd %xmm1, %eax 19; SSE-NEXT: testl %eax, %eax 20; SSE-NEXT: sete %al 21; SSE-NEXT: retq 22; 23; AVX-LABEL: movmskps_noneof_bitcast_v2f64: 24; AVX: # %bb.0: 25; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 26; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 27; AVX-NEXT: vmovmskpd %xmm0, %eax 28; AVX-NEXT: testl %eax, %eax 29; AVX-NEXT: sete %al 30; AVX-NEXT: retq 31 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 32 %2 = sext <2 x i1> %1 to <2 x i64> 33 %3 = bitcast <2 x i64> %2 to <4 x float> 34 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 35 %5 = icmp eq i32 %4, 0 36 ret i1 %5 37} 38 39define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) { 40; SSE-LABEL: movmskps_allof_bitcast_v2f64: 41; SSE: # %bb.0: 42; SSE-NEXT: xorpd %xmm1, %xmm1 43; SSE-NEXT: cmpeqpd %xmm0, %xmm1 44; SSE-NEXT: movmskpd %xmm1, %eax 45; SSE-NEXT: cmpl $3, %eax 46; SSE-NEXT: sete %al 47; SSE-NEXT: retq 48; 49; AVX-LABEL: movmskps_allof_bitcast_v2f64: 50; AVX: # %bb.0: 51; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 52; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 53; AVX-NEXT: vmovmskpd %xmm0, %eax 54; AVX-NEXT: cmpl $3, %eax 55; AVX-NEXT: sete %al 56; AVX-NEXT: retq 57 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 58 %2 = sext <2 x i1> %1 to <2 x i64> 59 %3 = bitcast <2 x i64> %2 to <4 x float> 60 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 61 %5 = icmp eq i32 %4, 15 62 ret i1 %5 63} 64 65define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) { 66; SSE2-LABEL: pmovmskb_noneof_bitcast_v2i64: 67; SSE2: # %bb.0: 68; SSE2-NEXT: pxor %xmm1, %xmm1 69; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 70; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 71; SSE2-NEXT: movmskps %xmm0, %eax 72; SSE2-NEXT: testl %eax, %eax 73; SSE2-NEXT: sete %al 74; SSE2-NEXT: retq 75; 76; SSE42-LABEL: pmovmskb_noneof_bitcast_v2i64: 77; SSE42: # %bb.0: 78; SSE42-NEXT: movmskpd %xmm0, %eax 79; SSE42-NEXT: testl %eax, %eax 80; SSE42-NEXT: sete %al 81; SSE42-NEXT: retq 82; 83; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64: 84; AVX: # %bb.0: 85; AVX-NEXT: vmovmskpd %xmm0, %eax 86; AVX-NEXT: testl %eax, %eax 87; AVX-NEXT: sete %al 88; AVX-NEXT: retq 89 %1 = icmp sgt <2 x i64> zeroinitializer, %a0 90 %2 = sext <2 x i1> %1 to <2 x i64> 91 %3 = bitcast <2 x i64> %2 to <16 x i8> 92 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 93 %5 = icmp eq i32 %4, 0 94 ret i1 %5 95} 96 97define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) { 98; SSE2-LABEL: pmovmskb_allof_bitcast_v2i64: 99; SSE2: # %bb.0: 100; SSE2-NEXT: pxor %xmm1, %xmm1 101; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 102; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 103; SSE2-NEXT: movmskps %xmm0, %eax 104; SSE2-NEXT: cmpl $15, %eax 105; SSE2-NEXT: sete %al 106; SSE2-NEXT: retq 107; 108; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64: 109; SSE42: # %bb.0: 110; SSE42-NEXT: movmskpd %xmm0, %eax 111; SSE42-NEXT: cmpl $3, %eax 112; SSE42-NEXT: sete %al 113; SSE42-NEXT: retq 114; 115; AVX-LABEL: pmovmskb_allof_bitcast_v2i64: 116; AVX: # %bb.0: 117; AVX-NEXT: vmovmskpd %xmm0, %eax 118; AVX-NEXT: cmpl $3, %eax 119; AVX-NEXT: sete %al 120; AVX-NEXT: retq 121 %1 = icmp sgt <2 x i64> zeroinitializer, %a0 122 %2 = sext <2 x i1> %1 to <2 x i64> 123 %3 = bitcast <2 x i64> %2 to <16 x i8> 124 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 125 %5 = icmp eq i32 %4, 65535 126 ret i1 %5 127} 128 129define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) { 130; SSE-LABEL: pmovmskb_noneof_bitcast_v4f32: 131; SSE: # %bb.0: 132; SSE-NEXT: xorps %xmm1, %xmm1 133; SSE-NEXT: cmpeqps %xmm0, %xmm1 134; SSE-NEXT: movmskps %xmm1, %eax 135; SSE-NEXT: testl %eax, %eax 136; SSE-NEXT: sete %al 137; SSE-NEXT: retq 138; 139; AVX-LABEL: pmovmskb_noneof_bitcast_v4f32: 140; AVX: # %bb.0: 141; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 142; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 143; AVX-NEXT: vmovmskps %xmm0, %eax 144; AVX-NEXT: testl %eax, %eax 145; AVX-NEXT: sete %al 146; AVX-NEXT: retq 147 %1 = fcmp oeq <4 x float> %a0, zeroinitializer 148 %2 = sext <4 x i1> %1 to <4 x i32> 149 %3 = bitcast <4 x i32> %2 to <16 x i8> 150 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 151 %5 = icmp eq i32 %4, 0 152 ret i1 %5 153} 154 155define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) { 156; SSE-LABEL: pmovmskb_allof_bitcast_v4f32: 157; SSE: # %bb.0: 158; SSE-NEXT: xorps %xmm1, %xmm1 159; SSE-NEXT: cmpeqps %xmm0, %xmm1 160; SSE-NEXT: movmskps %xmm1, %eax 161; SSE-NEXT: cmpl $15, %eax 162; SSE-NEXT: sete %al 163; SSE-NEXT: retq 164; 165; AVX-LABEL: pmovmskb_allof_bitcast_v4f32: 166; AVX: # %bb.0: 167; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 168; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 169; AVX-NEXT: vmovmskps %xmm0, %eax 170; AVX-NEXT: cmpl $15, %eax 171; AVX-NEXT: sete %al 172; AVX-NEXT: retq 173 %1 = fcmp oeq <4 x float> %a0, zeroinitializer 174 %2 = sext <4 x i1> %1 to <4 x i32> 175 %3 = bitcast <4 x i32> %2 to <16 x i8> 176 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 177 %5 = icmp eq i32 %4, 65535 178 ret i1 %5 179} 180 181; MOVMSK(ICMP_SGT(X,-1)) -> NOT(MOVMSK(X))) 182define i1 @movmskps_allof_v4i32_positive(<4 x i32> %a0) { 183; SSE-LABEL: movmskps_allof_v4i32_positive: 184; SSE: # %bb.0: 185; SSE-NEXT: movmskps %xmm0, %eax 186; SSE-NEXT: xorl $15, %eax 187; SSE-NEXT: cmpl $15, %eax 188; SSE-NEXT: sete %al 189; SSE-NEXT: retq 190; 191; AVX-LABEL: movmskps_allof_v4i32_positive: 192; AVX: # %bb.0: 193; AVX-NEXT: vmovmskps %xmm0, %eax 194; AVX-NEXT: xorl $15, %eax 195; AVX-NEXT: cmpl $15, %eax 196; AVX-NEXT: sete %al 197; AVX-NEXT: retq 198 %1 = icmp sgt <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1> 199 %2 = sext <4 x i1> %1 to <4 x i32> 200 %3 = bitcast <4 x i32> %2 to <4 x float> 201 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 202 %5 = icmp eq i32 %4, 15 203 ret i1 %5 204} 205 206define i1 @pmovmskb_noneof_v16i8_positive(<16 x i8> %a0) { 207; SSE-LABEL: pmovmskb_noneof_v16i8_positive: 208; SSE: # %bb.0: 209; SSE-NEXT: pmovmskb %xmm0, %eax 210; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF 211; SSE-NEXT: sete %al 212; SSE-NEXT: retq 213; 214; AVX-LABEL: pmovmskb_noneof_v16i8_positive: 215; AVX: # %bb.0: 216; AVX-NEXT: vpmovmskb %xmm0, %eax 217; AVX-NEXT: xorl $65535, %eax # imm = 0xFFFF 218; AVX-NEXT: sete %al 219; AVX-NEXT: retq 220 %1 = icmp sgt <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 221 %2 = sext <16 x i1> %1 to <16 x i8> 222 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2) 223 %4 = icmp eq i32 %3, 0 224 ret i1 %4 225} 226 227; AND(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(AND(X,Y)) 228; XOR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(XOR(X,Y)) 229; OR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(OR(X,Y)) 230; if the elements are the same width. 231 232define i32 @and_movmskpd_movmskpd(<2 x double> %a0, <2 x i64> %a1) { 233; SSE-LABEL: and_movmskpd_movmskpd: 234; SSE: # %bb.0: 235; SSE-NEXT: xorpd %xmm2, %xmm2 236; SSE-NEXT: cmpeqpd %xmm0, %xmm2 237; SSE-NEXT: andpd %xmm1, %xmm2 238; SSE-NEXT: movmskpd %xmm2, %eax 239; SSE-NEXT: retq 240; 241; AVX-LABEL: and_movmskpd_movmskpd: 242; AVX: # %bb.0: 243; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 244; AVX-NEXT: vcmpeqpd %xmm0, %xmm2, %xmm0 245; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 246; AVX-NEXT: vmovmskpd %xmm0, %eax 247; AVX-NEXT: retq 248 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 249 %2 = sext <2 x i1> %1 to <2 x i64> 250 %3 = bitcast <2 x i64> %2 to <2 x double> 251 %4 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %3) 252 %5 = icmp sgt <2 x i64> zeroinitializer, %a1 253 %6 = bitcast <2 x i1> %5 to i2 254 %7 = zext i2 %6 to i32 255 %8 = and i32 %4, %7 256 ret i32 %8 257} 258 259define i32 @xor_movmskps_movmskps(<4 x float> %a0, <4 x i32> %a1) { 260; SSE-LABEL: xor_movmskps_movmskps: 261; SSE: # %bb.0: 262; SSE-NEXT: xorps %xmm2, %xmm2 263; SSE-NEXT: cmpeqps %xmm0, %xmm2 264; SSE-NEXT: xorps %xmm1, %xmm2 265; SSE-NEXT: movmskps %xmm2, %eax 266; SSE-NEXT: retq 267; 268; AVX-LABEL: xor_movmskps_movmskps: 269; AVX: # %bb.0: 270; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 271; AVX-NEXT: vcmpeqps %xmm0, %xmm2, %xmm0 272; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 273; AVX-NEXT: vmovmskps %xmm0, %eax 274; AVX-NEXT: retq 275 %1 = fcmp oeq <4 x float> zeroinitializer, %a0 276 %2 = sext <4 x i1> %1 to <4 x i32> 277 %3 = bitcast <4 x i32> %2 to <4 x float> 278 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 279 %5 = ashr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31> 280 %6 = bitcast <4 x i32> %5 to <4 x float> 281 %7 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %6) 282 %8 = xor i32 %4, %7 283 ret i32 %8 284} 285 286define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) { 287; SSE-LABEL: or_pmovmskb_pmovmskb: 288; SSE: # %bb.0: 289; SSE-NEXT: pxor %xmm2, %xmm2 290; SSE-NEXT: pcmpeqb %xmm0, %xmm2 291; SSE-NEXT: psraw $15, %xmm1 292; SSE-NEXT: por %xmm2, %xmm1 293; SSE-NEXT: pmovmskb %xmm1, %eax 294; SSE-NEXT: retq 295; 296; AVX-LABEL: or_pmovmskb_pmovmskb: 297; AVX: # %bb.0: 298; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 299; AVX-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 300; AVX-NEXT: vpsraw $15, %xmm1, %xmm1 301; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 302; AVX-NEXT: vpmovmskb %xmm0, %eax 303; AVX-NEXT: retq 304 %1 = icmp eq <16 x i8> zeroinitializer, %a0 305 %2 = sext <16 x i1> %1 to <16 x i8> 306 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2) 307 %4 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 308 %5 = bitcast <8 x i16> %4 to <16 x i8> 309 %6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %5) 310 %7 = or i32 %3, %6 311 ret i32 %7 312} 313