1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; Test patterns which generates lzcnt instructions. 3; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt)) 4; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s 5; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s 6; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s 7; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s 8 9; Test one 32-bit input, output is 32-bit, no transformations expected. 10define i32 @test_zext_cmp0(i32 %a) { 11; ALL-LABEL: test_zext_cmp0: 12; ALL: # %bb.0: # %entry 13; ALL-NEXT: xorl %eax, %eax 14; ALL-NEXT: testl %edi, %edi 15; ALL-NEXT: sete %al 16; ALL-NEXT: retq 17entry: 18 %cmp = icmp eq i32 %a, 0 19 %conv = zext i1 %cmp to i32 20 ret i32 %conv 21} 22 23; Test two 32-bit inputs, output is 32-bit. 24define i32 @test_zext_cmp1(i32 %a, i32 %b) { 25; FASTLZCNT-LABEL: test_zext_cmp1: 26; FASTLZCNT: # %bb.0: 27; FASTLZCNT-NEXT: lzcntl %edi, %ecx 28; FASTLZCNT-NEXT: lzcntl %esi, %eax 29; FASTLZCNT-NEXT: orl %ecx, %eax 30; FASTLZCNT-NEXT: shrl $5, %eax 31; FASTLZCNT-NEXT: retq 32; 33; NOFASTLZCNT-LABEL: test_zext_cmp1: 34; NOFASTLZCNT: # %bb.0: 35; NOFASTLZCNT-NEXT: testl %edi, %edi 36; NOFASTLZCNT-NEXT: sete %al 37; NOFASTLZCNT-NEXT: testl %esi, %esi 38; NOFASTLZCNT-NEXT: sete %cl 39; NOFASTLZCNT-NEXT: orb %al, %cl 40; NOFASTLZCNT-NEXT: movzbl %cl, %eax 41; NOFASTLZCNT-NEXT: retq 42 %cmp = icmp eq i32 %a, 0 43 %cmp1 = icmp eq i32 %b, 0 44 %or = or i1 %cmp, %cmp1 45 %lor.ext = zext i1 %or to i32 46 ret i32 %lor.ext 47} 48 49; Test two 64-bit inputs, output is 64-bit. 50define i64 @test_zext_cmp2(i64 %a, i64 %b) { 51; FASTLZCNT-LABEL: test_zext_cmp2: 52; FASTLZCNT: # %bb.0: 53; FASTLZCNT-NEXT: lzcntq %rdi, %rcx 54; FASTLZCNT-NEXT: lzcntq %rsi, %rax 55; FASTLZCNT-NEXT: orl %ecx, %eax 56; FASTLZCNT-NEXT: shrl $6, %eax 57; FASTLZCNT-NEXT: retq 58; 59; NOFASTLZCNT-LABEL: test_zext_cmp2: 60; NOFASTLZCNT: # %bb.0: 61; NOFASTLZCNT-NEXT: testq %rdi, %rdi 62; NOFASTLZCNT-NEXT: sete %al 63; NOFASTLZCNT-NEXT: testq %rsi, %rsi 64; NOFASTLZCNT-NEXT: sete %cl 65; NOFASTLZCNT-NEXT: orb %al, %cl 66; NOFASTLZCNT-NEXT: movzbl %cl, %eax 67; NOFASTLZCNT-NEXT: retq 68 %cmp = icmp eq i64 %a, 0 69 %cmp1 = icmp eq i64 %b, 0 70 %or = or i1 %cmp, %cmp1 71 %lor.ext = zext i1 %or to i64 72 ret i64 %lor.ext 73} 74 75; Test two 16-bit inputs, output is 16-bit. 76; The transform is disabled for the 16-bit case, as we still have to clear the 77; upper 16-bits, adding one more instruction. 78define i16 @test_zext_cmp3(i16 %a, i16 %b) { 79; ALL-LABEL: test_zext_cmp3: 80; ALL: # %bb.0: 81; ALL-NEXT: testw %di, %di 82; ALL-NEXT: sete %al 83; ALL-NEXT: testw %si, %si 84; ALL-NEXT: sete %cl 85; ALL-NEXT: orb %al, %cl 86; ALL-NEXT: movzbl %cl, %eax 87; ALL-NEXT: # kill: def $ax killed $ax killed $eax 88; ALL-NEXT: retq 89 %cmp = icmp eq i16 %a, 0 90 %cmp1 = icmp eq i16 %b, 0 91 %or = or i1 %cmp, %cmp1 92 %lor.ext = zext i1 %or to i16 93 ret i16 %lor.ext 94} 95 96; Test two 32-bit inputs, output is 64-bit. 97define i64 @test_zext_cmp4(i32 %a, i32 %b) { 98; FASTLZCNT-LABEL: test_zext_cmp4: 99; FASTLZCNT: # %bb.0: # %entry 100; FASTLZCNT-NEXT: lzcntl %edi, %ecx 101; FASTLZCNT-NEXT: lzcntl %esi, %eax 102; FASTLZCNT-NEXT: orl %ecx, %eax 103; FASTLZCNT-NEXT: shrl $5, %eax 104; FASTLZCNT-NEXT: retq 105; 106; NOFASTLZCNT-LABEL: test_zext_cmp4: 107; NOFASTLZCNT: # %bb.0: # %entry 108; NOFASTLZCNT-NEXT: testl %edi, %edi 109; NOFASTLZCNT-NEXT: sete %al 110; NOFASTLZCNT-NEXT: testl %esi, %esi 111; NOFASTLZCNT-NEXT: sete %cl 112; NOFASTLZCNT-NEXT: orb %al, %cl 113; NOFASTLZCNT-NEXT: movzbl %cl, %eax 114; NOFASTLZCNT-NEXT: retq 115entry: 116 %cmp = icmp eq i32 %a, 0 117 %cmp1 = icmp eq i32 %b, 0 118 %0 = or i1 %cmp, %cmp1 119 %conv = zext i1 %0 to i64 120 ret i64 %conv 121} 122 123; Test two 64-bit inputs, output is 32-bit. 124define i32 @test_zext_cmp5(i64 %a, i64 %b) { 125; FASTLZCNT-LABEL: test_zext_cmp5: 126; FASTLZCNT: # %bb.0: # %entry 127; FASTLZCNT-NEXT: lzcntq %rdi, %rcx 128; FASTLZCNT-NEXT: lzcntq %rsi, %rax 129; FASTLZCNT-NEXT: orl %ecx, %eax 130; FASTLZCNT-NEXT: shrl $6, %eax 131; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax 132; FASTLZCNT-NEXT: retq 133; 134; NOFASTLZCNT-LABEL: test_zext_cmp5: 135; NOFASTLZCNT: # %bb.0: # %entry 136; NOFASTLZCNT-NEXT: testq %rdi, %rdi 137; NOFASTLZCNT-NEXT: sete %al 138; NOFASTLZCNT-NEXT: testq %rsi, %rsi 139; NOFASTLZCNT-NEXT: sete %cl 140; NOFASTLZCNT-NEXT: orb %al, %cl 141; NOFASTLZCNT-NEXT: movzbl %cl, %eax 142; NOFASTLZCNT-NEXT: retq 143entry: 144 %cmp = icmp eq i64 %a, 0 145 %cmp1 = icmp eq i64 %b, 0 146 %0 = or i1 %cmp, %cmp1 147 %lor.ext = zext i1 %0 to i32 148 ret i32 %lor.ext 149} 150 151; Test three 32-bit inputs, output is 32-bit. 152define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) { 153; FASTLZCNT-LABEL: test_zext_cmp6: 154; FASTLZCNT: # %bb.0: # %entry 155; FASTLZCNT-NEXT: lzcntl %edi, %eax 156; FASTLZCNT-NEXT: lzcntl %esi, %ecx 157; FASTLZCNT-NEXT: orl %eax, %ecx 158; FASTLZCNT-NEXT: lzcntl %edx, %eax 159; FASTLZCNT-NEXT: orl %ecx, %eax 160; FASTLZCNT-NEXT: shrl $5, %eax 161; FASTLZCNT-NEXT: retq 162; 163; NOFASTLZCNT-LABEL: test_zext_cmp6: 164; NOFASTLZCNT: # %bb.0: # %entry 165; NOFASTLZCNT-NEXT: testl %edi, %edi 166; NOFASTLZCNT-NEXT: sete %al 167; NOFASTLZCNT-NEXT: testl %esi, %esi 168; NOFASTLZCNT-NEXT: sete %cl 169; NOFASTLZCNT-NEXT: orb %al, %cl 170; NOFASTLZCNT-NEXT: testl %edx, %edx 171; NOFASTLZCNT-NEXT: sete %al 172; NOFASTLZCNT-NEXT: orb %cl, %al 173; NOFASTLZCNT-NEXT: movzbl %al, %eax 174; NOFASTLZCNT-NEXT: retq 175entry: 176 %cmp = icmp eq i32 %a, 0 177 %cmp1 = icmp eq i32 %b, 0 178 %or.cond = or i1 %cmp, %cmp1 179 %cmp2 = icmp eq i32 %c, 0 180 %.cmp2 = or i1 %or.cond, %cmp2 181 %lor.ext = zext i1 %.cmp2 to i32 182 ret i32 %lor.ext 183} 184 185; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test, 186; %.cmp2 inputs' order is inverted. 187define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) { 188; FASTLZCNT-LABEL: test_zext_cmp7: 189; FASTLZCNT: # %bb.0: # %entry 190; FASTLZCNT-NEXT: lzcntl %edi, %eax 191; FASTLZCNT-NEXT: lzcntl %esi, %ecx 192; FASTLZCNT-NEXT: orl %eax, %ecx 193; FASTLZCNT-NEXT: lzcntl %edx, %eax 194; FASTLZCNT-NEXT: orl %ecx, %eax 195; FASTLZCNT-NEXT: shrl $5, %eax 196; FASTLZCNT-NEXT: retq 197; 198; NOFASTLZCNT-LABEL: test_zext_cmp7: 199; NOFASTLZCNT: # %bb.0: # %entry 200; NOFASTLZCNT-NEXT: testl %edi, %edi 201; NOFASTLZCNT-NEXT: sete %al 202; NOFASTLZCNT-NEXT: testl %esi, %esi 203; NOFASTLZCNT-NEXT: sete %cl 204; NOFASTLZCNT-NEXT: orb %al, %cl 205; NOFASTLZCNT-NEXT: testl %edx, %edx 206; NOFASTLZCNT-NEXT: sete %al 207; NOFASTLZCNT-NEXT: orb %cl, %al 208; NOFASTLZCNT-NEXT: movzbl %al, %eax 209; NOFASTLZCNT-NEXT: retq 210entry: 211 %cmp = icmp eq i32 %a, 0 212 %cmp1 = icmp eq i32 %b, 0 213 %or.cond = or i1 %cmp, %cmp1 214 %cmp2 = icmp eq i32 %c, 0 215 %.cmp2 = or i1 %cmp2, %or.cond 216 %lor.ext = zext i1 %.cmp2 to i32 217 ret i32 %lor.ext 218} 219 220; Test four 32-bit inputs, output is 32-bit. 221define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) { 222; FASTLZCNT-LABEL: test_zext_cmp8: 223; FASTLZCNT: # %bb.0: # %entry 224; FASTLZCNT-NEXT: lzcntl %edi, %eax 225; FASTLZCNT-NEXT: lzcntl %esi, %esi 226; FASTLZCNT-NEXT: lzcntl %edx, %edx 227; FASTLZCNT-NEXT: orl %eax, %esi 228; FASTLZCNT-NEXT: lzcntl %ecx, %eax 229; FASTLZCNT-NEXT: orl %edx, %eax 230; FASTLZCNT-NEXT: orl %esi, %eax 231; FASTLZCNT-NEXT: shrl $5, %eax 232; FASTLZCNT-NEXT: retq 233; 234; NOFASTLZCNT-LABEL: test_zext_cmp8: 235; NOFASTLZCNT: # %bb.0: # %entry 236; NOFASTLZCNT-NEXT: testl %edi, %edi 237; NOFASTLZCNT-NEXT: sete %dil 238; NOFASTLZCNT-NEXT: testl %esi, %esi 239; NOFASTLZCNT-NEXT: sete %al 240; NOFASTLZCNT-NEXT: orb %dil, %al 241; NOFASTLZCNT-NEXT: testl %edx, %edx 242; NOFASTLZCNT-NEXT: sete %dl 243; NOFASTLZCNT-NEXT: testl %ecx, %ecx 244; NOFASTLZCNT-NEXT: sete %cl 245; NOFASTLZCNT-NEXT: orb %dl, %cl 246; NOFASTLZCNT-NEXT: orb %al, %cl 247; NOFASTLZCNT-NEXT: movzbl %cl, %eax 248; NOFASTLZCNT-NEXT: retq 249entry: 250 %cmp = icmp eq i32 %a, 0 251 %cmp1 = icmp eq i32 %b, 0 252 %or.cond = or i1 %cmp, %cmp1 253 %cmp3 = icmp eq i32 %c, 0 254 %or.cond5 = or i1 %or.cond, %cmp3 255 %cmp4 = icmp eq i32 %d, 0 256 %.cmp4 = or i1 %or.cond5, %cmp4 257 %lor.ext = zext i1 %.cmp4 to i32 258 ret i32 %lor.ext 259} 260 261; Test one 32-bit input, one 64-bit input, output is 32-bit. 262define i32 @test_zext_cmp9(i32 %a, i64 %b) { 263; FASTLZCNT-LABEL: test_zext_cmp9: 264; FASTLZCNT: # %bb.0: # %entry 265; FASTLZCNT-NEXT: lzcntq %rsi, %rax 266; FASTLZCNT-NEXT: lzcntl %edi, %ecx 267; FASTLZCNT-NEXT: shrl $5, %ecx 268; FASTLZCNT-NEXT: shrl $6, %eax 269; FASTLZCNT-NEXT: orl %ecx, %eax 270; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax 271; FASTLZCNT-NEXT: retq 272; 273; NOFASTLZCNT-LABEL: test_zext_cmp9: 274; NOFASTLZCNT: # %bb.0: # %entry 275; NOFASTLZCNT-NEXT: testl %edi, %edi 276; NOFASTLZCNT-NEXT: sete %al 277; NOFASTLZCNT-NEXT: testq %rsi, %rsi 278; NOFASTLZCNT-NEXT: sete %cl 279; NOFASTLZCNT-NEXT: orb %al, %cl 280; NOFASTLZCNT-NEXT: movzbl %cl, %eax 281; NOFASTLZCNT-NEXT: retq 282entry: 283 %cmp = icmp eq i32 %a, 0 284 %cmp1 = icmp eq i64 %b, 0 285 %0 = or i1 %cmp, %cmp1 286 %lor.ext = zext i1 %0 to i32 287 ret i32 %lor.ext 288} 289 290; Test 2 128-bit inputs, output is 32-bit, no transformations expected. 291define i32 @test_zext_cmp10(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) { 292; ALL-LABEL: test_zext_cmp10: 293; ALL: # %bb.0: # %entry 294; ALL-NEXT: orq %rsi, %rdi 295; ALL-NEXT: sete %al 296; ALL-NEXT: orq %rcx, %rdx 297; ALL-NEXT: sete %cl 298; ALL-NEXT: orb %al, %cl 299; ALL-NEXT: movzbl %cl, %eax 300; ALL-NEXT: retq 301entry: 302 %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128 303 %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64 304 %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128 305 %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext 306 %b.sroa.2.0.insert.ext = zext i64 %b.coerce1 to i128 307 %b.sroa.2.0.insert.shift = shl nuw i128 %b.sroa.2.0.insert.ext, 64 308 %b.sroa.0.0.insert.ext = zext i64 %b.coerce0 to i128 309 %b.sroa.0.0.insert.insert = or i128 %b.sroa.2.0.insert.shift, %b.sroa.0.0.insert.ext 310 %cmp = icmp eq i128 %a.sroa.0.0.insert.insert, 0 311 %cmp3 = icmp eq i128 %b.sroa.0.0.insert.insert, 0 312 %0 = or i1 %cmp, %cmp3 313 %lor.ext = zext i1 %0 to i32 314 ret i32 %lor.ext 315} 316 317; PR31902 Fix a crash in combineOrCmpEqZeroToCtlzSrl under fast math. 318define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" { 319; 320; ALL-LABEL: test_zext_cmp11: 321; ALL: # %bb.0: # %entry 322; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 323; ALL-NEXT: vucomisd %xmm2, %xmm0 324; ALL-NEXT: sete %al 325; ALL-NEXT: vucomisd %xmm2, %xmm1 326; ALL-NEXT: sete %cl 327; ALL-NEXT: orb %al, %cl 328; ALL-NEXT: movzbl %cl, %eax 329; ALL-NEXT: retq 330entry: 331 %cmp = fcmp fast oeq double %a, 0.000000e+00 332 %cmp1 = fcmp fast oeq double %b, 0.000000e+00 333 %0 = or i1 %cmp, %cmp1 334 %conv = zext i1 %0 to i32 335 ret i32 %conv 336} 337