1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ 7 8declare i8 @llvm.cttz.i8(i8, i1) 9declare i16 @llvm.cttz.i16(i16, i1) 10declare i32 @llvm.cttz.i32(i32, i1) 11declare i64 @llvm.cttz.i64(i64, i1) 12 13declare i8 @llvm.ctlz.i8(i8, i1) 14declare i16 @llvm.ctlz.i16(i16, i1) 15declare i32 @llvm.ctlz.i32(i32, i1) 16declare i64 @llvm.ctlz.i64(i64, i1) 17 18define i8 @cttz_i8(i8 %x) { 19; X86-LABEL: cttz_i8: 20; X86: # %bb.0: 21; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax 22; X86-NEXT: # kill: def $al killed $al killed $eax 23; X86-NEXT: retl 24; 25; X64-LABEL: cttz_i8: 26; X64: # %bb.0: 27; X64-NEXT: bsfl %edi, %eax 28; X64-NEXT: # kill: def $al killed $al killed $eax 29; X64-NEXT: retq 30; 31; X86-CLZ-LABEL: cttz_i8: 32; X86-CLZ: # %bb.0: 33; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 34; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 35; X86-CLZ-NEXT: retl 36; 37; X64-CLZ-LABEL: cttz_i8: 38; X64-CLZ: # %bb.0: 39; X64-CLZ-NEXT: tzcntl %edi, %eax 40; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 41; X64-CLZ-NEXT: retq 42 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true ) 43 ret i8 %tmp 44} 45 46define i16 @cttz_i16(i16 %x) { 47; X86-LABEL: cttz_i16: 48; X86: # %bb.0: 49; X86-NEXT: bsfw {{[0-9]+}}(%esp), %ax 50; X86-NEXT: retl 51; 52; X64-LABEL: cttz_i16: 53; X64: # %bb.0: 54; X64-NEXT: bsfw %di, %ax 55; X64-NEXT: retq 56; 57; X86-CLZ-LABEL: cttz_i16: 58; X86-CLZ: # %bb.0: 59; X86-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax 60; X86-CLZ-NEXT: retl 61; 62; X64-CLZ-LABEL: cttz_i16: 63; X64-CLZ: # %bb.0: 64; X64-CLZ-NEXT: tzcntw %di, %ax 65; X64-CLZ-NEXT: retq 66 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true ) 67 ret i16 %tmp 68} 69 70define i32 @cttz_i32(i32 %x) { 71; X86-LABEL: cttz_i32: 72; X86: # %bb.0: 73; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax 74; X86-NEXT: retl 75; 76; X64-LABEL: cttz_i32: 77; X64: # %bb.0: 78; X64-NEXT: bsfl %edi, %eax 79; X64-NEXT: retq 80; 81; X86-CLZ-LABEL: cttz_i32: 82; X86-CLZ: # %bb.0: 83; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 84; X86-CLZ-NEXT: retl 85; 86; X64-CLZ-LABEL: cttz_i32: 87; X64-CLZ: # %bb.0: 88; X64-CLZ-NEXT: tzcntl %edi, %eax 89; X64-CLZ-NEXT: retq 90 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true ) 91 ret i32 %tmp 92} 93 94define i64 @cttz_i64(i64 %x) { 95; X86-NOCMOV-LABEL: cttz_i64: 96; X86-NOCMOV: # %bb.0: 97; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 98; X86-NOCMOV-NEXT: testl %eax, %eax 99; X86-NOCMOV-NEXT: jne .LBB3_1 100; X86-NOCMOV-NEXT: # %bb.2: 101; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %eax 102; X86-NOCMOV-NEXT: addl $32, %eax 103; X86-NOCMOV-NEXT: xorl %edx, %edx 104; X86-NOCMOV-NEXT: retl 105; X86-NOCMOV-NEXT: .LBB3_1: 106; X86-NOCMOV-NEXT: bsfl %eax, %eax 107; X86-NOCMOV-NEXT: xorl %edx, %edx 108; X86-NOCMOV-NEXT: retl 109; 110; X86-CMOV-LABEL: cttz_i64: 111; X86-CMOV: # %bb.0: 112; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 113; X86-CMOV-NEXT: bsfl %ecx, %edx 114; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %eax 115; X86-CMOV-NEXT: addl $32, %eax 116; X86-CMOV-NEXT: testl %ecx, %ecx 117; X86-CMOV-NEXT: cmovnel %edx, %eax 118; X86-CMOV-NEXT: xorl %edx, %edx 119; X86-CMOV-NEXT: retl 120; 121; X64-LABEL: cttz_i64: 122; X64: # %bb.0: 123; X64-NEXT: bsfq %rdi, %rax 124; X64-NEXT: retq 125; 126; X86-CLZ-LABEL: cttz_i64: 127; X86-CLZ: # %bb.0: 128; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 129; X86-CLZ-NEXT: testl %eax, %eax 130; X86-CLZ-NEXT: jne .LBB3_1 131; X86-CLZ-NEXT: # %bb.2: 132; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 133; X86-CLZ-NEXT: addl $32, %eax 134; X86-CLZ-NEXT: xorl %edx, %edx 135; X86-CLZ-NEXT: retl 136; X86-CLZ-NEXT: .LBB3_1: 137; X86-CLZ-NEXT: tzcntl %eax, %eax 138; X86-CLZ-NEXT: xorl %edx, %edx 139; X86-CLZ-NEXT: retl 140; 141; X64-CLZ-LABEL: cttz_i64: 142; X64-CLZ: # %bb.0: 143; X64-CLZ-NEXT: tzcntq %rdi, %rax 144; X64-CLZ-NEXT: retq 145 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true ) 146 ret i64 %tmp 147} 148 149define i8 @ctlz_i8(i8 %x) { 150; X86-LABEL: ctlz_i8: 151; X86: # %bb.0: 152; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 153; X86-NEXT: bsrl %eax, %eax 154; X86-NEXT: xorl $7, %eax 155; X86-NEXT: # kill: def $al killed $al killed $eax 156; X86-NEXT: retl 157; 158; X64-LABEL: ctlz_i8: 159; X64: # %bb.0: 160; X64-NEXT: movzbl %dil, %eax 161; X64-NEXT: bsrl %eax, %eax 162; X64-NEXT: xorl $7, %eax 163; X64-NEXT: # kill: def $al killed $al killed $eax 164; X64-NEXT: retq 165; 166; X86-CLZ-LABEL: ctlz_i8: 167; X86-CLZ: # %bb.0: 168; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 169; X86-CLZ-NEXT: lzcntl %eax, %eax 170; X86-CLZ-NEXT: addl $-24, %eax 171; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 172; X86-CLZ-NEXT: retl 173; 174; X64-CLZ-LABEL: ctlz_i8: 175; X64-CLZ: # %bb.0: 176; X64-CLZ-NEXT: movzbl %dil, %eax 177; X64-CLZ-NEXT: lzcntl %eax, %eax 178; X64-CLZ-NEXT: addl $-24, %eax 179; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 180; X64-CLZ-NEXT: retq 181 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true ) 182 ret i8 %tmp2 183} 184 185define i16 @ctlz_i16(i16 %x) { 186; X86-LABEL: ctlz_i16: 187; X86: # %bb.0: 188; X86-NEXT: bsrw {{[0-9]+}}(%esp), %ax 189; X86-NEXT: xorl $15, %eax 190; X86-NEXT: # kill: def $ax killed $ax killed $eax 191; X86-NEXT: retl 192; 193; X64-LABEL: ctlz_i16: 194; X64: # %bb.0: 195; X64-NEXT: bsrw %di, %ax 196; X64-NEXT: xorl $15, %eax 197; X64-NEXT: # kill: def $ax killed $ax killed $eax 198; X64-NEXT: retq 199; 200; X86-CLZ-LABEL: ctlz_i16: 201; X86-CLZ: # %bb.0: 202; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 203; X86-CLZ-NEXT: retl 204; 205; X64-CLZ-LABEL: ctlz_i16: 206; X64-CLZ: # %bb.0: 207; X64-CLZ-NEXT: lzcntw %di, %ax 208; X64-CLZ-NEXT: retq 209 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true ) 210 ret i16 %tmp2 211} 212 213define i32 @ctlz_i32(i32 %x) { 214; X86-LABEL: ctlz_i32: 215; X86: # %bb.0: 216; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax 217; X86-NEXT: xorl $31, %eax 218; X86-NEXT: retl 219; 220; X64-LABEL: ctlz_i32: 221; X64: # %bb.0: 222; X64-NEXT: bsrl %edi, %eax 223; X64-NEXT: xorl $31, %eax 224; X64-NEXT: retq 225; 226; X86-CLZ-LABEL: ctlz_i32: 227; X86-CLZ: # %bb.0: 228; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 229; X86-CLZ-NEXT: retl 230; 231; X64-CLZ-LABEL: ctlz_i32: 232; X64-CLZ: # %bb.0: 233; X64-CLZ-NEXT: lzcntl %edi, %eax 234; X64-CLZ-NEXT: retq 235 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true ) 236 ret i32 %tmp 237} 238 239define i64 @ctlz_i64(i64 %x) { 240; X86-NOCMOV-LABEL: ctlz_i64: 241; X86-NOCMOV: # %bb.0: 242; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 243; X86-NOCMOV-NEXT: testl %eax, %eax 244; X86-NOCMOV-NEXT: jne .LBB7_1 245; X86-NOCMOV-NEXT: # %bb.2: 246; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 247; X86-NOCMOV-NEXT: xorl $31, %eax 248; X86-NOCMOV-NEXT: addl $32, %eax 249; X86-NOCMOV-NEXT: xorl %edx, %edx 250; X86-NOCMOV-NEXT: retl 251; X86-NOCMOV-NEXT: .LBB7_1: 252; X86-NOCMOV-NEXT: bsrl %eax, %eax 253; X86-NOCMOV-NEXT: xorl $31, %eax 254; X86-NOCMOV-NEXT: xorl %edx, %edx 255; X86-NOCMOV-NEXT: retl 256; 257; X86-CMOV-LABEL: ctlz_i64: 258; X86-CMOV: # %bb.0: 259; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 260; X86-CMOV-NEXT: bsrl %ecx, %edx 261; X86-CMOV-NEXT: xorl $31, %edx 262; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 263; X86-CMOV-NEXT: xorl $31, %eax 264; X86-CMOV-NEXT: addl $32, %eax 265; X86-CMOV-NEXT: testl %ecx, %ecx 266; X86-CMOV-NEXT: cmovnel %edx, %eax 267; X86-CMOV-NEXT: xorl %edx, %edx 268; X86-CMOV-NEXT: retl 269; 270; X64-LABEL: ctlz_i64: 271; X64: # %bb.0: 272; X64-NEXT: bsrq %rdi, %rax 273; X64-NEXT: xorq $63, %rax 274; X64-NEXT: retq 275; 276; X86-CLZ-LABEL: ctlz_i64: 277; X86-CLZ: # %bb.0: 278; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 279; X86-CLZ-NEXT: testl %eax, %eax 280; X86-CLZ-NEXT: jne .LBB7_1 281; X86-CLZ-NEXT: # %bb.2: 282; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 283; X86-CLZ-NEXT: addl $32, %eax 284; X86-CLZ-NEXT: xorl %edx, %edx 285; X86-CLZ-NEXT: retl 286; X86-CLZ-NEXT: .LBB7_1: 287; X86-CLZ-NEXT: lzcntl %eax, %eax 288; X86-CLZ-NEXT: xorl %edx, %edx 289; X86-CLZ-NEXT: retl 290; 291; X64-CLZ-LABEL: ctlz_i64: 292; X64-CLZ: # %bb.0: 293; X64-CLZ-NEXT: lzcntq %rdi, %rax 294; X64-CLZ-NEXT: retq 295 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true ) 296 ret i64 %tmp 297} 298 299; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 300define i8 @ctlz_i8_zero_test(i8 %n) { 301; X86-LABEL: ctlz_i8_zero_test: 302; X86: # %bb.0: 303; X86-NEXT: movb {{[0-9]+}}(%esp), %al 304; X86-NEXT: testb %al, %al 305; X86-NEXT: je .LBB8_1 306; X86-NEXT: # %bb.2: # %cond.false 307; X86-NEXT: movzbl %al, %eax 308; X86-NEXT: bsrl %eax, %eax 309; X86-NEXT: xorl $7, %eax 310; X86-NEXT: # kill: def $al killed $al killed $eax 311; X86-NEXT: retl 312; X86-NEXT: .LBB8_1: 313; X86-NEXT: movb $8, %al 314; X86-NEXT: # kill: def $al killed $al killed $eax 315; X86-NEXT: retl 316; 317; X64-LABEL: ctlz_i8_zero_test: 318; X64: # %bb.0: 319; X64-NEXT: testb %dil, %dil 320; X64-NEXT: je .LBB8_1 321; X64-NEXT: # %bb.2: # %cond.false 322; X64-NEXT: movzbl %dil, %eax 323; X64-NEXT: bsrl %eax, %eax 324; X64-NEXT: xorl $7, %eax 325; X64-NEXT: # kill: def $al killed $al killed $eax 326; X64-NEXT: retq 327; X64-NEXT: .LBB8_1: 328; X64-NEXT: movb $8, %al 329; X64-NEXT: # kill: def $al killed $al killed $eax 330; X64-NEXT: retq 331; 332; X86-CLZ-LABEL: ctlz_i8_zero_test: 333; X86-CLZ: # %bb.0: 334; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 335; X86-CLZ-NEXT: lzcntl %eax, %eax 336; X86-CLZ-NEXT: addl $-24, %eax 337; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 338; X86-CLZ-NEXT: retl 339; 340; X64-CLZ-LABEL: ctlz_i8_zero_test: 341; X64-CLZ: # %bb.0: 342; X64-CLZ-NEXT: movzbl %dil, %eax 343; X64-CLZ-NEXT: lzcntl %eax, %eax 344; X64-CLZ-NEXT: addl $-24, %eax 345; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 346; X64-CLZ-NEXT: retq 347 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false) 348 ret i8 %tmp1 349} 350 351; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 352define i16 @ctlz_i16_zero_test(i16 %n) { 353; X86-LABEL: ctlz_i16_zero_test: 354; X86: # %bb.0: 355; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 356; X86-NEXT: testw %ax, %ax 357; X86-NEXT: je .LBB9_1 358; X86-NEXT: # %bb.2: # %cond.false 359; X86-NEXT: bsrw %ax, %ax 360; X86-NEXT: xorl $15, %eax 361; X86-NEXT: # kill: def $ax killed $ax killed $eax 362; X86-NEXT: retl 363; X86-NEXT: .LBB9_1: 364; X86-NEXT: movw $16, %ax 365; X86-NEXT: # kill: def $ax killed $ax killed $eax 366; X86-NEXT: retl 367; 368; X64-LABEL: ctlz_i16_zero_test: 369; X64: # %bb.0: 370; X64-NEXT: testw %di, %di 371; X64-NEXT: je .LBB9_1 372; X64-NEXT: # %bb.2: # %cond.false 373; X64-NEXT: bsrw %di, %ax 374; X64-NEXT: xorl $15, %eax 375; X64-NEXT: # kill: def $ax killed $ax killed $eax 376; X64-NEXT: retq 377; X64-NEXT: .LBB9_1: 378; X64-NEXT: movw $16, %ax 379; X64-NEXT: # kill: def $ax killed $ax killed $eax 380; X64-NEXT: retq 381; 382; X86-CLZ-LABEL: ctlz_i16_zero_test: 383; X86-CLZ: # %bb.0: 384; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 385; X86-CLZ-NEXT: retl 386; 387; X64-CLZ-LABEL: ctlz_i16_zero_test: 388; X64-CLZ: # %bb.0: 389; X64-CLZ-NEXT: lzcntw %di, %ax 390; X64-CLZ-NEXT: retq 391 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false) 392 ret i16 %tmp1 393} 394 395; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 396define i32 @ctlz_i32_zero_test(i32 %n) { 397; X86-LABEL: ctlz_i32_zero_test: 398; X86: # %bb.0: 399; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 400; X86-NEXT: testl %eax, %eax 401; X86-NEXT: je .LBB10_1 402; X86-NEXT: # %bb.2: # %cond.false 403; X86-NEXT: bsrl %eax, %eax 404; X86-NEXT: xorl $31, %eax 405; X86-NEXT: retl 406; X86-NEXT: .LBB10_1: 407; X86-NEXT: movl $32, %eax 408; X86-NEXT: retl 409; 410; X64-LABEL: ctlz_i32_zero_test: 411; X64: # %bb.0: 412; X64-NEXT: testl %edi, %edi 413; X64-NEXT: je .LBB10_1 414; X64-NEXT: # %bb.2: # %cond.false 415; X64-NEXT: bsrl %edi, %eax 416; X64-NEXT: xorl $31, %eax 417; X64-NEXT: retq 418; X64-NEXT: .LBB10_1: 419; X64-NEXT: movl $32, %eax 420; X64-NEXT: retq 421; 422; X86-CLZ-LABEL: ctlz_i32_zero_test: 423; X86-CLZ: # %bb.0: 424; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 425; X86-CLZ-NEXT: retl 426; 427; X64-CLZ-LABEL: ctlz_i32_zero_test: 428; X64-CLZ: # %bb.0: 429; X64-CLZ-NEXT: lzcntl %edi, %eax 430; X64-CLZ-NEXT: retq 431 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 432 ret i32 %tmp1 433} 434 435; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 436define i64 @ctlz_i64_zero_test(i64 %n) { 437; X86-NOCMOV-LABEL: ctlz_i64_zero_test: 438; X86-NOCMOV: # %bb.0: 439; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 440; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %edx 441; X86-NOCMOV-NEXT: movl $63, %eax 442; X86-NOCMOV-NEXT: je .LBB11_2 443; X86-NOCMOV-NEXT: # %bb.1: 444; X86-NOCMOV-NEXT: movl %edx, %eax 445; X86-NOCMOV-NEXT: .LBB11_2: 446; X86-NOCMOV-NEXT: testl %ecx, %ecx 447; X86-NOCMOV-NEXT: jne .LBB11_3 448; X86-NOCMOV-NEXT: # %bb.4: 449; X86-NOCMOV-NEXT: xorl $31, %eax 450; X86-NOCMOV-NEXT: addl $32, %eax 451; X86-NOCMOV-NEXT: xorl %edx, %edx 452; X86-NOCMOV-NEXT: retl 453; X86-NOCMOV-NEXT: .LBB11_3: 454; X86-NOCMOV-NEXT: bsrl %ecx, %eax 455; X86-NOCMOV-NEXT: xorl $31, %eax 456; X86-NOCMOV-NEXT: xorl %edx, %edx 457; X86-NOCMOV-NEXT: retl 458; 459; X86-CMOV-LABEL: ctlz_i64_zero_test: 460; X86-CMOV: # %bb.0: 461; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 462; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 463; X86-CMOV-NEXT: movl $63, %edx 464; X86-CMOV-NEXT: cmovnel %eax, %edx 465; X86-CMOV-NEXT: xorl $31, %edx 466; X86-CMOV-NEXT: addl $32, %edx 467; X86-CMOV-NEXT: bsrl %ecx, %eax 468; X86-CMOV-NEXT: xorl $31, %eax 469; X86-CMOV-NEXT: testl %ecx, %ecx 470; X86-CMOV-NEXT: cmovel %edx, %eax 471; X86-CMOV-NEXT: xorl %edx, %edx 472; X86-CMOV-NEXT: retl 473; 474; X64-LABEL: ctlz_i64_zero_test: 475; X64: # %bb.0: 476; X64-NEXT: testq %rdi, %rdi 477; X64-NEXT: je .LBB11_1 478; X64-NEXT: # %bb.2: # %cond.false 479; X64-NEXT: bsrq %rdi, %rax 480; X64-NEXT: xorq $63, %rax 481; X64-NEXT: retq 482; X64-NEXT: .LBB11_1: 483; X64-NEXT: movl $64, %eax 484; X64-NEXT: retq 485; 486; X86-CLZ-LABEL: ctlz_i64_zero_test: 487; X86-CLZ: # %bb.0: 488; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 489; X86-CLZ-NEXT: testl %eax, %eax 490; X86-CLZ-NEXT: jne .LBB11_1 491; X86-CLZ-NEXT: # %bb.2: 492; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 493; X86-CLZ-NEXT: addl $32, %eax 494; X86-CLZ-NEXT: xorl %edx, %edx 495; X86-CLZ-NEXT: retl 496; X86-CLZ-NEXT: .LBB11_1: 497; X86-CLZ-NEXT: lzcntl %eax, %eax 498; X86-CLZ-NEXT: xorl %edx, %edx 499; X86-CLZ-NEXT: retl 500; 501; X64-CLZ-LABEL: ctlz_i64_zero_test: 502; X64-CLZ: # %bb.0: 503; X64-CLZ-NEXT: lzcntq %rdi, %rax 504; X64-CLZ-NEXT: retq 505 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false) 506 ret i64 %tmp1 507} 508 509; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 510define i8 @cttz_i8_zero_test(i8 %n) { 511; X86-LABEL: cttz_i8_zero_test: 512; X86: # %bb.0: 513; X86-NEXT: movb {{[0-9]+}}(%esp), %al 514; X86-NEXT: testb %al, %al 515; X86-NEXT: je .LBB12_1 516; X86-NEXT: # %bb.2: # %cond.false 517; X86-NEXT: movzbl %al, %eax 518; X86-NEXT: bsfl %eax, %eax 519; X86-NEXT: # kill: def $al killed $al killed $eax 520; X86-NEXT: retl 521; X86-NEXT: .LBB12_1: 522; X86-NEXT: movb $8, %al 523; X86-NEXT: # kill: def $al killed $al killed $eax 524; X86-NEXT: retl 525; 526; X64-LABEL: cttz_i8_zero_test: 527; X64: # %bb.0: 528; X64-NEXT: testb %dil, %dil 529; X64-NEXT: je .LBB12_1 530; X64-NEXT: # %bb.2: # %cond.false 531; X64-NEXT: movzbl %dil, %eax 532; X64-NEXT: bsfl %eax, %eax 533; X64-NEXT: # kill: def $al killed $al killed $eax 534; X64-NEXT: retq 535; X64-NEXT: .LBB12_1: 536; X64-NEXT: movb $8, %al 537; X64-NEXT: # kill: def $al killed $al killed $eax 538; X64-NEXT: retq 539; 540; X86-CLZ-LABEL: cttz_i8_zero_test: 541; X86-CLZ: # %bb.0: 542; X86-CLZ-NEXT: movl $256, %eax # imm = 0x100 543; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax 544; X86-CLZ-NEXT: tzcntl %eax, %eax 545; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 546; X86-CLZ-NEXT: retl 547; 548; X64-CLZ-LABEL: cttz_i8_zero_test: 549; X64-CLZ: # %bb.0: 550; X64-CLZ-NEXT: orl $256, %edi # imm = 0x100 551; X64-CLZ-NEXT: tzcntl %edi, %eax 552; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 553; X64-CLZ-NEXT: retq 554 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false) 555 ret i8 %tmp1 556} 557 558; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 559define i16 @cttz_i16_zero_test(i16 %n) { 560; X86-LABEL: cttz_i16_zero_test: 561; X86: # %bb.0: 562; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 563; X86-NEXT: testw %ax, %ax 564; X86-NEXT: je .LBB13_1 565; X86-NEXT: # %bb.2: # %cond.false 566; X86-NEXT: bsfw %ax, %ax 567; X86-NEXT: retl 568; X86-NEXT: .LBB13_1: 569; X86-NEXT: movw $16, %ax 570; X86-NEXT: retl 571; 572; X64-LABEL: cttz_i16_zero_test: 573; X64: # %bb.0: 574; X64-NEXT: testw %di, %di 575; X64-NEXT: je .LBB13_1 576; X64-NEXT: # %bb.2: # %cond.false 577; X64-NEXT: bsfw %di, %ax 578; X64-NEXT: retq 579; X64-NEXT: .LBB13_1: 580; X64-NEXT: movw $16, %ax 581; X64-NEXT: retq 582; 583; X86-CLZ-LABEL: cttz_i16_zero_test: 584; X86-CLZ: # %bb.0: 585; X86-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax 586; X86-CLZ-NEXT: retl 587; 588; X64-CLZ-LABEL: cttz_i16_zero_test: 589; X64-CLZ: # %bb.0: 590; X64-CLZ-NEXT: tzcntw %di, %ax 591; X64-CLZ-NEXT: retq 592 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false) 593 ret i16 %tmp1 594} 595 596; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 597define i32 @cttz_i32_zero_test(i32 %n) { 598; X86-LABEL: cttz_i32_zero_test: 599; X86: # %bb.0: 600; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 601; X86-NEXT: testl %eax, %eax 602; X86-NEXT: je .LBB14_1 603; X86-NEXT: # %bb.2: # %cond.false 604; X86-NEXT: bsfl %eax, %eax 605; X86-NEXT: retl 606; X86-NEXT: .LBB14_1: 607; X86-NEXT: movl $32, %eax 608; X86-NEXT: retl 609; 610; X64-LABEL: cttz_i32_zero_test: 611; X64: # %bb.0: 612; X64-NEXT: testl %edi, %edi 613; X64-NEXT: je .LBB14_1 614; X64-NEXT: # %bb.2: # %cond.false 615; X64-NEXT: bsfl %edi, %eax 616; X64-NEXT: retq 617; X64-NEXT: .LBB14_1: 618; X64-NEXT: movl $32, %eax 619; X64-NEXT: retq 620; 621; X86-CLZ-LABEL: cttz_i32_zero_test: 622; X86-CLZ: # %bb.0: 623; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 624; X86-CLZ-NEXT: retl 625; 626; X64-CLZ-LABEL: cttz_i32_zero_test: 627; X64-CLZ: # %bb.0: 628; X64-CLZ-NEXT: tzcntl %edi, %eax 629; X64-CLZ-NEXT: retq 630 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false) 631 ret i32 %tmp1 632} 633 634; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 635define i64 @cttz_i64_zero_test(i64 %n) { 636; X86-NOCMOV-LABEL: cttz_i64_zero_test: 637; X86-NOCMOV: # %bb.0: 638; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 639; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %edx 640; X86-NOCMOV-NEXT: movl $32, %eax 641; X86-NOCMOV-NEXT: je .LBB15_2 642; X86-NOCMOV-NEXT: # %bb.1: 643; X86-NOCMOV-NEXT: movl %edx, %eax 644; X86-NOCMOV-NEXT: .LBB15_2: 645; X86-NOCMOV-NEXT: testl %ecx, %ecx 646; X86-NOCMOV-NEXT: jne .LBB15_3 647; X86-NOCMOV-NEXT: # %bb.4: 648; X86-NOCMOV-NEXT: addl $32, %eax 649; X86-NOCMOV-NEXT: xorl %edx, %edx 650; X86-NOCMOV-NEXT: retl 651; X86-NOCMOV-NEXT: .LBB15_3: 652; X86-NOCMOV-NEXT: bsfl %ecx, %eax 653; X86-NOCMOV-NEXT: xorl %edx, %edx 654; X86-NOCMOV-NEXT: retl 655; 656; X86-CMOV-LABEL: cttz_i64_zero_test: 657; X86-CMOV: # %bb.0: 658; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 659; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx 660; X86-CMOV-NEXT: movl $32, %edx 661; X86-CMOV-NEXT: cmovnel %ecx, %edx 662; X86-CMOV-NEXT: addl $32, %edx 663; X86-CMOV-NEXT: bsfl %eax, %eax 664; X86-CMOV-NEXT: cmovel %edx, %eax 665; X86-CMOV-NEXT: xorl %edx, %edx 666; X86-CMOV-NEXT: retl 667; 668; X64-LABEL: cttz_i64_zero_test: 669; X64: # %bb.0: 670; X64-NEXT: testq %rdi, %rdi 671; X64-NEXT: je .LBB15_1 672; X64-NEXT: # %bb.2: # %cond.false 673; X64-NEXT: bsfq %rdi, %rax 674; X64-NEXT: retq 675; X64-NEXT: .LBB15_1: 676; X64-NEXT: movl $64, %eax 677; X64-NEXT: retq 678; 679; X86-CLZ-LABEL: cttz_i64_zero_test: 680; X86-CLZ: # %bb.0: 681; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 682; X86-CLZ-NEXT: testl %eax, %eax 683; X86-CLZ-NEXT: jne .LBB15_1 684; X86-CLZ-NEXT: # %bb.2: 685; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 686; X86-CLZ-NEXT: addl $32, %eax 687; X86-CLZ-NEXT: xorl %edx, %edx 688; X86-CLZ-NEXT: retl 689; X86-CLZ-NEXT: .LBB15_1: 690; X86-CLZ-NEXT: tzcntl %eax, %eax 691; X86-CLZ-NEXT: xorl %edx, %edx 692; X86-CLZ-NEXT: retl 693; 694; X64-CLZ-LABEL: cttz_i64_zero_test: 695; X64-CLZ: # %bb.0: 696; X64-CLZ-NEXT: tzcntq %rdi, %rax 697; X64-CLZ-NEXT: retq 698 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false) 699 ret i64 %tmp1 700} 701 702; Don't generate the cmovne when the source is known non-zero (and bsr would 703; not set ZF). 704; rdar://9490949 705; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 706; codegen doesn't know how to delete the movl and je. 707define i32 @ctlz_i32_fold_cmov(i32 %n) { 708; X86-LABEL: ctlz_i32_fold_cmov: 709; X86: # %bb.0: 710; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 711; X86-NEXT: orl $1, %eax 712; X86-NEXT: je .LBB16_1 713; X86-NEXT: # %bb.2: # %cond.false 714; X86-NEXT: bsrl %eax, %eax 715; X86-NEXT: xorl $31, %eax 716; X86-NEXT: retl 717; X86-NEXT: .LBB16_1: 718; X86-NEXT: movl $32, %eax 719; X86-NEXT: retl 720; 721; X64-LABEL: ctlz_i32_fold_cmov: 722; X64: # %bb.0: 723; X64-NEXT: orl $1, %edi 724; X64-NEXT: je .LBB16_1 725; X64-NEXT: # %bb.2: # %cond.false 726; X64-NEXT: bsrl %edi, %eax 727; X64-NEXT: xorl $31, %eax 728; X64-NEXT: retq 729; X64-NEXT: .LBB16_1: 730; X64-NEXT: movl $32, %eax 731; X64-NEXT: retq 732; 733; X86-CLZ-LABEL: ctlz_i32_fold_cmov: 734; X86-CLZ: # %bb.0: 735; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 736; X86-CLZ-NEXT: orl $1, %eax 737; X86-CLZ-NEXT: lzcntl %eax, %eax 738; X86-CLZ-NEXT: retl 739; 740; X64-CLZ-LABEL: ctlz_i32_fold_cmov: 741; X64-CLZ: # %bb.0: 742; X64-CLZ-NEXT: orl $1, %edi 743; X64-CLZ-NEXT: lzcntl %edi, %eax 744; X64-CLZ-NEXT: retq 745 %or = or i32 %n, 1 746 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false) 747 ret i32 %tmp1 748} 749 750; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute 751; the most significant bit, which is what 'bsr' does natively. 752; FIXME: We should probably select BSR instead of LZCNT in these circumstances. 753define i32 @ctlz_bsr(i32 %n) { 754; X86-LABEL: ctlz_bsr: 755; X86: # %bb.0: 756; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax 757; X86-NEXT: retl 758; 759; X64-LABEL: ctlz_bsr: 760; X64: # %bb.0: 761; X64-NEXT: bsrl %edi, %eax 762; X64-NEXT: retq 763; 764; X86-CLZ-LABEL: ctlz_bsr: 765; X86-CLZ: # %bb.0: 766; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 767; X86-CLZ-NEXT: xorl $31, %eax 768; X86-CLZ-NEXT: retl 769; 770; X64-CLZ-LABEL: ctlz_bsr: 771; X64-CLZ: # %bb.0: 772; X64-CLZ-NEXT: lzcntl %edi, %eax 773; X64-CLZ-NEXT: xorl $31, %eax 774; X64-CLZ-NEXT: retq 775 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true) 776 %bsr = xor i32 %ctlz, 31 777 ret i32 %bsr 778} 779 780; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 781; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 782; codegen doesn't know how to combine the $32 and $31 into $63. 783define i32 @ctlz_bsr_zero_test(i32 %n) { 784; X86-LABEL: ctlz_bsr_zero_test: 785; X86: # %bb.0: 786; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 787; X86-NEXT: testl %eax, %eax 788; X86-NEXT: je .LBB18_1 789; X86-NEXT: # %bb.2: # %cond.false 790; X86-NEXT: bsrl %eax, %eax 791; X86-NEXT: xorl $31, %eax 792; X86-NEXT: xorl $31, %eax 793; X86-NEXT: retl 794; X86-NEXT: .LBB18_1: 795; X86-NEXT: movl $32, %eax 796; X86-NEXT: xorl $31, %eax 797; X86-NEXT: retl 798; 799; X64-LABEL: ctlz_bsr_zero_test: 800; X64: # %bb.0: 801; X64-NEXT: testl %edi, %edi 802; X64-NEXT: je .LBB18_1 803; X64-NEXT: # %bb.2: # %cond.false 804; X64-NEXT: bsrl %edi, %eax 805; X64-NEXT: xorl $31, %eax 806; X64-NEXT: xorl $31, %eax 807; X64-NEXT: retq 808; X64-NEXT: .LBB18_1: 809; X64-NEXT: movl $32, %eax 810; X64-NEXT: xorl $31, %eax 811; X64-NEXT: retq 812; 813; X86-CLZ-LABEL: ctlz_bsr_zero_test: 814; X86-CLZ: # %bb.0: 815; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 816; X86-CLZ-NEXT: xorl $31, %eax 817; X86-CLZ-NEXT: retl 818; 819; X64-CLZ-LABEL: ctlz_bsr_zero_test: 820; X64-CLZ: # %bb.0: 821; X64-CLZ-NEXT: lzcntl %edi, %eax 822; X64-CLZ-NEXT: xorl $31, %eax 823; X64-CLZ-NEXT: retq 824 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 825 %bsr = xor i32 %ctlz, 31 826 ret i32 %bsr 827} 828 829define i8 @cttz_i8_knownbits(i8 %x) { 830; X86-LABEL: cttz_i8_knownbits: 831; X86: # %bb.0: 832; X86-NEXT: movb {{[0-9]+}}(%esp), %al 833; X86-NEXT: orb $2, %al 834; X86-NEXT: movzbl %al, %eax 835; X86-NEXT: bsfl %eax, %eax 836; X86-NEXT: # kill: def $al killed $al killed $eax 837; X86-NEXT: retl 838; 839; X64-LABEL: cttz_i8_knownbits: 840; X64: # %bb.0: 841; X64-NEXT: orb $2, %dil 842; X64-NEXT: movzbl %dil, %eax 843; X64-NEXT: bsfl %eax, %eax 844; X64-NEXT: # kill: def $al killed $al killed $eax 845; X64-NEXT: retq 846; 847; X86-CLZ-LABEL: cttz_i8_knownbits: 848; X86-CLZ: # %bb.0: 849; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al 850; X86-CLZ-NEXT: orb $2, %al 851; X86-CLZ-NEXT: movzbl %al, %eax 852; X86-CLZ-NEXT: tzcntl %eax, %eax 853; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 854; X86-CLZ-NEXT: retl 855; 856; X64-CLZ-LABEL: cttz_i8_knownbits: 857; X64-CLZ: # %bb.0: 858; X64-CLZ-NEXT: orb $2, %dil 859; X64-CLZ-NEXT: movzbl %dil, %eax 860; X64-CLZ-NEXT: tzcntl %eax, %eax 861; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 862; X64-CLZ-NEXT: retq 863 %x2 = or i8 %x, 2 864 %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true ) 865 %tmp2 = and i8 %tmp, 1 866 ret i8 %tmp2 867} 868 869define i8 @ctlz_i8_knownbits(i8 %x) { 870; X86-LABEL: ctlz_i8_knownbits: 871; X86: # %bb.0: 872; X86-NEXT: movb {{[0-9]+}}(%esp), %al 873; X86-NEXT: orb $64, %al 874; X86-NEXT: movzbl %al, %eax 875; X86-NEXT: bsrl %eax, %eax 876; X86-NEXT: xorl $7, %eax 877; X86-NEXT: # kill: def $al killed $al killed $eax 878; X86-NEXT: retl 879; 880; X64-LABEL: ctlz_i8_knownbits: 881; X64: # %bb.0: 882; X64-NEXT: orb $64, %dil 883; X64-NEXT: movzbl %dil, %eax 884; X64-NEXT: bsrl %eax, %eax 885; X64-NEXT: xorl $7, %eax 886; X64-NEXT: # kill: def $al killed $al killed $eax 887; X64-NEXT: retq 888; 889; X86-CLZ-LABEL: ctlz_i8_knownbits: 890; X86-CLZ: # %bb.0: 891; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al 892; X86-CLZ-NEXT: orb $64, %al 893; X86-CLZ-NEXT: movzbl %al, %eax 894; X86-CLZ-NEXT: lzcntl %eax, %eax 895; X86-CLZ-NEXT: addl $-24, %eax 896; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 897; X86-CLZ-NEXT: retl 898; 899; X64-CLZ-LABEL: ctlz_i8_knownbits: 900; X64-CLZ: # %bb.0: 901; X64-CLZ-NEXT: orb $64, %dil 902; X64-CLZ-NEXT: movzbl %dil, %eax 903; X64-CLZ-NEXT: lzcntl %eax, %eax 904; X64-CLZ-NEXT: addl $-24, %eax 905; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 906; X64-CLZ-NEXT: retq 907 908 %x2 = or i8 %x, 64 909 %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true ) 910 %tmp2 = and i8 %tmp, 1 911 ret i8 %tmp2 912} 913 914; Make sure we can detect that the input is non-zero and avoid cmov after BSR 915; This is relevant for 32-bit mode without lzcnt 916define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) { 917; X86-NOCMOV-LABEL: ctlz_i64_zero_test_knownneverzero: 918; X86-NOCMOV: # %bb.0: 919; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 920; X86-NOCMOV-NEXT: testl %eax, %eax 921; X86-NOCMOV-NEXT: jne .LBB21_1 922; X86-NOCMOV-NEXT: # %bb.2: 923; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 924; X86-NOCMOV-NEXT: orl $1, %eax 925; X86-NOCMOV-NEXT: bsrl %eax, %eax 926; X86-NOCMOV-NEXT: xorl $31, %eax 927; X86-NOCMOV-NEXT: orl $32, %eax 928; X86-NOCMOV-NEXT: xorl %edx, %edx 929; X86-NOCMOV-NEXT: retl 930; X86-NOCMOV-NEXT: .LBB21_1: 931; X86-NOCMOV-NEXT: bsrl %eax, %eax 932; X86-NOCMOV-NEXT: xorl $31, %eax 933; X86-NOCMOV-NEXT: xorl %edx, %edx 934; X86-NOCMOV-NEXT: retl 935; 936; X86-CMOV-LABEL: ctlz_i64_zero_test_knownneverzero: 937; X86-CMOV: # %bb.0: 938; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 939; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 940; X86-CMOV-NEXT: orl $1, %eax 941; X86-CMOV-NEXT: bsrl %ecx, %edx 942; X86-CMOV-NEXT: xorl $31, %edx 943; X86-CMOV-NEXT: bsrl %eax, %eax 944; X86-CMOV-NEXT: xorl $31, %eax 945; X86-CMOV-NEXT: orl $32, %eax 946; X86-CMOV-NEXT: testl %ecx, %ecx 947; X86-CMOV-NEXT: cmovnel %edx, %eax 948; X86-CMOV-NEXT: xorl %edx, %edx 949; X86-CMOV-NEXT: retl 950; 951; X64-LABEL: ctlz_i64_zero_test_knownneverzero: 952; X64: # %bb.0: 953; X64-NEXT: orq $1, %rdi 954; X64-NEXT: je .LBB21_1 955; X64-NEXT: # %bb.2: # %cond.false 956; X64-NEXT: bsrq %rdi, %rax 957; X64-NEXT: xorq $63, %rax 958; X64-NEXT: retq 959; X64-NEXT: .LBB21_1: 960; X64-NEXT: movl $64, %eax 961; X64-NEXT: retq 962; 963; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero: 964; X86-CLZ: # %bb.0: 965; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 966; X86-CLZ-NEXT: testl %eax, %eax 967; X86-CLZ-NEXT: jne .LBB21_1 968; X86-CLZ-NEXT: # %bb.2: 969; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 970; X86-CLZ-NEXT: orl $1, %eax 971; X86-CLZ-NEXT: lzcntl %eax, %eax 972; X86-CLZ-NEXT: orl $32, %eax 973; X86-CLZ-NEXT: xorl %edx, %edx 974; X86-CLZ-NEXT: retl 975; X86-CLZ-NEXT: .LBB21_1: 976; X86-CLZ-NEXT: lzcntl %eax, %eax 977; X86-CLZ-NEXT: xorl %edx, %edx 978; X86-CLZ-NEXT: retl 979; 980; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero: 981; X64-CLZ: # %bb.0: 982; X64-CLZ-NEXT: orq $1, %rdi 983; X64-CLZ-NEXT: lzcntq %rdi, %rax 984; X64-CLZ-NEXT: retq 985 %o = or i64 %n, 1 986 %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false) 987 ret i64 %tmp1 988} 989 990; Make sure we can detect that the input is non-zero and avoid cmov after BSF 991; This is relevant for 32-bit mode without tzcnt 992define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) { 993; X86-NOCMOV-LABEL: cttz_i64_zero_test_knownneverzero: 994; X86-NOCMOV: # %bb.0: 995; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 996; X86-NOCMOV-NEXT: testl %eax, %eax 997; X86-NOCMOV-NEXT: jne .LBB22_1 998; X86-NOCMOV-NEXT: # %bb.2: 999; X86-NOCMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000 1000; X86-NOCMOV-NEXT: orl {{[0-9]+}}(%esp), %eax 1001; X86-NOCMOV-NEXT: bsfl %eax, %eax 1002; X86-NOCMOV-NEXT: orl $32, %eax 1003; X86-NOCMOV-NEXT: xorl %edx, %edx 1004; X86-NOCMOV-NEXT: retl 1005; X86-NOCMOV-NEXT: .LBB22_1: 1006; X86-NOCMOV-NEXT: bsfl %eax, %eax 1007; X86-NOCMOV-NEXT: xorl %edx, %edx 1008; X86-NOCMOV-NEXT: retl 1009; 1010; X86-CMOV-LABEL: cttz_i64_zero_test_knownneverzero: 1011; X86-CMOV: # %bb.0: 1012; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 1013; X86-CMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000 1014; X86-CMOV-NEXT: orl {{[0-9]+}}(%esp), %eax 1015; X86-CMOV-NEXT: bsfl %ecx, %edx 1016; X86-CMOV-NEXT: bsfl %eax, %eax 1017; X86-CMOV-NEXT: orl $32, %eax 1018; X86-CMOV-NEXT: testl %ecx, %ecx 1019; X86-CMOV-NEXT: cmovnel %edx, %eax 1020; X86-CMOV-NEXT: xorl %edx, %edx 1021; X86-CMOV-NEXT: retl 1022; 1023; X64-LABEL: cttz_i64_zero_test_knownneverzero: 1024; X64: # %bb.0: 1025; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1026; X64-NEXT: orq %rdi, %rax 1027; X64-NEXT: je .LBB22_1 1028; X64-NEXT: # %bb.2: # %cond.false 1029; X64-NEXT: bsfq %rax, %rax 1030; X64-NEXT: retq 1031; X64-NEXT: .LBB22_1: 1032; X64-NEXT: movl $64, %eax 1033; X64-NEXT: retq 1034; 1035; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero: 1036; X86-CLZ: # %bb.0: 1037; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 1038; X86-CLZ-NEXT: testl %eax, %eax 1039; X86-CLZ-NEXT: jne .LBB22_1 1040; X86-CLZ-NEXT: # %bb.2: 1041; X86-CLZ-NEXT: movl $-2147483648, %eax # imm = 0x80000000 1042; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax 1043; X86-CLZ-NEXT: tzcntl %eax, %eax 1044; X86-CLZ-NEXT: orl $32, %eax 1045; X86-CLZ-NEXT: xorl %edx, %edx 1046; X86-CLZ-NEXT: retl 1047; X86-CLZ-NEXT: .LBB22_1: 1048; X86-CLZ-NEXT: tzcntl %eax, %eax 1049; X86-CLZ-NEXT: xorl %edx, %edx 1050; X86-CLZ-NEXT: retl 1051; 1052; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero: 1053; X64-CLZ: # %bb.0: 1054; X64-CLZ-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1055; X64-CLZ-NEXT: orq %rdi, %rax 1056; X64-CLZ-NEXT: tzcntq %rax, %rax 1057; X64-CLZ-NEXT: retq 1058 %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000 1059 %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false) 1060 ret i64 %tmp1 1061} 1062