1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s --check-prefix=X86 4 5; FIXME: We don't match this properly due to different size of 'rotate' and 'and' 6define i16 @btr_16(i16 %x, i16 %n) { 7; X64-LABEL: btr_16: 8; X64: # %bb.0: 9; X64-NEXT: movl %esi, %ecx 10; X64-NEXT: movw $-2, %ax 11; X64-NEXT: # kill: def $cl killed $cl killed $ecx 12; X64-NEXT: rolw %cl, %ax 13; X64-NEXT: andl %edi, %eax 14; X64-NEXT: # kill: def $ax killed $ax killed $eax 15; X64-NEXT: retq 16; 17; X86-LABEL: btr_16: 18; X86: # %bb.0: 19; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 20; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 21; X86-NEXT: btrw %cx, %ax 22; X86-NEXT: retl 23 %1 = shl i16 1, %n 24 %2 = xor i16 %1, -1 25 %3 = and i16 %x, %2 26 ret i16 %3 27} 28 29define i16 @bts_16(i16 %x, i16 %n) { 30; X64-LABEL: bts_16: 31; X64: # %bb.0: 32; X64-NEXT: movl %edi, %eax 33; X64-NEXT: btsl %esi, %eax 34; X64-NEXT: # kill: def $ax killed $ax killed $eax 35; X64-NEXT: retq 36; 37; X86-LABEL: bts_16: 38; X86: # %bb.0: 39; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 40; X86-NEXT: movl $1, %eax 41; X86-NEXT: shll %cl, %eax 42; X86-NEXT: orw {{[0-9]+}}(%esp), %ax 43; X86-NEXT: # kill: def $ax killed $ax killed $eax 44; X86-NEXT: retl 45 %1 = shl i16 1, %n 46 %2 = or i16 %x, %1 47 ret i16 %2 48} 49 50define i16 @btc_16(i16 %x, i16 %n) { 51; X64-LABEL: btc_16: 52; X64: # %bb.0: 53; X64-NEXT: movl %edi, %eax 54; X64-NEXT: btcl %esi, %eax 55; X64-NEXT: # kill: def $ax killed $ax killed $eax 56; X64-NEXT: retq 57; 58; X86-LABEL: btc_16: 59; X86: # %bb.0: 60; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 61; X86-NEXT: movl $1, %eax 62; X86-NEXT: shll %cl, %eax 63; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax 64; X86-NEXT: # kill: def $ax killed $ax killed $eax 65; X86-NEXT: retl 66 %1 = shl i16 1, %n 67 %2 = xor i16 %x, %1 68 ret i16 %2 69} 70 71define i32 @btr_32(i32 %x, i32 %n) { 72; X64-LABEL: btr_32: 73; X64: # %bb.0: 74; X64-NEXT: movl %edi, %eax 75; X64-NEXT: btrl %esi, %eax 76; X64-NEXT: retq 77; 78; X86-LABEL: btr_32: 79; X86: # %bb.0: 80; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 81; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 82; X86-NEXT: btrl %ecx, %eax 83; X86-NEXT: retl 84 %1 = shl i32 1, %n 85 %2 = xor i32 %1, -1 86 %3 = and i32 %x, %2 87 ret i32 %3 88} 89 90define i32 @bts_32(i32 %x, i32 %n) { 91; X64-LABEL: bts_32: 92; X64: # %bb.0: 93; X64-NEXT: movl %edi, %eax 94; X64-NEXT: btsl %esi, %eax 95; X64-NEXT: retq 96; 97; X86-LABEL: bts_32: 98; X86: # %bb.0: 99; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 100; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 101; X86-NEXT: btsl %ecx, %eax 102; X86-NEXT: retl 103 %1 = shl i32 1, %n 104 %2 = or i32 %x, %1 105 ret i32 %2 106} 107 108define i32 @btc_32(i32 %x, i32 %n) { 109; X64-LABEL: btc_32: 110; X64: # %bb.0: 111; X64-NEXT: movl %edi, %eax 112; X64-NEXT: btcl %esi, %eax 113; X64-NEXT: retq 114; 115; X86-LABEL: btc_32: 116; X86: # %bb.0: 117; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 118; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 119; X86-NEXT: btcl %ecx, %eax 120; X86-NEXT: retl 121 %1 = shl i32 1, %n 122 %2 = xor i32 %x, %1 123 ret i32 %2 124} 125 126define i64 @btr_64(i64 %x, i64 %n) { 127; X64-LABEL: btr_64: 128; X64: # %bb.0: 129; X64-NEXT: movq %rdi, %rax 130; X64-NEXT: btrq %rsi, %rax 131; X64-NEXT: retq 132; 133; X86-LABEL: btr_64: 134; X86: # %bb.0: 135; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 136; X86-NEXT: movl $1, %eax 137; X86-NEXT: xorl %edx, %edx 138; X86-NEXT: shldl %cl, %eax, %edx 139; X86-NEXT: shll %cl, %eax 140; X86-NEXT: testb $32, %cl 141; X86-NEXT: je .LBB6_2 142; X86-NEXT: # %bb.1: 143; X86-NEXT: movl %eax, %edx 144; X86-NEXT: xorl %eax, %eax 145; X86-NEXT: .LBB6_2: 146; X86-NEXT: notl %edx 147; X86-NEXT: notl %eax 148; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 149; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 150; X86-NEXT: retl 151 %1 = shl i64 1, %n 152 %2 = xor i64 %1, -1 153 %3 = and i64 %x, %2 154 ret i64 %3 155} 156 157define i64 @bts_64(i64 %x, i64 %n) { 158; X64-LABEL: bts_64: 159; X64: # %bb.0: 160; X64-NEXT: movq %rdi, %rax 161; X64-NEXT: btsq %rsi, %rax 162; X64-NEXT: retq 163; 164; X86-LABEL: bts_64: 165; X86: # %bb.0: 166; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 167; X86-NEXT: movl $1, %eax 168; X86-NEXT: xorl %edx, %edx 169; X86-NEXT: shldl %cl, %eax, %edx 170; X86-NEXT: shll %cl, %eax 171; X86-NEXT: testb $32, %cl 172; X86-NEXT: je .LBB7_2 173; X86-NEXT: # %bb.1: 174; X86-NEXT: movl %eax, %edx 175; X86-NEXT: xorl %eax, %eax 176; X86-NEXT: .LBB7_2: 177; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 178; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 179; X86-NEXT: retl 180 %1 = shl i64 1, %n 181 %2 = or i64 %x, %1 182 ret i64 %2 183} 184 185define i64 @btc_64(i64 %x, i64 %n) { 186; X64-LABEL: btc_64: 187; X64: # %bb.0: 188; X64-NEXT: movq %rdi, %rax 189; X64-NEXT: btcq %rsi, %rax 190; X64-NEXT: retq 191; 192; X86-LABEL: btc_64: 193; X86: # %bb.0: 194; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 195; X86-NEXT: movl $1, %eax 196; X86-NEXT: xorl %edx, %edx 197; X86-NEXT: shldl %cl, %eax, %edx 198; X86-NEXT: shll %cl, %eax 199; X86-NEXT: testb $32, %cl 200; X86-NEXT: je .LBB8_2 201; X86-NEXT: # %bb.1: 202; X86-NEXT: movl %eax, %edx 203; X86-NEXT: xorl %eax, %eax 204; X86-NEXT: .LBB8_2: 205; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx 206; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 207; X86-NEXT: retl 208 %1 = shl i64 1, %n 209 %2 = xor i64 %x, %1 210 ret i64 %2 211} 212 213define i16 @btr_16_mask(i16 %x, i16 %n) { 214; X64-LABEL: btr_16_mask: 215; X64: # %bb.0: 216; X64-NEXT: movl %esi, %ecx 217; X64-NEXT: movw $-2, %ax 218; X64-NEXT: # kill: def $cl killed $cl killed $ecx 219; X64-NEXT: rolw %cl, %ax 220; X64-NEXT: andl %edi, %eax 221; X64-NEXT: # kill: def $ax killed $ax killed $eax 222; X64-NEXT: retq 223; 224; X86-LABEL: btr_16_mask: 225; X86: # %bb.0: 226; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 227; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 228; X86-NEXT: btrw %cx, %ax 229; X86-NEXT: retl 230 %1 = and i16 %n, 15 231 %2 = shl i16 1, %1 232 %3 = xor i16 %2, -1 233 %4 = and i16 %x, %3 234 ret i16 %4 235} 236 237define i16 @bts_16_mask(i16 %x, i16 %n) { 238; X64-LABEL: bts_16_mask: 239; X64: # %bb.0: 240; X64-NEXT: movl %edi, %eax 241; X64-NEXT: andb $15, %sil 242; X64-NEXT: btsl %esi, %eax 243; X64-NEXT: # kill: def $ax killed $ax killed $eax 244; X64-NEXT: retq 245; 246; X86-LABEL: bts_16_mask: 247; X86: # %bb.0: 248; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 249; X86-NEXT: andb $15, %cl 250; X86-NEXT: movl $1, %eax 251; X86-NEXT: shll %cl, %eax 252; X86-NEXT: orw {{[0-9]+}}(%esp), %ax 253; X86-NEXT: # kill: def $ax killed $ax killed $eax 254; X86-NEXT: retl 255 %1 = and i16 %n, 15 256 %2 = shl i16 1, %1 257 %3 = or i16 %x, %2 258 ret i16 %3 259} 260 261define i16 @btc_16_mask(i16 %x, i16 %n) { 262; X64-LABEL: btc_16_mask: 263; X64: # %bb.0: 264; X64-NEXT: movl %edi, %eax 265; X64-NEXT: andb $15, %sil 266; X64-NEXT: btcl %esi, %eax 267; X64-NEXT: # kill: def $ax killed $ax killed $eax 268; X64-NEXT: retq 269; 270; X86-LABEL: btc_16_mask: 271; X86: # %bb.0: 272; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 273; X86-NEXT: andb $15, %cl 274; X86-NEXT: movl $1, %eax 275; X86-NEXT: shll %cl, %eax 276; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax 277; X86-NEXT: # kill: def $ax killed $ax killed $eax 278; X86-NEXT: retl 279 %1 = and i16 %n, 15 280 %2 = shl i16 1, %1 281 %3 = xor i16 %x, %2 282 ret i16 %3 283} 284 285define i32 @btr_32_mask(i32 %x, i32 %n) { 286; X64-LABEL: btr_32_mask: 287; X64: # %bb.0: 288; X64-NEXT: movl %edi, %eax 289; X64-NEXT: btrl %esi, %eax 290; X64-NEXT: retq 291; 292; X86-LABEL: btr_32_mask: 293; X86: # %bb.0: 294; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 295; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 296; X86-NEXT: btrl %ecx, %eax 297; X86-NEXT: retl 298 %1 = and i32 %n, 31 299 %2 = shl i32 1, %1 300 %3 = xor i32 %2, -1 301 %4 = and i32 %x, %3 302 ret i32 %4 303} 304 305define i32 @bts_32_mask(i32 %x, i32 %n) { 306; X64-LABEL: bts_32_mask: 307; X64: # %bb.0: 308; X64-NEXT: movl %edi, %eax 309; X64-NEXT: btsl %esi, %eax 310; X64-NEXT: retq 311; 312; X86-LABEL: bts_32_mask: 313; X86: # %bb.0: 314; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 315; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 316; X86-NEXT: btsl %ecx, %eax 317; X86-NEXT: retl 318 %1 = and i32 %n, 31 319 %2 = shl i32 1, %1 320 %3 = or i32 %x, %2 321 ret i32 %3 322} 323 324define i32 @btc_32_mask(i32 %x, i32 %n) { 325; X64-LABEL: btc_32_mask: 326; X64: # %bb.0: 327; X64-NEXT: movl %edi, %eax 328; X64-NEXT: btcl %esi, %eax 329; X64-NEXT: retq 330; 331; X86-LABEL: btc_32_mask: 332; X86: # %bb.0: 333; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 334; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 335; X86-NEXT: btcl %ecx, %eax 336; X86-NEXT: retl 337 %1 = and i32 %n, 31 338 %2 = shl i32 1, %1 339 %3 = xor i32 %x, %2 340 ret i32 %3 341} 342 343define i64 @btr_64_mask(i64 %x, i64 %n) { 344; X64-LABEL: btr_64_mask: 345; X64: # %bb.0: 346; X64-NEXT: movq %rdi, %rax 347; X64-NEXT: btrq %rsi, %rax 348; X64-NEXT: retq 349; 350; X86-LABEL: btr_64_mask: 351; X86: # %bb.0: 352; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 353; X86-NEXT: movl $1, %eax 354; X86-NEXT: xorl %edx, %edx 355; X86-NEXT: shldl %cl, %eax, %edx 356; X86-NEXT: shll %cl, %eax 357; X86-NEXT: testb $32, %cl 358; X86-NEXT: je .LBB15_2 359; X86-NEXT: # %bb.1: 360; X86-NEXT: movl %eax, %edx 361; X86-NEXT: xorl %eax, %eax 362; X86-NEXT: .LBB15_2: 363; X86-NEXT: notl %edx 364; X86-NEXT: notl %eax 365; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 366; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 367; X86-NEXT: retl 368 %1 = and i64 %n, 63 369 %2 = shl i64 1, %1 370 %3 = xor i64 %2, -1 371 %4 = and i64 %x, %3 372 ret i64 %4 373} 374 375define i64 @bts_64_mask(i64 %x, i64 %n) { 376; X64-LABEL: bts_64_mask: 377; X64: # %bb.0: 378; X64-NEXT: movq %rdi, %rax 379; X64-NEXT: btsq %rsi, %rax 380; X64-NEXT: retq 381; 382; X86-LABEL: bts_64_mask: 383; X86: # %bb.0: 384; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 385; X86-NEXT: movl $1, %eax 386; X86-NEXT: xorl %edx, %edx 387; X86-NEXT: shldl %cl, %eax, %edx 388; X86-NEXT: shll %cl, %eax 389; X86-NEXT: testb $32, %cl 390; X86-NEXT: je .LBB16_2 391; X86-NEXT: # %bb.1: 392; X86-NEXT: movl %eax, %edx 393; X86-NEXT: xorl %eax, %eax 394; X86-NEXT: .LBB16_2: 395; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 396; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 397; X86-NEXT: retl 398 %1 = and i64 %n, 63 399 %2 = shl i64 1, %1 400 %3 = or i64 %x, %2 401 ret i64 %3 402} 403 404define i64 @btc_64_mask(i64 %x, i64 %n) { 405; X64-LABEL: btc_64_mask: 406; X64: # %bb.0: 407; X64-NEXT: movq %rdi, %rax 408; X64-NEXT: btcq %rsi, %rax 409; X64-NEXT: retq 410; 411; X86-LABEL: btc_64_mask: 412; X86: # %bb.0: 413; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 414; X86-NEXT: movl $1, %eax 415; X86-NEXT: xorl %edx, %edx 416; X86-NEXT: shldl %cl, %eax, %edx 417; X86-NEXT: shll %cl, %eax 418; X86-NEXT: testb $32, %cl 419; X86-NEXT: je .LBB17_2 420; X86-NEXT: # %bb.1: 421; X86-NEXT: movl %eax, %edx 422; X86-NEXT: xorl %eax, %eax 423; X86-NEXT: .LBB17_2: 424; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx 425; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 426; X86-NEXT: retl 427 %1 = and i64 %n, 63 428 %2 = shl i64 1, %1 429 %3 = xor i64 %x, %2 430 ret i64 %3 431} 432 433; Tests below use loads and we favor folding those over matching btc/btr/bts. 434 435define i16 @btr_16_load(i16* %x, i16 %n) { 436; X64-LABEL: btr_16_load: 437; X64: # %bb.0: 438; X64-NEXT: movzwl (%rdi), %eax 439; X64-NEXT: btrw %si, %ax 440; X64-NEXT: retq 441; 442; X86-LABEL: btr_16_load: 443; X86: # %bb.0: 444; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 445; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 446; X86-NEXT: movzwl (%eax), %eax 447; X86-NEXT: btrw %cx, %ax 448; X86-NEXT: retl 449 %1 = load i16, i16* %x 450 %2 = shl i16 1, %n 451 %3 = xor i16 %2, -1 452 %4 = and i16 %1, %3 453 ret i16 %4 454} 455 456define i16 @bts_16_load(i16* %x, i16 %n) { 457; X64-LABEL: bts_16_load: 458; X64: # %bb.0: 459; X64-NEXT: movl %esi, %ecx 460; X64-NEXT: movl $1, %eax 461; X64-NEXT: # kill: def $cl killed $cl killed $ecx 462; X64-NEXT: shll %cl, %eax 463; X64-NEXT: orw (%rdi), %ax 464; X64-NEXT: # kill: def $ax killed $ax killed $eax 465; X64-NEXT: retq 466; 467; X86-LABEL: bts_16_load: 468; X86: # %bb.0: 469; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 470; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 471; X86-NEXT: movl $1, %eax 472; X86-NEXT: shll %cl, %eax 473; X86-NEXT: orw (%edx), %ax 474; X86-NEXT: # kill: def $ax killed $ax killed $eax 475; X86-NEXT: retl 476 %1 = load i16, i16* %x 477 %2 = shl i16 1, %n 478 %3 = or i16 %1, %2 479 ret i16 %3 480} 481 482define i16 @btc_16_load(i16* %x, i16 %n) { 483; X64-LABEL: btc_16_load: 484; X64: # %bb.0: 485; X64-NEXT: movl %esi, %ecx 486; X64-NEXT: movl $1, %eax 487; X64-NEXT: # kill: def $cl killed $cl killed $ecx 488; X64-NEXT: shll %cl, %eax 489; X64-NEXT: xorw (%rdi), %ax 490; X64-NEXT: # kill: def $ax killed $ax killed $eax 491; X64-NEXT: retq 492; 493; X86-LABEL: btc_16_load: 494; X86: # %bb.0: 495; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 496; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 497; X86-NEXT: movl $1, %eax 498; X86-NEXT: shll %cl, %eax 499; X86-NEXT: xorw (%edx), %ax 500; X86-NEXT: # kill: def $ax killed $ax killed $eax 501; X86-NEXT: retl 502 %1 = load i16, i16* %x 503 %2 = shl i16 1, %n 504 %3 = xor i16 %1, %2 505 ret i16 %3 506} 507 508define i32 @btr_32_load(i32* %x, i32 %n) { 509; X64-LABEL: btr_32_load: 510; X64: # %bb.0: 511; X64-NEXT: movl (%rdi), %eax 512; X64-NEXT: btrl %esi, %eax 513; X64-NEXT: retq 514; 515; X86-LABEL: btr_32_load: 516; X86: # %bb.0: 517; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 518; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 519; X86-NEXT: movl (%eax), %eax 520; X86-NEXT: btrl %ecx, %eax 521; X86-NEXT: retl 522 %1 = load i32, i32* %x 523 %2 = shl i32 1, %n 524 %3 = xor i32 %2, -1 525 %4 = and i32 %1, %3 526 ret i32 %4 527} 528 529define i32 @bts_32_load(i32* %x, i32 %n) { 530; X64-LABEL: bts_32_load: 531; X64: # %bb.0: 532; X64-NEXT: movl (%rdi), %eax 533; X64-NEXT: btsl %esi, %eax 534; X64-NEXT: retq 535; 536; X86-LABEL: bts_32_load: 537; X86: # %bb.0: 538; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 539; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 540; X86-NEXT: movl (%eax), %eax 541; X86-NEXT: btsl %ecx, %eax 542; X86-NEXT: retl 543 %1 = load i32, i32* %x 544 %2 = shl i32 1, %n 545 %3 = or i32 %1, %2 546 ret i32 %3 547} 548 549define i32 @btc_32_load(i32* %x, i32 %n) { 550; X64-LABEL: btc_32_load: 551; X64: # %bb.0: 552; X64-NEXT: movl (%rdi), %eax 553; X64-NEXT: btcl %esi, %eax 554; X64-NEXT: retq 555; 556; X86-LABEL: btc_32_load: 557; X86: # %bb.0: 558; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 559; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 560; X86-NEXT: movl (%eax), %eax 561; X86-NEXT: btcl %ecx, %eax 562; X86-NEXT: retl 563 %1 = load i32, i32* %x 564 %2 = shl i32 1, %n 565 %3 = xor i32 %1, %2 566 ret i32 %3 567} 568 569define i64 @btr_64_load(i64* %x, i64 %n) { 570; X64-LABEL: btr_64_load: 571; X64: # %bb.0: 572; X64-NEXT: movq (%rdi), %rax 573; X64-NEXT: btrq %rsi, %rax 574; X64-NEXT: retq 575; 576; X86-LABEL: btr_64_load: 577; X86: # %bb.0: 578; X86-NEXT: pushl %esi 579; X86-NEXT: .cfi_def_cfa_offset 8 580; X86-NEXT: .cfi_offset %esi, -8 581; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 582; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 583; X86-NEXT: movl $1, %eax 584; X86-NEXT: xorl %edx, %edx 585; X86-NEXT: shldl %cl, %eax, %edx 586; X86-NEXT: shll %cl, %eax 587; X86-NEXT: testb $32, %cl 588; X86-NEXT: je .LBB24_2 589; X86-NEXT: # %bb.1: 590; X86-NEXT: movl %eax, %edx 591; X86-NEXT: xorl %eax, %eax 592; X86-NEXT: .LBB24_2: 593; X86-NEXT: notl %edx 594; X86-NEXT: notl %eax 595; X86-NEXT: andl 4(%esi), %edx 596; X86-NEXT: andl (%esi), %eax 597; X86-NEXT: popl %esi 598; X86-NEXT: .cfi_def_cfa_offset 4 599; X86-NEXT: retl 600 %1 = load i64, i64* %x 601 %2 = shl i64 1, %n 602 %3 = xor i64 %2, -1 603 %4 = and i64 %1, %3 604 ret i64 %4 605} 606 607define i64 @bts_64_load(i64* %x, i64 %n) { 608; X64-LABEL: bts_64_load: 609; X64: # %bb.0: 610; X64-NEXT: movq (%rdi), %rax 611; X64-NEXT: btsq %rsi, %rax 612; X64-NEXT: retq 613; 614; X86-LABEL: bts_64_load: 615; X86: # %bb.0: 616; X86-NEXT: pushl %esi 617; X86-NEXT: .cfi_def_cfa_offset 8 618; X86-NEXT: .cfi_offset %esi, -8 619; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 620; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 621; X86-NEXT: movl $1, %eax 622; X86-NEXT: xorl %edx, %edx 623; X86-NEXT: shldl %cl, %eax, %edx 624; X86-NEXT: shll %cl, %eax 625; X86-NEXT: testb $32, %cl 626; X86-NEXT: je .LBB25_2 627; X86-NEXT: # %bb.1: 628; X86-NEXT: movl %eax, %edx 629; X86-NEXT: xorl %eax, %eax 630; X86-NEXT: .LBB25_2: 631; X86-NEXT: orl 4(%esi), %edx 632; X86-NEXT: orl (%esi), %eax 633; X86-NEXT: popl %esi 634; X86-NEXT: .cfi_def_cfa_offset 4 635; X86-NEXT: retl 636 %1 = load i64, i64* %x 637 %2 = shl i64 1, %n 638 %3 = or i64 %1, %2 639 ret i64 %3 640} 641 642define i64 @btc_64_load(i64* %x, i64 %n) { 643; X64-LABEL: btc_64_load: 644; X64: # %bb.0: 645; X64-NEXT: movq (%rdi), %rax 646; X64-NEXT: btcq %rsi, %rax 647; X64-NEXT: retq 648; 649; X86-LABEL: btc_64_load: 650; X86: # %bb.0: 651; X86-NEXT: pushl %esi 652; X86-NEXT: .cfi_def_cfa_offset 8 653; X86-NEXT: .cfi_offset %esi, -8 654; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 655; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 656; X86-NEXT: movl $1, %eax 657; X86-NEXT: xorl %edx, %edx 658; X86-NEXT: shldl %cl, %eax, %edx 659; X86-NEXT: shll %cl, %eax 660; X86-NEXT: testb $32, %cl 661; X86-NEXT: je .LBB26_2 662; X86-NEXT: # %bb.1: 663; X86-NEXT: movl %eax, %edx 664; X86-NEXT: xorl %eax, %eax 665; X86-NEXT: .LBB26_2: 666; X86-NEXT: xorl 4(%esi), %edx 667; X86-NEXT: xorl (%esi), %eax 668; X86-NEXT: popl %esi 669; X86-NEXT: .cfi_def_cfa_offset 4 670; X86-NEXT: retl 671 %1 = load i64, i64* %x 672 %2 = shl i64 1, %n 673 %3 = xor i64 %1, %2 674 ret i64 %3 675} 676 677; For the tests below, we definitely shouldn't fold them to the memory forms 678; of BTR/BTS/BTC as they have very different semantics from their register 679; counterparts. 680 681define void @btr_16_dont_fold(i16* %x, i16 %n) { 682; X64-LABEL: btr_16_dont_fold: 683; X64: # %bb.0: 684; X64-NEXT: movl %esi, %ecx 685; X64-NEXT: movw $-2, %ax 686; X64-NEXT: # kill: def $cl killed $cl killed $ecx 687; X64-NEXT: rolw %cl, %ax 688; X64-NEXT: andw %ax, (%rdi) 689; X64-NEXT: retq 690; 691; X86-LABEL: btr_16_dont_fold: 692; X86: # %bb.0: 693; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 694; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 695; X86-NEXT: movw $-2, %dx 696; X86-NEXT: rolw %cl, %dx 697; X86-NEXT: andw %dx, (%eax) 698; X86-NEXT: retl 699 %1 = load i16, i16* %x 700 %2 = shl i16 1, %n 701 %3 = xor i16 %2, -1 702 %4 = and i16 %1, %3 703 store i16 %4, i16* %x 704 ret void 705} 706 707define void @bts_16_dont_fold(i16* %x, i16 %n) { 708; X64-LABEL: bts_16_dont_fold: 709; X64: # %bb.0: 710; X64-NEXT: movl %esi, %ecx 711; X64-NEXT: movl $1, %eax 712; X64-NEXT: # kill: def $cl killed $cl killed $ecx 713; X64-NEXT: shll %cl, %eax 714; X64-NEXT: orw %ax, (%rdi) 715; X64-NEXT: retq 716; 717; X86-LABEL: bts_16_dont_fold: 718; X86: # %bb.0: 719; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 720; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 721; X86-NEXT: movl $1, %edx 722; X86-NEXT: shll %cl, %edx 723; X86-NEXT: orw %dx, (%eax) 724; X86-NEXT: retl 725 %1 = load i16, i16* %x 726 %2 = shl i16 1, %n 727 %3 = or i16 %1, %2 728 store i16 %3, i16* %x 729 ret void 730} 731 732define void @btc_16_dont_fold(i16* %x, i16 %n) { 733; X64-LABEL: btc_16_dont_fold: 734; X64: # %bb.0: 735; X64-NEXT: movl %esi, %ecx 736; X64-NEXT: movl $1, %eax 737; X64-NEXT: # kill: def $cl killed $cl killed $ecx 738; X64-NEXT: shll %cl, %eax 739; X64-NEXT: xorw %ax, (%rdi) 740; X64-NEXT: retq 741; 742; X86-LABEL: btc_16_dont_fold: 743; X86: # %bb.0: 744; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 745; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 746; X86-NEXT: movl $1, %edx 747; X86-NEXT: shll %cl, %edx 748; X86-NEXT: xorw %dx, (%eax) 749; X86-NEXT: retl 750 %1 = load i16, i16* %x 751 %2 = shl i16 1, %n 752 %3 = xor i16 %1, %2 753 store i16 %3, i16* %x 754 ret void 755} 756 757define void @btr_32_dont_fold(i32* %x, i32 %n) { 758; X64-LABEL: btr_32_dont_fold: 759; X64: # %bb.0: 760; X64-NEXT: movl %esi, %ecx 761; X64-NEXT: movl $-2, %eax 762; X64-NEXT: # kill: def $cl killed $cl killed $ecx 763; X64-NEXT: roll %cl, %eax 764; X64-NEXT: andl %eax, (%rdi) 765; X64-NEXT: retq 766; 767; X86-LABEL: btr_32_dont_fold: 768; X86: # %bb.0: 769; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 770; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 771; X86-NEXT: movl $-2, %edx 772; X86-NEXT: roll %cl, %edx 773; X86-NEXT: andl %edx, (%eax) 774; X86-NEXT: retl 775 %1 = load i32, i32* %x 776 %2 = shl i32 1, %n 777 %3 = xor i32 %2, -1 778 %4 = and i32 %1, %3 779 store i32 %4, i32* %x 780 ret void 781} 782 783define void @bts_32_dont_fold(i32* %x, i32 %n) { 784; X64-LABEL: bts_32_dont_fold: 785; X64: # %bb.0: 786; X64-NEXT: movl %esi, %ecx 787; X64-NEXT: movl $1, %eax 788; X64-NEXT: # kill: def $cl killed $cl killed $ecx 789; X64-NEXT: shll %cl, %eax 790; X64-NEXT: orl %eax, (%rdi) 791; X64-NEXT: retq 792; 793; X86-LABEL: bts_32_dont_fold: 794; X86: # %bb.0: 795; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 796; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 797; X86-NEXT: movl $1, %edx 798; X86-NEXT: shll %cl, %edx 799; X86-NEXT: orl %edx, (%eax) 800; X86-NEXT: retl 801 %1 = load i32, i32* %x 802 %2 = shl i32 1, %n 803 %3 = or i32 %1, %2 804 store i32 %3, i32* %x 805 ret void 806} 807 808define void @btc_32_dont_fold(i32* %x, i32 %n) { 809; X64-LABEL: btc_32_dont_fold: 810; X64: # %bb.0: 811; X64-NEXT: movl %esi, %ecx 812; X64-NEXT: movl $1, %eax 813; X64-NEXT: # kill: def $cl killed $cl killed $ecx 814; X64-NEXT: shll %cl, %eax 815; X64-NEXT: xorl %eax, (%rdi) 816; X64-NEXT: retq 817; 818; X86-LABEL: btc_32_dont_fold: 819; X86: # %bb.0: 820; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 821; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 822; X86-NEXT: movl $1, %edx 823; X86-NEXT: shll %cl, %edx 824; X86-NEXT: xorl %edx, (%eax) 825; X86-NEXT: retl 826 %1 = load i32, i32* %x 827 %2 = shl i32 1, %n 828 %3 = xor i32 %1, %2 829 store i32 %3, i32* %x 830 ret void 831} 832 833define void @btr_64_dont_fold(i64* %x, i64 %n) { 834; X64-LABEL: btr_64_dont_fold: 835; X64: # %bb.0: 836; X64-NEXT: movq %rsi, %rcx 837; X64-NEXT: movq $-2, %rax 838; X64-NEXT: # kill: def $cl killed $cl killed $rcx 839; X64-NEXT: rolq %cl, %rax 840; X64-NEXT: andq %rax, (%rdi) 841; X64-NEXT: retq 842; 843; X86-LABEL: btr_64_dont_fold: 844; X86: # %bb.0: 845; X86-NEXT: pushl %esi 846; X86-NEXT: .cfi_def_cfa_offset 8 847; X86-NEXT: .cfi_offset %esi, -8 848; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 849; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 850; X86-NEXT: movl $1, %edx 851; X86-NEXT: xorl %esi, %esi 852; X86-NEXT: shldl %cl, %edx, %esi 853; X86-NEXT: shll %cl, %edx 854; X86-NEXT: testb $32, %cl 855; X86-NEXT: je .LBB33_2 856; X86-NEXT: # %bb.1: 857; X86-NEXT: movl %edx, %esi 858; X86-NEXT: xorl %edx, %edx 859; X86-NEXT: .LBB33_2: 860; X86-NEXT: notl %esi 861; X86-NEXT: notl %edx 862; X86-NEXT: andl %edx, (%eax) 863; X86-NEXT: andl %esi, 4(%eax) 864; X86-NEXT: popl %esi 865; X86-NEXT: .cfi_def_cfa_offset 4 866; X86-NEXT: retl 867 %1 = load i64, i64* %x 868 %2 = shl i64 1, %n 869 %3 = xor i64 %2, -1 870 %4 = and i64 %1, %3 871 store i64 %4, i64* %x 872 ret void 873} 874 875define void @bts_64_dont_fold(i64* %x, i64 %n) { 876; X64-LABEL: bts_64_dont_fold: 877; X64: # %bb.0: 878; X64-NEXT: movq %rsi, %rcx 879; X64-NEXT: movl $1, %eax 880; X64-NEXT: # kill: def $cl killed $cl killed $rcx 881; X64-NEXT: shlq %cl, %rax 882; X64-NEXT: orq %rax, (%rdi) 883; X64-NEXT: retq 884; 885; X86-LABEL: bts_64_dont_fold: 886; X86: # %bb.0: 887; X86-NEXT: pushl %esi 888; X86-NEXT: .cfi_def_cfa_offset 8 889; X86-NEXT: .cfi_offset %esi, -8 890; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 891; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 892; X86-NEXT: movl $1, %edx 893; X86-NEXT: xorl %esi, %esi 894; X86-NEXT: shldl %cl, %edx, %esi 895; X86-NEXT: shll %cl, %edx 896; X86-NEXT: testb $32, %cl 897; X86-NEXT: je .LBB34_2 898; X86-NEXT: # %bb.1: 899; X86-NEXT: movl %edx, %esi 900; X86-NEXT: xorl %edx, %edx 901; X86-NEXT: .LBB34_2: 902; X86-NEXT: orl %edx, (%eax) 903; X86-NEXT: orl %esi, 4(%eax) 904; X86-NEXT: popl %esi 905; X86-NEXT: .cfi_def_cfa_offset 4 906; X86-NEXT: retl 907 %1 = load i64, i64* %x 908 %2 = shl i64 1, %n 909 %3 = or i64 %1, %2 910 store i64 %3, i64* %x 911 ret void 912} 913 914define void @btc_64_dont_fold(i64* %x, i64 %n) { 915; X64-LABEL: btc_64_dont_fold: 916; X64: # %bb.0: 917; X64-NEXT: movq %rsi, %rcx 918; X64-NEXT: movl $1, %eax 919; X64-NEXT: # kill: def $cl killed $cl killed $rcx 920; X64-NEXT: shlq %cl, %rax 921; X64-NEXT: xorq %rax, (%rdi) 922; X64-NEXT: retq 923; 924; X86-LABEL: btc_64_dont_fold: 925; X86: # %bb.0: 926; X86-NEXT: pushl %esi 927; X86-NEXT: .cfi_def_cfa_offset 8 928; X86-NEXT: .cfi_offset %esi, -8 929; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 930; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 931; X86-NEXT: movl $1, %edx 932; X86-NEXT: xorl %esi, %esi 933; X86-NEXT: shldl %cl, %edx, %esi 934; X86-NEXT: shll %cl, %edx 935; X86-NEXT: testb $32, %cl 936; X86-NEXT: je .LBB35_2 937; X86-NEXT: # %bb.1: 938; X86-NEXT: movl %edx, %esi 939; X86-NEXT: xorl %edx, %edx 940; X86-NEXT: .LBB35_2: 941; X86-NEXT: xorl %edx, (%eax) 942; X86-NEXT: xorl %esi, 4(%eax) 943; X86-NEXT: popl %esi 944; X86-NEXT: .cfi_def_cfa_offset 4 945; X86-NEXT: retl 946 %1 = load i64, i64* %x 947 %2 = shl i64 1, %n 948 %3 = xor i64 %1, %2 949 store i64 %3, i64* %x 950 ret void 951} 952 953define i32 @btr_32_mask_zeros(i32 %x, i32 %n) { 954; X64-LABEL: btr_32_mask_zeros: 955; X64: # %bb.0: 956; X64-NEXT: movl %edi, %eax 957; X64-NEXT: shll $2, %esi 958; X64-NEXT: btrl %esi, %eax 959; X64-NEXT: retq 960; 961; X86-LABEL: btr_32_mask_zeros: 962; X86: # %bb.0: 963; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 964; X86-NEXT: shlb $2, %cl 965; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 966; X86-NEXT: btrl %ecx, %eax 967; X86-NEXT: retl 968 %1 = shl i32 %n, 2 969 %2 = and i32 %1, 31 970 %3 = shl i32 1, %2 971 %4 = xor i32 %3, -1 972 %5 = and i32 %x, %4 973 ret i32 %5 974} 975 976define i32 @bts_32_mask_zeros(i32 %x, i32 %n) { 977; X64-LABEL: bts_32_mask_zeros: 978; X64: # %bb.0: 979; X64-NEXT: movl %edi, %eax 980; X64-NEXT: shll $2, %esi 981; X64-NEXT: btsl %esi, %eax 982; X64-NEXT: retq 983; 984; X86-LABEL: bts_32_mask_zeros: 985; X86: # %bb.0: 986; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 987; X86-NEXT: shlb $2, %cl 988; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 989; X86-NEXT: btsl %ecx, %eax 990; X86-NEXT: retl 991 %1 = shl i32 %n, 2 992 %2 = and i32 %1, 31 993 %3 = shl i32 1, %2 994 %4 = or i32 %x, %3 995 ret i32 %4 996} 997 998define i32 @btc_32_mask_zeros(i32 %x, i32 %n) { 999; X64-LABEL: btc_32_mask_zeros: 1000; X64: # %bb.0: 1001; X64-NEXT: movl %edi, %eax 1002; X64-NEXT: shll $2, %esi 1003; X64-NEXT: btcl %esi, %eax 1004; X64-NEXT: retq 1005; 1006; X86-LABEL: btc_32_mask_zeros: 1007; X86: # %bb.0: 1008; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 1009; X86-NEXT: shlb $2, %cl 1010; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1011; X86-NEXT: btcl %ecx, %eax 1012; X86-NEXT: retl 1013 %1 = shl i32 %n, 2 1014 %2 = and i32 %1, 31 1015 %3 = shl i32 1, %2 1016 %4 = xor i32 %x, %3 1017 ret i32 %4 1018} 1019 1020define i64 @btr_64_mask_zeros(i64 %x, i64 %n) { 1021; X64-LABEL: btr_64_mask_zeros: 1022; X64: # %bb.0: 1023; X64-NEXT: movq %rdi, %rax 1024; X64-NEXT: shll $2, %esi 1025; X64-NEXT: btrq %rsi, %rax 1026; X64-NEXT: retq 1027; 1028; X86-LABEL: btr_64_mask_zeros: 1029; X86: # %bb.0: 1030; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1031; X86-NEXT: shll $2, %ecx 1032; X86-NEXT: movl $1, %eax 1033; X86-NEXT: xorl %edx, %edx 1034; X86-NEXT: shldl %cl, %eax, %edx 1035; X86-NEXT: shll %cl, %eax 1036; X86-NEXT: testb $32, %cl 1037; X86-NEXT: je .LBB39_2 1038; X86-NEXT: # %bb.1: 1039; X86-NEXT: movl %eax, %edx 1040; X86-NEXT: xorl %eax, %eax 1041; X86-NEXT: .LBB39_2: 1042; X86-NEXT: notl %edx 1043; X86-NEXT: notl %eax 1044; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 1045; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 1046; X86-NEXT: retl 1047 %1 = shl i64 %n, 2 1048 %2 = and i64 %1, 63 1049 %3 = shl i64 1, %2 1050 %4 = xor i64 %3, -1 1051 %5 = and i64 %x, %4 1052 ret i64 %5 1053} 1054 1055define i64 @bts_64_mask_zeros(i64 %x, i64 %n) { 1056; X64-LABEL: bts_64_mask_zeros: 1057; X64: # %bb.0: 1058; X64-NEXT: movq %rdi, %rax 1059; X64-NEXT: shll $2, %esi 1060; X64-NEXT: btsq %rsi, %rax 1061; X64-NEXT: retq 1062; 1063; X86-LABEL: bts_64_mask_zeros: 1064; X86: # %bb.0: 1065; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1066; X86-NEXT: shll $2, %ecx 1067; X86-NEXT: movl $1, %eax 1068; X86-NEXT: xorl %edx, %edx 1069; X86-NEXT: shldl %cl, %eax, %edx 1070; X86-NEXT: shll %cl, %eax 1071; X86-NEXT: testb $32, %cl 1072; X86-NEXT: je .LBB40_2 1073; X86-NEXT: # %bb.1: 1074; X86-NEXT: movl %eax, %edx 1075; X86-NEXT: xorl %eax, %eax 1076; X86-NEXT: .LBB40_2: 1077; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 1078; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 1079; X86-NEXT: retl 1080 %1 = shl i64 %n, 2 1081 %2 = and i64 %1, 63 1082 %3 = shl i64 1, %2 1083 %4 = or i64 %x, %3 1084 ret i64 %4 1085} 1086 1087define i64 @btc_64_mask_zeros(i64 %x, i64 %n) { 1088; X64-LABEL: btc_64_mask_zeros: 1089; X64: # %bb.0: 1090; X64-NEXT: movq %rdi, %rax 1091; X64-NEXT: shll $2, %esi 1092; X64-NEXT: btcq %rsi, %rax 1093; X64-NEXT: retq 1094; 1095; X86-LABEL: btc_64_mask_zeros: 1096; X86: # %bb.0: 1097; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1098; X86-NEXT: shll $2, %ecx 1099; X86-NEXT: movl $1, %eax 1100; X86-NEXT: xorl %edx, %edx 1101; X86-NEXT: shldl %cl, %eax, %edx 1102; X86-NEXT: shll %cl, %eax 1103; X86-NEXT: testb $32, %cl 1104; X86-NEXT: je .LBB41_2 1105; X86-NEXT: # %bb.1: 1106; X86-NEXT: movl %eax, %edx 1107; X86-NEXT: xorl %eax, %eax 1108; X86-NEXT: .LBB41_2: 1109; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx 1110; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 1111; X86-NEXT: retl 1112 %1 = shl i64 %n, 2 1113 %2 = and i64 %1, 63 1114 %3 = shl i64 1, %2 1115 %4 = xor i64 %x, %3 1116 ret i64 %4 1117} 1118