1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix FAST_INC 3; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix SLOW_INC 5 6; This file checks that atomic (non-seq_cst) stores of immediate values are 7; done in one mov instruction and not 2. More precisely, it makes sure that the 8; immediate is not first copied uselessly into a register. 9 10; Similarily, it checks that a binary operation of an immediate with an atomic 11; variable that is stored back in that variable is done as a single instruction. 12; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release) 13; should be just an add instruction, instead of loading x into a register, doing 14; an add and storing the result back. 15; The binary operations supported are currently add, and, or, xor. 16; sub is not supported because they are translated by an addition of the 17; negated immediate. 18; 19; We also check the same patterns: 20; - For inc/dec. 21; - For register instead of immediate operands. 22; - For floating point operations. 23 24; seq_cst stores are left as (lock) xchgl, but we try to check every other 25; attribute at least once. 26 27; Please note that these operations do not require the lock prefix: only 28; sequentially consistent stores require this kind of protection on X86. 29; And even for seq_cst operations, llvm uses the xchg instruction which has 30; an implicit lock prefix, so making it explicit is not required. 31 32define void @store_atomic_imm_8(i8* %p) { 33; X64-LABEL: store_atomic_imm_8: 34; X64: # %bb.0: 35; X64-NEXT: movb $42, (%rdi) 36; X64-NEXT: retq 37; 38; X32-LABEL: store_atomic_imm_8: 39; X32: # %bb.0: 40; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 41; X32-NEXT: movb $42, (%eax) 42; X32-NEXT: retl 43 store atomic i8 42, i8* %p release, align 1 44 ret void 45} 46 47define void @store_atomic_imm_16(i16* %p) { 48; X64-LABEL: store_atomic_imm_16: 49; X64: # %bb.0: 50; X64-NEXT: movw $42, (%rdi) 51; X64-NEXT: retq 52; 53; X32-LABEL: store_atomic_imm_16: 54; X32: # %bb.0: 55; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 56; X32-NEXT: movw $42, (%eax) 57; X32-NEXT: retl 58 store atomic i16 42, i16* %p monotonic, align 2 59 ret void 60} 61 62define void @store_atomic_imm_32(i32* %p) { 63; X64-LABEL: store_atomic_imm_32: 64; X64: # %bb.0: 65; X64-NEXT: movl $42, (%rdi) 66; X64-NEXT: retq 67; 68; X32-LABEL: store_atomic_imm_32: 69; X32: # %bb.0: 70; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 71; X32-NEXT: movl $42, (%eax) 72; X32-NEXT: retl 73; On 32 bits, there is an extra movl for each of those functions 74; (probably for alignment reasons). 75 store atomic i32 42, i32* %p release, align 4 76 ret void 77} 78 79define void @store_atomic_imm_64(i64* %p) { 80; X64-LABEL: store_atomic_imm_64: 81; X64: # %bb.0: 82; X64-NEXT: movq $42, (%rdi) 83; X64-NEXT: retq 84; 85; X32-LABEL: store_atomic_imm_64: 86; X32: # %bb.0: 87; X32-NEXT: pushl %ebp 88; X32-NEXT: .cfi_def_cfa_offset 8 89; X32-NEXT: .cfi_offset %ebp, -8 90; X32-NEXT: movl %esp, %ebp 91; X32-NEXT: .cfi_def_cfa_register %ebp 92; X32-NEXT: andl $-8, %esp 93; X32-NEXT: subl $8, %esp 94; X32-NEXT: movl 8(%ebp), %eax 95; X32-NEXT: movl $0, {{[0-9]+}}(%esp) 96; X32-NEXT: movl $42, (%esp) 97; X32-NEXT: fildll (%esp) 98; X32-NEXT: fistpll (%eax) 99; X32-NEXT: movl %ebp, %esp 100; X32-NEXT: popl %ebp 101; X32-NEXT: .cfi_def_cfa %esp, 4 102; X32-NEXT: retl 103; These are implemented with a CAS loop on 32 bit architectures, and thus 104; cannot be optimized in the same way as the others. 105 store atomic i64 42, i64* %p release, align 8 106 ret void 107} 108 109; If an immediate is too big to fit in 32 bits, it cannot be store in one mov, 110; even on X64, one must use movabsq that can only target a register. 111define void @store_atomic_imm_64_big(i64* %p) { 112; X64-LABEL: store_atomic_imm_64_big: 113; X64: # %bb.0: 114; X64-NEXT: movabsq $100000000000, %rax # imm = 0x174876E800 115; X64-NEXT: movq %rax, (%rdi) 116; X64-NEXT: retq 117; 118; X32-LABEL: store_atomic_imm_64_big: 119; X32: # %bb.0: 120; X32-NEXT: pushl %ebp 121; X32-NEXT: .cfi_def_cfa_offset 8 122; X32-NEXT: .cfi_offset %ebp, -8 123; X32-NEXT: movl %esp, %ebp 124; X32-NEXT: .cfi_def_cfa_register %ebp 125; X32-NEXT: andl $-8, %esp 126; X32-NEXT: subl $8, %esp 127; X32-NEXT: movl 8(%ebp), %eax 128; X32-NEXT: movl $23, {{[0-9]+}}(%esp) 129; X32-NEXT: movl $1215752192, (%esp) # imm = 0x4876E800 130; X32-NEXT: fildll (%esp) 131; X32-NEXT: fistpll (%eax) 132; X32-NEXT: movl %ebp, %esp 133; X32-NEXT: popl %ebp 134; X32-NEXT: .cfi_def_cfa %esp, 4 135; X32-NEXT: retl 136 store atomic i64 100000000000, i64* %p monotonic, align 8 137 ret void 138} 139 140; It would be incorrect to replace a lock xchgl by a movl 141define void @store_atomic_imm_32_seq_cst(i32* %p) { 142; X64-LABEL: store_atomic_imm_32_seq_cst: 143; X64: # %bb.0: 144; X64-NEXT: movl $42, %eax 145; X64-NEXT: xchgl %eax, (%rdi) 146; X64-NEXT: retq 147; 148; X32-LABEL: store_atomic_imm_32_seq_cst: 149; X32: # %bb.0: 150; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 151; X32-NEXT: movl $42, %ecx 152; X32-NEXT: xchgl %ecx, (%eax) 153; X32-NEXT: retl 154 store atomic i32 42, i32* %p seq_cst, align 4 155 ret void 156} 157 158; ----- ADD ----- 159 160define void @add_8i(i8* %p) { 161; X64-LABEL: add_8i: 162; X64: # %bb.0: 163; X64-NEXT: addb $2, (%rdi) 164; X64-NEXT: retq 165; 166; X32-LABEL: add_8i: 167; X32: # %bb.0: 168; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 169; X32-NEXT: addb $2, (%eax) 170; X32-NEXT: retl 171 %1 = load atomic i8, i8* %p seq_cst, align 1 172 %2 = add i8 %1, 2 173 store atomic i8 %2, i8* %p release, align 1 174 ret void 175} 176 177define void @add_8r(i8* %p, i8 %v) { 178; X64-LABEL: add_8r: 179; X64: # %bb.0: 180; X64-NEXT: addb %sil, (%rdi) 181; X64-NEXT: retq 182; 183; X32-LABEL: add_8r: 184; X32: # %bb.0: 185; X32-NEXT: movb {{[0-9]+}}(%esp), %al 186; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 187; X32-NEXT: addb %al, (%ecx) 188; X32-NEXT: retl 189 %1 = load atomic i8, i8* %p seq_cst, align 1 190 %2 = add i8 %1, %v 191 store atomic i8 %2, i8* %p release, align 1 192 ret void 193} 194 195define void @add_16i(i16* %p) { 196; X64-LABEL: add_16i: 197; X64: # %bb.0: 198; X64-NEXT: addw $2, (%rdi) 199; X64-NEXT: retq 200; 201; X32-LABEL: add_16i: 202; X32: # %bb.0: 203; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X32-NEXT: addw $2, (%eax) 205; X32-NEXT: retl 206 %1 = load atomic i16, i16* %p acquire, align 2 207 %2 = add i16 %1, 2 208 store atomic i16 %2, i16* %p release, align 2 209 ret void 210} 211 212define void @add_16r(i16* %p, i16 %v) { 213; X64-LABEL: add_16r: 214; X64: # %bb.0: 215; X64-NEXT: addw %si, (%rdi) 216; X64-NEXT: retq 217; 218; X32-LABEL: add_16r: 219; X32: # %bb.0: 220; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 221; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 222; X32-NEXT: addw %ax, (%ecx) 223; X32-NEXT: retl 224 %1 = load atomic i16, i16* %p acquire, align 2 225 %2 = add i16 %1, %v 226 store atomic i16 %2, i16* %p release, align 2 227 ret void 228} 229 230define void @add_32i(i32* %p) { 231; X64-LABEL: add_32i: 232; X64: # %bb.0: 233; X64-NEXT: addl $2, (%rdi) 234; X64-NEXT: retq 235; 236; X32-LABEL: add_32i: 237; X32: # %bb.0: 238; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 239; X32-NEXT: addl $2, (%eax) 240; X32-NEXT: retl 241 %1 = load atomic i32, i32* %p acquire, align 4 242 %2 = add i32 %1, 2 243 store atomic i32 %2, i32* %p monotonic, align 4 244 ret void 245} 246 247define void @add_32r(i32* %p, i32 %v) { 248; X64-LABEL: add_32r: 249; X64: # %bb.0: 250; X64-NEXT: addl %esi, (%rdi) 251; X64-NEXT: retq 252; 253; X32-LABEL: add_32r: 254; X32: # %bb.0: 255; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 256; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 257; X32-NEXT: addl %eax, (%ecx) 258; X32-NEXT: retl 259 %1 = load atomic i32, i32* %p acquire, align 4 260 %2 = add i32 %1, %v 261 store atomic i32 %2, i32* %p monotonic, align 4 262 ret void 263} 264 265; The following is a corner case where the load is added to itself. The pattern 266; matching should not fold this. We only test with 32-bit add, but the same 267; applies to other sizes and operations. 268define void @add_32r_self(i32* %p) { 269; X64-LABEL: add_32r_self: 270; X64: # %bb.0: 271; X64-NEXT: movl (%rdi), %eax 272; X64-NEXT: addl %eax, %eax 273; X64-NEXT: movl %eax, (%rdi) 274; X64-NEXT: retq 275; 276; X32-LABEL: add_32r_self: 277; X32: # %bb.0: 278; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 279; X32-NEXT: movl (%eax), %ecx 280; X32-NEXT: addl %ecx, %ecx 281; X32-NEXT: movl %ecx, (%eax) 282; X32-NEXT: retl 283 %1 = load atomic i32, i32* %p acquire, align 4 284 %2 = add i32 %1, %1 285 store atomic i32 %2, i32* %p monotonic, align 4 286 ret void 287} 288 289; The following is a corner case where the load's result is returned. The 290; optimizer isn't allowed to duplicate the load because it's atomic. 291define i32 @add_32r_ret_load(i32* %p, i32 %v) { 292; X64-LABEL: add_32r_ret_load: 293; X64: # %bb.0: 294; X64-NEXT: movl (%rdi), %eax 295; X64-NEXT: addl %eax, %esi 296; X64-NEXT: movl %esi, (%rdi) 297; X64-NEXT: retq 298; 299; X32-LABEL: add_32r_ret_load: 300; X32: # %bb.0: 301; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 302; X32-NEXT: movl (%ecx), %eax 303; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 304; X32-NEXT: addl %eax, %edx 305; X32-NEXT: movl %edx, (%ecx) 306; X32-NEXT: retl 307; More code here, we just don't want it to load from P. 308 %1 = load atomic i32, i32* %p acquire, align 4 309 %2 = add i32 %1, %v 310 store atomic i32 %2, i32* %p monotonic, align 4 311 ret i32 %1 312} 313 314define void @add_64i(i64* %p) { 315; X64-LABEL: add_64i: 316; X64: # %bb.0: 317; X64-NEXT: addq $2, (%rdi) 318; X64-NEXT: retq 319; 320; X32-LABEL: add_64i: 321; X32: # %bb.0: 322; X32-NEXT: pushl %ebp 323; X32-NEXT: .cfi_def_cfa_offset 8 324; X32-NEXT: .cfi_offset %ebp, -8 325; X32-NEXT: movl %esp, %ebp 326; X32-NEXT: .cfi_def_cfa_register %ebp 327; X32-NEXT: andl $-8, %esp 328; X32-NEXT: subl $16, %esp 329; X32-NEXT: movl 8(%ebp), %eax 330; X32-NEXT: fildll (%eax) 331; X32-NEXT: fistpll {{[0-9]+}}(%esp) 332; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 333; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 334; X32-NEXT: addl $2, %ecx 335; X32-NEXT: adcl $0, %edx 336; X32-NEXT: movl %ecx, (%esp) 337; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 338; X32-NEXT: fildll (%esp) 339; X32-NEXT: fistpll (%eax) 340; X32-NEXT: movl %ebp, %esp 341; X32-NEXT: popl %ebp 342; X32-NEXT: .cfi_def_cfa %esp, 4 343; X32-NEXT: retl 344; We do not check X86-32 as it cannot do 'addq'. 345 %1 = load atomic i64, i64* %p acquire, align 8 346 %2 = add i64 %1, 2 347 store atomic i64 %2, i64* %p release, align 8 348 ret void 349} 350 351define void @add_64r(i64* %p, i64 %v) { 352; X64-LABEL: add_64r: 353; X64: # %bb.0: 354; X64-NEXT: addq %rsi, (%rdi) 355; X64-NEXT: retq 356; 357; X32-LABEL: add_64r: 358; X32: # %bb.0: 359; X32-NEXT: pushl %ebp 360; X32-NEXT: .cfi_def_cfa_offset 8 361; X32-NEXT: .cfi_offset %ebp, -8 362; X32-NEXT: movl %esp, %ebp 363; X32-NEXT: .cfi_def_cfa_register %ebp 364; X32-NEXT: andl $-8, %esp 365; X32-NEXT: subl $16, %esp 366; X32-NEXT: movl 8(%ebp), %eax 367; X32-NEXT: fildll (%eax) 368; X32-NEXT: fistpll {{[0-9]+}}(%esp) 369; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 370; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 371; X32-NEXT: addl 12(%ebp), %ecx 372; X32-NEXT: adcl 16(%ebp), %edx 373; X32-NEXT: movl %ecx, (%esp) 374; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 375; X32-NEXT: fildll (%esp) 376; X32-NEXT: fistpll (%eax) 377; X32-NEXT: movl %ebp, %esp 378; X32-NEXT: popl %ebp 379; X32-NEXT: .cfi_def_cfa %esp, 4 380; X32-NEXT: retl 381; We do not check X86-32 as it cannot do 'addq'. 382 %1 = load atomic i64, i64* %p acquire, align 8 383 %2 = add i64 %1, %v 384 store atomic i64 %2, i64* %p release, align 8 385 ret void 386} 387 388define void @add_32i_seq_cst(i32* %p) { 389; X64-LABEL: add_32i_seq_cst: 390; X64: # %bb.0: 391; X64-NEXT: movl (%rdi), %eax 392; X64-NEXT: addl $2, %eax 393; X64-NEXT: xchgl %eax, (%rdi) 394; X64-NEXT: retq 395; 396; X32-LABEL: add_32i_seq_cst: 397; X32: # %bb.0: 398; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 399; X32-NEXT: movl (%eax), %ecx 400; X32-NEXT: addl $2, %ecx 401; X32-NEXT: xchgl %ecx, (%eax) 402; X32-NEXT: retl 403 %1 = load atomic i32, i32* %p monotonic, align 4 404 %2 = add i32 %1, 2 405 store atomic i32 %2, i32* %p seq_cst, align 4 406 ret void 407} 408 409define void @add_32r_seq_cst(i32* %p, i32 %v) { 410; X64-LABEL: add_32r_seq_cst: 411; X64: # %bb.0: 412; X64-NEXT: movl (%rdi), %eax 413; X64-NEXT: addl %esi, %eax 414; X64-NEXT: xchgl %eax, (%rdi) 415; X64-NEXT: retq 416; 417; X32-LABEL: add_32r_seq_cst: 418; X32: # %bb.0: 419; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 420; X32-NEXT: movl (%eax), %ecx 421; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx 422; X32-NEXT: xchgl %ecx, (%eax) 423; X32-NEXT: retl 424 %1 = load atomic i32, i32* %p monotonic, align 4 425 %2 = add i32 %1, %v 426 store atomic i32 %2, i32* %p seq_cst, align 4 427 ret void 428} 429 430; ----- SUB ----- 431 432define void @sub_8r(i8* %p, i8 %v) { 433; X64-LABEL: sub_8r: 434; X64: # %bb.0: 435; X64-NEXT: subb %sil, (%rdi) 436; X64-NEXT: retq 437; 438; X32-LABEL: sub_8r: 439; X32: # %bb.0: 440; X32-NEXT: movb {{[0-9]+}}(%esp), %al 441; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 442; X32-NEXT: subb %al, (%ecx) 443; X32-NEXT: retl 444 %1 = load atomic i8, i8* %p seq_cst, align 1 445 %2 = sub i8 %1, %v 446 store atomic i8 %2, i8* %p release, align 1 447 ret void 448} 449 450define void @sub_16r(i16* %p, i16 %v) { 451; X64-LABEL: sub_16r: 452; X64: # %bb.0: 453; X64-NEXT: subw %si, (%rdi) 454; X64-NEXT: retq 455; 456; X32-LABEL: sub_16r: 457; X32: # %bb.0: 458; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 459; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 460; X32-NEXT: subw %ax, (%ecx) 461; X32-NEXT: retl 462 %1 = load atomic i16, i16* %p acquire, align 2 463 %2 = sub i16 %1, %v 464 store atomic i16 %2, i16* %p release, align 2 465 ret void 466} 467 468define void @sub_32r(i32* %p, i32 %v) { 469; X64-LABEL: sub_32r: 470; X64: # %bb.0: 471; X64-NEXT: subl %esi, (%rdi) 472; X64-NEXT: retq 473; 474; X32-LABEL: sub_32r: 475; X32: # %bb.0: 476; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 477; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 478; X32-NEXT: subl %eax, (%ecx) 479; X32-NEXT: retl 480 %1 = load atomic i32, i32* %p acquire, align 4 481 %2 = sub i32 %1, %v 482 store atomic i32 %2, i32* %p monotonic, align 4 483 ret void 484} 485 486; The following is a corner case where the load is subed to itself. The pattern 487; matching should not fold this. We only test with 32-bit sub, but the same 488; applies to other sizes and operations. 489define void @sub_32r_self(i32* %p) { 490; X64-LABEL: sub_32r_self: 491; X64: # %bb.0: 492; X64-NEXT: movl (%rdi), %eax 493; X64-NEXT: movl $0, (%rdi) 494; X64-NEXT: retq 495; 496; X32-LABEL: sub_32r_self: 497; X32: # %bb.0: 498; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 499; X32-NEXT: movl (%eax), %ecx 500; X32-NEXT: movl $0, (%eax) 501; X32-NEXT: retl 502 %1 = load atomic i32, i32* %p acquire, align 4 503 %2 = sub i32 %1, %1 504 store atomic i32 %2, i32* %p monotonic, align 4 505 ret void 506} 507 508; The following is a corner case where the load's result is returned. The 509; optimizer isn't allowed to duplicate the load because it's atomic. 510define i32 @sub_32r_ret_load(i32* %p, i32 %v) { 511; X64-LABEL: sub_32r_ret_load: 512; X64: # %bb.0: 513; X64-NEXT: movl (%rdi), %eax 514; X64-NEXT: movl %eax, %ecx 515; X64-NEXT: subl %esi, %ecx 516; X64-NEXT: movl %ecx, (%rdi) 517; X64-NEXT: retq 518; 519; X32-LABEL: sub_32r_ret_load: 520; X32: # %bb.0: 521; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 522; X32-NEXT: movl (%ecx), %eax 523; X32-NEXT: movl %eax, %edx 524; X32-NEXT: subl {{[0-9]+}}(%esp), %edx 525; X32-NEXT: movl %edx, (%ecx) 526; X32-NEXT: retl 527; More code here, we just don't want it to load from P. 528 %1 = load atomic i32, i32* %p acquire, align 4 529 %2 = sub i32 %1, %v 530 store atomic i32 %2, i32* %p monotonic, align 4 531 ret i32 %1 532} 533 534define void @sub_64r(i64* %p, i64 %v) { 535; X64-LABEL: sub_64r: 536; X64: # %bb.0: 537; X64-NEXT: subq %rsi, (%rdi) 538; X64-NEXT: retq 539; 540; X32-LABEL: sub_64r: 541; X32: # %bb.0: 542; X32-NEXT: pushl %ebp 543; X32-NEXT: .cfi_def_cfa_offset 8 544; X32-NEXT: .cfi_offset %ebp, -8 545; X32-NEXT: movl %esp, %ebp 546; X32-NEXT: .cfi_def_cfa_register %ebp 547; X32-NEXT: andl $-8, %esp 548; X32-NEXT: subl $16, %esp 549; X32-NEXT: movl 8(%ebp), %eax 550; X32-NEXT: fildll (%eax) 551; X32-NEXT: fistpll {{[0-9]+}}(%esp) 552; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 553; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 554; X32-NEXT: subl 12(%ebp), %ecx 555; X32-NEXT: sbbl 16(%ebp), %edx 556; X32-NEXT: movl %ecx, (%esp) 557; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 558; X32-NEXT: fildll (%esp) 559; X32-NEXT: fistpll (%eax) 560; X32-NEXT: movl %ebp, %esp 561; X32-NEXT: popl %ebp 562; X32-NEXT: .cfi_def_cfa %esp, 4 563; X32-NEXT: retl 564; We do not check X86-32 as it cannot do 'subq'. 565 %1 = load atomic i64, i64* %p acquire, align 8 566 %2 = sub i64 %1, %v 567 store atomic i64 %2, i64* %p release, align 8 568 ret void 569} 570 571define void @sub_32r_seq_cst(i32* %p, i32 %v) { 572; X64-LABEL: sub_32r_seq_cst: 573; X64: # %bb.0: 574; X64-NEXT: movl (%rdi), %eax 575; X64-NEXT: subl %esi, %eax 576; X64-NEXT: xchgl %eax, (%rdi) 577; X64-NEXT: retq 578; 579; X32-LABEL: sub_32r_seq_cst: 580; X32: # %bb.0: 581; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 582; X32-NEXT: movl (%eax), %ecx 583; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx 584; X32-NEXT: xchgl %ecx, (%eax) 585; X32-NEXT: retl 586 %1 = load atomic i32, i32* %p monotonic, align 4 587 %2 = sub i32 %1, %v 588 store atomic i32 %2, i32* %p seq_cst, align 4 589 ret void 590} 591 592; ----- AND ----- 593 594define void @and_8i(i8* %p) { 595; X64-LABEL: and_8i: 596; X64: # %bb.0: 597; X64-NEXT: andb $2, (%rdi) 598; X64-NEXT: retq 599; 600; X32-LABEL: and_8i: 601; X32: # %bb.0: 602; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 603; X32-NEXT: andb $2, (%eax) 604; X32-NEXT: retl 605 %1 = load atomic i8, i8* %p monotonic, align 1 606 %2 = and i8 %1, 2 607 store atomic i8 %2, i8* %p release, align 1 608 ret void 609} 610 611define void @and_8r(i8* %p, i8 %v) { 612; X64-LABEL: and_8r: 613; X64: # %bb.0: 614; X64-NEXT: andb %sil, (%rdi) 615; X64-NEXT: retq 616; 617; X32-LABEL: and_8r: 618; X32: # %bb.0: 619; X32-NEXT: movb {{[0-9]+}}(%esp), %al 620; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 621; X32-NEXT: andb %al, (%ecx) 622; X32-NEXT: retl 623 %1 = load atomic i8, i8* %p monotonic, align 1 624 %2 = and i8 %1, %v 625 store atomic i8 %2, i8* %p release, align 1 626 ret void 627} 628 629define void @and_16i(i16* %p) { 630; X64-LABEL: and_16i: 631; X64: # %bb.0: 632; X64-NEXT: andw $2, (%rdi) 633; X64-NEXT: retq 634; 635; X32-LABEL: and_16i: 636; X32: # %bb.0: 637; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 638; X32-NEXT: andw $2, (%eax) 639; X32-NEXT: retl 640 %1 = load atomic i16, i16* %p acquire, align 2 641 %2 = and i16 %1, 2 642 store atomic i16 %2, i16* %p release, align 2 643 ret void 644} 645 646define void @and_16r(i16* %p, i16 %v) { 647; X64-LABEL: and_16r: 648; X64: # %bb.0: 649; X64-NEXT: andw %si, (%rdi) 650; X64-NEXT: retq 651; 652; X32-LABEL: and_16r: 653; X32: # %bb.0: 654; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 655; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 656; X32-NEXT: andw %ax, (%ecx) 657; X32-NEXT: retl 658 %1 = load atomic i16, i16* %p acquire, align 2 659 %2 = and i16 %1, %v 660 store atomic i16 %2, i16* %p release, align 2 661 ret void 662} 663 664define void @and_32i(i32* %p) { 665; X64-LABEL: and_32i: 666; X64: # %bb.0: 667; X64-NEXT: andl $2, (%rdi) 668; X64-NEXT: retq 669; 670; X32-LABEL: and_32i: 671; X32: # %bb.0: 672; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 673; X32-NEXT: andl $2, (%eax) 674; X32-NEXT: retl 675 %1 = load atomic i32, i32* %p acquire, align 4 676 %2 = and i32 %1, 2 677 store atomic i32 %2, i32* %p release, align 4 678 ret void 679} 680 681define void @and_32r(i32* %p, i32 %v) { 682; X64-LABEL: and_32r: 683; X64: # %bb.0: 684; X64-NEXT: andl %esi, (%rdi) 685; X64-NEXT: retq 686; 687; X32-LABEL: and_32r: 688; X32: # %bb.0: 689; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 690; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 691; X32-NEXT: andl %eax, (%ecx) 692; X32-NEXT: retl 693 %1 = load atomic i32, i32* %p acquire, align 4 694 %2 = and i32 %1, %v 695 store atomic i32 %2, i32* %p release, align 4 696 ret void 697} 698 699define void @and_64i(i64* %p) { 700; X64-LABEL: and_64i: 701; X64: # %bb.0: 702; X64-NEXT: andq $2, (%rdi) 703; X64-NEXT: retq 704; 705; X32-LABEL: and_64i: 706; X32: # %bb.0: 707; X32-NEXT: pushl %ebp 708; X32-NEXT: .cfi_def_cfa_offset 8 709; X32-NEXT: .cfi_offset %ebp, -8 710; X32-NEXT: movl %esp, %ebp 711; X32-NEXT: .cfi_def_cfa_register %ebp 712; X32-NEXT: andl $-8, %esp 713; X32-NEXT: subl $16, %esp 714; X32-NEXT: movl 8(%ebp), %eax 715; X32-NEXT: fildll (%eax) 716; X32-NEXT: fistpll {{[0-9]+}}(%esp) 717; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 718; X32-NEXT: andl $2, %ecx 719; X32-NEXT: movl %ecx, (%esp) 720; X32-NEXT: movl $0, {{[0-9]+}}(%esp) 721; X32-NEXT: fildll (%esp) 722; X32-NEXT: fistpll (%eax) 723; X32-NEXT: movl %ebp, %esp 724; X32-NEXT: popl %ebp 725; X32-NEXT: .cfi_def_cfa %esp, 4 726; X32-NEXT: retl 727; We do not check X86-32 as it cannot do 'andq'. 728 %1 = load atomic i64, i64* %p acquire, align 8 729 %2 = and i64 %1, 2 730 store atomic i64 %2, i64* %p release, align 8 731 ret void 732} 733 734define void @and_64r(i64* %p, i64 %v) { 735; X64-LABEL: and_64r: 736; X64: # %bb.0: 737; X64-NEXT: andq %rsi, (%rdi) 738; X64-NEXT: retq 739; 740; X32-LABEL: and_64r: 741; X32: # %bb.0: 742; X32-NEXT: pushl %ebp 743; X32-NEXT: .cfi_def_cfa_offset 8 744; X32-NEXT: .cfi_offset %ebp, -8 745; X32-NEXT: movl %esp, %ebp 746; X32-NEXT: .cfi_def_cfa_register %ebp 747; X32-NEXT: andl $-8, %esp 748; X32-NEXT: subl $16, %esp 749; X32-NEXT: movl 8(%ebp), %eax 750; X32-NEXT: fildll (%eax) 751; X32-NEXT: fistpll {{[0-9]+}}(%esp) 752; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 753; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 754; X32-NEXT: andl 16(%ebp), %edx 755; X32-NEXT: andl 12(%ebp), %ecx 756; X32-NEXT: movl %ecx, (%esp) 757; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 758; X32-NEXT: fildll (%esp) 759; X32-NEXT: fistpll (%eax) 760; X32-NEXT: movl %ebp, %esp 761; X32-NEXT: popl %ebp 762; X32-NEXT: .cfi_def_cfa %esp, 4 763; X32-NEXT: retl 764; We do not check X86-32 as it cannot do 'andq'. 765 %1 = load atomic i64, i64* %p acquire, align 8 766 %2 = and i64 %1, %v 767 store atomic i64 %2, i64* %p release, align 8 768 ret void 769} 770 771define void @and_32i_seq_cst(i32* %p) { 772; X64-LABEL: and_32i_seq_cst: 773; X64: # %bb.0: 774; X64-NEXT: movl (%rdi), %eax 775; X64-NEXT: andl $2, %eax 776; X64-NEXT: xchgl %eax, (%rdi) 777; X64-NEXT: retq 778; 779; X32-LABEL: and_32i_seq_cst: 780; X32: # %bb.0: 781; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 782; X32-NEXT: movl (%eax), %ecx 783; X32-NEXT: andl $2, %ecx 784; X32-NEXT: xchgl %ecx, (%eax) 785; X32-NEXT: retl 786 %1 = load atomic i32, i32* %p monotonic, align 4 787 %2 = and i32 %1, 2 788 store atomic i32 %2, i32* %p seq_cst, align 4 789 ret void 790} 791 792define void @and_32r_seq_cst(i32* %p, i32 %v) { 793; X64-LABEL: and_32r_seq_cst: 794; X64: # %bb.0: 795; X64-NEXT: movl (%rdi), %eax 796; X64-NEXT: andl %esi, %eax 797; X64-NEXT: xchgl %eax, (%rdi) 798; X64-NEXT: retq 799; 800; X32-LABEL: and_32r_seq_cst: 801; X32: # %bb.0: 802; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 803; X32-NEXT: movl (%eax), %ecx 804; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx 805; X32-NEXT: xchgl %ecx, (%eax) 806; X32-NEXT: retl 807 %1 = load atomic i32, i32* %p monotonic, align 4 808 %2 = and i32 %1, %v 809 store atomic i32 %2, i32* %p seq_cst, align 4 810 ret void 811} 812 813; ----- OR ----- 814 815define void @or_8i(i8* %p) { 816; X64-LABEL: or_8i: 817; X64: # %bb.0: 818; X64-NEXT: orb $2, (%rdi) 819; X64-NEXT: retq 820; 821; X32-LABEL: or_8i: 822; X32: # %bb.0: 823; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 824; X32-NEXT: orb $2, (%eax) 825; X32-NEXT: retl 826 %1 = load atomic i8, i8* %p acquire, align 1 827 %2 = or i8 %1, 2 828 store atomic i8 %2, i8* %p release, align 1 829 ret void 830} 831 832define void @or_8r(i8* %p, i8 %v) { 833; X64-LABEL: or_8r: 834; X64: # %bb.0: 835; X64-NEXT: orb %sil, (%rdi) 836; X64-NEXT: retq 837; 838; X32-LABEL: or_8r: 839; X32: # %bb.0: 840; X32-NEXT: movb {{[0-9]+}}(%esp), %al 841; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 842; X32-NEXT: orb %al, (%ecx) 843; X32-NEXT: retl 844 %1 = load atomic i8, i8* %p acquire, align 1 845 %2 = or i8 %1, %v 846 store atomic i8 %2, i8* %p release, align 1 847 ret void 848} 849 850define void @or_16i(i16* %p) { 851; X64-LABEL: or_16i: 852; X64: # %bb.0: 853; X64-NEXT: orw $2, (%rdi) 854; X64-NEXT: retq 855; 856; X32-LABEL: or_16i: 857; X32: # %bb.0: 858; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 859; X32-NEXT: orw $2, (%eax) 860; X32-NEXT: retl 861 %1 = load atomic i16, i16* %p acquire, align 2 862 %2 = or i16 %1, 2 863 store atomic i16 %2, i16* %p release, align 2 864 ret void 865} 866 867define void @or_16r(i16* %p, i16 %v) { 868; X64-LABEL: or_16r: 869; X64: # %bb.0: 870; X64-NEXT: orw %si, (%rdi) 871; X64-NEXT: retq 872; 873; X32-LABEL: or_16r: 874; X32: # %bb.0: 875; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 876; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 877; X32-NEXT: orw %ax, (%ecx) 878; X32-NEXT: retl 879 %1 = load atomic i16, i16* %p acquire, align 2 880 %2 = or i16 %1, %v 881 store atomic i16 %2, i16* %p release, align 2 882 ret void 883} 884 885define void @or_32i(i32* %p) { 886; X64-LABEL: or_32i: 887; X64: # %bb.0: 888; X64-NEXT: orl $2, (%rdi) 889; X64-NEXT: retq 890; 891; X32-LABEL: or_32i: 892; X32: # %bb.0: 893; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 894; X32-NEXT: orl $2, (%eax) 895; X32-NEXT: retl 896 %1 = load atomic i32, i32* %p acquire, align 4 897 %2 = or i32 %1, 2 898 store atomic i32 %2, i32* %p release, align 4 899 ret void 900} 901 902define void @or_32r(i32* %p, i32 %v) { 903; X64-LABEL: or_32r: 904; X64: # %bb.0: 905; X64-NEXT: orl %esi, (%rdi) 906; X64-NEXT: retq 907; 908; X32-LABEL: or_32r: 909; X32: # %bb.0: 910; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 911; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 912; X32-NEXT: orl %eax, (%ecx) 913; X32-NEXT: retl 914 %1 = load atomic i32, i32* %p acquire, align 4 915 %2 = or i32 %1, %v 916 store atomic i32 %2, i32* %p release, align 4 917 ret void 918} 919 920define void @or_64i(i64* %p) { 921; X64-LABEL: or_64i: 922; X64: # %bb.0: 923; X64-NEXT: orq $2, (%rdi) 924; X64-NEXT: retq 925; 926; X32-LABEL: or_64i: 927; X32: # %bb.0: 928; X32-NEXT: pushl %ebp 929; X32-NEXT: .cfi_def_cfa_offset 8 930; X32-NEXT: .cfi_offset %ebp, -8 931; X32-NEXT: movl %esp, %ebp 932; X32-NEXT: .cfi_def_cfa_register %ebp 933; X32-NEXT: andl $-8, %esp 934; X32-NEXT: subl $16, %esp 935; X32-NEXT: movl 8(%ebp), %eax 936; X32-NEXT: fildll (%eax) 937; X32-NEXT: fistpll {{[0-9]+}}(%esp) 938; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 939; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 940; X32-NEXT: orl $2, %ecx 941; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 942; X32-NEXT: movl %ecx, (%esp) 943; X32-NEXT: fildll (%esp) 944; X32-NEXT: fistpll (%eax) 945; X32-NEXT: movl %ebp, %esp 946; X32-NEXT: popl %ebp 947; X32-NEXT: .cfi_def_cfa %esp, 4 948; X32-NEXT: retl 949; We do not check X86-32 as it cannot do 'orq'. 950 %1 = load atomic i64, i64* %p acquire, align 8 951 %2 = or i64 %1, 2 952 store atomic i64 %2, i64* %p release, align 8 953 ret void 954} 955 956define void @or_64r(i64* %p, i64 %v) { 957; X64-LABEL: or_64r: 958; X64: # %bb.0: 959; X64-NEXT: orq %rsi, (%rdi) 960; X64-NEXT: retq 961; 962; X32-LABEL: or_64r: 963; X32: # %bb.0: 964; X32-NEXT: pushl %ebp 965; X32-NEXT: .cfi_def_cfa_offset 8 966; X32-NEXT: .cfi_offset %ebp, -8 967; X32-NEXT: movl %esp, %ebp 968; X32-NEXT: .cfi_def_cfa_register %ebp 969; X32-NEXT: andl $-8, %esp 970; X32-NEXT: subl $16, %esp 971; X32-NEXT: movl 8(%ebp), %eax 972; X32-NEXT: fildll (%eax) 973; X32-NEXT: fistpll {{[0-9]+}}(%esp) 974; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 975; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 976; X32-NEXT: orl 16(%ebp), %edx 977; X32-NEXT: orl 12(%ebp), %ecx 978; X32-NEXT: movl %ecx, (%esp) 979; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 980; X32-NEXT: fildll (%esp) 981; X32-NEXT: fistpll (%eax) 982; X32-NEXT: movl %ebp, %esp 983; X32-NEXT: popl %ebp 984; X32-NEXT: .cfi_def_cfa %esp, 4 985; X32-NEXT: retl 986; We do not check X86-32 as it cannot do 'orq'. 987 %1 = load atomic i64, i64* %p acquire, align 8 988 %2 = or i64 %1, %v 989 store atomic i64 %2, i64* %p release, align 8 990 ret void 991} 992 993define void @or_32i_seq_cst(i32* %p) { 994; X64-LABEL: or_32i_seq_cst: 995; X64: # %bb.0: 996; X64-NEXT: movl (%rdi), %eax 997; X64-NEXT: orl $2, %eax 998; X64-NEXT: xchgl %eax, (%rdi) 999; X64-NEXT: retq 1000; 1001; X32-LABEL: or_32i_seq_cst: 1002; X32: # %bb.0: 1003; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1004; X32-NEXT: movl (%eax), %ecx 1005; X32-NEXT: orl $2, %ecx 1006; X32-NEXT: xchgl %ecx, (%eax) 1007; X32-NEXT: retl 1008 %1 = load atomic i32, i32* %p monotonic, align 4 1009 %2 = or i32 %1, 2 1010 store atomic i32 %2, i32* %p seq_cst, align 4 1011 ret void 1012} 1013 1014define void @or_32r_seq_cst(i32* %p, i32 %v) { 1015; X64-LABEL: or_32r_seq_cst: 1016; X64: # %bb.0: 1017; X64-NEXT: movl (%rdi), %eax 1018; X64-NEXT: orl %esi, %eax 1019; X64-NEXT: xchgl %eax, (%rdi) 1020; X64-NEXT: retq 1021; 1022; X32-LABEL: or_32r_seq_cst: 1023; X32: # %bb.0: 1024; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1025; X32-NEXT: movl (%eax), %ecx 1026; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx 1027; X32-NEXT: xchgl %ecx, (%eax) 1028; X32-NEXT: retl 1029 %1 = load atomic i32, i32* %p monotonic, align 4 1030 %2 = or i32 %1, %v 1031 store atomic i32 %2, i32* %p seq_cst, align 4 1032 ret void 1033} 1034 1035; ----- XOR ----- 1036 1037define void @xor_8i(i8* %p) { 1038; X64-LABEL: xor_8i: 1039; X64: # %bb.0: 1040; X64-NEXT: xorb $2, (%rdi) 1041; X64-NEXT: retq 1042; 1043; X32-LABEL: xor_8i: 1044; X32: # %bb.0: 1045; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1046; X32-NEXT: xorb $2, (%eax) 1047; X32-NEXT: retl 1048 %1 = load atomic i8, i8* %p acquire, align 1 1049 %2 = xor i8 %1, 2 1050 store atomic i8 %2, i8* %p release, align 1 1051 ret void 1052} 1053 1054define void @xor_8r(i8* %p, i8 %v) { 1055; X64-LABEL: xor_8r: 1056; X64: # %bb.0: 1057; X64-NEXT: xorb %sil, (%rdi) 1058; X64-NEXT: retq 1059; 1060; X32-LABEL: xor_8r: 1061; X32: # %bb.0: 1062; X32-NEXT: movb {{[0-9]+}}(%esp), %al 1063; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1064; X32-NEXT: xorb %al, (%ecx) 1065; X32-NEXT: retl 1066 %1 = load atomic i8, i8* %p acquire, align 1 1067 %2 = xor i8 %1, %v 1068 store atomic i8 %2, i8* %p release, align 1 1069 ret void 1070} 1071 1072define void @xor_16i(i16* %p) { 1073; X64-LABEL: xor_16i: 1074; X64: # %bb.0: 1075; X64-NEXT: xorw $2, (%rdi) 1076; X64-NEXT: retq 1077; 1078; X32-LABEL: xor_16i: 1079; X32: # %bb.0: 1080; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1081; X32-NEXT: xorw $2, (%eax) 1082; X32-NEXT: retl 1083 %1 = load atomic i16, i16* %p acquire, align 2 1084 %2 = xor i16 %1, 2 1085 store atomic i16 %2, i16* %p release, align 2 1086 ret void 1087} 1088 1089define void @xor_16r(i16* %p, i16 %v) { 1090; X64-LABEL: xor_16r: 1091; X64: # %bb.0: 1092; X64-NEXT: xorw %si, (%rdi) 1093; X64-NEXT: retq 1094; 1095; X32-LABEL: xor_16r: 1096; X32: # %bb.0: 1097; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1098; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1099; X32-NEXT: xorw %ax, (%ecx) 1100; X32-NEXT: retl 1101 %1 = load atomic i16, i16* %p acquire, align 2 1102 %2 = xor i16 %1, %v 1103 store atomic i16 %2, i16* %p release, align 2 1104 ret void 1105} 1106 1107define void @xor_32i(i32* %p) { 1108; X64-LABEL: xor_32i: 1109; X64: # %bb.0: 1110; X64-NEXT: xorl $2, (%rdi) 1111; X64-NEXT: retq 1112; 1113; X32-LABEL: xor_32i: 1114; X32: # %bb.0: 1115; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1116; X32-NEXT: xorl $2, (%eax) 1117; X32-NEXT: retl 1118 %1 = load atomic i32, i32* %p acquire, align 4 1119 %2 = xor i32 %1, 2 1120 store atomic i32 %2, i32* %p release, align 4 1121 ret void 1122} 1123 1124define void @xor_32r(i32* %p, i32 %v) { 1125; X64-LABEL: xor_32r: 1126; X64: # %bb.0: 1127; X64-NEXT: xorl %esi, (%rdi) 1128; X64-NEXT: retq 1129; 1130; X32-LABEL: xor_32r: 1131; X32: # %bb.0: 1132; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1133; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1134; X32-NEXT: xorl %eax, (%ecx) 1135; X32-NEXT: retl 1136 %1 = load atomic i32, i32* %p acquire, align 4 1137 %2 = xor i32 %1, %v 1138 store atomic i32 %2, i32* %p release, align 4 1139 ret void 1140} 1141 1142define void @xor_64i(i64* %p) { 1143; X64-LABEL: xor_64i: 1144; X64: # %bb.0: 1145; X64-NEXT: xorq $2, (%rdi) 1146; X64-NEXT: retq 1147; 1148; X32-LABEL: xor_64i: 1149; X32: # %bb.0: 1150; X32-NEXT: pushl %ebp 1151; X32-NEXT: .cfi_def_cfa_offset 8 1152; X32-NEXT: .cfi_offset %ebp, -8 1153; X32-NEXT: movl %esp, %ebp 1154; X32-NEXT: .cfi_def_cfa_register %ebp 1155; X32-NEXT: andl $-8, %esp 1156; X32-NEXT: subl $16, %esp 1157; X32-NEXT: movl 8(%ebp), %eax 1158; X32-NEXT: fildll (%eax) 1159; X32-NEXT: fistpll {{[0-9]+}}(%esp) 1160; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1161; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1162; X32-NEXT: xorl $2, %ecx 1163; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 1164; X32-NEXT: movl %ecx, (%esp) 1165; X32-NEXT: fildll (%esp) 1166; X32-NEXT: fistpll (%eax) 1167; X32-NEXT: movl %ebp, %esp 1168; X32-NEXT: popl %ebp 1169; X32-NEXT: .cfi_def_cfa %esp, 4 1170; X32-NEXT: retl 1171; We do not check X86-32 as it cannot do 'xorq'. 1172 %1 = load atomic i64, i64* %p acquire, align 8 1173 %2 = xor i64 %1, 2 1174 store atomic i64 %2, i64* %p release, align 8 1175 ret void 1176} 1177 1178define void @xor_64r(i64* %p, i64 %v) { 1179; X64-LABEL: xor_64r: 1180; X64: # %bb.0: 1181; X64-NEXT: xorq %rsi, (%rdi) 1182; X64-NEXT: retq 1183; 1184; X32-LABEL: xor_64r: 1185; X32: # %bb.0: 1186; X32-NEXT: pushl %ebp 1187; X32-NEXT: .cfi_def_cfa_offset 8 1188; X32-NEXT: .cfi_offset %ebp, -8 1189; X32-NEXT: movl %esp, %ebp 1190; X32-NEXT: .cfi_def_cfa_register %ebp 1191; X32-NEXT: andl $-8, %esp 1192; X32-NEXT: subl $16, %esp 1193; X32-NEXT: movl 8(%ebp), %eax 1194; X32-NEXT: fildll (%eax) 1195; X32-NEXT: fistpll {{[0-9]+}}(%esp) 1196; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1197; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1198; X32-NEXT: xorl 16(%ebp), %edx 1199; X32-NEXT: xorl 12(%ebp), %ecx 1200; X32-NEXT: movl %ecx, (%esp) 1201; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 1202; X32-NEXT: fildll (%esp) 1203; X32-NEXT: fistpll (%eax) 1204; X32-NEXT: movl %ebp, %esp 1205; X32-NEXT: popl %ebp 1206; X32-NEXT: .cfi_def_cfa %esp, 4 1207; X32-NEXT: retl 1208; We do not check X86-32 as it cannot do 'xorq'. 1209 %1 = load atomic i64, i64* %p acquire, align 8 1210 %2 = xor i64 %1, %v 1211 store atomic i64 %2, i64* %p release, align 8 1212 ret void 1213} 1214 1215define void @xor_32i_seq_cst(i32* %p) { 1216; X64-LABEL: xor_32i_seq_cst: 1217; X64: # %bb.0: 1218; X64-NEXT: movl (%rdi), %eax 1219; X64-NEXT: xorl $2, %eax 1220; X64-NEXT: xchgl %eax, (%rdi) 1221; X64-NEXT: retq 1222; 1223; X32-LABEL: xor_32i_seq_cst: 1224; X32: # %bb.0: 1225; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1226; X32-NEXT: movl (%eax), %ecx 1227; X32-NEXT: xorl $2, %ecx 1228; X32-NEXT: xchgl %ecx, (%eax) 1229; X32-NEXT: retl 1230 %1 = load atomic i32, i32* %p monotonic, align 4 1231 %2 = xor i32 %1, 2 1232 store atomic i32 %2, i32* %p seq_cst, align 4 1233 ret void 1234} 1235 1236define void @xor_32r_seq_cst(i32* %p, i32 %v) { 1237; X64-LABEL: xor_32r_seq_cst: 1238; X64: # %bb.0: 1239; X64-NEXT: movl (%rdi), %eax 1240; X64-NEXT: xorl %esi, %eax 1241; X64-NEXT: xchgl %eax, (%rdi) 1242; X64-NEXT: retq 1243; 1244; X32-LABEL: xor_32r_seq_cst: 1245; X32: # %bb.0: 1246; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1247; X32-NEXT: movl (%eax), %ecx 1248; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1249; X32-NEXT: xchgl %ecx, (%eax) 1250; X32-NEXT: retl 1251 %1 = load atomic i32, i32* %p monotonic, align 4 1252 %2 = xor i32 %1, %v 1253 store atomic i32 %2, i32* %p seq_cst, align 4 1254 ret void 1255} 1256 1257; ----- INC ----- 1258 1259define void @inc_8(i8* %p) { 1260; FAST_INC-LABEL: inc_8: 1261; FAST_INC: # %bb.0: 1262; FAST_INC-NEXT: incb (%rdi) 1263; FAST_INC-NEXT: retq 1264; 1265; X32-LABEL: inc_8: 1266; X32: # %bb.0: 1267; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1268; X32-NEXT: incb (%eax) 1269; X32-NEXT: retl 1270; 1271; SLOW_INC-LABEL: inc_8: 1272; SLOW_INC: # %bb.0: 1273; SLOW_INC-NEXT: addb $1, (%rdi) 1274; SLOW_INC-NEXT: retq 1275 %1 = load atomic i8, i8* %p seq_cst, align 1 1276 %2 = add i8 %1, 1 1277 store atomic i8 %2, i8* %p release, align 1 1278 ret void 1279} 1280 1281define void @inc_16(i16* %p) { 1282; FAST_INC-LABEL: inc_16: 1283; FAST_INC: # %bb.0: 1284; FAST_INC-NEXT: incw (%rdi) 1285; FAST_INC-NEXT: retq 1286; 1287; X32-LABEL: inc_16: 1288; X32: # %bb.0: 1289; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1290; X32-NEXT: incw (%eax) 1291; X32-NEXT: retl 1292; 1293; SLOW_INC-LABEL: inc_16: 1294; SLOW_INC: # %bb.0: 1295; SLOW_INC-NEXT: addw $1, (%rdi) 1296; SLOW_INC-NEXT: retq 1297 %1 = load atomic i16, i16* %p acquire, align 2 1298 %2 = add i16 %1, 1 1299 store atomic i16 %2, i16* %p release, align 2 1300 ret void 1301} 1302 1303define void @inc_32(i32* %p) { 1304; FAST_INC-LABEL: inc_32: 1305; FAST_INC: # %bb.0: 1306; FAST_INC-NEXT: incl (%rdi) 1307; FAST_INC-NEXT: retq 1308; 1309; X32-LABEL: inc_32: 1310; X32: # %bb.0: 1311; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1312; X32-NEXT: incl (%eax) 1313; X32-NEXT: retl 1314; 1315; SLOW_INC-LABEL: inc_32: 1316; SLOW_INC: # %bb.0: 1317; SLOW_INC-NEXT: addl $1, (%rdi) 1318; SLOW_INC-NEXT: retq 1319 %1 = load atomic i32, i32* %p acquire, align 4 1320 %2 = add i32 %1, 1 1321 store atomic i32 %2, i32* %p monotonic, align 4 1322 ret void 1323} 1324 1325define void @inc_64(i64* %p) { 1326; FAST_INC-LABEL: inc_64: 1327; FAST_INC: # %bb.0: 1328; FAST_INC-NEXT: incq (%rdi) 1329; FAST_INC-NEXT: retq 1330; 1331; X32-LABEL: inc_64: 1332; X32: # %bb.0: 1333; X32-NEXT: pushl %ebp 1334; X32-NEXT: .cfi_def_cfa_offset 8 1335; X32-NEXT: .cfi_offset %ebp, -8 1336; X32-NEXT: movl %esp, %ebp 1337; X32-NEXT: .cfi_def_cfa_register %ebp 1338; X32-NEXT: andl $-8, %esp 1339; X32-NEXT: subl $16, %esp 1340; X32-NEXT: movl 8(%ebp), %eax 1341; X32-NEXT: fildll (%eax) 1342; X32-NEXT: fistpll {{[0-9]+}}(%esp) 1343; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1344; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1345; X32-NEXT: addl $1, %ecx 1346; X32-NEXT: adcl $0, %edx 1347; X32-NEXT: movl %ecx, (%esp) 1348; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 1349; X32-NEXT: fildll (%esp) 1350; X32-NEXT: fistpll (%eax) 1351; X32-NEXT: movl %ebp, %esp 1352; X32-NEXT: popl %ebp 1353; X32-NEXT: .cfi_def_cfa %esp, 4 1354; X32-NEXT: retl 1355; 1356; SLOW_INC-LABEL: inc_64: 1357; SLOW_INC: # %bb.0: 1358; SLOW_INC-NEXT: addq $1, (%rdi) 1359; SLOW_INC-NEXT: retq 1360; We do not check X86-32 as it cannot do 'incq'. 1361 %1 = load atomic i64, i64* %p acquire, align 8 1362 %2 = add i64 %1, 1 1363 store atomic i64 %2, i64* %p release, align 8 1364 ret void 1365} 1366 1367define void @inc_32_seq_cst(i32* %p) { 1368; FAST_INC-LABEL: inc_32_seq_cst: 1369; FAST_INC: # %bb.0: 1370; FAST_INC-NEXT: movl (%rdi), %eax 1371; FAST_INC-NEXT: incl %eax 1372; FAST_INC-NEXT: xchgl %eax, (%rdi) 1373; FAST_INC-NEXT: retq 1374; 1375; X32-LABEL: inc_32_seq_cst: 1376; X32: # %bb.0: 1377; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1378; X32-NEXT: movl (%eax), %ecx 1379; X32-NEXT: incl %ecx 1380; X32-NEXT: xchgl %ecx, (%eax) 1381; X32-NEXT: retl 1382; 1383; SLOW_INC-LABEL: inc_32_seq_cst: 1384; SLOW_INC: # %bb.0: 1385; SLOW_INC-NEXT: movl (%rdi), %eax 1386; SLOW_INC-NEXT: addl $1, %eax 1387; SLOW_INC-NEXT: xchgl %eax, (%rdi) 1388; SLOW_INC-NEXT: retq 1389 %1 = load atomic i32, i32* %p monotonic, align 4 1390 %2 = add i32 %1, 1 1391 store atomic i32 %2, i32* %p seq_cst, align 4 1392 ret void 1393} 1394 1395; ----- DEC ----- 1396 1397define void @dec_8(i8* %p) { 1398; FAST_INC-LABEL: dec_8: 1399; FAST_INC: # %bb.0: 1400; FAST_INC-NEXT: decb (%rdi) 1401; FAST_INC-NEXT: retq 1402; 1403; X32-LABEL: dec_8: 1404; X32: # %bb.0: 1405; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1406; X32-NEXT: decb (%eax) 1407; X32-NEXT: retl 1408; 1409; SLOW_INC-LABEL: dec_8: 1410; SLOW_INC: # %bb.0: 1411; SLOW_INC-NEXT: addb $-1, (%rdi) 1412; SLOW_INC-NEXT: retq 1413 %1 = load atomic i8, i8* %p seq_cst, align 1 1414 %2 = sub i8 %1, 1 1415 store atomic i8 %2, i8* %p release, align 1 1416 ret void 1417} 1418 1419define void @dec_16(i16* %p) { 1420; FAST_INC-LABEL: dec_16: 1421; FAST_INC: # %bb.0: 1422; FAST_INC-NEXT: decw (%rdi) 1423; FAST_INC-NEXT: retq 1424; 1425; X32-LABEL: dec_16: 1426; X32: # %bb.0: 1427; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1428; X32-NEXT: decw (%eax) 1429; X32-NEXT: retl 1430; 1431; SLOW_INC-LABEL: dec_16: 1432; SLOW_INC: # %bb.0: 1433; SLOW_INC-NEXT: addw $-1, (%rdi) 1434; SLOW_INC-NEXT: retq 1435 %1 = load atomic i16, i16* %p acquire, align 2 1436 %2 = sub i16 %1, 1 1437 store atomic i16 %2, i16* %p release, align 2 1438 ret void 1439} 1440 1441define void @dec_32(i32* %p) { 1442; FAST_INC-LABEL: dec_32: 1443; FAST_INC: # %bb.0: 1444; FAST_INC-NEXT: decl (%rdi) 1445; FAST_INC-NEXT: retq 1446; 1447; X32-LABEL: dec_32: 1448; X32: # %bb.0: 1449; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1450; X32-NEXT: decl (%eax) 1451; X32-NEXT: retl 1452; 1453; SLOW_INC-LABEL: dec_32: 1454; SLOW_INC: # %bb.0: 1455; SLOW_INC-NEXT: addl $-1, (%rdi) 1456; SLOW_INC-NEXT: retq 1457 %1 = load atomic i32, i32* %p acquire, align 4 1458 %2 = sub i32 %1, 1 1459 store atomic i32 %2, i32* %p monotonic, align 4 1460 ret void 1461} 1462 1463define void @dec_64(i64* %p) { 1464; FAST_INC-LABEL: dec_64: 1465; FAST_INC: # %bb.0: 1466; FAST_INC-NEXT: decq (%rdi) 1467; FAST_INC-NEXT: retq 1468; 1469; X32-LABEL: dec_64: 1470; X32: # %bb.0: 1471; X32-NEXT: pushl %ebp 1472; X32-NEXT: .cfi_def_cfa_offset 8 1473; X32-NEXT: .cfi_offset %ebp, -8 1474; X32-NEXT: movl %esp, %ebp 1475; X32-NEXT: .cfi_def_cfa_register %ebp 1476; X32-NEXT: andl $-8, %esp 1477; X32-NEXT: subl $16, %esp 1478; X32-NEXT: movl 8(%ebp), %eax 1479; X32-NEXT: fildll (%eax) 1480; X32-NEXT: fistpll {{[0-9]+}}(%esp) 1481; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1482; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1483; X32-NEXT: addl $-1, %ecx 1484; X32-NEXT: adcl $-1, %edx 1485; X32-NEXT: movl %ecx, (%esp) 1486; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 1487; X32-NEXT: fildll (%esp) 1488; X32-NEXT: fistpll (%eax) 1489; X32-NEXT: movl %ebp, %esp 1490; X32-NEXT: popl %ebp 1491; X32-NEXT: .cfi_def_cfa %esp, 4 1492; X32-NEXT: retl 1493; 1494; SLOW_INC-LABEL: dec_64: 1495; SLOW_INC: # %bb.0: 1496; SLOW_INC-NEXT: addq $-1, (%rdi) 1497; SLOW_INC-NEXT: retq 1498; We do not check X86-32 as it cannot do 'decq'. 1499 %1 = load atomic i64, i64* %p acquire, align 8 1500 %2 = sub i64 %1, 1 1501 store atomic i64 %2, i64* %p release, align 8 1502 ret void 1503} 1504 1505define void @dec_32_seq_cst(i32* %p) { 1506; FAST_INC-LABEL: dec_32_seq_cst: 1507; FAST_INC: # %bb.0: 1508; FAST_INC-NEXT: movl (%rdi), %eax 1509; FAST_INC-NEXT: decl %eax 1510; FAST_INC-NEXT: xchgl %eax, (%rdi) 1511; FAST_INC-NEXT: retq 1512; 1513; X32-LABEL: dec_32_seq_cst: 1514; X32: # %bb.0: 1515; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1516; X32-NEXT: movl (%eax), %ecx 1517; X32-NEXT: decl %ecx 1518; X32-NEXT: xchgl %ecx, (%eax) 1519; X32-NEXT: retl 1520; 1521; SLOW_INC-LABEL: dec_32_seq_cst: 1522; SLOW_INC: # %bb.0: 1523; SLOW_INC-NEXT: movl (%rdi), %eax 1524; SLOW_INC-NEXT: addl $-1, %eax 1525; SLOW_INC-NEXT: xchgl %eax, (%rdi) 1526; SLOW_INC-NEXT: retq 1527 %1 = load atomic i32, i32* %p monotonic, align 4 1528 %2 = sub i32 %1, 1 1529 store atomic i32 %2, i32* %p seq_cst, align 4 1530 ret void 1531} 1532 1533; ----- NOT ----- 1534 1535define void @not_8(i8* %p) { 1536; X64-LABEL: not_8: 1537; X64: # %bb.0: 1538; X64-NEXT: notb (%rdi) 1539; X64-NEXT: retq 1540; 1541; X32-LABEL: not_8: 1542; X32: # %bb.0: 1543; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1544; X32-NEXT: notb (%eax) 1545; X32-NEXT: retl 1546 %1 = load atomic i8, i8* %p seq_cst, align 1 1547 %2 = xor i8 %1, -1 1548 store atomic i8 %2, i8* %p release, align 1 1549 ret void 1550} 1551 1552define void @not_16(i16* %p) { 1553; X64-LABEL: not_16: 1554; X64: # %bb.0: 1555; X64-NEXT: notw (%rdi) 1556; X64-NEXT: retq 1557; 1558; X32-LABEL: not_16: 1559; X32: # %bb.0: 1560; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1561; X32-NEXT: notw (%eax) 1562; X32-NEXT: retl 1563 %1 = load atomic i16, i16* %p acquire, align 2 1564 %2 = xor i16 %1, -1 1565 store atomic i16 %2, i16* %p release, align 2 1566 ret void 1567} 1568 1569define void @not_32(i32* %p) { 1570; X64-LABEL: not_32: 1571; X64: # %bb.0: 1572; X64-NEXT: notl (%rdi) 1573; X64-NEXT: retq 1574; 1575; X32-LABEL: not_32: 1576; X32: # %bb.0: 1577; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1578; X32-NEXT: notl (%eax) 1579; X32-NEXT: retl 1580 %1 = load atomic i32, i32* %p acquire, align 4 1581 %2 = xor i32 %1, -1 1582 store atomic i32 %2, i32* %p monotonic, align 4 1583 ret void 1584} 1585 1586define void @not_64(i64* %p) { 1587; X64-LABEL: not_64: 1588; X64: # %bb.0: 1589; X64-NEXT: notq (%rdi) 1590; X64-NEXT: retq 1591; 1592; X32-LABEL: not_64: 1593; X32: # %bb.0: 1594; X32-NEXT: pushl %ebp 1595; X32-NEXT: .cfi_def_cfa_offset 8 1596; X32-NEXT: .cfi_offset %ebp, -8 1597; X32-NEXT: movl %esp, %ebp 1598; X32-NEXT: .cfi_def_cfa_register %ebp 1599; X32-NEXT: andl $-8, %esp 1600; X32-NEXT: subl $16, %esp 1601; X32-NEXT: movl 8(%ebp), %eax 1602; X32-NEXT: fildll (%eax) 1603; X32-NEXT: fistpll {{[0-9]+}}(%esp) 1604; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1605; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1606; X32-NEXT: notl %edx 1607; X32-NEXT: notl %ecx 1608; X32-NEXT: movl %ecx, (%esp) 1609; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 1610; X32-NEXT: fildll (%esp) 1611; X32-NEXT: fistpll (%eax) 1612; X32-NEXT: movl %ebp, %esp 1613; X32-NEXT: popl %ebp 1614; X32-NEXT: .cfi_def_cfa %esp, 4 1615; X32-NEXT: retl 1616; We do not check X86-32 as it cannot do 'notq'. 1617 %1 = load atomic i64, i64* %p acquire, align 8 1618 %2 = xor i64 %1, -1 1619 store atomic i64 %2, i64* %p release, align 8 1620 ret void 1621} 1622 1623define void @not_32_seq_cst(i32* %p) { 1624; X64-LABEL: not_32_seq_cst: 1625; X64: # %bb.0: 1626; X64-NEXT: movl (%rdi), %eax 1627; X64-NEXT: notl %eax 1628; X64-NEXT: xchgl %eax, (%rdi) 1629; X64-NEXT: retq 1630; 1631; X32-LABEL: not_32_seq_cst: 1632; X32: # %bb.0: 1633; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1634; X32-NEXT: movl (%eax), %ecx 1635; X32-NEXT: notl %ecx 1636; X32-NEXT: xchgl %ecx, (%eax) 1637; X32-NEXT: retl 1638 %1 = load atomic i32, i32* %p monotonic, align 4 1639 %2 = xor i32 %1, -1 1640 store atomic i32 %2, i32* %p seq_cst, align 4 1641 ret void 1642} 1643 1644; ----- NEG ----- 1645 1646define void @neg_8(i8* %p) { 1647; X64-LABEL: neg_8: 1648; X64: # %bb.0: 1649; X64-NEXT: negb (%rdi) 1650; X64-NEXT: retq 1651; 1652; X32-LABEL: neg_8: 1653; X32: # %bb.0: 1654; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1655; X32-NEXT: negb (%eax) 1656; X32-NEXT: retl 1657 %1 = load atomic i8, i8* %p seq_cst, align 1 1658 %2 = sub i8 0, %1 1659 store atomic i8 %2, i8* %p release, align 1 1660 ret void 1661} 1662 1663define void @neg_16(i16* %p) { 1664; X64-LABEL: neg_16: 1665; X64: # %bb.0: 1666; X64-NEXT: movzwl (%rdi), %eax 1667; X64-NEXT: negl %eax 1668; X64-NEXT: movw %ax, (%rdi) 1669; X64-NEXT: retq 1670; 1671; X32-LABEL: neg_16: 1672; X32: # %bb.0: 1673; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1674; X32-NEXT: movzwl (%eax), %ecx 1675; X32-NEXT: negl %ecx 1676; X32-NEXT: movw %cx, (%eax) 1677; X32-NEXT: retl 1678 %1 = load atomic i16, i16* %p acquire, align 2 1679 %2 = sub i16 0, %1 1680 store atomic i16 %2, i16* %p release, align 2 1681 ret void 1682} 1683 1684define void @neg_32(i32* %p) { 1685; X64-LABEL: neg_32: 1686; X64: # %bb.0: 1687; X64-NEXT: negl (%rdi) 1688; X64-NEXT: retq 1689; 1690; X32-LABEL: neg_32: 1691; X32: # %bb.0: 1692; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1693; X32-NEXT: negl (%eax) 1694; X32-NEXT: retl 1695 %1 = load atomic i32, i32* %p acquire, align 4 1696 %2 = sub i32 0, %1 1697 store atomic i32 %2, i32* %p monotonic, align 4 1698 ret void 1699} 1700 1701define void @neg_64(i64* %p) { 1702; X64-LABEL: neg_64: 1703; X64: # %bb.0: 1704; X64-NEXT: negq (%rdi) 1705; X64-NEXT: retq 1706; 1707; X32-LABEL: neg_64: 1708; X32: # %bb.0: 1709; X32-NEXT: pushl %ebp 1710; X32-NEXT: .cfi_def_cfa_offset 8 1711; X32-NEXT: .cfi_offset %ebp, -8 1712; X32-NEXT: movl %esp, %ebp 1713; X32-NEXT: .cfi_def_cfa_register %ebp 1714; X32-NEXT: andl $-8, %esp 1715; X32-NEXT: subl $16, %esp 1716; X32-NEXT: movl 8(%ebp), %eax 1717; X32-NEXT: fildll (%eax) 1718; X32-NEXT: fistpll {{[0-9]+}}(%esp) 1719; X32-NEXT: xorl %ecx, %ecx 1720; X32-NEXT: xorl %edx, %edx 1721; X32-NEXT: subl {{[0-9]+}}(%esp), %edx 1722; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx 1723; X32-NEXT: movl %edx, (%esp) 1724; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1725; X32-NEXT: fildll (%esp) 1726; X32-NEXT: fistpll (%eax) 1727; X32-NEXT: movl %ebp, %esp 1728; X32-NEXT: popl %ebp 1729; X32-NEXT: .cfi_def_cfa %esp, 4 1730; X32-NEXT: retl 1731; We do neg check X86-32 as it canneg do 'negq'. 1732 %1 = load atomic i64, i64* %p acquire, align 8 1733 %2 = sub i64 0, %1 1734 store atomic i64 %2, i64* %p release, align 8 1735 ret void 1736} 1737 1738define void @neg_32_seq_cst(i32* %p) { 1739; X64-LABEL: neg_32_seq_cst: 1740; X64: # %bb.0: 1741; X64-NEXT: movl (%rdi), %eax 1742; X64-NEXT: negl %eax 1743; X64-NEXT: xchgl %eax, (%rdi) 1744; X64-NEXT: retq 1745; 1746; X32-LABEL: neg_32_seq_cst: 1747; X32: # %bb.0: 1748; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1749; X32-NEXT: movl (%eax), %ecx 1750; X32-NEXT: negl %ecx 1751; X32-NEXT: xchgl %ecx, (%eax) 1752; X32-NEXT: retl 1753 %1 = load atomic i32, i32* %p monotonic, align 4 1754 %2 = sub i32 0, %1 1755 store atomic i32 %2, i32* %p seq_cst, align 4 1756 ret void 1757} 1758 1759