1; This tests each of the supported NaCl atomic instructions for every 2; size allowed. 3 4; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ 5; RUN: -allow-externally-defined-symbols | FileCheck %s 6; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ 7; RUN: -allow-externally-defined-symbols | FileCheck --check-prefix=O2 %s 8; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \ 9; RUN: -allow-externally-defined-symbols | FileCheck %s 10 11; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ 12; RUN: --target arm32 -i %s --args -O2 \ 13; RUN: -allow-externally-defined-symbols \ 14; RUN: | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \ 15; RUN: --check-prefix=ARM32 16 17; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ 18; RUN: --target arm32 -i %s --args -O2 \ 19; RUN: -allow-externally-defined-symbols \ 20; RUN: | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \ 21; RUN: --check-prefix=ARM32O2 22 23; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ 24; RUN: --target arm32 -i %s --args -Om1 \ 25; RUN: -allow-externally-defined-symbols \ 26; RUN: | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \ 27; RUN: --check-prefix=ARM32 28 29; RUN: %if --need=allow_dump --need=target_MIPS32 --command %p2i --filetype=asm\ 30; RUN: --target mips32 -i %s --args -O2 \ 31; RUN: -allow-externally-defined-symbols \ 32; RUN: | %if --need=allow_dump --need=target_MIPS32 --command FileCheck %s \ 33; RUN: --check-prefix=MIPS32O2 --check-prefix=MIPS32 34 35; RUN: %if --need=allow_dump --need=target_MIPS32 --command %p2i --filetype=asm\ 36; RUN: --target mips32 -i %s --args -Om1 \ 37; RUN: -allow-externally-defined-symbols \ 38; RUN: | %if --need=allow_dump --need=target_MIPS32 --command FileCheck %s \ 39; RUN: --check-prefix=MIPS32OM1 --check-prefix=MIPS32 40 41declare i8 @llvm.nacl.atomic.load.i8(i8*, i32) 42declare i16 @llvm.nacl.atomic.load.i16(i16*, i32) 43declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) 44declare i64 @llvm.nacl.atomic.load.i64(i64*, i32) 45declare void @llvm.nacl.atomic.store.i8(i8, i8*, i32) 46declare void @llvm.nacl.atomic.store.i16(i16, i16*, i32) 47declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) 48declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) 49declare i8 @llvm.nacl.atomic.rmw.i8(i32, i8*, i8, i32) 50declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32) 51declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32) 52declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32) 53declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32) 54declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32) 55declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32) 56declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32) 57declare void @llvm.nacl.atomic.fence(i32) 58declare void @llvm.nacl.atomic.fence.all() 59declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*) 60 61@SzGlobal8 = internal global [1 x i8] zeroinitializer, align 1 62@SzGlobal16 = internal global [2 x i8] zeroinitializer, align 2 63@SzGlobal32 = internal global [4 x i8] zeroinitializer, align 4 64@SzGlobal64 = internal global [8 x i8] zeroinitializer, align 8 65 66; NOTE: The LLC equivalent for 16-bit atomic operations are expanded 67; as 32-bit operations. For Subzero, assume that real 16-bit operations 68; will be usable (the validator will be fixed): 69; https://code.google.com/p/nativeclient/issues/detail?id=2981 70 71;;; Load 72 73; x86 guarantees load/store to be atomic if naturally aligned. 74; The PNaCl IR requires all atomic accesses to be naturally aligned. 75 76define internal i32 @test_atomic_load_8(i32 %iptr) { 77entry: 78 %ptr = inttoptr i32 %iptr to i8* 79 ; parameter value "6" is for the sequential consistency memory order. 80 %i = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6) 81 %i2 = sub i8 %i, 0 82 %r = zext i8 %i2 to i32 83 ret i32 %r 84} 85; CHECK-LABEL: test_atomic_load_8 86; CHECK: mov {{.*}},DWORD 87; CHECK: mov {{.*}},BYTE 88; ARM32-LABEL: test_atomic_load_8 89; ARM32: ldrb r{{[0-9]+}}, [r{{[0-9]+}} 90; ARM32: dmb 91; MIPS32-LABEL: test_atomic_load_8 92; MIPS32: sync 93; MIPS32: ll 94; MIPS32: sc 95; MIPS32: sync 96 97define internal i32 @test_atomic_load_16(i32 %iptr) { 98entry: 99 %ptr = inttoptr i32 %iptr to i16* 100 %i = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6) 101 %i2 = sub i16 %i, 0 102 %r = zext i16 %i2 to i32 103 ret i32 %r 104} 105; CHECK-LABEL: test_atomic_load_16 106; CHECK: mov {{.*}},DWORD 107; CHECK: mov {{.*}},WORD 108; ARM32-LABEL: test_atomic_load_16 109; ARM32: ldrh r{{[0-9]+}}, [r{{[0-9]+}} 110; ARM32: dmb 111; MIPS32-LABEL: test_atomic_load_16 112; MIPS32: sync 113; MIPS32: ll 114; MIPS32: sc 115; MIPS32: sync 116 117define internal i32 @test_atomic_load_32(i32 %iptr) { 118entry: 119 %ptr = inttoptr i32 %iptr to i32* 120 %r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) 121 ret i32 %r 122} 123; CHECK-LABEL: test_atomic_load_32 124; CHECK: mov {{.*}},DWORD 125; CHECK: mov {{.*}},DWORD 126; ARM32-LABEL: test_atomic_load_32 127; ARM32: ldr r{{[0-9]+}}, [r{{[0-9]+}} 128; ARM32: dmb 129; MIPS32-LABEL: test_atomic_load_32 130; MIPS32: sync 131; MIPS32: ll 132; MIPS32: sc 133; MIPS32: sync 134 135define internal i64 @test_atomic_load_64(i32 %iptr) { 136entry: 137 %ptr = inttoptr i32 %iptr to i64* 138 %r = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6) 139 ret i64 %r 140} 141; CHECK-LABEL: test_atomic_load_64 142; CHECK: movq x{{.*}},QWORD 143; CHECK: movq QWORD {{.*}},x{{.*}} 144; ARM32-LABEL: test_atomic_load_64 145; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}} 146; ARM32: dmb 147; MIPS32-LABEL: test_atomic_load_64 148; MIPS32: jal __sync_val_compare_and_swap_8 149; MIPS32: sync 150 151define internal i32 @test_atomic_load_32_with_arith(i32 %iptr) { 152entry: 153 br label %next 154 155next: 156 %ptr = inttoptr i32 %iptr to i32* 157 %r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) 158 %r2 = sub i32 32, %r 159 ret i32 %r2 160} 161; CHECK-LABEL: test_atomic_load_32_with_arith 162; CHECK: mov {{.*}},DWORD 163; The next instruction may be a separate load or folded into an add. 164; 165; In O2 mode, we know that the load and sub are going to be fused. 166; O2-LABEL: test_atomic_load_32_with_arith 167; O2: mov {{.*}},DWORD 168; O2: sub {{.*}},DWORD 169; ARM32-LABEL: test_atomic_load_32_with_arith 170; ARM32: ldr r{{[0-9]+}}, [r{{[0-9]+}} 171; ARM32: dmb 172; MIPS32-LABEL: test_atomic_load_32_with_arith 173; MIPS32: sync 174; MIPS32: ll 175; MIPS32: sc 176; MIPS32: sync 177; MIPS32: subu 178 179define internal i32 @test_atomic_load_32_ignored(i32 %iptr) { 180entry: 181 %ptr = inttoptr i32 %iptr to i32* 182 %ignored = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) 183 ret i32 0 184} 185; CHECK-LABEL: test_atomic_load_32_ignored 186; CHECK: mov {{.*}},DWORD 187; CHECK: mov {{.*}},DWORD 188; O2-LABEL: test_atomic_load_32_ignored 189; O2: mov {{.*}},DWORD 190; O2: mov {{.*}},DWORD 191; ARM32-LABEL: test_atomic_load_32_ignored 192; ARM32: ldr r{{[0-9]+}}, [r{{[0-9]+}} 193; ARM32: dmb 194; MIPS32-LABEL: test_atomic_load_32_ignored 195; MIPS32: sync 196; MIPS32: ll 197; MIPS32: sc 198; MIPS32: sync 199 200define internal i64 @test_atomic_load_64_ignored(i32 %iptr) { 201entry: 202 %ptr = inttoptr i32 %iptr to i64* 203 %ignored = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6) 204 ret i64 0 205} 206; CHECK-LABEL: test_atomic_load_64_ignored 207; CHECK: movq x{{.*}},QWORD 208; CHECK: movq QWORD {{.*}},x{{.*}} 209; ARM32-LABEL: test_atomic_load_64_ignored 210; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}} 211; ARM32: dmb 212; MIPS32-LABEL: test_atomic_load_64_ignored 213; MIPS32: jal __sync_val_compare_and_swap_8 214; MIPS32: sync 215 216;;; Store 217 218define internal void @test_atomic_store_8(i32 %iptr, i32 %v) { 219entry: 220 %truncv = trunc i32 %v to i8 221 %ptr = inttoptr i32 %iptr to i8* 222 call void @llvm.nacl.atomic.store.i8(i8 %truncv, i8* %ptr, i32 6) 223 ret void 224} 225; CHECK-LABEL: test_atomic_store_8 226; CHECK: mov BYTE 227; CHECK: mfence 228; ARM32-LABEL: test_atomic_store_8 229; ARM32: dmb 230; ARM32: strb r{{[0-9]+}}, [r{{[0-9]+}} 231; ARM32: dmb 232; MIPS32-LABEL: test_atomic_store_8 233; MIPS32: sync 234; MIPS32: ll 235; MIPS32: sc 236; MIPS32: sync 237 238define internal void @test_atomic_store_16(i32 %iptr, i32 %v) { 239entry: 240 %truncv = trunc i32 %v to i16 241 %ptr = inttoptr i32 %iptr to i16* 242 call void @llvm.nacl.atomic.store.i16(i16 %truncv, i16* %ptr, i32 6) 243 ret void 244} 245; CHECK-LABEL: test_atomic_store_16 246; CHECK: mov WORD 247; CHECK: mfence 248; ARM32-LABEL: test_atomic_store_16 249; ARM32: dmb 250; ARM32: strh r{{[0-9]+}}, [r{{[0-9]+}} 251; ARM32: dmb 252; MIPS32-LABEL: test_atomic_store_16 253; MIPS32: sync 254; MIPS32: ll 255; MIPS32: sc 256; MIPS32: sync 257 258define internal void @test_atomic_store_32(i32 %iptr, i32 %v) { 259entry: 260 %ptr = inttoptr i32 %iptr to i32* 261 call void @llvm.nacl.atomic.store.i32(i32 %v, i32* %ptr, i32 6) 262 ret void 263} 264; CHECK-LABEL: test_atomic_store_32 265; CHECK: mov DWORD 266; CHECK: mfence 267; ARM32-LABEL: test_atomic_store_32 268; ARM32: dmb 269; ARM32: str r{{[0-9]+}}, [r{{[0-9]+}} 270; ARM32: dmb 271; MIPS32-LABEL: test_atomic_store_32 272; MIPS32: sync 273; MIPS32: ll 274; MIPS32: sc 275; MIPS32: sync 276 277define internal void @test_atomic_store_64(i32 %iptr, i64 %v) { 278entry: 279 %ptr = inttoptr i32 %iptr to i64* 280 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr, i32 6) 281 ret void 282} 283; CHECK-LABEL: test_atomic_store_64 284; CHECK: movq x{{.*}},QWORD 285; CHECK: movq QWORD {{.*}},x{{.*}} 286; CHECK: mfence 287; ARM32-LABEL: test_atomic_store_64 288; ARM32: dmb 289; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [[MEM:.*]] 290; ARM32: strexd [[S:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}, [[MEM]] 291; ARM32: cmp [[S]], #0 292; ARM32: bne 293; ARM32: dmb 294; MIPS32-LABEL: test_atomic_store_64 295; MIPS32: sync 296; MIPS32: jal __sync_lock_test_and_set_8 297; MIPS32: sync 298 299define internal void @test_atomic_store_64_const(i32 %iptr) { 300entry: 301 %ptr = inttoptr i32 %iptr to i64* 302 call void @llvm.nacl.atomic.store.i64(i64 12345678901234, i64* %ptr, i32 6) 303 ret void 304} 305; CHECK-LABEL: test_atomic_store_64_const 306; CHECK: mov {{.*}},0x73ce2ff2 307; CHECK: mov {{.*}},0xb3a 308; CHECK: movq x{{.*}},QWORD 309; CHECK: movq QWORD {{.*}},x{{.*}} 310; CHECK: mfence 311; ARM32-LABEL: test_atomic_store_64_const 312; ARM32: movw [[T0:r[0-9]+]], #12274 313; ARM32: movt [[T0]], #29646 314; ARM32: movw r{{[0-9]+}}, #2874 315; ARM32: dmb 316; ARM32: .L[[RETRY:.*]]: 317; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [[MEM:.*]] 318; ARM32: strexd [[S:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}, [[MEM]] 319; ARM32: cmp [[S]], #0 320; ARM32: bne .L[[RETRY]] 321; ARM32: dmb 322; MIPS32-LABEL: test_atomic_store_64_const 323; MIPS32: sync 324; MIPS32: lui {{.*}}, 29646 325; MIPS32: ori {{.*}},{{.*}}, 12274 326; MIPS32: addiu {{.*}}, $zero, 2874 327; MIPS32: jal __sync_lock_test_and_set_8 328; MIPS32: sync 329 330;;; RMW 331 332;; add 333 334define internal i32 @test_atomic_rmw_add_8(i32 %iptr, i32 %v) { 335entry: 336 %trunc = trunc i32 %v to i8 337 %ptr = inttoptr i32 %iptr to i8* 338 ; "1" is an atomic add, and "6" is sequential consistency. 339 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr, i8 %trunc, i32 6) 340 %a_ext = zext i8 %a to i32 341 ret i32 %a_ext 342} 343; CHECK-LABEL: test_atomic_rmw_add_8 344; CHECK: lock xadd BYTE {{.*}},[[REG:.*]] 345; CHECK: {{mov|movzx}} {{.*}},[[REG]] 346; ARM32-LABEL: test_atomic_rmw_add_8 347; ARM32: dmb 348; ARM32: ldrexb 349; ARM32: add 350; ARM32: strexb 351; ARM32: bne 352; ARM32: dmb 353; MIPS32-LABEL: test_atomic_rmw_add_8 354; MIPS32: sync 355; MIPS32: addiu {{.*}}, $zero, -4 356; MIPS32: and 357; MIPS32: andi {{.*}}, {{.*}}, 3 358; MIPS32: sll {{.*}}, {{.*}}, 3 359; MIPS32: ori {{.*}}, $zero, 255 360; MIPS32: sllv 361; MIPS32: nor 362; MIPS32: sllv 363; MIPS32: ll 364; MIPS32: addu 365; MIPS32: and 366; MIPS32: and 367; MIPS32: or 368; MIPS32: sc 369; MIPS32: beq {{.*}}, $zero, {{.*}} 370; MIPS32: and 371; MIPS32: srlv 372; MIPS32: sll {{.*}}, {{.*}}, 24 373; MIPS32: sra {{.*}}, {{.*}}, 24 374; MIPS32: sync 375 376define internal i32 @test_atomic_rmw_add_16(i32 %iptr, i32 %v) { 377entry: 378 %trunc = trunc i32 %v to i16 379 %ptr = inttoptr i32 %iptr to i16* 380 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %trunc, i32 6) 381 %a_ext = zext i16 %a to i32 382 ret i32 %a_ext 383} 384; CHECK-LABEL: test_atomic_rmw_add_16 385; CHECK: lock xadd WORD {{.*}},[[REG:.*]] 386; CHECK: {{mov|movzx}} {{.*}},[[REG]] 387; ARM32-LABEL: test_atomic_rmw_add_16 388; ARM32: dmb 389; ARM32: ldrexh 390; ARM32: add 391; ARM32: strexh 392; ARM32: bne 393; ARM32: dmb 394; MIPS32-LABEL: test_atomic_rmw_add_16 395; MIPS32: sync 396; MIPS32: addiu {{.*}}, $zero, -4 397; MIPS32: and 398; MIPS32: andi {{.*}}, {{.*}}, 3 399; MIPS32: sll {{.*}}, {{.*}}, 3 400; MIPS32: ori {{.*}}, {{.*}}, 65535 401; MIPS32: sllv 402; MIPS32: nor 403; MIPS32: sllv 404; MIPS32: ll 405; MIPS32: addu 406; MIPS32: and 407; MIPS32: and 408; MIPS32: or 409; MIPS32: sc 410; MIPS32: beq {{.*}}, $zero, {{.*}} 411; MIPS32: and 412; MIPS32: srlv 413; MIPS32: sll {{.*}}, {{.*}}, 16 414; MIPS32: sra {{.*}}, {{.*}}, 16 415; MIPS32: sync 416 417define internal i32 @test_atomic_rmw_add_32(i32 %iptr, i32 %v) { 418entry: 419 %ptr = inttoptr i32 %iptr to i32* 420 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %v, i32 6) 421 ret i32 %a 422} 423; CHECK-LABEL: test_atomic_rmw_add_32 424; CHECK: lock xadd DWORD {{.*}},[[REG:.*]] 425; CHECK: mov {{.*}},[[REG]] 426; ARM32-LABEL: test_atomic_rmw_add_32 427; ARM32: dmb 428; ARM32: ldrex 429; ARM32: add 430; ARM32: strex 431; ARM32: bne 432; ARM32: dmb 433; MIPS32-LABEL: test_atomic_rmw_add_32 434; MIPS32: sync 435; MIPS32: ll 436; MIPS32: addu 437; MIPS32: sc 438; MIPS32: beq {{.*}}, $zero, {{.*}} 439; MIPS32: sync 440 441define internal i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) { 442entry: 443 %ptr = inttoptr i32 %iptr to i64* 444 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6) 445 ret i64 %a 446} 447; CHECK-LABEL: test_atomic_rmw_add_64 448; CHECK: push ebx 449; CHECK: mov eax,DWORD PTR [{{.*}}] 450; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] 451; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax 452; RHS of add cannot be any of the e[abcd]x regs because they are 453; clobbered in the loop, and the RHS needs to be remain live. 454; CHECK: add ebx,{{.*e.[^x]}} 455; CHECK: mov ecx,edx 456; CHECK: adc ecx,{{.*e.[^x]}} 457; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired). 458; It can be esi, edi, or ebp though, for example (so we need to be careful 459; about rejecting eb* and ed*.) 460; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} 461; CHECK: jne [[LABEL]] 462; ARM32-LABEL: test_atomic_rmw_add_64 463; ARM32: dmb 464; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 465; ARM32: adds 466; ARM32: adc 467; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 468; ARM32: bne 469; ARM32: dmb 470; MIPS32-LABEL: test_atomic_rmw_add_64 471; MIPS32: sync 472; MIPS32: jal __sync_fetch_and_add_8 473; MIPS32: sync 474 475; Same test as above, but with a global address to test FakeUse issues. 476define internal i64 @test_atomic_rmw_add_64_global(i64 %v) { 477entry: 478 %ptr = bitcast [8 x i8]* @SzGlobal64 to i64* 479 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6) 480 ret i64 %a 481} 482; CHECK-LABEL: test_atomic_rmw_add_64_global 483; ARM32-LABEL: test_atomic_rmw_add_64_global 484; ARM32: dmb 485; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 486; ARM32: adds 487; ARM32: adc 488; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 489; ARM32: bne 490; ARM32: dmb 491; MIPS32-LABEL: test_atomic_rmw_add_64_global 492; MIPS32: sync 493; MIPS32: jal __sync_fetch_and_add_8 494; MIPS32: sync 495 496; Test with some more register pressure. When we have an alloca, ebp is 497; used to manage the stack frame, so it cannot be used as a register either. 498declare void @use_ptr(i32 %iptr) 499 500define internal i64 @test_atomic_rmw_add_64_alloca(i32 %iptr, i64 %v) { 501entry: 502 br label %eblock ; Disable alloca optimization 503eblock: 504 %alloca_ptr = alloca i8, i32 16, align 16 505 %ptr = inttoptr i32 %iptr to i64* 506 %old = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6) 507 store i8 0, i8* %alloca_ptr, align 1 508 store i8 1, i8* %alloca_ptr, align 1 509 store i8 2, i8* %alloca_ptr, align 1 510 store i8 3, i8* %alloca_ptr, align 1 511 %__5 = ptrtoint i8* %alloca_ptr to i32 512 call void @use_ptr(i32 %__5) 513 ret i64 %old 514} 515; CHECK-LABEL: test_atomic_rmw_add_64_alloca 516; CHECK: push ebx 517; CHECK-DAG: mov edx 518; CHECK-DAG: mov eax 519; CHECK-DAG: mov ecx 520; CHECK-DAG: mov ebx 521; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired). 522; It also cannot be ebp since we use that for alloca. Also make sure it's 523; not esp, since that's the stack pointer and mucking with it will break 524; the later use_ptr function call. 525; That pretty much leaves esi, or edi as the only viable registers. 526; CHECK: lock cmpxchg8b QWORD PTR [e{{[ds]}}i] 527; CHECK: call {{.*}} R_{{.*}} use_ptr 528; ARM32-LABEL: test_atomic_rmw_add_64_alloca 529; ARM32: dmb 530; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 531; ARM32: adds 532; ARM32: adc 533; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 534; ARM32: bne 535; ARM32: dmb 536; MIPS32-LABEL: test_atomic_rmw_add_64_alloca 537; MIPS32: sync 538; MIPS32: jal __sync_fetch_and_add_8 539; MIPS32: sync 540 541define internal i32 @test_atomic_rmw_add_32_ignored(i32 %iptr, i32 %v) { 542entry: 543 %ptr = inttoptr i32 %iptr to i32* 544 %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %v, i32 6) 545 ret i32 %v 546} 547; Technically this could use "lock add" instead of "lock xadd", if liveness 548; tells us that the destination variable is dead. 549; CHECK-LABEL: test_atomic_rmw_add_32_ignored 550; CHECK: lock xadd DWORD {{.*}},[[REG:.*]] 551; ARM32-LABEL: test_atomic_rmw_add_32_ignored 552; ARM32: dmb 553; ARM32: ldrex 554; ARM32: add 555; ARM32: strex 556; ARM32: bne 557; ARM32: dmb 558; MIPS32-LABEL: test_atomic_rmw_add_32_ignored 559; MIPS32: sync 560; MIPS32: ll 561; MIPS32: addu 562; MIPS32: sc 563; MIPS32: beq {{.*}}, $zero, {{.*}} 564; MIPS32: sync 565 566; Atomic RMW 64 needs to be expanded into its own loop. 567; Make sure that works w/ non-trivial function bodies. 568define internal i64 @test_atomic_rmw_add_64_loop(i32 %iptr, i64 %v) { 569entry: 570 %x = icmp ult i64 %v, 100 571 br i1 %x, label %err, label %loop 572 573loop: 574 %v_next = phi i64 [ %v, %entry ], [ %next, %loop ] 575 %ptr = inttoptr i32 %iptr to i64* 576 %next = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v_next, i32 6) 577 %success = icmp eq i64 %next, 100 578 br i1 %success, label %done, label %loop 579 580done: 581 ret i64 %next 582 583err: 584 ret i64 0 585} 586; CHECK-LABEL: test_atomic_rmw_add_64_loop 587; CHECK: push ebx 588; CHECK: mov eax,DWORD PTR [{{.*}}] 589; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] 590; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax 591; CHECK: add ebx,{{.*e.[^x]}} 592; CHECK: mov ecx,edx 593; CHECK: adc ecx,{{.*e.[^x]}} 594; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0] 595; CHECK: jne [[LABEL]] 596; ARM32-LABEL: test_atomic_rmw_add_64_loop 597; ARM32: dmb 598; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 599; ARM32: adds 600; ARM32: adc 601; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 602; ARM32: bne 603; ARM32: dmb 604; ARM32: b 605; MIPS32-LABEL: test_atomic_rmw_add_64_loop 606; MIPS32: sync 607; MIPS32: jal __sync_fetch_and_add_8 608; MIPS32: sync 609 610;; sub 611 612define internal i32 @test_atomic_rmw_sub_8(i32 %iptr, i32 %v) { 613entry: 614 %trunc = trunc i32 %v to i8 615 %ptr = inttoptr i32 %iptr to i8* 616 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i8* %ptr, i8 %trunc, i32 6) 617 %a_ext = zext i8 %a to i32 618 ret i32 %a_ext 619} 620; CHECK-LABEL: test_atomic_rmw_sub_8 621; CHECK: neg [[REG:.*]] 622; CHECK: lock xadd BYTE {{.*}},[[REG]] 623; CHECK: {{mov|movzx}} {{.*}},[[REG]] 624; ARM32-LABEL: test_atomic_rmw_sub_8 625; ARM32: dmb 626; ARM32: ldrexb 627; ARM32: sub 628; ARM32: strexb 629; ARM32: bne 630; ARM32: dmb 631; MIPS32-LABEL: test_atomic_rmw_sub_8 632; MIPS32: sync 633; MIPS32: addiu {{.*}}, $zero, -4 634; MIPS32: and 635; MIPS32: andi {{.*}}, {{.*}}, 3 636; MIPS32: sll {{.*}}, {{.*}}, 3 637; MIPS32: ori {{.*}}, $zero, 255 638; MIPS32: sllv 639; MIPS32: nor 640; MIPS32: sllv 641; MIPS32: ll 642; MIPS32: subu 643; MIPS32: and 644; MIPS32: and 645; MIPS32: or 646; MIPS32: sc 647; MIPS32: beq {{.*}}, $zero, {{.*}} 648; MIPS32: and 649; MIPS32: srlv 650; MIPS32: sll {{.*}}, {{.*}}, 24 651; MIPS32: sra {{.*}}, {{.*}}, 24 652; MIPS32: sync 653 654define internal i32 @test_atomic_rmw_sub_16(i32 %iptr, i32 %v) { 655entry: 656 %trunc = trunc i32 %v to i16 657 %ptr = inttoptr i32 %iptr to i16* 658 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %trunc, i32 6) 659 %a_ext = zext i16 %a to i32 660 ret i32 %a_ext 661} 662; CHECK-LABEL: test_atomic_rmw_sub_16 663; CHECK: neg [[REG:.*]] 664; CHECK: lock xadd WORD {{.*}},[[REG]] 665; CHECK: {{mov|movzx}} {{.*}},[[REG]] 666; ARM32-LABEL: test_atomic_rmw_sub_16 667; ARM32: dmb 668; ARM32: ldrexh 669; ARM32: sub 670; ARM32: strexh 671; ARM32: bne 672; ARM32: dmb 673; MIPS32-LABEL: test_atomic_rmw_sub_16 674; MIPS32: sync 675; MIPS32: addiu {{.*}}, $zero, -4 676; MIPS32: and 677; MIPS32: andi {{.*}}, {{.*}}, 3 678; MIPS32: sll {{.*}}, {{.*}}, 3 679; MIPS32: ori {{.*}}, {{.*}}, 65535 680; MIPS32: sllv 681; MIPS32: nor 682; MIPS32: sllv 683; MIPS32: ll 684; MIPS32: subu 685; MIPS32: and 686; MIPS32: and 687; MIPS32: or 688; MIPS32: sc 689; MIPS32: beq {{.*}}, $zero, {{.*}} 690; MIPS32: and 691; MIPS32: srlv 692; MIPS32: sll {{.*}}, {{.*}}, 16 693; MIPS32: sra {{.*}}, {{.*}}, 16 694; MIPS32: sync 695 696define internal i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) { 697entry: 698 %ptr = inttoptr i32 %iptr to i32* 699 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6) 700 ret i32 %a 701} 702; CHECK-LABEL: test_atomic_rmw_sub_32 703; CHECK: neg [[REG:.*]] 704; CHECK: lock xadd DWORD {{.*}},[[REG]] 705; CHECK: mov {{.*}},[[REG]] 706; ARM32-LABEL: test_atomic_rmw_sub_32 707; ARM32: dmb 708; ARM32: ldrex 709; ARM32: sub 710; ARM32: strex 711; ARM32: bne 712; ARM32: dmb 713; MIPS32-LABEL: test_atomic_rmw_sub_32 714; MIPS32: sync 715; MIPS32: ll 716; MIPS32: subu 717; MIPS32: sc 718; MIPS32: beq {{.*}}, $zero, {{.*}} 719; MIPS32: sync 720 721define internal i64 @test_atomic_rmw_sub_64(i32 %iptr, i64 %v) { 722entry: 723 %ptr = inttoptr i32 %iptr to i64* 724 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i64* %ptr, i64 %v, i32 6) 725 ret i64 %a 726} 727; CHECK-LABEL: test_atomic_rmw_sub_64 728; CHECK: push ebx 729; CHECK: mov eax,DWORD PTR [{{.*}}] 730; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] 731; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax 732; CHECK: sub ebx,{{.*e.[^x]}} 733; CHECK: mov ecx,edx 734; CHECK: sbb ecx,{{.*e.[^x]}} 735; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} 736; CHECK: jne [[LABEL]] 737; ARM32-LABEL: test_atomic_rmw_sub_64 738; ARM32: dmb 739; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 740; ARM32: subs 741; ARM32: sbc 742; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 743; ARM32: bne 744; ARM32: dmb 745; MIPS32-LABEL: test_atomic_rmw_sub_64 746; MIPS32: sync 747; MIPS32: jal __sync_fetch_and_sub_8 748; MIPS32: sync 749 750define internal i32 @test_atomic_rmw_sub_32_ignored(i32 %iptr, i32 %v) { 751entry: 752 %ptr = inttoptr i32 %iptr to i32* 753 %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6) 754 ret i32 %v 755} 756; Could use "lock sub" instead of "neg; lock xadd" 757; CHECK-LABEL: test_atomic_rmw_sub_32_ignored 758; CHECK: neg [[REG:.*]] 759; CHECK: lock xadd DWORD {{.*}},[[REG]] 760; ARM32-LABEL: test_atomic_rmw_sub_32_ignored 761; ARM32: dmb 762; ARM32: ldrex 763; ARM32: sub 764; ARM32: strex 765; ARM32: bne 766; ARM32: dmb 767; MIPS32-LABEL: test_atomic_rmw_sub_32_ignored 768; MIPS32: sync 769; MIPS32: ll 770; MIPS32: subu 771; MIPS32: sc 772; MIPS32: beq {{.*}}, $zero, {{.*}} 773; MIPS32: sync 774 775;; or 776 777define internal i32 @test_atomic_rmw_or_8(i32 %iptr, i32 %v) { 778entry: 779 %trunc = trunc i32 %v to i8 780 %ptr = inttoptr i32 %iptr to i8* 781 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6) 782 %a_ext = zext i8 %a to i32 783 ret i32 %a_ext 784} 785; CHECK-LABEL: test_atomic_rmw_or_8 786; CHECK: mov al,BYTE PTR 787; Dest cannot be eax here, because eax is used for the old value. Also want 788; to make sure that cmpxchg's source is the same register. 789; CHECK: or [[REG:[^a].]] 790; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]] 791; CHECK: jne 792; ARM32-LABEL: test_atomic_rmw_or_8 793; ARM32: dmb 794; ARM32: ldrexb 795; ARM32: orr 796; ARM32: strexb 797; ARM32: bne 798; ARM32: dmb 799; MIPS32-LABEL: test_atomic_rmw_or_8 800; MIPS32: sync 801; MIPS32: addiu {{.*}}, $zero, -4 802; MIPS32: and 803; MIPS32: andi {{.*}}, {{.*}}, 3 804; MIPS32: sll {{.*}}, {{.*}}, 3 805; MIPS32: ori {{.*}}, $zero, 255 806; MIPS32: sllv 807; MIPS32: nor 808; MIPS32: sllv 809; MIPS32: ll 810; MIPS32: or 811; MIPS32: and 812; MIPS32: and 813; MIPS32: or 814; MIPS32: sc 815; MIPS32: beq {{.*}}, $zero, {{.*}} 816; MIPS32: and 817; MIPS32: srlv 818; MIPS32: sll {{.*}}, {{.*}}, 24 819; MIPS32: sra {{.*}}, {{.*}}, 24 820; MIPS32: sync 821 822; Same test as above, but with a global address to test FakeUse issues. 823define internal i32 @test_atomic_rmw_or_8_global(i32 %v) { 824entry: 825 %trunc = trunc i32 %v to i8 826 %ptr = bitcast [1 x i8]* @SzGlobal8 to i8* 827 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6) 828 %a_ext = zext i8 %a to i32 829 ret i32 %a_ext 830} 831; CHECK-LABEL: test_atomic_rmw_or_8_global 832; ARM32-LABEL: test_atomic_rmw_or_8_global 833; ARM32: dmb 834; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal8 835; ARM32: movt [[PTR]], #:upper16:SzGlobal8 836; ARM32: ldrexb r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} 837; ARM32: orr 838; ARM32: strexb 839; ARM32: bne 840; ARM32: dmb 841; MIPS32-LABEL: test_atomic_rmw_or_8_global 842; MIPS32: sync 843; MIPS32: addiu {{.*}}, $zero, -4 844; MIPS32: and 845; MIPS32: andi {{.*}}, {{.*}}, 3 846; MIPS32: sll {{.*}}, {{.*}}, 3 847; MIPS32: ori {{.*}}, $zero, 255 848; MIPS32: sllv 849; MIPS32: nor 850; MIPS32: sllv 851; MIPS32: ll 852; MIPS32: or 853; MIPS32: and 854; MIPS32: and 855; MIPS32: or 856; MIPS32: sc 857; MIPS32: beq {{.*}}, $zero, {{.*}} 858; MIPS32: and 859; MIPS32: srlv 860; MIPS32: sll {{.*}}, {{.*}}, 24 861; MIPS32: sra {{.*}}, {{.*}}, 24 862; MIPS32: sync 863 864define internal i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) { 865entry: 866 %trunc = trunc i32 %v to i16 867 %ptr = inttoptr i32 %iptr to i16* 868 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6) 869 %a_ext = zext i16 %a to i32 870 ret i32 %a_ext 871} 872; CHECK-LABEL: test_atomic_rmw_or_16 873; CHECK: mov ax,WORD PTR 874; CHECK: or [[REG:[^a].]] 875; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],[[REG]] 876; CHECK: jne 877; ARM32-LABEL: test_atomic_rmw_or_16 878; ARM32: dmb 879; ARM32: ldrexh 880; ARM32: orr 881; ARM32: strexh 882; ARM32: bne 883; ARM32: dmb 884; MIPS32-LABEL: test_atomic_rmw_or_16 885; MIPS32: sync 886; MIPS32: addiu {{.*}}, $zero, -4 887; MIPS32: and 888; MIPS32: andi {{.*}}, {{.*}}, 3 889; MIPS32: sll {{.*}}, {{.*}}, 3 890; MIPS32: ori {{.*}}, {{.*}}, 65535 891; MIPS32: sllv 892; MIPS32: nor 893; MIPS32: sllv 894; MIPS32: ll 895; MIPS32: or 896; MIPS32: and 897; MIPS32: and 898; MIPS32: or 899; MIPS32: sc 900; MIPS32: beq {{.*}}, $zero, {{.*}} 901; MIPS32: and 902; MIPS32: srlv 903; MIPS32: sll {{.*}}, {{.*}}, 16 904; MIPS32: sra {{.*}}, {{.*}}, 16 905; MIPS32: sync 906 907; Same test as above, but with a global address to test FakeUse issues. 908define internal i32 @test_atomic_rmw_or_16_global(i32 %v) { 909entry: 910 %trunc = trunc i32 %v to i16 911 %ptr = bitcast [2 x i8]* @SzGlobal16 to i16* 912 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6) 913 %a_ext = zext i16 %a to i32 914 ret i32 %a_ext 915} 916; CHECK-LABEL: test_atomic_rmw_or_16_global 917; ARM32-LABEL: test_atomic_rmw_or_16_global 918; ARM32: dmb 919; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal16 920; ARM32: movt [[PTR]], #:upper16:SzGlobal16 921; ARM32: ldrexh r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} 922; ARM32: orr 923; ARM32: strexh 924; ARM32: bne 925; ARM32: dmb 926; MIPS32-LABEL: test_atomic_rmw_or_16_global 927; MIPS32: sync 928; MIPS32: addiu {{.*}}, $zero, -4 929; MIPS32: and 930; MIPS32: andi {{.*}}, {{.*}}, 3 931; MIPS32: sll {{.*}}, {{.*}}, 3 932; MIPS32: ori {{.*}}, {{.*}}, 65535 933; MIPS32: sllv 934; MIPS32: nor 935; MIPS32: sllv 936; MIPS32: ll 937; MIPS32: or 938; MIPS32: and 939; MIPS32: and 940; MIPS32: or 941; MIPS32: sc 942; MIPS32: beq {{.*}}, $zero, {{.*}} 943; MIPS32: and 944; MIPS32: srlv 945; MIPS32: sll {{.*}}, {{.*}}, 16 946; MIPS32: sra {{.*}}, {{.*}}, 16 947; MIPS32: sync 948 949define internal i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) { 950entry: 951 %ptr = inttoptr i32 %iptr to i32* 952 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6) 953 ret i32 %a 954} 955; CHECK-LABEL: test_atomic_rmw_or_32 956; CHECK: mov eax,DWORD PTR 957; CHECK: or [[REG:e[^a].]] 958; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],[[REG]] 959; CHECK: jne 960; ARM32-LABEL: test_atomic_rmw_or_32 961; ARM32: dmb 962; ARM32: ldrex 963; ARM32: orr 964; ARM32: strex 965; ARM32: bne 966; ARM32: dmb 967; MIPS32-LABEL: test_atomic_rmw_or_32 968; MIPS32: sync 969; MIPS32: ll 970; MIPS32: or 971; MIPS32: sc 972; MIPS32: beq {{.*}}, $zero, {{.*}} 973; MIPS32: sync 974 975; Same test as above, but with a global address to test FakeUse issues. 976define internal i32 @test_atomic_rmw_or_32_global(i32 %v) { 977entry: 978 %ptr = bitcast [4 x i8]* @SzGlobal32 to i32* 979 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6) 980 ret i32 %a 981} 982; CHECK-LABEL: test_atomic_rmw_or_32_global 983; ARM32-LABEL: test_atomic_rmw_or_32_global 984; ARM32: dmb 985; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal32 986; ARM32: movt [[PTR]], #:upper16:SzGlobal32 987; ARM32: ldrex r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} 988; ARM32: orr 989; ARM32: strex 990; ARM32: bne 991; ARM32: dmb 992; MIPS32-LABEL: test_atomic_rmw_or_32_global 993; MIPS32: sync 994; MIPS32: ll 995; MIPS32: or 996; MIPS32: sc 997; MIPS32: beq {{.*}}, $zero, {{.*}} 998; MIPS32: sync 999 1000define internal i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) { 1001entry: 1002 %ptr = inttoptr i32 %iptr to i64* 1003 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %v, i32 6) 1004 ret i64 %a 1005} 1006; CHECK-LABEL: test_atomic_rmw_or_64 1007; CHECK: push ebx 1008; CHECK: mov eax,DWORD PTR [{{.*}}] 1009; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] 1010; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax 1011; CHECK: or ebx,{{.*e.[^x]}} 1012; CHECK: mov ecx,edx 1013; CHECK: or ecx,{{.*e.[^x]}} 1014; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} 1015; CHECK: jne [[LABEL]] 1016; ARM32-LABEL: test_atomic_rmw_or_64 1017; ARM32: dmb 1018; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 1019; ARM32: orr 1020; ARM32: orr 1021; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 1022; ARM32: bne 1023; ARM32: dmb 1024; MIPS32-LABEL: test_atomic_rmw_or_64 1025; MIPS32: sync 1026; MIPS32: jal __sync_fetch_and_or_8 1027; MIPS32: sync 1028 1029define internal i32 @test_atomic_rmw_or_32_ignored(i32 %iptr, i32 %v) { 1030entry: 1031 %ptr = inttoptr i32 %iptr to i32* 1032 %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6) 1033 ret i32 %v 1034} 1035; CHECK-LABEL: test_atomic_rmw_or_32_ignored 1036; Could just "lock or", if we inspect the liveness information first. 1037; Would also need a way to introduce "lock"'edness to binary 1038; operators without introducing overhead on the more common binary ops. 1039; CHECK: mov eax,DWORD PTR 1040; CHECK: or [[REG:e[^a].]] 1041; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],[[REG]] 1042; CHECK: jne 1043; ARM32-LABEL: test_atomic_rmw_or_32_ignored 1044; ARM32: dmb 1045; ARM32: ldrex 1046; ARM32: orr 1047; ARM32: strex 1048; ARM32: bne 1049; ARM32: dmb 1050; MIPS32-LABEL: test_atomic_rmw_or_32_ignored 1051; MIPS32: sync 1052; MIPS32: ll 1053; MIPS32: or 1054; MIPS32: sc 1055; MIPS32: beq {{.*}}, $zero, {{.*}} 1056; MIPS32: sync 1057 1058;; and 1059 1060define internal i32 @test_atomic_rmw_and_8(i32 %iptr, i32 %v) { 1061entry: 1062 %trunc = trunc i32 %v to i8 1063 %ptr = inttoptr i32 %iptr to i8* 1064 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i8* %ptr, i8 %trunc, i32 6) 1065 %a_ext = zext i8 %a to i32 1066 ret i32 %a_ext 1067} 1068; CHECK-LABEL: test_atomic_rmw_and_8 1069; CHECK: mov al,BYTE PTR 1070; CHECK: and [[REG:[^a].]] 1071; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]] 1072; CHECK: jne 1073; ARM32-LABEL: test_atomic_rmw_and_8 1074; ARM32: dmb 1075; ARM32: ldrexb 1076; ARM32: and 1077; ARM32: strexb 1078; ARM32: bne 1079; ARM32: dmb 1080; MIPS32-LABEL: test_atomic_rmw_and_8 1081; MIPS32: sync 1082; MIPS32: addiu {{.*}}, $zero, -4 1083; MIPS32: and 1084; MIPS32: andi {{.*}}, {{.*}}, 3 1085; MIPS32: sll {{.*}}, {{.*}}, 3 1086; MIPS32: ori {{.*}}, $zero, 255 1087; MIPS32: sllv 1088; MIPS32: nor 1089; MIPS32: sllv 1090; MIPS32: ll 1091; MIPS32: and 1092; MIPS32: and 1093; MIPS32: and 1094; MIPS32: or 1095; MIPS32: sc 1096; MIPS32: beq {{.*}}, $zero, {{.*}} 1097; MIPS32: and 1098; MIPS32: srlv 1099; MIPS32: sll {{.*}}, {{.*}}, 24 1100; MIPS32: sra {{.*}}, {{.*}}, 24 1101; MIPS32: sync 1102 1103define internal i32 @test_atomic_rmw_and_16(i32 %iptr, i32 %v) { 1104entry: 1105 %trunc = trunc i32 %v to i16 1106 %ptr = inttoptr i32 %iptr to i16* 1107 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %trunc, i32 6) 1108 %a_ext = zext i16 %a to i32 1109 ret i32 %a_ext 1110} 1111; CHECK-LABEL: test_atomic_rmw_and_16 1112; CHECK: mov ax,WORD PTR 1113; CHECK: and 1114; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}] 1115; CHECK: jne 1116; ARM32-LABEL: test_atomic_rmw_and_16 1117; ARM32: dmb 1118; ARM32: ldrexh 1119; ARM32: and 1120; ARM32: strexh 1121; ARM32: bne 1122; ARM32: dmb 1123; MIPS32-LABEL: test_atomic_rmw_and_16 1124; MIPS32: sync 1125; MIPS32: addiu {{.*}}, $zero, -4 1126; MIPS32: and 1127; MIPS32: andi {{.*}}, {{.*}}, 3 1128; MIPS32: sll {{.*}}, {{.*}}, 3 1129; MIPS32: ori {{.*}}, {{.*}}, 65535 1130; MIPS32: sllv 1131; MIPS32: nor 1132; MIPS32: sllv 1133; MIPS32: ll 1134; MIPS32: and 1135; MIPS32: and 1136; MIPS32: and 1137; MIPS32: or 1138; MIPS32: sc 1139; MIPS32: beq {{.*}}, $zero, {{.*}} 1140; MIPS32: and 1141; MIPS32: srlv 1142; MIPS32: sll {{.*}}, {{.*}}, 16 1143; MIPS32: sra {{.*}}, {{.*}}, 16 1144; MIPS32: sync 1145 1146define internal i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) { 1147entry: 1148 %ptr = inttoptr i32 %iptr to i32* 1149 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6) 1150 ret i32 %a 1151} 1152; CHECK-LABEL: test_atomic_rmw_and_32 1153; CHECK: mov eax,DWORD PTR 1154; CHECK: and 1155; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}] 1156; CHECK: jne 1157; ARM32-LABEL: test_atomic_rmw_and_32 1158; ARM32: dmb 1159; ARM32: ldrex 1160; ARM32: and 1161; ARM32: strex 1162; ARM32: bne 1163; ARM32: dmb 1164; MIPS32-LABEL: test_atomic_rmw_and_32 1165; MIPS32: sync 1166; MIPS32: ll 1167; MIPS32: and 1168; MIPS32: sc 1169; MIPS32: beq {{.*}}, $zero, {{.*}} 1170; MIPS32: sync 1171 1172define internal i64 @test_atomic_rmw_and_64(i32 %iptr, i64 %v) { 1173entry: 1174 %ptr = inttoptr i32 %iptr to i64* 1175 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i64* %ptr, i64 %v, i32 6) 1176 ret i64 %a 1177} 1178; CHECK-LABEL: test_atomic_rmw_and_64 1179; CHECK: push ebx 1180; CHECK: mov eax,DWORD PTR [{{.*}}] 1181; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] 1182; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax 1183; CHECK: and ebx,{{.*e.[^x]}} 1184; CHECK: mov ecx,edx 1185; CHECK: and ecx,{{.*e.[^x]}} 1186; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} 1187; CHECK: jne [[LABEL]] 1188; ARM32-LABEL: test_atomic_rmw_and_64 1189; ARM32: dmb 1190; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 1191; ARM32: and 1192; ARM32: and 1193; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 1194; ARM32: bne 1195; ARM32: dmb 1196; MIPS32-LABEL: test_atomic_rmw_and_64 1197; MIPS32: sync 1198; MIPS32: jal __sync_fetch_and_and_8 1199; MIPS32: sync 1200 1201define internal i32 @test_atomic_rmw_and_32_ignored(i32 %iptr, i32 %v) { 1202entry: 1203 %ptr = inttoptr i32 %iptr to i32* 1204 %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6) 1205 ret i32 %v 1206} 1207; CHECK-LABEL: test_atomic_rmw_and_32_ignored 1208; Could just "lock and" 1209; CHECK: mov eax,DWORD PTR 1210; CHECK: and 1211; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}] 1212; CHECK: jne 1213; ARM32-LABEL: test_atomic_rmw_and_32_ignored 1214; ARM32: dmb 1215; ARM32: ldrex 1216; ARM32: and 1217; ARM32: strex 1218; ARM32: bne 1219; ARM32: dmb 1220; MIPS32-LABEL: test_atomic_rmw_and_32_ignored 1221; MIPS32: sync 1222; MIPS32: ll 1223; MIPS32: and 1224; MIPS32: sc 1225; MIPS32: beq {{.*}}, $zero, {{.*}} 1226; MIPS32: sync 1227 1228;; xor 1229 1230define internal i32 @test_atomic_rmw_xor_8(i32 %iptr, i32 %v) { 1231entry: 1232 %trunc = trunc i32 %v to i8 1233 %ptr = inttoptr i32 %iptr to i8* 1234 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i8* %ptr, i8 %trunc, i32 6) 1235 %a_ext = zext i8 %a to i32 1236 ret i32 %a_ext 1237} 1238; CHECK-LABEL: test_atomic_rmw_xor_8 1239; CHECK: mov al,BYTE PTR 1240; CHECK: xor [[REG:[^a].]] 1241; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]] 1242; CHECK: jne 1243; ARM32-LABEL: test_atomic_rmw_xor_8 1244; ARM32: dmb 1245; ARM32: ldrexb 1246; ARM32: eor 1247; ARM32: strexb 1248; ARM32: bne 1249; ARM32: dmb 1250; MIPS32-LABEL: test_atomic_rmw_xor_8 1251; MIPS32: sync 1252; MIPS32: addiu {{.*}}, $zero, -4 1253; MIPS32: and 1254; MIPS32: andi {{.*}}, {{.*}}, 3 1255; MIPS32: sll {{.*}}, {{.*}}, 3 1256; MIPS32: ori {{.*}}, $zero, 255 1257; MIPS32: sllv 1258; MIPS32: nor 1259; MIPS32: sllv 1260; MIPS32: ll 1261; MIPS32: xor 1262; MIPS32: and 1263; MIPS32: and 1264; MIPS32: or 1265; MIPS32: sc 1266; MIPS32: beq {{.*}}, $zero, {{.*}} 1267; MIPS32: and 1268; MIPS32: srlv 1269; MIPS32: sll {{.*}}, {{.*}}, 24 1270; MIPS32: sra {{.*}}, {{.*}}, 24 1271; MIPS32: sync 1272 1273define internal i32 @test_atomic_rmw_xor_16(i32 %iptr, i32 %v) { 1274entry: 1275 %trunc = trunc i32 %v to i16 1276 %ptr = inttoptr i32 %iptr to i16* 1277 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %trunc, i32 6) 1278 %a_ext = zext i16 %a to i32 1279 ret i32 %a_ext 1280} 1281; CHECK-LABEL: test_atomic_rmw_xor_16 1282; CHECK: mov ax,WORD PTR 1283; CHECK: xor 1284; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}] 1285; CHECK: jne 1286; ARM32-LABEL: test_atomic_rmw_xor_16 1287; ARM32: dmb 1288; ARM32: ldrexh 1289; ARM32: eor 1290; ARM32: strexh 1291; ARM32: bne 1292; ARM32: dmb 1293; MIPS32-LABEL: test_atomic_rmw_xor_16 1294; MIPS32: sync 1295; MIPS32: addiu {{.*}}, $zero, -4 1296; MIPS32: and 1297; MIPS32: andi {{.*}}, {{.*}}, 3 1298; MIPS32: sll {{.*}}, {{.*}}, 3 1299; MIPS32: ori {{.*}}, {{.*}}, 65535 1300; MIPS32: sllv 1301; MIPS32: nor 1302; MIPS32: sllv 1303; MIPS32: ll 1304; MIPS32: xor 1305; MIPS32: and 1306; MIPS32: and 1307; MIPS32: or 1308; MIPS32: sc 1309; MIPS32: beq {{.*}}, $zero, {{.*}} 1310; MIPS32: and 1311; MIPS32: srlv 1312; MIPS32: sll {{.*}}, {{.*}}, 16 1313; MIPS32: sra {{.*}}, {{.*}}, 16 1314; MIPS32: sync 1315 1316define internal i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) { 1317entry: 1318 %ptr = inttoptr i32 %iptr to i32* 1319 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6) 1320 ret i32 %a 1321} 1322; CHECK-LABEL: test_atomic_rmw_xor_32 1323; CHECK: mov eax,DWORD PTR 1324; CHECK: xor 1325; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}] 1326; CHECK: jne 1327; ARM32-LABEL: test_atomic_rmw_xor_32 1328; ARM32: dmb 1329; ARM32: ldrex 1330; ARM32: eor 1331; ARM32: strex 1332; ARM32: bne 1333; ARM32: dmb 1334; MIPS32-LABEL: test_atomic_rmw_xor_32 1335; MIPS32: sync 1336; MIPS32: ll 1337; MIPS32: xor 1338; MIPS32: sc 1339; MIPS32: beq {{.*}}, $zero, {{.*}} 1340; MIPS32: sync 1341 1342define internal i64 @test_atomic_rmw_xor_64(i32 %iptr, i64 %v) { 1343entry: 1344 %ptr = inttoptr i32 %iptr to i64* 1345 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i64* %ptr, i64 %v, i32 6) 1346 ret i64 %a 1347} 1348; CHECK-LABEL: test_atomic_rmw_xor_64 1349; CHECK: push ebx 1350; CHECK: mov eax,DWORD PTR [{{.*}}] 1351; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] 1352; CHECK: mov ebx,eax 1353; CHECK: or ebx,{{.*e.[^x]}} 1354; CHECK: mov ecx,edx 1355; CHECK: or ecx,{{.*e.[^x]}} 1356; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} 1357; CHECK: jne 1358; ARM32-LABEL: test_atomic_rmw_xor_64 1359; ARM32: dmb 1360; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 1361; ARM32: eor 1362; ARM32: eor 1363; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] 1364; ARM32: bne 1365; ARM32: dmb 1366; MIPS32-LABEL: test_atomic_rmw_xor_64 1367; MIPS32: sync 1368; MIPS32: jal __sync_fetch_and_xor_8 1369; MIPS32: sync 1370 1371define internal i32 @test_atomic_rmw_xor_32_ignored(i32 %iptr, i32 %v) { 1372entry: 1373 %ptr = inttoptr i32 %iptr to i32* 1374 %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6) 1375 ret i32 %v 1376} 1377; CHECK-LABEL: test_atomic_rmw_xor_32_ignored 1378; CHECK: mov eax,DWORD PTR 1379; CHECK: xor 1380; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}] 1381; CHECK: jne 1382; ARM32-LABEL: test_atomic_rmw_xor_32_ignored 1383; ARM32: dmb 1384; ARM32: ldrex 1385; ARM32: eor 1386; ARM32: strex 1387; ARM32: bne 1388; ARM32: dmb 1389; MIPS32-LABEL: test_atomic_rmw_xor_32_ignored 1390; MIPS32: sync 1391; MIPS32: ll 1392; MIPS32: xor 1393; MIPS32: sc 1394; MIPS32: beq {{.*}}, $zero, {{.*}} 1395; MIPS32: sync 1396 1397;; exchange 1398 1399define internal i32 @test_atomic_rmw_xchg_8(i32 %iptr, i32 %v) { 1400entry: 1401 %trunc = trunc i32 %v to i8 1402 %ptr = inttoptr i32 %iptr to i8* 1403 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i8* %ptr, i8 %trunc, i32 6) 1404 %a_ext = zext i8 %a to i32 1405 ret i32 %a_ext 1406} 1407; CHECK-LABEL: test_atomic_rmw_xchg_8 1408; CHECK: xchg BYTE PTR {{.*}},[[REG:.*]] 1409; ARM32-LABEL: test_atomic_rmw_xchg_8 1410; ARM32: dmb 1411; ARM32: ldrexb 1412; ARM32: strexb 1413; ARM32: cmp 1414; ARM32: bne 1415; ARM32: dmb 1416; MIPS32-LABEL: test_atomic_rmw_xchg_8 1417; MIPS32: sync 1418; MIPS32: addiu {{.*}}, $zero, -4 1419; MIPS32: and 1420; MIPS32: andi {{.*}}, {{.*}}, 3 1421; MIPS32: sll {{.*}}, {{.*}}, 3 1422; MIPS32: ori {{.*}}, $zero, 255 1423; MIPS32: sllv 1424; MIPS32: nor 1425; MIPS32: sllv 1426; MIPS32: ll 1427; MIPS32: and 1428; MIPS32: or 1429; MIPS32: sc 1430; MIPS32: beq {{.*}}, $zero, {{.*}} 1431; MIPS32: and 1432; MIPS32: srlv 1433; MIPS32: sll {{.*}}, {{.*}}, 24 1434; MIPS32: sra {{.*}}, {{.*}}, 24 1435; MIPS32: sync 1436 1437define internal i32 @test_atomic_rmw_xchg_16(i32 %iptr, i32 %v) { 1438entry: 1439 %trunc = trunc i32 %v to i16 1440 %ptr = inttoptr i32 %iptr to i16* 1441 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i16* %ptr, i16 %trunc, i32 6) 1442 %a_ext = zext i16 %a to i32 1443 ret i32 %a_ext 1444} 1445; CHECK-LABEL: test_atomic_rmw_xchg_16 1446; CHECK: xchg WORD PTR {{.*}},[[REG:.*]] 1447; ARM32-LABEL: test_atomic_rmw_xchg_16 1448; ARM32: dmb 1449; ARM32: ldrexh 1450; ARM32: strexh 1451; ARM32: cmp 1452; ARM32: bne 1453; ARM32: dmb 1454; MIPS32-LABEL: test_atomic_rmw_xchg_16 1455; MIPS32: sync 1456; MIPS32: addiu {{.*}}, $zero, -4 1457; MIPS32: and 1458; MIPS32: andi {{.*}}, {{.*}}, 3 1459; MIPS32: sll {{.*}}, {{.*}}, 3 1460; MIPS32: ori {{.*}}, {{.*}}, 65535 1461; MIPS32: sllv 1462; MIPS32: nor 1463; MIPS32: sllv 1464; MIPS32: ll 1465; MIPS32: and 1466; MIPS32: or 1467; MIPS32: sc 1468; MIPS32: beq {{.*}}, $zero, {{.*}} 1469; MIPS32: and 1470; MIPS32: srlv 1471; MIPS32: sll {{.*}}, {{.*}}, 16 1472; MIPS32: sra {{.*}}, {{.*}}, 16 1473; MIPS32: sync 1474 1475define internal i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) { 1476entry: 1477 %ptr = inttoptr i32 %iptr to i32* 1478 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6) 1479 ret i32 %a 1480} 1481; CHECK-LABEL: test_atomic_rmw_xchg_32 1482; CHECK: xchg DWORD PTR {{.*}},[[REG:.*]] 1483; ARM32-LABEL: test_atomic_rmw_xchg_32 1484; ARM32: dmb 1485; ARM32: ldrex 1486; ARM32: strex 1487; ARM32: cmp 1488; ARM32: bne 1489; ARM32: dmb 1490; MIPS32-LABEL: test_atomic_rmw_xchg_32 1491; MIPS32: sync 1492; MIPS32: ll 1493; MIPS32: move 1494; MIPS32: sc 1495; MIPS32: beq {{.*}}, $zero, {{.*}} 1496; MIPS32: sync 1497 1498define internal i64 @test_atomic_rmw_xchg_64(i32 %iptr, i64 %v) { 1499entry: 1500 %ptr = inttoptr i32 %iptr to i64* 1501 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i64* %ptr, i64 %v, i32 6) 1502 ret i64 %a 1503} 1504; CHECK-LABEL: test_atomic_rmw_xchg_64 1505; CHECK: push ebx 1506; CHECK-DAG: mov edx 1507; CHECK-DAG: mov eax 1508; CHECK-DAG: mov ecx 1509; CHECK-DAG: mov ebx 1510; CHECK: lock cmpxchg8b QWORD PTR [{{e.[^x]}} 1511; CHECK: jne 1512; ARM32-LABEL: test_atomic_rmw_xchg_64 1513; ARM32: dmb 1514; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}} 1515; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} 1516; ARM32: cmp 1517; ARM32: bne 1518; ARM32: dmb 1519; MIPS32-LABEL: test_atomic_rmw_xchg_64 1520; MIPS32: sync 1521; MIPS32: jal __sync_lock_test_and_set_8 1522; MIPS32: sync 1523 1524define internal i32 @test_atomic_rmw_xchg_32_ignored(i32 %iptr, i32 %v) { 1525entry: 1526 %ptr = inttoptr i32 %iptr to i32* 1527 %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6) 1528 ret i32 %v 1529} 1530; In this case, ignoring the return value doesn't help. The xchg is 1531; used to do an atomic store. 1532; CHECK-LABEL: test_atomic_rmw_xchg_32_ignored 1533; CHECK: xchg DWORD PTR {{.*}},[[REG:.*]] 1534; ARM32-LABEL: test_atomic_rmw_xchg_32_ignored 1535; ARM32: dmb 1536; ARM32: ldrex 1537; ARM32: strex 1538; ARM32: cmp 1539; ARM32: bne 1540; ARM32: dmb 1541; MIPS32-LABEL: test_atomic_rmw_xchg_32_ignored 1542; MIPS32: sync 1543; MIPS32: ll 1544; MIPS32: move 1545; MIPS32: sc 1546; MIPS32: beq {{.*}}, $zero, {{.*}} 1547; MIPS32: sync 1548 1549;;;; Cmpxchg 1550 1551define internal i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected, 1552 i32 %desired) { 1553entry: 1554 %trunc_exp = trunc i32 %expected to i8 1555 %trunc_des = trunc i32 %desired to i8 1556 %ptr = inttoptr i32 %iptr to i8* 1557 %old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %trunc_exp, 1558 i8 %trunc_des, i32 6, i32 6) 1559 %old_ext = zext i8 %old to i32 1560 ret i32 %old_ext 1561} 1562; CHECK-LABEL: test_atomic_cmpxchg_8 1563; CHECK: mov eax,{{.*}} 1564; Need to check that eax isn't used as the address register or the desired. 1565; since it is already used as the *expected* register. 1566; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],{{[^a]}}l 1567; ARM32-LABEL: test_atomic_cmpxchg_8 1568; ARM32: dmb 1569; ARM32: ldrexb [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1570; ARM32: lsl [[VV:r[0-9]+]], [[V]], #24 1571; ARM32: cmp [[VV]], {{r[0-9]+}}, lsl #24 1572; ARM32: movne [[SUCCESS:r[0-9]+]], 1573; ARM32: strexbeq [[SUCCESS]], {{r[0-9]+}}, {{[[]}}[[A]]{{[]]}} 1574; ARM32: cmp [[SUCCESS]], #0 1575; ARM32: bne 1576; ARM32: dmb 1577; MIPS32-LABEL: test_atomic_cmpxchg_8 1578; MIPS32: addiu {{.*}}, $zero, -4 1579; MIPS32: and 1580; MIPS32: andi {{.*}}, {{.*}}, 3 1581; MIPS32: sll {{.*}}, {{.*}}, 3 1582; MIPS32: ori {{.*}}, $zero, 255 1583; MIPS32: sllv 1584; MIPS32: nor 1585; MIPS32: andi {{.*}}, {{.*}}, 255 1586; MIPS32: sllv 1587; MIPS32: andi {{.*}}, {{.*}}, 255 1588; MIPS32: sllv 1589; MIPS32: sync 1590; MIPS32: ll 1591; MIPS32: and 1592; MIPS32: bne 1593; MIPS32: and 1594; MIPS32: or 1595; MIPS32: sc 1596; MIPS32: beq $zero, {{.*}}, {{.*}} 1597; MIPS32: srlv 1598; MIPS32: sll {{.*}}, {{.*}}, 24 1599; MIPS32: sra {{.*}}, {{.*}}, 24 1600; MIPS32: sync 1601 1602define internal i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected, 1603 i32 %desired) { 1604entry: 1605 %trunc_exp = trunc i32 %expected to i16 1606 %trunc_des = trunc i32 %desired to i16 1607 %ptr = inttoptr i32 %iptr to i16* 1608 %old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %trunc_exp, 1609 i16 %trunc_des, i32 6, i32 6) 1610 %old_ext = zext i16 %old to i32 1611 ret i32 %old_ext 1612} 1613; CHECK-LABEL: test_atomic_cmpxchg_16 1614; CHECK: mov {{ax|eax}},{{.*}} 1615; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],{{[^a]}}x 1616; ARM32-LABEL: test_atomic_cmpxchg_16 1617; ARM32: dmb 1618; ARM32: ldrexh [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1619; ARM32: lsl [[VV:r[0-9]+]], [[V]], #16 1620; ARM32: cmp [[VV]], {{r[0-9]+}}, lsl #16 1621; ARM32: movne [[SUCCESS:r[0-9]+]], 1622; ARM32: strexheq [[SUCCESS]], {{r[0-9]+}}, {{[[]}}[[A]]{{[]]}} 1623; ARM32: cmp [[SUCCESS]], #0 1624; ARM32: bne 1625; ARM32: dmb 1626; MIPS32-LABEL: test_atomic_cmpxchg_16 1627; MIPS32: addiu {{.*}}, $zero, -4 1628; MIPS32: and 1629; MIPS32: andi {{.*}}, {{.*}}, 3 1630; MIPS32: sll {{.*}}, {{.*}}, 3 1631; MIPS32: ori {{.*}}, {{.*}}, 65535 1632; MIPS32: sllv 1633; MIPS32: nor 1634; MIPS32: andi {{.*}}, {{.*}}, 65535 1635; MIPS32: sllv 1636; MIPS32: andi {{.*}}, {{.*}}, 65535 1637; MIPS32: sllv 1638; MIPS32: sync 1639; MIPS32: ll 1640; MIPS32: and 1641; MIPS32: bne 1642; MIPS32: and 1643; MIPS32: or 1644; MIPS32: sc 1645; MIPS32: beq $zero, {{.*}}, {{.*}} 1646; MIPS32: srlv 1647; MIPS32: sll {{.*}}, {{.*}}, 16 1648; MIPS32: sra {{.*}}, {{.*}}, 16 1649; MIPS32: sync 1650 1651define internal i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected, 1652 i32 %desired) { 1653entry: 1654 %ptr = inttoptr i32 %iptr to i32* 1655 %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected, 1656 i32 %desired, i32 6, i32 6) 1657 ret i32 %old 1658} 1659; CHECK-LABEL: test_atomic_cmpxchg_32 1660; CHECK: mov eax,{{.*}} 1661; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}} 1662; ARM32-LABEL: test_atomic_cmpxchg_32 1663; ARM32: dmb 1664; ARM32: ldrex [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1665; ARM32: cmp [[V]], {{r[0-9]+}} 1666; ARM32: movne [[SUCCESS:r[0-9]+]], 1667; ARM32: strexeq [[SUCCESS]], {{r[0-9]+}}, {{[[]}}[[A]]{{[]]}} 1668; ARM32: cmp [[SUCCESS]], #0 1669; ARM32: bne 1670; ARM32: dmb 1671; MIPS32-LABEL: test_atomic_cmpxchg_32 1672; MIPS32: sync 1673; MIPS32: ll 1674; MIPS32: bne 1675; MIPS32: sc 1676; MIPS32: beq {{.*}}, $zero, {{.*}} 1677; MIPS32: sync 1678 1679define internal i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected, 1680 i64 %desired) { 1681entry: 1682 %ptr = inttoptr i32 %iptr to i64* 1683 %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected, 1684 i64 %desired, i32 6, i32 6) 1685 ret i64 %old 1686} 1687; CHECK-LABEL: test_atomic_cmpxchg_64 1688; CHECK: push ebx 1689; CHECK-DAG: mov edx 1690; CHECK-DAG: mov eax 1691; CHECK-DAG: mov ecx 1692; CHECK-DAG: mov ebx 1693; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0] 1694; edx and eax are already the return registers, so they don't actually 1695; need to be reshuffled via movs. The next test stores the result 1696; somewhere, so in that case they do need to be mov'ed. 1697; ARM32-LABEL: test_atomic_cmpxchg_64 1698; ARM32: dmb 1699; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1700; ARM32: cmp [[V0]], {{r[0-9]+}} 1701; ARM32: cmpeq [[V1]], {{r[0-9]+}} 1702; ARM32: movne [[SUCCESS:r[0-9]+]], 1703; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}} 1704; ARM32: cmp [[SUCCESS]], #0 1705; ARM32: bne 1706; ARM32: dmb 1707; MIPS32-LABEL: test_atomic_cmpxchg_64 1708; MIPS32: sync 1709; MIPS32: jal __sync_val_compare_and_swap_8 1710; MIPS32: sync 1711 1712 1713define internal i64 @test_atomic_cmpxchg_64_undef(i32 %iptr, i64 %desired) { 1714entry: 1715 %ptr = inttoptr i32 %iptr to i64* 1716 %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 undef, 1717 i64 %desired, i32 6, i32 6) 1718 ret i64 %old 1719} 1720; CHECK-LABEL: test_atomic_cmpxchg_64_undef 1721; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0] 1722; ARM32-LABEL: test_atomic_cmpxchg_64_undef 1723; ARM32: mov r{{[0-9]+}}, #0 1724; ARM32: mov r{{[0-9]+}}, #0 1725; ARM32: dmb 1726; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1727; ARM32: cmp [[V0]], {{r[0-9]+}} 1728; ARM32: cmpeq [[V1]], {{r[0-9]+}} 1729; ARM32: movne [[SUCCESS:r[0-9]+]], 1730; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}} 1731; ARM32: cmp [[SUCCESS]], #0 1732; ARM32: bne 1733; ARM32: dmb 1734; MIPS32-LABEL: test_atomic_cmpxchg_64_undef 1735; MIPS32: sync 1736; MIPS32: jal __sync_val_compare_and_swap_8 1737; MIPS32: sync 1738 1739; Test a case where %old really does need to be copied out of edx:eax. 1740define internal void @test_atomic_cmpxchg_64_store( 1741 i32 %ret_iptr, i32 %iptr, i64 %expected, i64 %desired) { 1742entry: 1743 %ptr = inttoptr i32 %iptr to i64* 1744 %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected, 1745 i64 %desired, i32 6, i32 6) 1746 %__6 = inttoptr i32 %ret_iptr to i64* 1747 store i64 %old, i64* %__6, align 1 1748 ret void 1749} 1750; CHECK-LABEL: test_atomic_cmpxchg_64_store 1751; CHECK: push ebx 1752; CHECK-DAG: mov edx 1753; CHECK-DAG: mov eax 1754; CHECK-DAG: mov ecx 1755; CHECK-DAG: mov ebx 1756; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} 1757; CHECK-DAG: mov {{.*}},edx 1758; CHECK-DAG: mov {{.*}},eax 1759; ARM32-LABEL: test_atomic_cmpxchg_64_store 1760; ARM32: dmb 1761; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1762; ARM32: cmp [[V0]], {{r[0-9]+}} 1763; ARM32: cmpeq [[V1]], {{r[0-9]+}} 1764; ARM32: movne [[SUCCESS:r[0-9]+]], 1765; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}} 1766; ARM32: cmp [[SUCCESS]], #0 1767; ARM32: bne 1768; ARM32: dmb 1769; ARM32: str 1770; ARM32: str 1771; MIPS32-LABEL: test_atomic_cmpxchg_64_store 1772; MIPS32: sync 1773; MIPS32: jal __sync_val_compare_and_swap_8 1774; MIPS32: sync 1775 1776 1777; Test with some more register pressure. When we have an alloca, ebp is 1778; used to manage the stack frame, so it cannot be used as a register either. 1779define internal i64 @test_atomic_cmpxchg_64_alloca(i32 %iptr, i64 %expected, 1780 i64 %desired) { 1781entry: 1782 br label %eblock ; Disable alloca optimization 1783eblock: 1784 %alloca_ptr = alloca i8, i32 16, align 16 1785 %ptr = inttoptr i32 %iptr to i64* 1786 %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected, 1787 i64 %desired, i32 6, i32 6) 1788 store i8 0, i8* %alloca_ptr, align 1 1789 store i8 1, i8* %alloca_ptr, align 1 1790 store i8 2, i8* %alloca_ptr, align 1 1791 store i8 3, i8* %alloca_ptr, align 1 1792 %__6 = ptrtoint i8* %alloca_ptr to i32 1793 call void @use_ptr(i32 %__6) 1794 ret i64 %old 1795} 1796; CHECK-LABEL: test_atomic_cmpxchg_64_alloca 1797; CHECK: push ebx 1798; CHECK-DAG: mov edx 1799; CHECK-DAG: mov eax 1800; CHECK-DAG: mov ecx 1801; CHECK-DAG: mov ebx 1802; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired). 1803; It also cannot be ebp since we use that for alloca. Also make sure it's 1804; not esp, since that's the stack pointer and mucking with it will break 1805; the later use_ptr function call. 1806; That pretty much leaves esi, or edi as the only viable registers. 1807; CHECK: lock cmpxchg8b QWORD PTR [e{{[ds]}}i] 1808; CHECK: call {{.*}} R_{{.*}} use_ptr 1809; ARM32-LABEL: test_atomic_cmpxchg_64_alloca 1810; ARM32: dmb 1811; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1812; ARM32: cmp [[V0]], {{r[0-9]+}} 1813; ARM32: cmpeq [[V1]], {{r[0-9]+}} 1814; ARM32: movne [[SUCCESS:r[0-9]+]], 1815; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}} 1816; ARM32: cmp [[SUCCESS]], #0 1817; ARM32: bne 1818; ARM32: dmb 1819; MIPS32-LABEL: test_atomic_cmpxchg_64_alloca 1820; MIPS32: sync 1821; MIPS32: jal __sync_val_compare_and_swap_8 1822; MIPS32: sync 1823 1824define internal i32 @test_atomic_cmpxchg_32_ignored(i32 %iptr, i32 %expected, 1825 i32 %desired) { 1826entry: 1827 %ptr = inttoptr i32 %iptr to i32* 1828 %ignored = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected, 1829 i32 %desired, i32 6, i32 6) 1830 ret i32 0 1831} 1832; CHECK-LABEL: test_atomic_cmpxchg_32_ignored 1833; CHECK: mov eax,{{.*}} 1834; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}] 1835; ARM32-LABEL: test_atomic_cmpxchg_32_ignored 1836; ARM32: dmb 1837; ARM32: ldrex [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1838; ARM32: cmp [[V]], {{r[0-9]+}} 1839; ARM32: movne [[SUCCESS:r[0-9]+]], 1840; ARM32: strexeq [[SUCCESS]] 1841; ARM32: cmp [[SUCCESS]], #0 1842; ARM32: bne 1843; ARM32: dmb 1844; MIPS32-LABEL: test_atomic_cmpxchg_32_ignored 1845; MIPS32: sync 1846; MIPS32: ll 1847; MIPS32: bne 1848; MIPS32: sc 1849; MIPS32: beq {{.*}}, $zero, {{.*}} 1850; MIPS32: sync 1851 1852define internal i64 @test_atomic_cmpxchg_64_ignored(i32 %iptr, i64 %expected, 1853 i64 %desired) { 1854entry: 1855 %ptr = inttoptr i32 %iptr to i64* 1856 %ignored = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected, 1857 i64 %desired, i32 6, i32 6) 1858 ret i64 0 1859} 1860; CHECK-LABEL: test_atomic_cmpxchg_64_ignored 1861; CHECK: push ebx 1862; CHECK-DAG: mov edx 1863; CHECK-DAG: mov eax 1864; CHECK-DAG: mov ecx 1865; CHECK-DAG: mov ebx 1866; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0] 1867; ARM32-LABEL: test_atomic_cmpxchg_64_ignored 1868; ARM32: dmb 1869; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}} 1870; ARM32: cmp [[V0]], {{r[0-9]+}} 1871; ARM32: cmpeq [[V1]], {{r[0-9]+}} 1872; ARM32: movne [[SUCCESS:r[0-9]+]], 1873; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} 1874; ARM32: cmp [[SUCCESS]], #0 1875; ARM32: bne 1876; ARM32: dmb 1877; MIPS32-LABEL: test_atomic_cmpxchg_64_ignored 1878; MIPS32: sync 1879; MIPS32: jal __sync_val_compare_and_swap_8 1880; MIPS32: sync 1881 1882;;;; Fence and is-lock-free. 1883 1884define internal void @test_atomic_fence() { 1885entry: 1886 call void @llvm.nacl.atomic.fence(i32 6) 1887 ret void 1888} 1889; CHECK-LABEL: test_atomic_fence 1890; CHECK: mfence 1891; ARM32-LABEL: test_atomic_fence 1892; ARM32: dmb sy 1893; MIPS32-LABEL: test_atomic_fence 1894; MIPS32: sync 1895 1896define internal void @test_atomic_fence_all() { 1897entry: 1898 call void @llvm.nacl.atomic.fence.all() 1899 ret void 1900} 1901; CHECK-LABEL: test_atomic_fence_all 1902; CHECK: mfence 1903; ARM32-LABEL: test_atomic_fence_all 1904; ARM32: dmb sy 1905; MIPS32-LABEL: test_atomic_fence_all 1906; MIPS32: sync 1907 1908define internal i32 @test_atomic_is_lock_free(i32 %iptr) { 1909entry: 1910 %ptr = inttoptr i32 %iptr to i8* 1911 %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr) 1912 %r = zext i1 %i to i32 1913 ret i32 %r 1914} 1915; CHECK-LABEL: test_atomic_is_lock_free 1916; CHECK: mov {{.*}},0x1 1917; ARM32-LABEL: test_atomic_is_lock_free 1918; ARM32: mov {{.*}}, #1 1919; MIPS32-LABEL: test_atomic_is_lock_free 1920; MIPS32: addiu {{.*}}, $zero, 1 1921 1922define internal i32 @test_not_lock_free(i32 %iptr) { 1923entry: 1924 %ptr = inttoptr i32 %iptr to i8* 1925 %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 7, i8* %ptr) 1926 %r = zext i1 %i to i32 1927 ret i32 %r 1928} 1929; CHECK-LABEL: test_not_lock_free 1930; CHECK: mov {{.*}},0x0 1931; ARM32-LABEL: test_not_lock_free 1932; ARM32: mov {{.*}}, #0 1933; MIPS32-LABEL: test_not_lock_free 1934; MIPS32: addiu {{.*}}, $zero, 0 1935 1936define internal i32 @test_atomic_is_lock_free_ignored(i32 %iptr) { 1937entry: 1938 %ptr = inttoptr i32 %iptr to i8* 1939 %ignored = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr) 1940 ret i32 0 1941} 1942; CHECK-LABEL: test_atomic_is_lock_free_ignored 1943; CHECK: mov {{.*}},0x0 1944; This can get optimized out, because it's side-effect-free. 1945; O2-LABEL: test_atomic_is_lock_free_ignored 1946; O2-NOT: mov {{.*}}, 1 1947; O2: mov {{.*}},0x0 1948; ARM32O2-LABEL: test_atomic_is_lock_free_ignored 1949; ARM32O2-NOT: mov {{.*}}, #1 1950; ARM32O2: mov {{.*}}, #0 1951; MIPS32O2-LABEL: test_atomic_is_lock_free 1952; MIPS32O2-NOT: addiu {{.*}}, $zero, 1 1953; MIPS32O2: addiu {{.*}}, $zero, 0 1954 1955; TODO(jvoung): at some point we can take advantage of the 1956; fact that nacl.atomic.is.lock.free will resolve to a constant 1957; (which adds DCE opportunities). Once we optimize, the test expectations 1958; for this case should change. 1959define internal i32 @test_atomic_is_lock_free_can_dce(i32 %iptr, i32 %x, 1960 i32 %y) { 1961entry: 1962 %ptr = inttoptr i32 %iptr to i8* 1963 %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr) 1964 %i_ext = zext i1 %i to i32 1965 %cmp = icmp eq i32 %i_ext, 1 1966 br i1 %cmp, label %lock_free, label %not_lock_free 1967lock_free: 1968 ret i32 %i_ext 1969 1970not_lock_free: 1971 %z = add i32 %x, %y 1972 ret i32 %z 1973} 1974; CHECK-LABEL: test_atomic_is_lock_free_can_dce 1975; CHECK: mov {{.*}},0x1 1976; CHECK: ret 1977; CHECK: add 1978; CHECK: ret 1979 1980; Test the liveness / register allocation properties of the xadd instruction. 1981; Make sure we model that the Src register is modified and therefore it can't 1982; share a register with an overlapping live range, even if the result of the 1983; xadd instruction is unused. 1984define internal void @test_xadd_regalloc() { 1985entry: 1986 br label %body 1987body: 1988 %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ] 1989 %g = bitcast [4 x i8]* @SzGlobal32 to i32* 1990 %unused = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %g, i32 %i, i32 6) 1991 %i_plus_1 = add i32 %i, 1 1992 %cmp = icmp eq i32 %i_plus_1, 1001 1993 br i1 %cmp, label %done, label %body 1994done: 1995 ret void 1996} 1997; O2-LABEL: test_xadd_regalloc 1998;;; Some register will be used in the xadd instruction. 1999; O2: lock xadd DWORD PTR {{.*}},[[REG:e..]] 2000;;; Make sure that register isn't used again, e.g. as the induction variable. 2001; O2-NOT: ,[[REG]] 2002; O2: ret 2003 2004; Do the same test for the xchg instruction instead of xadd. 2005define internal void @test_xchg_regalloc() { 2006entry: 2007 br label %body 2008body: 2009 %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ] 2010 %g = bitcast [4 x i8]* @SzGlobal32 to i32* 2011 %unused = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %g, i32 %i, i32 6) 2012 %i_plus_1 = add i32 %i, 1 2013 %cmp = icmp eq i32 %i_plus_1, 1001 2014 br i1 %cmp, label %done, label %body 2015done: 2016 ret void 2017} 2018; O2-LABEL: test_xchg_regalloc 2019;;; Some register will be used in the xchg instruction. 2020; O2: xchg DWORD PTR {{.*}},[[REG:e..]] 2021;;; Make sure that register isn't used again, e.g. as the induction variable. 2022; O2-NOT: ,[[REG]] 2023; O2: ret 2024 2025; Same test for cmpxchg. 2026define internal void @test_cmpxchg_regalloc() { 2027entry: 2028 br label %body 2029body: 2030 %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ] 2031 %g = bitcast [4 x i8]* @SzGlobal32 to i32* 2032 %unused = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %g, i32 %i, i32 %i, i32 6, i32 6) 2033 %i_plus_1 = add i32 %i, 1 2034 %cmp = icmp eq i32 %i_plus_1, 1001 2035 br i1 %cmp, label %done, label %body 2036done: 2037 ret void 2038} 2039; O2-LABEL: test_cmpxchg_regalloc 2040;;; eax and some other register will be used in the cmpxchg instruction. 2041; O2: lock cmpxchg DWORD PTR {{.*}},[[REG:e..]] 2042;;; Make sure eax isn't used again, e.g. as the induction variable. 2043; O2-NOT: ,eax 2044; O2: ret 2045 2046; Same test for cmpxchg8b. 2047define internal void @test_cmpxchg8b_regalloc() { 2048entry: 2049 br label %body 2050body: 2051 %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ] 2052 %g = bitcast [8 x i8]* @SzGlobal64 to i64* 2053 %i_64 = zext i32 %i to i64 2054 %unused = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %g, i64 %i_64, i64 %i_64, i32 6, i32 6) 2055 %i_plus_1 = add i32 %i, 1 2056 %cmp = icmp eq i32 %i_plus_1, 1001 2057 br i1 %cmp, label %done, label %body 2058done: 2059 ret void 2060} 2061; O2-LABEL: test_cmpxchg8b_regalloc 2062;;; eax and some other register will be used in the cmpxchg instruction. 2063; O2: lock cmpxchg8b QWORD PTR 2064;;; Make sure eax/ecx/edx/ebx aren't used again, e.g. as the induction variable. 2065; O2-NOT: ,{{eax|ecx|edx|ebx}} 2066; O2: pop ebx 2067