1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s 3; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck %s -check-prefixes=CHECK32 4; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=-cx16 | FileCheck %s -check-prefixes=CHECK32 5 6@var = global i128 0 7 8; Due to the scheduling right after isel for cmpxchg and given the 9; machine scheduler and copy coalescer do not mess up with physical 10; register live-ranges, we end up with a useless copy. 11define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { 12; CHECK-LABEL: val_compare_and_swap: 13; CHECK: ## %bb.0: 14; CHECK-NEXT: pushq %rbx 15; CHECK-NEXT: .cfi_def_cfa_offset 16 16; CHECK-NEXT: .cfi_offset %rbx, -16 17; CHECK-NEXT: movq %rcx, %rbx 18; CHECK-NEXT: movq %rsi, %rax 19; CHECK-NEXT: movq %r8, %rcx 20; CHECK-NEXT: lock cmpxchg16b (%rdi) 21; CHECK-NEXT: popq %rbx 22; CHECK-NEXT: retq 23; 24; CHECK32-LABEL: val_compare_and_swap: 25; CHECK32: # %bb.0: 26; CHECK32-NEXT: pushl %edi 27; CHECK32-NEXT: .cfi_def_cfa_offset 8 28; CHECK32-NEXT: pushl %esi 29; CHECK32-NEXT: .cfi_def_cfa_offset 12 30; CHECK32-NEXT: subl $20, %esp 31; CHECK32-NEXT: .cfi_def_cfa_offset 32 32; CHECK32-NEXT: .cfi_offset %esi, -12 33; CHECK32-NEXT: .cfi_offset %edi, -8 34; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 35; CHECK32-NEXT: subl $8, %esp 36; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 37; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 38; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 39; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 40; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 41; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 42; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 43; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 44; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 45; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 46; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 47; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 48; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 49; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 50; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 51; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 52; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 53; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 54; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 55; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 56; CHECK32-NEXT: pushl %eax 57; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 58; CHECK32-NEXT: calll __sync_val_compare_and_swap_16 59; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 60; CHECK32-NEXT: addl $44, %esp 61; CHECK32-NEXT: .cfi_adjust_cfa_offset -44 62; CHECK32-NEXT: movl (%esp), %eax 63; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 64; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 65; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi 66; CHECK32-NEXT: movl %edi, 8(%esi) 67; CHECK32-NEXT: movl %edx, 12(%esi) 68; CHECK32-NEXT: movl %eax, (%esi) 69; CHECK32-NEXT: movl %ecx, 4(%esi) 70; CHECK32-NEXT: movl %esi, %eax 71; CHECK32-NEXT: addl $20, %esp 72; CHECK32-NEXT: .cfi_def_cfa_offset 12 73; CHECK32-NEXT: popl %esi 74; CHECK32-NEXT: .cfi_def_cfa_offset 8 75; CHECK32-NEXT: popl %edi 76; CHECK32-NEXT: .cfi_def_cfa_offset 4 77; CHECK32-NEXT: retl $4 78 %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire 79 %val = extractvalue { i128, i1 } %pair, 0 80 ret i128 %val 81} 82 83@cmpxchg16b_global = external dso_local global { i128, i128 }, align 16 84 85;; Make sure we retain the offset of the global variable. 86define void @cmpxchg16b_global_with_offset() nounwind { 87; CHECK-LABEL: cmpxchg16b_global_with_offset: 88; CHECK: ## %bb.0: ## %entry 89; CHECK-NEXT: pushq %rbx 90; CHECK-NEXT: xorl %eax, %eax 91; CHECK-NEXT: xorl %edx, %edx 92; CHECK-NEXT: xorl %ecx, %ecx 93; CHECK-NEXT: xorl %ebx, %ebx 94; CHECK-NEXT: lock cmpxchg16b _cmpxchg16b_global+{{.*}}(%rip) 95; CHECK-NEXT: popq %rbx 96; CHECK-NEXT: retq 97; 98; CHECK32-LABEL: cmpxchg16b_global_with_offset: 99; CHECK32: # %bb.0: # %entry 100; CHECK32-NEXT: subl $36, %esp 101; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 102; CHECK32-NEXT: pushl $0 103; CHECK32-NEXT: pushl $0 104; CHECK32-NEXT: pushl $0 105; CHECK32-NEXT: pushl $0 106; CHECK32-NEXT: pushl $0 107; CHECK32-NEXT: pushl $0 108; CHECK32-NEXT: pushl $0 109; CHECK32-NEXT: pushl $0 110; CHECK32-NEXT: pushl $cmpxchg16b_global+16 111; CHECK32-NEXT: pushl %eax 112; CHECK32-NEXT: calll __sync_val_compare_and_swap_16 113; CHECK32-NEXT: addl $72, %esp 114; CHECK32-NEXT: retl 115entry: 116 %0 = load atomic i128, i128* getelementptr inbounds ({i128, i128}, {i128, i128}* @cmpxchg16b_global, i64 0, i32 1) acquire, align 16 117 ret void 118} 119 120define void @fetch_and_nand(i128* %p, i128 %bits) { 121; CHECK-LABEL: fetch_and_nand: 122; CHECK: ## %bb.0: 123; CHECK-NEXT: pushq %rbx 124; CHECK-NEXT: .cfi_def_cfa_offset 16 125; CHECK-NEXT: .cfi_offset %rbx, -16 126; CHECK-NEXT: movq %rdx, %r8 127; CHECK-NEXT: movq (%rdi), %rax 128; CHECK-NEXT: movq 8(%rdi), %rdx 129; CHECK-NEXT: .p2align 4, 0x90 130; CHECK-NEXT: LBB2_1: ## %atomicrmw.start 131; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 132; CHECK-NEXT: movq %rdx, %rcx 133; CHECK-NEXT: andq %r8, %rcx 134; CHECK-NEXT: movq %rax, %rbx 135; CHECK-NEXT: andq %rsi, %rbx 136; CHECK-NEXT: notq %rbx 137; CHECK-NEXT: notq %rcx 138; CHECK-NEXT: lock cmpxchg16b (%rdi) 139; CHECK-NEXT: jne LBB2_1 140; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 141; CHECK-NEXT: movq %rax, {{.*}}(%rip) 142; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 143; CHECK-NEXT: popq %rbx 144; CHECK-NEXT: retq 145; 146; CHECK32-LABEL: fetch_and_nand: 147; CHECK32: # %bb.0: 148; CHECK32-NEXT: pushl %esi 149; CHECK32-NEXT: .cfi_def_cfa_offset 8 150; CHECK32-NEXT: subl $24, %esp 151; CHECK32-NEXT: .cfi_def_cfa_offset 32 152; CHECK32-NEXT: .cfi_offset %esi, -8 153; CHECK32-NEXT: subl $8, %esp 154; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 155; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 156; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 157; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 158; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 159; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 160; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 161; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 162; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 163; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 164; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 165; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 166; CHECK32-NEXT: pushl %eax 167; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 168; CHECK32-NEXT: calll __sync_fetch_and_nand_16 169; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 170; CHECK32-NEXT: addl $28, %esp 171; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 172; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 173; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 174; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 175; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 176; CHECK32-NEXT: movl %esi, var+8 177; CHECK32-NEXT: movl %edx, var+12 178; CHECK32-NEXT: movl %eax, var 179; CHECK32-NEXT: movl %ecx, var+4 180; CHECK32-NEXT: addl $24, %esp 181; CHECK32-NEXT: .cfi_def_cfa_offset 8 182; CHECK32-NEXT: popl %esi 183; CHECK32-NEXT: .cfi_def_cfa_offset 4 184; CHECK32-NEXT: retl 185 %val = atomicrmw nand i128* %p, i128 %bits release 186 store i128 %val, i128* @var, align 16 187 ret void 188} 189 190define void @fetch_and_or(i128* %p, i128 %bits) { 191; CHECK-LABEL: fetch_and_or: 192; CHECK: ## %bb.0: 193; CHECK-NEXT: pushq %rbx 194; CHECK-NEXT: .cfi_def_cfa_offset 16 195; CHECK-NEXT: .cfi_offset %rbx, -16 196; CHECK-NEXT: movq %rdx, %r8 197; CHECK-NEXT: movq (%rdi), %rax 198; CHECK-NEXT: movq 8(%rdi), %rdx 199; CHECK-NEXT: .p2align 4, 0x90 200; CHECK-NEXT: LBB3_1: ## %atomicrmw.start 201; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 202; CHECK-NEXT: movq %rax, %rbx 203; CHECK-NEXT: orq %rsi, %rbx 204; CHECK-NEXT: movq %rdx, %rcx 205; CHECK-NEXT: orq %r8, %rcx 206; CHECK-NEXT: lock cmpxchg16b (%rdi) 207; CHECK-NEXT: jne LBB3_1 208; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 209; CHECK-NEXT: movq %rax, {{.*}}(%rip) 210; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 211; CHECK-NEXT: popq %rbx 212; CHECK-NEXT: retq 213; 214; CHECK32-LABEL: fetch_and_or: 215; CHECK32: # %bb.0: 216; CHECK32-NEXT: pushl %esi 217; CHECK32-NEXT: .cfi_def_cfa_offset 8 218; CHECK32-NEXT: subl $24, %esp 219; CHECK32-NEXT: .cfi_def_cfa_offset 32 220; CHECK32-NEXT: .cfi_offset %esi, -8 221; CHECK32-NEXT: subl $8, %esp 222; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 223; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 224; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 225; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 226; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 227; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 228; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 229; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 230; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 231; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 232; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 233; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 234; CHECK32-NEXT: pushl %eax 235; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 236; CHECK32-NEXT: calll __sync_fetch_and_or_16 237; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 238; CHECK32-NEXT: addl $28, %esp 239; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 240; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 241; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 242; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 243; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 244; CHECK32-NEXT: movl %esi, var+8 245; CHECK32-NEXT: movl %edx, var+12 246; CHECK32-NEXT: movl %eax, var 247; CHECK32-NEXT: movl %ecx, var+4 248; CHECK32-NEXT: addl $24, %esp 249; CHECK32-NEXT: .cfi_def_cfa_offset 8 250; CHECK32-NEXT: popl %esi 251; CHECK32-NEXT: .cfi_def_cfa_offset 4 252; CHECK32-NEXT: retl 253 %val = atomicrmw or i128* %p, i128 %bits seq_cst 254 store i128 %val, i128* @var, align 16 255 ret void 256} 257 258define void @fetch_and_add(i128* %p, i128 %bits) { 259; CHECK-LABEL: fetch_and_add: 260; CHECK: ## %bb.0: 261; CHECK-NEXT: pushq %rbx 262; CHECK-NEXT: .cfi_def_cfa_offset 16 263; CHECK-NEXT: .cfi_offset %rbx, -16 264; CHECK-NEXT: movq %rdx, %r8 265; CHECK-NEXT: movq (%rdi), %rax 266; CHECK-NEXT: movq 8(%rdi), %rdx 267; CHECK-NEXT: .p2align 4, 0x90 268; CHECK-NEXT: LBB4_1: ## %atomicrmw.start 269; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 270; CHECK-NEXT: movq %rax, %rbx 271; CHECK-NEXT: addq %rsi, %rbx 272; CHECK-NEXT: movq %rdx, %rcx 273; CHECK-NEXT: adcq %r8, %rcx 274; CHECK-NEXT: lock cmpxchg16b (%rdi) 275; CHECK-NEXT: jne LBB4_1 276; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 277; CHECK-NEXT: movq %rax, {{.*}}(%rip) 278; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 279; CHECK-NEXT: popq %rbx 280; CHECK-NEXT: retq 281; 282; CHECK32-LABEL: fetch_and_add: 283; CHECK32: # %bb.0: 284; CHECK32-NEXT: pushl %esi 285; CHECK32-NEXT: .cfi_def_cfa_offset 8 286; CHECK32-NEXT: subl $24, %esp 287; CHECK32-NEXT: .cfi_def_cfa_offset 32 288; CHECK32-NEXT: .cfi_offset %esi, -8 289; CHECK32-NEXT: subl $8, %esp 290; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 291; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 292; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 293; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 294; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 295; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 296; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 297; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 298; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 299; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 300; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 301; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 302; CHECK32-NEXT: pushl %eax 303; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 304; CHECK32-NEXT: calll __sync_fetch_and_add_16 305; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 306; CHECK32-NEXT: addl $28, %esp 307; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 308; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 309; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 310; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 311; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 312; CHECK32-NEXT: movl %esi, var+8 313; CHECK32-NEXT: movl %edx, var+12 314; CHECK32-NEXT: movl %eax, var 315; CHECK32-NEXT: movl %ecx, var+4 316; CHECK32-NEXT: addl $24, %esp 317; CHECK32-NEXT: .cfi_def_cfa_offset 8 318; CHECK32-NEXT: popl %esi 319; CHECK32-NEXT: .cfi_def_cfa_offset 4 320; CHECK32-NEXT: retl 321 %val = atomicrmw add i128* %p, i128 %bits seq_cst 322 store i128 %val, i128* @var, align 16 323 ret void 324} 325 326define void @fetch_and_sub(i128* %p, i128 %bits) { 327; CHECK-LABEL: fetch_and_sub: 328; CHECK: ## %bb.0: 329; CHECK-NEXT: pushq %rbx 330; CHECK-NEXT: .cfi_def_cfa_offset 16 331; CHECK-NEXT: .cfi_offset %rbx, -16 332; CHECK-NEXT: movq %rdx, %r8 333; CHECK-NEXT: movq (%rdi), %rax 334; CHECK-NEXT: movq 8(%rdi), %rdx 335; CHECK-NEXT: .p2align 4, 0x90 336; CHECK-NEXT: LBB5_1: ## %atomicrmw.start 337; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 338; CHECK-NEXT: movq %rax, %rbx 339; CHECK-NEXT: subq %rsi, %rbx 340; CHECK-NEXT: movq %rdx, %rcx 341; CHECK-NEXT: sbbq %r8, %rcx 342; CHECK-NEXT: lock cmpxchg16b (%rdi) 343; CHECK-NEXT: jne LBB5_1 344; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 345; CHECK-NEXT: movq %rax, {{.*}}(%rip) 346; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 347; CHECK-NEXT: popq %rbx 348; CHECK-NEXT: retq 349; 350; CHECK32-LABEL: fetch_and_sub: 351; CHECK32: # %bb.0: 352; CHECK32-NEXT: pushl %esi 353; CHECK32-NEXT: .cfi_def_cfa_offset 8 354; CHECK32-NEXT: subl $24, %esp 355; CHECK32-NEXT: .cfi_def_cfa_offset 32 356; CHECK32-NEXT: .cfi_offset %esi, -8 357; CHECK32-NEXT: subl $8, %esp 358; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 359; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 360; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 361; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 362; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 363; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 364; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 365; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 366; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 367; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 368; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 369; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 370; CHECK32-NEXT: pushl %eax 371; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 372; CHECK32-NEXT: calll __sync_fetch_and_sub_16 373; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 374; CHECK32-NEXT: addl $28, %esp 375; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 376; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 377; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 378; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 379; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 380; CHECK32-NEXT: movl %esi, var+8 381; CHECK32-NEXT: movl %edx, var+12 382; CHECK32-NEXT: movl %eax, var 383; CHECK32-NEXT: movl %ecx, var+4 384; CHECK32-NEXT: addl $24, %esp 385; CHECK32-NEXT: .cfi_def_cfa_offset 8 386; CHECK32-NEXT: popl %esi 387; CHECK32-NEXT: .cfi_def_cfa_offset 4 388; CHECK32-NEXT: retl 389 %val = atomicrmw sub i128* %p, i128 %bits seq_cst 390 store i128 %val, i128* @var, align 16 391 ret void 392} 393 394define void @fetch_and_min(i128* %p, i128 %bits) { 395; CHECK-LABEL: fetch_and_min: 396; CHECK: ## %bb.0: 397; CHECK-NEXT: pushq %rbx 398; CHECK-NEXT: .cfi_def_cfa_offset 16 399; CHECK-NEXT: .cfi_offset %rbx, -16 400; CHECK-NEXT: movq %rdx, %r8 401; CHECK-NEXT: movq (%rdi), %rax 402; CHECK-NEXT: movq 8(%rdi), %rdx 403; CHECK-NEXT: .p2align 4, 0x90 404; CHECK-NEXT: LBB6_1: ## %atomicrmw.start 405; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 406; CHECK-NEXT: cmpq %rax, %rsi 407; CHECK-NEXT: movq %r8, %rcx 408; CHECK-NEXT: sbbq %rdx, %rcx 409; CHECK-NEXT: movq %r8, %rcx 410; CHECK-NEXT: cmovgeq %rdx, %rcx 411; CHECK-NEXT: movq %rsi, %rbx 412; CHECK-NEXT: cmovgeq %rax, %rbx 413; CHECK-NEXT: lock cmpxchg16b (%rdi) 414; CHECK-NEXT: jne LBB6_1 415; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 416; CHECK-NEXT: movq %rax, {{.*}}(%rip) 417; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 418; CHECK-NEXT: popq %rbx 419; CHECK-NEXT: retq 420; 421; CHECK32-LABEL: fetch_and_min: 422; CHECK32: # %bb.0: 423; CHECK32-NEXT: pushl %esi 424; CHECK32-NEXT: .cfi_def_cfa_offset 8 425; CHECK32-NEXT: subl $24, %esp 426; CHECK32-NEXT: .cfi_def_cfa_offset 32 427; CHECK32-NEXT: .cfi_offset %esi, -8 428; CHECK32-NEXT: subl $8, %esp 429; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 430; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 431; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 432; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 433; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 434; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 435; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 436; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 437; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 438; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 439; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 440; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 441; CHECK32-NEXT: pushl %eax 442; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 443; CHECK32-NEXT: calll __sync_fetch_and_min_16 444; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 445; CHECK32-NEXT: addl $28, %esp 446; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 447; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 448; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 449; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 450; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 451; CHECK32-NEXT: movl %esi, var+8 452; CHECK32-NEXT: movl %edx, var+12 453; CHECK32-NEXT: movl %eax, var 454; CHECK32-NEXT: movl %ecx, var+4 455; CHECK32-NEXT: addl $24, %esp 456; CHECK32-NEXT: .cfi_def_cfa_offset 8 457; CHECK32-NEXT: popl %esi 458; CHECK32-NEXT: .cfi_def_cfa_offset 4 459; CHECK32-NEXT: retl 460 %val = atomicrmw min i128* %p, i128 %bits seq_cst 461 store i128 %val, i128* @var, align 16 462 ret void 463} 464 465define void @fetch_and_max(i128* %p, i128 %bits) { 466; CHECK-LABEL: fetch_and_max: 467; CHECK: ## %bb.0: 468; CHECK-NEXT: pushq %rbx 469; CHECK-NEXT: .cfi_def_cfa_offset 16 470; CHECK-NEXT: .cfi_offset %rbx, -16 471; CHECK-NEXT: movq %rdx, %r8 472; CHECK-NEXT: movq (%rdi), %rax 473; CHECK-NEXT: movq 8(%rdi), %rdx 474; CHECK-NEXT: .p2align 4, 0x90 475; CHECK-NEXT: LBB7_1: ## %atomicrmw.start 476; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 477; CHECK-NEXT: cmpq %rax, %rsi 478; CHECK-NEXT: movq %r8, %rcx 479; CHECK-NEXT: sbbq %rdx, %rcx 480; CHECK-NEXT: movq %r8, %rcx 481; CHECK-NEXT: cmovlq %rdx, %rcx 482; CHECK-NEXT: movq %rsi, %rbx 483; CHECK-NEXT: cmovlq %rax, %rbx 484; CHECK-NEXT: lock cmpxchg16b (%rdi) 485; CHECK-NEXT: jne LBB7_1 486; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 487; CHECK-NEXT: movq %rax, {{.*}}(%rip) 488; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 489; CHECK-NEXT: popq %rbx 490; CHECK-NEXT: retq 491; 492; CHECK32-LABEL: fetch_and_max: 493; CHECK32: # %bb.0: 494; CHECK32-NEXT: pushl %esi 495; CHECK32-NEXT: .cfi_def_cfa_offset 8 496; CHECK32-NEXT: subl $24, %esp 497; CHECK32-NEXT: .cfi_def_cfa_offset 32 498; CHECK32-NEXT: .cfi_offset %esi, -8 499; CHECK32-NEXT: subl $8, %esp 500; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 501; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 502; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 503; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 504; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 505; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 506; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 507; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 508; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 509; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 510; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 511; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 512; CHECK32-NEXT: pushl %eax 513; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 514; CHECK32-NEXT: calll __sync_fetch_and_max_16 515; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 516; CHECK32-NEXT: addl $28, %esp 517; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 518; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 519; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 520; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 521; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 522; CHECK32-NEXT: movl %esi, var+8 523; CHECK32-NEXT: movl %edx, var+12 524; CHECK32-NEXT: movl %eax, var 525; CHECK32-NEXT: movl %ecx, var+4 526; CHECK32-NEXT: addl $24, %esp 527; CHECK32-NEXT: .cfi_def_cfa_offset 8 528; CHECK32-NEXT: popl %esi 529; CHECK32-NEXT: .cfi_def_cfa_offset 4 530; CHECK32-NEXT: retl 531 %val = atomicrmw max i128* %p, i128 %bits seq_cst 532 store i128 %val, i128* @var, align 16 533 ret void 534} 535 536define void @fetch_and_umin(i128* %p, i128 %bits) { 537; CHECK-LABEL: fetch_and_umin: 538; CHECK: ## %bb.0: 539; CHECK-NEXT: pushq %rbx 540; CHECK-NEXT: .cfi_def_cfa_offset 16 541; CHECK-NEXT: .cfi_offset %rbx, -16 542; CHECK-NEXT: movq %rdx, %r8 543; CHECK-NEXT: movq (%rdi), %rax 544; CHECK-NEXT: movq 8(%rdi), %rdx 545; CHECK-NEXT: .p2align 4, 0x90 546; CHECK-NEXT: LBB8_1: ## %atomicrmw.start 547; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 548; CHECK-NEXT: cmpq %rax, %rsi 549; CHECK-NEXT: movq %r8, %rcx 550; CHECK-NEXT: sbbq %rdx, %rcx 551; CHECK-NEXT: movq %r8, %rcx 552; CHECK-NEXT: cmovaeq %rdx, %rcx 553; CHECK-NEXT: movq %rsi, %rbx 554; CHECK-NEXT: cmovaeq %rax, %rbx 555; CHECK-NEXT: lock cmpxchg16b (%rdi) 556; CHECK-NEXT: jne LBB8_1 557; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 558; CHECK-NEXT: movq %rax, {{.*}}(%rip) 559; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 560; CHECK-NEXT: popq %rbx 561; CHECK-NEXT: retq 562; 563; CHECK32-LABEL: fetch_and_umin: 564; CHECK32: # %bb.0: 565; CHECK32-NEXT: pushl %esi 566; CHECK32-NEXT: .cfi_def_cfa_offset 8 567; CHECK32-NEXT: subl $24, %esp 568; CHECK32-NEXT: .cfi_def_cfa_offset 32 569; CHECK32-NEXT: .cfi_offset %esi, -8 570; CHECK32-NEXT: subl $8, %esp 571; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 572; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 573; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 574; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 575; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 576; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 577; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 578; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 579; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 580; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 581; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 582; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 583; CHECK32-NEXT: pushl %eax 584; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 585; CHECK32-NEXT: calll __sync_fetch_and_umin_16 586; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 587; CHECK32-NEXT: addl $28, %esp 588; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 589; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 590; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 591; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 592; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 593; CHECK32-NEXT: movl %esi, var+8 594; CHECK32-NEXT: movl %edx, var+12 595; CHECK32-NEXT: movl %eax, var 596; CHECK32-NEXT: movl %ecx, var+4 597; CHECK32-NEXT: addl $24, %esp 598; CHECK32-NEXT: .cfi_def_cfa_offset 8 599; CHECK32-NEXT: popl %esi 600; CHECK32-NEXT: .cfi_def_cfa_offset 4 601; CHECK32-NEXT: retl 602 %val = atomicrmw umin i128* %p, i128 %bits seq_cst 603 store i128 %val, i128* @var, align 16 604 ret void 605} 606 607define void @fetch_and_umax(i128* %p, i128 %bits) { 608; CHECK-LABEL: fetch_and_umax: 609; CHECK: ## %bb.0: 610; CHECK-NEXT: pushq %rbx 611; CHECK-NEXT: .cfi_def_cfa_offset 16 612; CHECK-NEXT: .cfi_offset %rbx, -16 613; CHECK-NEXT: movq %rdx, %r8 614; CHECK-NEXT: movq (%rdi), %rax 615; CHECK-NEXT: movq 8(%rdi), %rdx 616; CHECK-NEXT: .p2align 4, 0x90 617; CHECK-NEXT: LBB9_1: ## %atomicrmw.start 618; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 619; CHECK-NEXT: cmpq %rax, %rsi 620; CHECK-NEXT: movq %r8, %rcx 621; CHECK-NEXT: sbbq %rdx, %rcx 622; CHECK-NEXT: movq %r8, %rcx 623; CHECK-NEXT: cmovbq %rdx, %rcx 624; CHECK-NEXT: movq %rsi, %rbx 625; CHECK-NEXT: cmovbq %rax, %rbx 626; CHECK-NEXT: lock cmpxchg16b (%rdi) 627; CHECK-NEXT: jne LBB9_1 628; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 629; CHECK-NEXT: movq %rax, {{.*}}(%rip) 630; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 631; CHECK-NEXT: popq %rbx 632; CHECK-NEXT: retq 633; 634; CHECK32-LABEL: fetch_and_umax: 635; CHECK32: # %bb.0: 636; CHECK32-NEXT: pushl %esi 637; CHECK32-NEXT: .cfi_def_cfa_offset 8 638; CHECK32-NEXT: subl $24, %esp 639; CHECK32-NEXT: .cfi_def_cfa_offset 32 640; CHECK32-NEXT: .cfi_offset %esi, -8 641; CHECK32-NEXT: subl $8, %esp 642; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 643; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 644; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 645; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 646; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 647; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 648; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 649; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 650; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 651; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 652; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 653; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 654; CHECK32-NEXT: pushl %eax 655; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 656; CHECK32-NEXT: calll __sync_fetch_and_umax_16 657; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 658; CHECK32-NEXT: addl $28, %esp 659; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 660; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 661; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 662; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 663; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 664; CHECK32-NEXT: movl %esi, var+8 665; CHECK32-NEXT: movl %edx, var+12 666; CHECK32-NEXT: movl %eax, var 667; CHECK32-NEXT: movl %ecx, var+4 668; CHECK32-NEXT: addl $24, %esp 669; CHECK32-NEXT: .cfi_def_cfa_offset 8 670; CHECK32-NEXT: popl %esi 671; CHECK32-NEXT: .cfi_def_cfa_offset 4 672; CHECK32-NEXT: retl 673 %val = atomicrmw umax i128* %p, i128 %bits seq_cst 674 store i128 %val, i128* @var, align 16 675 ret void 676} 677 678define i128 @atomic_load_seq_cst(i128* %p) { 679; CHECK-LABEL: atomic_load_seq_cst: 680; CHECK: ## %bb.0: 681; CHECK-NEXT: pushq %rbx 682; CHECK-NEXT: .cfi_def_cfa_offset 16 683; CHECK-NEXT: .cfi_offset %rbx, -16 684; CHECK-NEXT: xorl %eax, %eax 685; CHECK-NEXT: xorl %edx, %edx 686; CHECK-NEXT: xorl %ecx, %ecx 687; CHECK-NEXT: xorl %ebx, %ebx 688; CHECK-NEXT: lock cmpxchg16b (%rdi) 689; CHECK-NEXT: popq %rbx 690; CHECK-NEXT: retq 691; 692; CHECK32-LABEL: atomic_load_seq_cst: 693; CHECK32: # %bb.0: 694; CHECK32-NEXT: pushl %edi 695; CHECK32-NEXT: .cfi_def_cfa_offset 8 696; CHECK32-NEXT: pushl %esi 697; CHECK32-NEXT: .cfi_def_cfa_offset 12 698; CHECK32-NEXT: subl $20, %esp 699; CHECK32-NEXT: .cfi_def_cfa_offset 32 700; CHECK32-NEXT: .cfi_offset %esi, -12 701; CHECK32-NEXT: .cfi_offset %edi, -8 702; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 703; CHECK32-NEXT: subl $8, %esp 704; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 705; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 706; CHECK32-NEXT: pushl $0 707; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 708; CHECK32-NEXT: pushl $0 709; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 710; CHECK32-NEXT: pushl $0 711; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 712; CHECK32-NEXT: pushl $0 713; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 714; CHECK32-NEXT: pushl $0 715; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 716; CHECK32-NEXT: pushl $0 717; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 718; CHECK32-NEXT: pushl $0 719; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 720; CHECK32-NEXT: pushl $0 721; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 722; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 723; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 724; CHECK32-NEXT: pushl %eax 725; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 726; CHECK32-NEXT: calll __sync_val_compare_and_swap_16 727; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 728; CHECK32-NEXT: addl $44, %esp 729; CHECK32-NEXT: .cfi_adjust_cfa_offset -44 730; CHECK32-NEXT: movl (%esp), %eax 731; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 732; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 733; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi 734; CHECK32-NEXT: movl %edi, 8(%esi) 735; CHECK32-NEXT: movl %edx, 12(%esi) 736; CHECK32-NEXT: movl %eax, (%esi) 737; CHECK32-NEXT: movl %ecx, 4(%esi) 738; CHECK32-NEXT: movl %esi, %eax 739; CHECK32-NEXT: addl $20, %esp 740; CHECK32-NEXT: .cfi_def_cfa_offset 12 741; CHECK32-NEXT: popl %esi 742; CHECK32-NEXT: .cfi_def_cfa_offset 8 743; CHECK32-NEXT: popl %edi 744; CHECK32-NEXT: .cfi_def_cfa_offset 4 745; CHECK32-NEXT: retl $4 746 %r = load atomic i128, i128* %p seq_cst, align 16 747 ret i128 %r 748} 749 750define i128 @atomic_load_relaxed(i128* %p) { 751; CHECK-LABEL: atomic_load_relaxed: 752; CHECK: ## %bb.0: 753; CHECK-NEXT: pushq %rbx 754; CHECK-NEXT: .cfi_def_cfa_offset 16 755; CHECK-NEXT: .cfi_offset %rbx, -16 756; CHECK-NEXT: xorl %eax, %eax 757; CHECK-NEXT: xorl %edx, %edx 758; CHECK-NEXT: xorl %ecx, %ecx 759; CHECK-NEXT: xorl %ebx, %ebx 760; CHECK-NEXT: lock cmpxchg16b (%rdi) 761; CHECK-NEXT: popq %rbx 762; CHECK-NEXT: retq 763; 764; CHECK32-LABEL: atomic_load_relaxed: 765; CHECK32: # %bb.0: 766; CHECK32-NEXT: pushl %edi 767; CHECK32-NEXT: .cfi_def_cfa_offset 8 768; CHECK32-NEXT: pushl %esi 769; CHECK32-NEXT: .cfi_def_cfa_offset 12 770; CHECK32-NEXT: subl $20, %esp 771; CHECK32-NEXT: .cfi_def_cfa_offset 32 772; CHECK32-NEXT: .cfi_offset %esi, -12 773; CHECK32-NEXT: .cfi_offset %edi, -8 774; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 775; CHECK32-NEXT: subl $8, %esp 776; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 777; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 778; CHECK32-NEXT: pushl $0 779; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 780; CHECK32-NEXT: pushl $0 781; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 782; CHECK32-NEXT: pushl $0 783; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 784; CHECK32-NEXT: pushl $0 785; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 786; CHECK32-NEXT: pushl $0 787; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 788; CHECK32-NEXT: pushl $0 789; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 790; CHECK32-NEXT: pushl $0 791; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 792; CHECK32-NEXT: pushl $0 793; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 794; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 795; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 796; CHECK32-NEXT: pushl %eax 797; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 798; CHECK32-NEXT: calll __sync_val_compare_and_swap_16 799; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 800; CHECK32-NEXT: addl $44, %esp 801; CHECK32-NEXT: .cfi_adjust_cfa_offset -44 802; CHECK32-NEXT: movl (%esp), %eax 803; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 804; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 805; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi 806; CHECK32-NEXT: movl %edi, 8(%esi) 807; CHECK32-NEXT: movl %edx, 12(%esi) 808; CHECK32-NEXT: movl %eax, (%esi) 809; CHECK32-NEXT: movl %ecx, 4(%esi) 810; CHECK32-NEXT: movl %esi, %eax 811; CHECK32-NEXT: addl $20, %esp 812; CHECK32-NEXT: .cfi_def_cfa_offset 12 813; CHECK32-NEXT: popl %esi 814; CHECK32-NEXT: .cfi_def_cfa_offset 8 815; CHECK32-NEXT: popl %edi 816; CHECK32-NEXT: .cfi_def_cfa_offset 4 817; CHECK32-NEXT: retl $4 818 %r = load atomic i128, i128* %p monotonic, align 16 819 ret i128 %r 820} 821 822define void @atomic_store_seq_cst(i128* %p, i128 %in) { 823; CHECK-LABEL: atomic_store_seq_cst: 824; CHECK: ## %bb.0: 825; CHECK-NEXT: pushq %rbx 826; CHECK-NEXT: .cfi_def_cfa_offset 16 827; CHECK-NEXT: .cfi_offset %rbx, -16 828; CHECK-NEXT: movq %rdx, %rcx 829; CHECK-NEXT: movq %rsi, %rbx 830; CHECK-NEXT: movq (%rdi), %rax 831; CHECK-NEXT: movq 8(%rdi), %rdx 832; CHECK-NEXT: .p2align 4, 0x90 833; CHECK-NEXT: LBB12_1: ## %atomicrmw.start 834; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 835; CHECK-NEXT: lock cmpxchg16b (%rdi) 836; CHECK-NEXT: jne LBB12_1 837; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 838; CHECK-NEXT: popq %rbx 839; CHECK-NEXT: retq 840; 841; CHECK32-LABEL: atomic_store_seq_cst: 842; CHECK32: # %bb.0: 843; CHECK32-NEXT: subl $36, %esp 844; CHECK32-NEXT: .cfi_adjust_cfa_offset 36 845; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 846; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 847; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 848; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 849; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 850; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 851; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 852; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 853; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 854; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 855; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 856; CHECK32-NEXT: pushl %eax 857; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 858; CHECK32-NEXT: calll __sync_lock_test_and_set_16 859; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 860; CHECK32-NEXT: addl $56, %esp 861; CHECK32-NEXT: .cfi_adjust_cfa_offset -56 862; CHECK32-NEXT: retl 863 store atomic i128 %in, i128* %p seq_cst, align 16 864 ret void 865} 866 867define void @atomic_store_release(i128* %p, i128 %in) { 868; CHECK-LABEL: atomic_store_release: 869; CHECK: ## %bb.0: 870; CHECK-NEXT: pushq %rbx 871; CHECK-NEXT: .cfi_def_cfa_offset 16 872; CHECK-NEXT: .cfi_offset %rbx, -16 873; CHECK-NEXT: movq %rdx, %rcx 874; CHECK-NEXT: movq %rsi, %rbx 875; CHECK-NEXT: movq (%rdi), %rax 876; CHECK-NEXT: movq 8(%rdi), %rdx 877; CHECK-NEXT: .p2align 4, 0x90 878; CHECK-NEXT: LBB13_1: ## %atomicrmw.start 879; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 880; CHECK-NEXT: lock cmpxchg16b (%rdi) 881; CHECK-NEXT: jne LBB13_1 882; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 883; CHECK-NEXT: popq %rbx 884; CHECK-NEXT: retq 885; 886; CHECK32-LABEL: atomic_store_release: 887; CHECK32: # %bb.0: 888; CHECK32-NEXT: subl $36, %esp 889; CHECK32-NEXT: .cfi_adjust_cfa_offset 36 890; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 891; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 892; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 893; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 894; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 895; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 896; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 897; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 898; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 899; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 900; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 901; CHECK32-NEXT: pushl %eax 902; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 903; CHECK32-NEXT: calll __sync_lock_test_and_set_16 904; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 905; CHECK32-NEXT: addl $56, %esp 906; CHECK32-NEXT: .cfi_adjust_cfa_offset -56 907; CHECK32-NEXT: retl 908 store atomic i128 %in, i128* %p release, align 16 909 ret void 910} 911 912define void @atomic_store_relaxed(i128* %p, i128 %in) { 913; CHECK-LABEL: atomic_store_relaxed: 914; CHECK: ## %bb.0: 915; CHECK-NEXT: pushq %rbx 916; CHECK-NEXT: .cfi_def_cfa_offset 16 917; CHECK-NEXT: .cfi_offset %rbx, -16 918; CHECK-NEXT: movq %rdx, %rcx 919; CHECK-NEXT: movq %rsi, %rbx 920; CHECK-NEXT: movq (%rdi), %rax 921; CHECK-NEXT: movq 8(%rdi), %rdx 922; CHECK-NEXT: .p2align 4, 0x90 923; CHECK-NEXT: LBB14_1: ## %atomicrmw.start 924; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 925; CHECK-NEXT: lock cmpxchg16b (%rdi) 926; CHECK-NEXT: jne LBB14_1 927; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 928; CHECK-NEXT: popq %rbx 929; CHECK-NEXT: retq 930; 931; CHECK32-LABEL: atomic_store_relaxed: 932; CHECK32: # %bb.0: 933; CHECK32-NEXT: subl $36, %esp 934; CHECK32-NEXT: .cfi_adjust_cfa_offset 36 935; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax 936; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 937; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 938; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 939; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 940; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 941; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 942; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 943; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 944; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) 945; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 946; CHECK32-NEXT: pushl %eax 947; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 948; CHECK32-NEXT: calll __sync_lock_test_and_set_16 949; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 950; CHECK32-NEXT: addl $56, %esp 951; CHECK32-NEXT: .cfi_adjust_cfa_offset -56 952; CHECK32-NEXT: retl 953 store atomic i128 %in, i128* %p unordered, align 16 954 ret void 955} 956 957 958