1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA 3; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA 4 5; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=64-ALL 6; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=64-ALL 7; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefix=64-ALL 8; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=64-ALL 9; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefix=64-ALL 10 11declare i32 @foo() 12declare i32 @bar(i64) 13 14; In the following case when using fast scheduling we get a long chain of 15; EFLAGS save/restore due to a sequence of: 16; cmpxchg8b (implicit-def eflags) 17; eax = copy eflags 18; adjcallstackdown32 19; ... 20; use of eax 21; During PEI the adjcallstackdown32 is replaced with the subl which 22; clobbers eflags, effectively interfering in the liveness interval. However, 23; we then promote these copies into independent conditions in GPRs that avoids 24; repeated saving and restoring logic and can be trivially managed by the 25; register allocator. 26define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { 27; 32-GOOD-RA-LABEL: test_intervening_call: 28; 32-GOOD-RA: # %bb.0: # %entry 29; 32-GOOD-RA-NEXT: pushl %ebx 30; 32-GOOD-RA-NEXT: pushl %esi 31; 32-GOOD-RA-NEXT: pushl %eax 32; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 33; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx 34; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx 35; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 36; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 37; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi) 38; 32-GOOD-RA-NEXT: setne %bl 39; 32-GOOD-RA-NEXT: subl $8, %esp 40; 32-GOOD-RA-NEXT: pushl %edx 41; 32-GOOD-RA-NEXT: pushl %eax 42; 32-GOOD-RA-NEXT: calll bar 43; 32-GOOD-RA-NEXT: addl $16, %esp 44; 32-GOOD-RA-NEXT: testb %bl, %bl 45; 32-GOOD-RA-NEXT: jne .LBB0_3 46; 32-GOOD-RA-NEXT: # %bb.1: # %t 47; 32-GOOD-RA-NEXT: movl $42, %eax 48; 32-GOOD-RA-NEXT: jmp .LBB0_2 49; 32-GOOD-RA-NEXT: .LBB0_3: # %f 50; 32-GOOD-RA-NEXT: xorl %eax, %eax 51; 32-GOOD-RA-NEXT: .LBB0_2: # %t 52; 32-GOOD-RA-NEXT: xorl %edx, %edx 53; 32-GOOD-RA-NEXT: addl $4, %esp 54; 32-GOOD-RA-NEXT: popl %esi 55; 32-GOOD-RA-NEXT: popl %ebx 56; 32-GOOD-RA-NEXT: retl 57; 58; 32-FAST-RA-LABEL: test_intervening_call: 59; 32-FAST-RA: # %bb.0: # %entry 60; 32-FAST-RA-NEXT: pushl %ebx 61; 32-FAST-RA-NEXT: pushl %esi 62; 32-FAST-RA-NEXT: pushl %eax 63; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 64; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx 65; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 66; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 67; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx 68; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi) 69; 32-FAST-RA-NEXT: setne %bl 70; 32-FAST-RA-NEXT: subl $8, %esp 71; 32-FAST-RA-NEXT: pushl %edx 72; 32-FAST-RA-NEXT: pushl %eax 73; 32-FAST-RA-NEXT: calll bar 74; 32-FAST-RA-NEXT: addl $16, %esp 75; 32-FAST-RA-NEXT: testb %bl, %bl 76; 32-FAST-RA-NEXT: jne .LBB0_3 77; 32-FAST-RA-NEXT: # %bb.1: # %t 78; 32-FAST-RA-NEXT: movl $42, %eax 79; 32-FAST-RA-NEXT: jmp .LBB0_2 80; 32-FAST-RA-NEXT: .LBB0_3: # %f 81; 32-FAST-RA-NEXT: xorl %eax, %eax 82; 32-FAST-RA-NEXT: .LBB0_2: # %t 83; 32-FAST-RA-NEXT: xorl %edx, %edx 84; 32-FAST-RA-NEXT: addl $4, %esp 85; 32-FAST-RA-NEXT: popl %esi 86; 32-FAST-RA-NEXT: popl %ebx 87; 32-FAST-RA-NEXT: retl 88; 89; 64-ALL-LABEL: test_intervening_call: 90; 64-ALL: # %bb.0: # %entry 91; 64-ALL-NEXT: pushq %rbx 92; 64-ALL-NEXT: movq %rsi, %rax 93; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi) 94; 64-ALL-NEXT: setne %bl 95; 64-ALL-NEXT: movq %rax, %rdi 96; 64-ALL-NEXT: callq bar 97; 64-ALL-NEXT: testb %bl, %bl 98; 64-ALL-NEXT: jne .LBB0_2 99; 64-ALL-NEXT: # %bb.1: # %t 100; 64-ALL-NEXT: movl $42, %eax 101; 64-ALL-NEXT: popq %rbx 102; 64-ALL-NEXT: retq 103; 64-ALL-NEXT: .LBB0_2: # %f 104; 64-ALL-NEXT: xorl %eax, %eax 105; 64-ALL-NEXT: popq %rbx 106; 64-ALL-NEXT: retq 107entry: 108 %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst 109 %v = extractvalue { i64, i1 } %cx, 0 110 %p = extractvalue { i64, i1 } %cx, 1 111 call i32 @bar(i64 %v) 112 br i1 %p, label %t, label %f 113 114t: 115 ret i64 42 116 117f: 118 ret i64 0 119} 120 121; Interesting in producing a clobber without any function calls. 122define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind { 123; 32-ALL-LABEL: test_control_flow: 124; 32-ALL: # %bb.0: # %entry 125; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax 126; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax 127; 32-ALL-NEXT: jle .LBB1_6 128; 32-ALL-NEXT: # %bb.1: # %loop_start 129; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx 130; 32-ALL-NEXT: .p2align 4, 0x90 131; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i 132; 32-ALL-NEXT: # =>This Loop Header: Depth=1 133; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2 134; 32-ALL-NEXT: movl (%ecx), %edx 135; 32-ALL-NEXT: .p2align 4, 0x90 136; 32-ALL-NEXT: .LBB1_3: # %while.cond.i 137; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1 138; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2 139; 32-ALL-NEXT: movl %edx, %eax 140; 32-ALL-NEXT: xorl %edx, %edx 141; 32-ALL-NEXT: testl %eax, %eax 142; 32-ALL-NEXT: je .LBB1_3 143; 32-ALL-NEXT: # %bb.4: # %while.body.i 144; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1 145; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx) 146; 32-ALL-NEXT: jne .LBB1_2 147; 32-ALL-NEXT: # %bb.5: 148; 32-ALL-NEXT: xorl %eax, %eax 149; 32-ALL-NEXT: .LBB1_6: # %cond.end 150; 32-ALL-NEXT: retl 151; 152; 64-ALL-LABEL: test_control_flow: 153; 64-ALL: # %bb.0: # %entry 154; 64-ALL-NEXT: movl %esi, %eax 155; 64-ALL-NEXT: cmpl %edx, %esi 156; 64-ALL-NEXT: jle .LBB1_5 157; 64-ALL-NEXT: .p2align 4, 0x90 158; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i 159; 64-ALL-NEXT: # =>This Loop Header: Depth=1 160; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2 161; 64-ALL-NEXT: movl (%rdi), %ecx 162; 64-ALL-NEXT: .p2align 4, 0x90 163; 64-ALL-NEXT: .LBB1_2: # %while.cond.i 164; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1 165; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2 166; 64-ALL-NEXT: movl %ecx, %eax 167; 64-ALL-NEXT: xorl %ecx, %ecx 168; 64-ALL-NEXT: testl %eax, %eax 169; 64-ALL-NEXT: je .LBB1_2 170; 64-ALL-NEXT: # %bb.3: # %while.body.i 171; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1 172; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi) 173; 64-ALL-NEXT: jne .LBB1_1 174; 64-ALL-NEXT: # %bb.4: 175; 64-ALL-NEXT: xorl %eax, %eax 176; 64-ALL-NEXT: .LBB1_5: # %cond.end 177; 64-ALL-NEXT: retq 178entry: 179 %cmp = icmp sgt i32 %i, %j 180 br i1 %cmp, label %loop_start, label %cond.end 181 182loop_start: 183 br label %while.condthread-pre-split.i 184 185while.condthread-pre-split.i: 186 %.pr.i = load i32, i32* %p, align 4 187 br label %while.cond.i 188 189while.cond.i: 190 %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ] 191 %tobool.i = icmp eq i32 %0, 0 192 br i1 %tobool.i, label %while.cond.i, label %while.body.i 193 194while.body.i: 195 %.lcssa = phi i32 [ %0, %while.cond.i ] 196 %1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst 197 %2 = extractvalue { i32, i1 } %1, 1 198 br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i 199 200cond.end.loopexit: 201 br label %cond.end 202 203cond.end: 204 %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ] 205 ret i32 %cond 206} 207 208; This one is an interesting case because CMOV doesn't have a chain 209; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. 210define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind { 211; 32-GOOD-RA-LABEL: test_feed_cmov: 212; 32-GOOD-RA: # %bb.0: # %entry 213; 32-GOOD-RA-NEXT: pushl %ebx 214; 32-GOOD-RA-NEXT: pushl %esi 215; 32-GOOD-RA-NEXT: pushl %eax 216; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 217; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 218; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 219; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx) 220; 32-GOOD-RA-NEXT: sete %bl 221; 32-GOOD-RA-NEXT: calll foo 222; 32-GOOD-RA-NEXT: testb %bl, %bl 223; 32-GOOD-RA-NEXT: jne .LBB2_2 224; 32-GOOD-RA-NEXT: # %bb.1: # %entry 225; 32-GOOD-RA-NEXT: movl %eax, %esi 226; 32-GOOD-RA-NEXT: .LBB2_2: # %entry 227; 32-GOOD-RA-NEXT: movl %esi, %eax 228; 32-GOOD-RA-NEXT: addl $4, %esp 229; 32-GOOD-RA-NEXT: popl %esi 230; 32-GOOD-RA-NEXT: popl %ebx 231; 32-GOOD-RA-NEXT: retl 232; 233; 32-FAST-RA-LABEL: test_feed_cmov: 234; 32-FAST-RA: # %bb.0: # %entry 235; 32-FAST-RA-NEXT: pushl %ebx 236; 32-FAST-RA-NEXT: pushl %esi 237; 32-FAST-RA-NEXT: pushl %eax 238; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 239; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 240; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 241; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx) 242; 32-FAST-RA-NEXT: sete %bl 243; 32-FAST-RA-NEXT: calll foo 244; 32-FAST-RA-NEXT: testb %bl, %bl 245; 32-FAST-RA-NEXT: jne .LBB2_2 246; 32-FAST-RA-NEXT: # %bb.1: # %entry 247; 32-FAST-RA-NEXT: movl %eax, %esi 248; 32-FAST-RA-NEXT: .LBB2_2: # %entry 249; 32-FAST-RA-NEXT: movl %esi, %eax 250; 32-FAST-RA-NEXT: addl $4, %esp 251; 32-FAST-RA-NEXT: popl %esi 252; 32-FAST-RA-NEXT: popl %ebx 253; 32-FAST-RA-NEXT: retl 254; 255; 64-ALL-LABEL: test_feed_cmov: 256; 64-ALL: # %bb.0: # %entry 257; 64-ALL-NEXT: pushq %rbp 258; 64-ALL-NEXT: pushq %rbx 259; 64-ALL-NEXT: pushq %rax 260; 64-ALL-NEXT: movl %edx, %ebx 261; 64-ALL-NEXT: movl %esi, %eax 262; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi) 263; 64-ALL-NEXT: sete %bpl 264; 64-ALL-NEXT: callq foo 265; 64-ALL-NEXT: testb %bpl, %bpl 266; 64-ALL-NEXT: cmovnel %ebx, %eax 267; 64-ALL-NEXT: addq $8, %rsp 268; 64-ALL-NEXT: popq %rbx 269; 64-ALL-NEXT: popq %rbp 270; 64-ALL-NEXT: retq 271entry: 272 %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst 273 %success = extractvalue { i32, i1 } %res, 1 274 275 %rhs = call i32 @foo() 276 277 %ret = select i1 %success, i32 %new, i32 %rhs 278 ret i32 %ret 279} 280