1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
3; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
4
5; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=64-ALL
6; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=64-ALL
7; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefix=64-ALL
8; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=64-ALL
9; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefix=64-ALL
10
11declare i32 @foo()
12declare i32 @bar(i64)
13
14; In the following case when using fast scheduling we get a long chain of
15; EFLAGS save/restore due to a sequence of:
16; cmpxchg8b (implicit-def eflags)
17; eax = copy eflags
18; adjcallstackdown32
19; ...
20; use of eax
21; During PEI the adjcallstackdown32 is replaced with the subl which
22; clobbers eflags, effectively interfering in the liveness interval. However,
23; we then promote these copies into independent conditions in GPRs that avoids
24; repeated saving and restoring logic and can be trivially managed by the
25; register allocator.
26define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
27; 32-GOOD-RA-LABEL: test_intervening_call:
28; 32-GOOD-RA:       # %bb.0: # %entry
29; 32-GOOD-RA-NEXT:    pushl %ebx
30; 32-GOOD-RA-NEXT:    pushl %esi
31; 32-GOOD-RA-NEXT:    pushl %eax
32; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
33; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %edx
34; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ebx
35; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
36; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
37; 32-GOOD-RA-NEXT:    lock cmpxchg8b (%esi)
38; 32-GOOD-RA-NEXT:    setne %bl
39; 32-GOOD-RA-NEXT:    subl $8, %esp
40; 32-GOOD-RA-NEXT:    pushl %edx
41; 32-GOOD-RA-NEXT:    pushl %eax
42; 32-GOOD-RA-NEXT:    calll bar
43; 32-GOOD-RA-NEXT:    addl $16, %esp
44; 32-GOOD-RA-NEXT:    testb %bl, %bl
45; 32-GOOD-RA-NEXT:    jne .LBB0_3
46; 32-GOOD-RA-NEXT:  # %bb.1: # %t
47; 32-GOOD-RA-NEXT:    movl $42, %eax
48; 32-GOOD-RA-NEXT:    jmp .LBB0_2
49; 32-GOOD-RA-NEXT:  .LBB0_3: # %f
50; 32-GOOD-RA-NEXT:    xorl %eax, %eax
51; 32-GOOD-RA-NEXT:  .LBB0_2: # %t
52; 32-GOOD-RA-NEXT:    xorl %edx, %edx
53; 32-GOOD-RA-NEXT:    addl $4, %esp
54; 32-GOOD-RA-NEXT:    popl %esi
55; 32-GOOD-RA-NEXT:    popl %ebx
56; 32-GOOD-RA-NEXT:    retl
57;
58; 32-FAST-RA-LABEL: test_intervening_call:
59; 32-FAST-RA:       # %bb.0: # %entry
60; 32-FAST-RA-NEXT:    pushl %ebx
61; 32-FAST-RA-NEXT:    pushl %esi
62; 32-FAST-RA-NEXT:    pushl %eax
63; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
64; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ebx
65; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
66; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
67; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %edx
68; 32-FAST-RA-NEXT:    lock cmpxchg8b (%esi)
69; 32-FAST-RA-NEXT:    setne %bl
70; 32-FAST-RA-NEXT:    subl $8, %esp
71; 32-FAST-RA-NEXT:    pushl %edx
72; 32-FAST-RA-NEXT:    pushl %eax
73; 32-FAST-RA-NEXT:    calll bar
74; 32-FAST-RA-NEXT:    addl $16, %esp
75; 32-FAST-RA-NEXT:    testb %bl, %bl
76; 32-FAST-RA-NEXT:    jne .LBB0_3
77; 32-FAST-RA-NEXT:  # %bb.1: # %t
78; 32-FAST-RA-NEXT:    movl $42, %eax
79; 32-FAST-RA-NEXT:    jmp .LBB0_2
80; 32-FAST-RA-NEXT:  .LBB0_3: # %f
81; 32-FAST-RA-NEXT:    xorl %eax, %eax
82; 32-FAST-RA-NEXT:  .LBB0_2: # %t
83; 32-FAST-RA-NEXT:    xorl %edx, %edx
84; 32-FAST-RA-NEXT:    addl $4, %esp
85; 32-FAST-RA-NEXT:    popl %esi
86; 32-FAST-RA-NEXT:    popl %ebx
87; 32-FAST-RA-NEXT:    retl
88;
89; 64-ALL-LABEL: test_intervening_call:
90; 64-ALL:       # %bb.0: # %entry
91; 64-ALL-NEXT:    pushq %rbx
92; 64-ALL-NEXT:    movq %rsi, %rax
93; 64-ALL-NEXT:    lock cmpxchgq %rdx, (%rdi)
94; 64-ALL-NEXT:    setne %bl
95; 64-ALL-NEXT:    movq %rax, %rdi
96; 64-ALL-NEXT:    callq bar
97; 64-ALL-NEXT:    testb %bl, %bl
98; 64-ALL-NEXT:    jne .LBB0_2
99; 64-ALL-NEXT:  # %bb.1: # %t
100; 64-ALL-NEXT:    movl $42, %eax
101; 64-ALL-NEXT:    popq %rbx
102; 64-ALL-NEXT:    retq
103; 64-ALL-NEXT:  .LBB0_2: # %f
104; 64-ALL-NEXT:    xorl %eax, %eax
105; 64-ALL-NEXT:    popq %rbx
106; 64-ALL-NEXT:    retq
107entry:
108  %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
109  %v = extractvalue { i64, i1 } %cx, 0
110  %p = extractvalue { i64, i1 } %cx, 1
111  call i32 @bar(i64 %v)
112  br i1 %p, label %t, label %f
113
114t:
115  ret i64 42
116
117f:
118  ret i64 0
119}
120
121; Interesting in producing a clobber without any function calls.
122define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind {
123; 32-ALL-LABEL: test_control_flow:
124; 32-ALL:       # %bb.0: # %entry
125; 32-ALL-NEXT:    movl {{[0-9]+}}(%esp), %eax
126; 32-ALL-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
127; 32-ALL-NEXT:    jle .LBB1_6
128; 32-ALL-NEXT:  # %bb.1: # %loop_start
129; 32-ALL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
130; 32-ALL-NEXT:    .p2align 4, 0x90
131; 32-ALL-NEXT:  .LBB1_2: # %while.condthread-pre-split.i
132; 32-ALL-NEXT:    # =>This Loop Header: Depth=1
133; 32-ALL-NEXT:    # Child Loop BB1_3 Depth 2
134; 32-ALL-NEXT:    movl (%ecx), %edx
135; 32-ALL-NEXT:    .p2align 4, 0x90
136; 32-ALL-NEXT:  .LBB1_3: # %while.cond.i
137; 32-ALL-NEXT:    # Parent Loop BB1_2 Depth=1
138; 32-ALL-NEXT:    # => This Inner Loop Header: Depth=2
139; 32-ALL-NEXT:    movl %edx, %eax
140; 32-ALL-NEXT:    xorl %edx, %edx
141; 32-ALL-NEXT:    testl %eax, %eax
142; 32-ALL-NEXT:    je .LBB1_3
143; 32-ALL-NEXT:  # %bb.4: # %while.body.i
144; 32-ALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
145; 32-ALL-NEXT:    lock cmpxchgl %eax, (%ecx)
146; 32-ALL-NEXT:    jne .LBB1_2
147; 32-ALL-NEXT:  # %bb.5:
148; 32-ALL-NEXT:    xorl %eax, %eax
149; 32-ALL-NEXT:  .LBB1_6: # %cond.end
150; 32-ALL-NEXT:    retl
151;
152; 64-ALL-LABEL: test_control_flow:
153; 64-ALL:       # %bb.0: # %entry
154; 64-ALL-NEXT:    movl %esi, %eax
155; 64-ALL-NEXT:    cmpl %edx, %esi
156; 64-ALL-NEXT:    jle .LBB1_5
157; 64-ALL-NEXT:    .p2align 4, 0x90
158; 64-ALL-NEXT:  .LBB1_1: # %while.condthread-pre-split.i
159; 64-ALL-NEXT:    # =>This Loop Header: Depth=1
160; 64-ALL-NEXT:    # Child Loop BB1_2 Depth 2
161; 64-ALL-NEXT:    movl (%rdi), %ecx
162; 64-ALL-NEXT:    .p2align 4, 0x90
163; 64-ALL-NEXT:  .LBB1_2: # %while.cond.i
164; 64-ALL-NEXT:    # Parent Loop BB1_1 Depth=1
165; 64-ALL-NEXT:    # => This Inner Loop Header: Depth=2
166; 64-ALL-NEXT:    movl %ecx, %eax
167; 64-ALL-NEXT:    xorl %ecx, %ecx
168; 64-ALL-NEXT:    testl %eax, %eax
169; 64-ALL-NEXT:    je .LBB1_2
170; 64-ALL-NEXT:  # %bb.3: # %while.body.i
171; 64-ALL-NEXT:    # in Loop: Header=BB1_1 Depth=1
172; 64-ALL-NEXT:    lock cmpxchgl %eax, (%rdi)
173; 64-ALL-NEXT:    jne .LBB1_1
174; 64-ALL-NEXT:  # %bb.4:
175; 64-ALL-NEXT:    xorl %eax, %eax
176; 64-ALL-NEXT:  .LBB1_5: # %cond.end
177; 64-ALL-NEXT:    retq
178entry:
179  %cmp = icmp sgt i32 %i, %j
180  br i1 %cmp, label %loop_start, label %cond.end
181
182loop_start:
183  br label %while.condthread-pre-split.i
184
185while.condthread-pre-split.i:
186  %.pr.i = load i32, i32* %p, align 4
187  br label %while.cond.i
188
189while.cond.i:
190  %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ]
191  %tobool.i = icmp eq i32 %0, 0
192  br i1 %tobool.i, label %while.cond.i, label %while.body.i
193
194while.body.i:
195  %.lcssa = phi i32 [ %0, %while.cond.i ]
196  %1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst
197  %2 = extractvalue { i32, i1 } %1, 1
198  br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i
199
200cond.end.loopexit:
201  br label %cond.end
202
203cond.end:
204  %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ]
205  ret i32 %cond
206}
207
208; This one is an interesting case because CMOV doesn't have a chain
209; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
210define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
211; 32-GOOD-RA-LABEL: test_feed_cmov:
212; 32-GOOD-RA:       # %bb.0: # %entry
213; 32-GOOD-RA-NEXT:    pushl %ebx
214; 32-GOOD-RA-NEXT:    pushl %esi
215; 32-GOOD-RA-NEXT:    pushl %eax
216; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
217; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
218; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
219; 32-GOOD-RA-NEXT:    lock cmpxchgl %esi, (%ecx)
220; 32-GOOD-RA-NEXT:    sete %bl
221; 32-GOOD-RA-NEXT:    calll foo
222; 32-GOOD-RA-NEXT:    testb %bl, %bl
223; 32-GOOD-RA-NEXT:    jne .LBB2_2
224; 32-GOOD-RA-NEXT:  # %bb.1: # %entry
225; 32-GOOD-RA-NEXT:    movl %eax, %esi
226; 32-GOOD-RA-NEXT:  .LBB2_2: # %entry
227; 32-GOOD-RA-NEXT:    movl %esi, %eax
228; 32-GOOD-RA-NEXT:    addl $4, %esp
229; 32-GOOD-RA-NEXT:    popl %esi
230; 32-GOOD-RA-NEXT:    popl %ebx
231; 32-GOOD-RA-NEXT:    retl
232;
233; 32-FAST-RA-LABEL: test_feed_cmov:
234; 32-FAST-RA:       # %bb.0: # %entry
235; 32-FAST-RA-NEXT:    pushl %ebx
236; 32-FAST-RA-NEXT:    pushl %esi
237; 32-FAST-RA-NEXT:    pushl %eax
238; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
239; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
240; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
241; 32-FAST-RA-NEXT:    lock cmpxchgl %esi, (%ecx)
242; 32-FAST-RA-NEXT:    sete %bl
243; 32-FAST-RA-NEXT:    calll foo
244; 32-FAST-RA-NEXT:    testb %bl, %bl
245; 32-FAST-RA-NEXT:    jne .LBB2_2
246; 32-FAST-RA-NEXT:  # %bb.1: # %entry
247; 32-FAST-RA-NEXT:    movl %eax, %esi
248; 32-FAST-RA-NEXT:  .LBB2_2: # %entry
249; 32-FAST-RA-NEXT:    movl %esi, %eax
250; 32-FAST-RA-NEXT:    addl $4, %esp
251; 32-FAST-RA-NEXT:    popl %esi
252; 32-FAST-RA-NEXT:    popl %ebx
253; 32-FAST-RA-NEXT:    retl
254;
255; 64-ALL-LABEL: test_feed_cmov:
256; 64-ALL:       # %bb.0: # %entry
257; 64-ALL-NEXT:    pushq %rbp
258; 64-ALL-NEXT:    pushq %rbx
259; 64-ALL-NEXT:    pushq %rax
260; 64-ALL-NEXT:    movl %edx, %ebx
261; 64-ALL-NEXT:    movl %esi, %eax
262; 64-ALL-NEXT:    lock cmpxchgl %edx, (%rdi)
263; 64-ALL-NEXT:    sete %bpl
264; 64-ALL-NEXT:    callq foo
265; 64-ALL-NEXT:    testb %bpl, %bpl
266; 64-ALL-NEXT:    cmovnel %ebx, %eax
267; 64-ALL-NEXT:    addq $8, %rsp
268; 64-ALL-NEXT:    popq %rbx
269; 64-ALL-NEXT:    popq %rbp
270; 64-ALL-NEXT:    retq
271entry:
272  %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
273  %success = extractvalue { i32, i1 } %res, 1
274
275  %rhs = call i32 @foo()
276
277  %ret = select i1 %success, i32 %new, i32 %rhs
278  ret i32 %ret
279}
280