1; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
3; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECKWIN64
4
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o /dev/null \
6; RUN:     -print-after postrapseudos -filter-print-funcs pr26023 2>&1 \
7; RUN:    | FileCheck %s --check-prefix=OPERAND32
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o /dev/null \
9; RUN:     -print-after postrapseudos -filter-print-funcs one64_minsize 2>&1 \
10; RUN:    | FileCheck %s --check-prefix=OPERAND64
11
12define i32 @one32_nooptsize() {
13entry:
14  ret i32 1
15
16; When not optimizing for size, use mov.
17; CHECK32-LABEL: one32_nooptsize:
18; CHECK32:       movl $1, %eax
19; CHECK32-NEXT:  retl
20; CHECK64-LABEL: one32_nooptsize:
21; CHECK64:       movl $1, %eax
22; CHECK64-NEXT:  retq
23}
24
25define i32 @one32() optsize {
26entry:
27  ret i32 1
28
29; CHECK32-LABEL: one32:
30; CHECK32:       xorl %eax, %eax
31; CHECK32-NEXT:  incl %eax
32; CHECK32-NEXT:  retl
33
34; FIXME: Figure out the best approach in 64-bit mode.
35; CHECK64-LABEL: one32:
36; CHECK64:       movl $1, %eax
37; CHECK64-NEXT:  retq
38}
39
40define i32 @one32_pgso() !prof !14 {
41entry:
42  ret i32 1
43
44; CHECK32-LABEL: one32_pgso:
45; CHECK32:       xorl %eax, %eax
46; CHECK32-NEXT:  incl %eax
47; CHECK32-NEXT:  retl
48
49; FIXME: Figure out the best approach in 64-bit mode.
50; CHECK64-LABEL: one32_pgso:
51; CHECK64:       movl $1, %eax
52; CHECK64-NEXT:  retq
53}
54
55define i32 @one32_minsize() minsize {
56entry:
57  ret i32 1
58
59; On 32-bit, xor-inc is preferred over push-pop.
60; CHECK32-LABEL: one32_minsize:
61; CHECK32:       xorl %eax, %eax
62; CHECK32-NEXT:  incl %eax
63; CHECK32-NEXT:  retl
64
65; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have to
66; pop into a 64-bit register even when we just need 32 bits.
67; CHECK64-LABEL: one32_minsize:
68; CHECK64:       pushq $1
69; CHECK64:       .cfi_adjust_cfa_offset 8
70; CHECK64:       popq %rax
71; CHECK64:       .cfi_adjust_cfa_offset -8
72; CHECK64-NEXT:  retq
73
74; On Win64 we can't adjust the stack unless there's a frame pointer.
75; CHECKWIN64-LABEL: one32_minsize:
76; CHECKWIN64:       movl $1, %eax
77; CHECKWIN64-NEXT:  retq
78}
79
80define i32 @pr26023() minsize {
81entry:
82  %x = alloca [120 x i8]
83  %0 = getelementptr inbounds [120 x i8], [120 x i8]* %x, i64 0, i64 0
84  call void asm sideeffect "", "imr,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %0)
85  %arrayidx = getelementptr inbounds [120 x i8], [120 x i8]* %x, i64 0, i64 119
86  store volatile i8 -2, i8* %arrayidx
87  call void asm sideeffect "", "r,~{dirflag},~{fpsr},~{flags}"(i32 5)
88  %1 = load volatile i8, i8* %arrayidx
89  %conv = sext i8 %1 to i32
90  ret i32 %conv
91
92; The function writes to the redzone, so push/pop cannot be used.
93; CHECK64-LABEL: pr26023:
94; CHECK64:       movl $5, %ecx
95; CHECK64:       retq
96
97; 32-bit X86 doesn't have a redzone.
98; CHECK32-LABEL: pr26023:
99; CHECK32:       pushl $5
100; CHECK32:       popl %ecx
101; CHECK32:       retl
102
103; Check push/pop have implicit def/use of $esp
104; OPERAND32:      PUSH32i8 5, implicit-def $esp, implicit $esp
105; OPERAND32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4
106; OPERAND32-NEXT: renamable $ecx = POP32r implicit-def $esp, implicit $esp
107; OPERAND32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -4
108}
109
110
111define i64 @one64_minsize() minsize {
112entry:
113  ret i64 1
114; On 64-bit we don't do xor-inc yet, so push-pop it is.
115; CHECK64-LABEL: one64_minsize:
116; CHECK64:       pushq $1
117; CHECK64:       .cfi_adjust_cfa_offset 8
118; CHECK64:       popq %rax
119; CHECK64:       .cfi_adjust_cfa_offset -8
120; CHECK64-NEXT:  retq
121
122; On Win64 we can't adjust the stack unless there's a frame pointer.
123; CHECKWIN64-LABEL: one64_minsize:
124; CHECKWIN64:       movl $1, %eax
125; CHECKWIN64-NEXT:  retq
126
127; Check push/pop have implicit def/use of $rsp
128; OPERAND64:      PUSH64i8 1, implicit-def $rsp, implicit $rsp
129; OPERAND64-NEXT: CFI_INSTRUCTION adjust_cfa_offset 8
130; OPERAND64-NEXT: $rax = POP64r implicit-def $rsp, implicit $rsp
131; OPERAND64-NEXT: CFI_INSTRUCTION adjust_cfa_offset -8
132; OPERAND64-NEXT: RET 0, $rax
133}
134
135define i32 @minus_one32() optsize {
136entry:
137  ret i32 -1
138
139; CHECK32-LABEL: minus_one32:
140; CHECK32:       xorl %eax, %eax
141; CHECK32-NEXT:  decl %eax
142; CHECK32-NEXT:  retl
143}
144
145define i32 @minus_one32_pgso() !prof !14 {
146entry:
147  ret i32 -1
148
149; CHECK32-LABEL: minus_one32_pgso:
150; CHECK32:       xorl %eax, %eax
151; CHECK32-NEXT:  decl %eax
152; CHECK32-NEXT:  retl
153}
154
155define i32 @minus_one32_minsize() minsize {
156entry:
157  ret i32 -1
158
159; xor-dec is preferred over push-pop.
160; CHECK32-LABEL: minus_one32_minsize:
161; CHECK32:       xorl %eax, %eax
162; CHECK32-NEXT:  decl %eax
163; CHECK32-NEXT:  retl
164}
165
166define i16 @one16() optsize {
167entry:
168  ret i16 1
169
170; CHECK32-LABEL: one16:
171; CHECK32:       xorl %eax, %eax
172; CHECK32-NEXT:  incl %eax
173; CHECK32-NEXT:  # kill
174; CHECK32-NEXT:  retl
175}
176
177define i16 @minus_one16() optsize {
178entry:
179  ret i16 -1
180
181; CHECK32-LABEL: minus_one16:
182; CHECK32:       xorl %eax, %eax
183; CHECK32-NEXT:  decl %eax
184; CHECK32-NEXT:  # kill
185; CHECK32-NEXT:  retl
186}
187
188define i16 @one16_pgso() !prof !14 {
189entry:
190  ret i16 1
191
192; CHECK32-LABEL: one16_pgso:
193; CHECK32:       xorl %eax, %eax
194; CHECK32-NEXT:  incl %eax
195; CHECK32-NEXT:  # kill
196; CHECK32-NEXT:  retl
197}
198
199define i16 @minus_one16_pgso() !prof !14 {
200entry:
201  ret i16 -1
202
203; CHECK32-LABEL: minus_one16_pgso:
204; CHECK32:       xorl %eax, %eax
205; CHECK32-NEXT:  decl %eax
206; CHECK32-NEXT:  # kill
207; CHECK32-NEXT:  retl
208}
209
210define i32 @minus_five32() minsize {
211entry:
212  ret i32 -5
213
214; CHECK32-LABEL: minus_five32:
215; CHECK32: pushl $-5
216; CHECK32: popl %eax
217; CHECK32: retl
218}
219
220define i64 @minus_five64() minsize {
221entry:
222  ret i64 -5
223
224; CHECK64-LABEL: minus_five64:
225; CHECK64: pushq $-5
226; CHECK64:       .cfi_adjust_cfa_offset 8
227; CHECK64: popq %rax
228; CHECK64:       .cfi_adjust_cfa_offset -8
229; CHECK64: retq
230}
231
232define i32 @rematerialize_minus_one() optsize {
233entry:
234  ; Materialize -1 (thiscall forces it into %ecx).
235  tail call x86_thiscallcc void @f(i32 -1)
236
237  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
238  ; spilling it to the stack.
239  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
240
241  ; -1 should be re-materialized here instead of getting spilled above.
242  ret i32 -1
243
244; CHECK32-LABEL: rematerialize_minus_one
245; CHECK32:       xorl %ecx, %ecx
246; CHECK32-NEXT:  decl %ecx
247; CHECK32:       calll
248; CHECK32:       xorl %eax, %eax
249; CHECK32-NEXT:  decl %eax
250; CHECK32-NOT:   %eax
251; CHECK32:       retl
252}
253
254define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
255entry:
256  ; Materialize -1 (thiscall forces it into %ecx).
257  tail call x86_thiscallcc void @f(i32 -1)
258
259  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
260  ; spilling it to the stack.
261  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
262
263  ; Define eflags.
264  %a = icmp ne i32 %x, 123
265  %b = zext i1 %a to i32
266  ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
267  ; It must therefore not use the xor-dec lowering.
268  %c = select i1 %a, i32 %b, i32 -1
269  ret i32 %c
270
271; CHECK32-LABEL: rematerialize_minus_one_eflags
272; CHECK32:       xorl %ecx, %ecx
273; CHECK32-NEXT:  decl %ecx
274; CHECK32:       calll
275; CHECK32:       cmpl
276; CHECK32:       setne
277; CHECK32-NOT:   xorl
278; CHECK32:       movl $-1
279; CHECK32:       cmov
280; CHECK32:       retl
281}
282
283define i32 @rematerialize_minus_one_pgso() !prof !14 {
284entry:
285  ; Materialize -1 (thiscall forces it into %ecx).
286  tail call x86_thiscallcc void @f(i32 -1)
287
288  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
289  ; spilling it to the stack.
290  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
291
292  ; -1 should be re-materialized here instead of getting spilled above.
293  ret i32 -1
294
295; CHECK32-LABEL: rematerialize_minus_one_pgso
296; CHECK32:       xorl %ecx, %ecx
297; CHECK32-NEXT:  decl %ecx
298; CHECK32:       calll
299; CHECK32:       xorl %eax, %eax
300; CHECK32-NEXT:  decl %eax
301; CHECK32-NOT:   %eax
302; CHECK32:       retl
303}
304
305define i32 @rematerialize_minus_one_eflags_pgso(i32 %x) !prof !14 {
306entry:
307  ; Materialize -1 (thiscall forces it into %ecx).
308  tail call x86_thiscallcc void @f(i32 -1)
309
310  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
311  ; spilling it to the stack.
312  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
313
314  ; Define eflags.
315  %a = icmp ne i32 %x, 123
316  %b = zext i1 %a to i32
317  ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
318  ; It must therefore not use the xor-dec lowering.
319  %c = select i1 %a, i32 %b, i32 -1
320  ret i32 %c
321
322; CHECK32-LABEL: rematerialize_minus_one_eflags_pgso
323; CHECK32:       xorl %ecx, %ecx
324; CHECK32-NEXT:  decl %ecx
325; CHECK32:       calll
326; CHECK32:       cmpl
327; CHECK32:       setne
328; CHECK32-NOT:   xorl
329; CHECK32:       movl $-1
330; CHECK32:       cmov
331; CHECK32:       retl
332}
333
334declare x86_thiscallcc void @f(i32)
335
336!llvm.module.flags = !{!0}
337!0 = !{i32 1, !"ProfileSummary", !1}
338!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
339!2 = !{!"ProfileFormat", !"InstrProf"}
340!3 = !{!"TotalCount", i64 10000}
341!4 = !{!"MaxCount", i64 10}
342!5 = !{!"MaxInternalCount", i64 1}
343!6 = !{!"MaxFunctionCount", i64 1000}
344!7 = !{!"NumCounts", i64 3}
345!8 = !{!"NumFunctions", i64 3}
346!9 = !{!"DetailedSummary", !10}
347!10 = !{!11, !12, !13}
348!11 = !{i32 10000, i64 100, i32 1}
349!12 = !{i32 999000, i64 100, i32 1}
350!13 = !{i32 999999, i64 1, i32 2}
351!14 = !{!"function_entry_count", i64 0}
352