1; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64
2; RUN: llc < %s -mtriple=x86_64-pc-linux         | FileCheck %s -check-prefix=LINUX
3
4; By default, windows CoreCLR requires an inline prologue stack expansion check
5; if more than 4096 bytes are allocated on the stack.
6
7; Prolog stack allocation >= 4096 bytes will require the probe sequence
8define i32 @main4k() nounwind {
9entry:
10; WIN_X64-LABEL:main4k:
11; WIN_X64: # BB#0:
12; WIN_X64:      movl    $4096, %eax
13; WIN_X64:      movq    %rcx, 8(%rsp)
14; WIN_X64:	movq	%rdx, 16(%rsp)
15; WIN_X64:	xorq	%rcx, %rcx
16; WIN_X64:	movq	%rsp, %rdx
17; WIN_X64:	subq	%rax, %rdx
18; WIN_X64:	cmovbq	%rcx, %rdx
19; WIN_X64:	movq	%gs:16, %rcx
20; WIN_X64:	cmpq	%rcx, %rdx
21; WIN_X64:	jae	.LBB0_3
22; WIN_X64:# BB#1:
23; WIN_X64:	andq	$-4096, %rdx
24; WIN_X64:.LBB0_2:
25; WIN_X64:	leaq	-4096(%rcx), %rcx
26; WIN_X64:	movb	$0, (%rcx)
27; WIN_X64:	cmpq	%rcx, %rdx
28; WIN_X64:	jne	.LBB0_2
29; WIN_X64:.LBB0_3:
30; WIN_X64:	movq	8(%rsp), %rcx
31; WIN_X64:	movq	16(%rsp), %rdx
32; WIN_X64:	subq	%rax, %rsp
33; WIN_X64:	xorl	%eax, %eax
34; WIN_X64:	addq	$4096, %rsp
35; WIN_X64:	retq
36; LINUX-LABEL:main4k:
37; LINUX-NOT:    movq    %gs:16, %rcx
38; LINUX: 	retq
39  %a = alloca [4096 x i8]
40  ret i32 0
41}
42
43; Prolog stack allocation >= 4096 bytes will require the probe sequence
44; Case with frame pointer
45define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" {
46entry:
47; WIN_X64-LABEL:main4k_frame:
48; WIN_X64:      movq    %rcx,   16(%rsp)
49; WIN_X64:      movq    %gs:16, %rcx
50; LINUX-LABEL:main4k_frame:
51; LINUX-NOT:    movq    %gs:16, %rcx
52; LINUX: 	retq
53  %a = alloca [4096 x i8]
54  ret i32 0
55}
56
57; Prolog stack allocation >= 4096 bytes will require the probe sequence
58; Case with INT args
59define i32 @main4k_intargs(i32 %x, i32 %y) nounwind {
60entry:
61; WIN_X64:      movq    %rcx,   8(%rsp)
62; WIN_X64:      movq    %gs:16, %rcx
63; LINUX-NOT:    movq    %gs:16, %rcx
64; LINUX: 	retq
65  %a = alloca [4096 x i8]
66  %t = add i32 %x, %y
67  ret i32 %t
68}
69
70; Prolog stack allocation >= 4096 bytes will require the probe sequence
71; Case with FP regs
72define i32 @main4k_fpargs(double %x, double %y) nounwind {
73entry:
74; WIN_X64:      movq    %rcx,   8(%rsp)
75; WIN_X64:      movq    %gs:16, %rcx
76; LINUX-NOT:    movq    %gs:16, %rcx
77; LINUX: 	retq
78  %a = alloca [4096 x i8]
79  ret i32 0
80}
81
82; Prolog stack allocation >= 4096 bytes will require the probe sequence
83; Case with mixed regs
84define i32 @main4k_mixargs(double %x, i32 %y) nounwind {
85entry:
86; WIN_X64:      movq    %gs:16, %rcx
87; LINUX-NOT:    movq    %gs:16, %rcx
88; LINUX: 	retq
89  %a = alloca [4096 x i8]
90  ret i32 %y
91}
92
93; Make sure we don't emit the probe for a smaller prolog stack allocation.
94define i32 @main128() nounwind {
95entry:
96; WIN_X64-NOT:  movq    %gs:16, %rcx
97; WIN_X64:      retq
98; LINUX-NOT:    movq    %gs:16, %rcx
99; LINUX: 	retq
100  %a = alloca [128 x i8]
101  ret i32 0
102}
103
104; Make sure we don't emit the probe sequence if not on windows even if the
105; caller has the Win64 calling convention.
106define x86_64_win64cc i32 @main4k_win64() nounwind {
107entry:
108; WIN_X64:      movq    %gs:16, %rcx
109; LINUX-NOT:    movq    %gs:16, %rcx
110; LINUX: 	retq
111  %a = alloca [4096 x i8]
112  ret i32 0
113}
114
115declare i32 @bar(i8*) nounwind
116
117; Within-body inline probe expansion
118define x86_64_win64cc i32 @main4k_alloca(i64 %n) nounwind {
119entry:
120; WIN_X64: 	callq	bar
121; WIN_X64:  	movq	%gs:16, [[R:%r.*]]
122; WIN_X64: 	callq	bar
123; LINUX: 	callq	bar
124; LINUX-NOT:  	movq	%gs:16, [[R:%r.*]]
125; LINUX: 	callq	bar
126  %a = alloca i8, i64 1024
127  %ra = call i32 @bar(i8* %a) nounwind
128  %b = alloca i8, i64 %n
129  %rb = call i32 @bar(i8* %b) nounwind
130  %r = add i32 %ra, %rb
131  ret i32 %r
132}
133
134; Influence of stack-probe-size attribute
135; Note this is not exposed in coreclr
136define i32 @test_probe_size() "stack-probe-size"="8192" nounwind {
137; WIN_X64-NOT:  movq    %gs:16, %rcx
138; WIN_X64: 	retq
139; LINUX-NOT:    movq    %gs:16, %rcx
140; LINUX: 	retq
141  %a = alloca [4096 x i8]
142  ret i32 0
143}
144