1; RUN: llc < %s -stack-symbol-ordering=0 -mcpu=generic -mattr=+avx -mtriple=x86_64-apple-darwin10 | FileCheck %s
2; RUN: llc < %s -stack-symbol-ordering=0 -mcpu=generic -stackrealign -stack-alignment=32 -mattr=+avx -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
3; rdar://11496434
4
5; no VLAs or dynamic alignment
6define i32 @t1() nounwind uwtable ssp {
7entry:
8  %a = alloca i32, align 4
9  call void @t1_helper(i32* %a) nounwind
10  %0 = load i32, i32* %a, align 4
11  %add = add nsw i32 %0, 13
12  ret i32 %add
13
14; CHECK: _t1
15; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
16; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
17; CHECK: callq _t1_helper
18; CHECK: movl [[OFFSET]](%rsp), %eax
19; CHECK: addl $13, %eax
20}
21
22declare void @t1_helper(i32*)
23
24; dynamic realignment
25define i32 @t2() nounwind uwtable ssp {
26entry:
27  %a = alloca i32, align 4
28  %v = alloca <8 x float>, align 32
29  call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
30  %0 = load i32, i32* %a, align 4
31  %add = add nsw i32 %0, 13
32  ret i32 %add
33
34; CHECK: _t2
35; CHECK: pushq %rbp
36; CHECK: movq %rsp, %rbp
37; CHECK: andq $-32, %rsp
38; CHECK: subq ${{[0-9]+}}, %rsp
39;
40; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
41; CHECK: movq %rsp, %rsi
42; CHECK: callq _t2_helper
43;
44; CHECK: movq %rbp, %rsp
45; CHECK: popq %rbp
46}
47
48declare void @t2_helper(i32*, <8 x float>*)
49
50; VLAs
51define i32 @t3(i64 %sz) nounwind uwtable ssp {
52entry:
53  %a = alloca i32, align 4
54  %vla = alloca i32, i64 %sz, align 16
55  call void @t3_helper(i32* %a, i32* %vla) nounwind
56  %0 = load i32, i32* %a, align 4
57  %add = add nsw i32 %0, 13
58  ret i32 %add
59
60; CHECK: _t3
61; CHECK: pushq %rbp
62; CHECK: movq %rsp, %rbp
63; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
64; CHECK: subq ${{[0-9]+}}, %rsp
65;
66; CHECK: movq %rbp, %rsp
67; CHECK: popq %rbp
68}
69
70declare void @t3_helper(i32*, i32*)
71
72; VLAs + Dynamic realignment
73define i32 @t4(i64 %sz) nounwind uwtable ssp {
74entry:
75  %a = alloca i32, align 4
76  %v = alloca <8 x float>, align 32
77  %vla = alloca i32, i64 %sz, align 16
78  call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
79  %0 = load i32, i32* %a, align 4
80  %add = add nsw i32 %0, 13
81  ret i32 %add
82
83; CHECK: _t4
84; CHECK: pushq %rbp
85; CHECK: movq %rsp, %rbp
86; CHECK: pushq %rbx
87; CHECK: andq $-32, %rsp
88; CHECK: subq ${{[0-9]+}}, %rsp
89; CHECK: movq %rsp, %rbx
90;
91; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
92; CHECK: movq %rbx, %rdx
93; CHECK: callq   _t4_helper
94;
95; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
96; CHECK: popq %rbx
97; CHECK: popq %rbp
98}
99
100declare void @t4_helper(i32*, i32*, <8 x float>*)
101
102; Spilling an AVX register shouldn't cause dynamic realignment
103define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
104entry:
105  %a = alloca i32, align 4
106  %0 = bitcast float* %f to <8 x float>*
107  %1 = load <8 x float>, <8 x float>* %0, align 32
108  call void @t5_helper1(i32* %a) nounwind
109  call void @t5_helper2(<8 x float> %1) nounwind
110  %2 = load i32, i32* %a, align 4
111  %add = add nsw i32 %2, 13
112  ret i32 %add
113
114; CHECK: _t5
115; CHECK: subq ${{[0-9]+}}, %rsp
116;
117; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
118; CHECK: vmovups [[AVXREG]], (%rsp)
119; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
120; CHECK: callq   _t5_helper1
121; CHECK: vmovups (%rsp), %ymm0
122; CHECK: callq   _t5_helper2
123; CHECK: movl {{[0-9]+}}(%rsp), %eax
124}
125
126declare void @t5_helper1(i32*)
127
128declare void @t5_helper2(<8 x float>)
129
130; VLAs + Dynamic realignment + Spill
131; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
132define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
133entry:
134; CHECK: _t6
135  %a = alloca i32, align 4
136  %0 = bitcast float* %f to <8 x float>*
137  %1 = load <8 x float>, <8 x float>* %0, align 32
138  %vla = alloca i32, i64 %sz, align 16
139  call void @t6_helper1(i32* %a, i32* %vla) nounwind
140  call void @t6_helper2(<8 x float> %1) nounwind
141  %2 = load i32, i32* %a, align 4
142  %add = add nsw i32 %2, 13
143  ret i32 %add
144}
145
146declare void @t6_helper1(i32*, i32*)
147
148declare void @t6_helper2(<8 x float>)
149
150; VLAs + Dynamic realignment + byval
151; The byval adjust the sp after the prolog, but if we're restoring the sp from
152; the base pointer we use the original adjustment.
153%struct.struct_t = type { [5 x i32] }
154
155define void @t7(i32 %size, %struct.struct_t* byval(%struct.struct_t) align 8 %arg1) nounwind uwtable {
156entry:
157  %x = alloca i32, align 32
158  store i32 0, i32* %x, align 32
159  %0 = zext i32 %size to i64
160  %vla = alloca i32, i64 %0, align 16
161  %1 = load i32, i32* %x, align 32
162  call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval(%struct.struct_t) align 8 %arg1)
163  ret void
164
165; CHECK: _t7
166; CHECK:     pushq %rbp
167; CHECK:     movq %rsp, %rbp
168; CHECK:     pushq %rbx
169; CHECK:     andq $-32, %rsp
170; CHECK:     subq ${{[0-9]+}}, %rsp
171; CHECK:     movq %rsp, %rbx
172
173; Stack adjustment for byval
174; CHECK:     subq {{.*}}, %rsp
175; CHECK:     callq _bar
176; CHECK-NOT: addq {{.*}}, %rsp
177; CHECK:     leaq -8(%rbp), %rsp
178; CHECK:     popq %rbx
179; CHECK:     popq %rbp
180}
181
182declare i8* @llvm.stacksave() nounwind
183
184declare void @bar(i32, i32*, %struct.struct_t* byval(%struct.struct_t) align 8)
185
186declare void @llvm.stackrestore(i8*) nounwind
187
188
189; Test when forcing stack alignment
190define i32 @t8() nounwind uwtable {
191entry:
192  %a = alloca i32, align 4
193  call void @t1_helper(i32* %a) nounwind
194  %0 = load i32, i32* %a, align 4
195  %add = add nsw i32 %0, 13
196  ret i32 %add
197
198; FORCE-ALIGN: _t8
199; FORCE-ALIGN:      movq %rsp, %rbp
200; FORCE-ALIGN:      andq $-32, %rsp
201; FORCE-ALIGN-NEXT: subq $32, %rsp
202; FORCE-ALIGN:      movq %rbp, %rsp
203; FORCE-ALIGN:      popq %rbp
204}
205
206; VLAs
207define i32 @t9(i64 %sz) nounwind uwtable {
208entry:
209  %a = alloca i32, align 4
210  %vla = alloca i32, i64 %sz, align 16
211  call void @t3_helper(i32* %a, i32* %vla) nounwind
212  %0 = load i32, i32* %a, align 4
213  %add = add nsw i32 %0, 13
214  ret i32 %add
215
216; FORCE-ALIGN: _t9
217; FORCE-ALIGN: pushq %rbp
218; FORCE-ALIGN: movq %rsp, %rbp
219; FORCE-ALIGN: pushq %rbx
220; FORCE-ALIGN: andq $-32, %rsp
221; FORCE-ALIGN: subq $32, %rsp
222; FORCE-ALIGN: movq %rsp, %rbx
223
224; FORCE-ALIGN: leaq -8(%rbp), %rsp
225; FORCE-ALIGN: popq %rbx
226; FORCE-ALIGN: popq %rbp
227}
228