1; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
2
3%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }
4
5define i32 @test_i64_align() {
6; CHECK-LABEL: test_i64_align:
7; CHECL: movs r0, #8
8  ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
9}
10
11define i32 @test_f64_align() {
12; CHECK-LABEL: test_f64_align:
13; CHECL: movs r0, #24
14  ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
15}
16
17define i32 @test_v2f32_align() {
18; CHECK-LABEL: test_v2f32_align:
19; CHECL: movs r0, #40
20  ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
21}
22
23define i32 @test_v4f32_align() {
24; CHECK-LABEL: test_v4f32_align:
25; CHECL: movs r0, #64
26  ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
27}
28
29; Key point here is than an extra register has to be saved so that the DPRs end
30; up in an aligned location (as prologue/epilogue inserter had calculated).
31define void @test_dpr_unwind_align() {
32; CHECK-LABEL: test_dpr_unwind_align:
33; CHECK: push {r5, r6, r7, lr}
34; CHECK-NOT: sub sp
35; CHECK: vpush {d8, d9}
36; CHECK: .cfi_offset d9, -24
37; CHECK: .cfi_offset d8, -32
38; [...]
39; CHECK: bl _test_i64_align
40; CHECK-NOT: add sp,
41; CHECK: vpop {d8, d9}
42; CHECK-NOT: add sp,
43; CHECK: pop {r5, r6, r7, pc}
44
45  call void asm sideeffect "", "~{r6},~{d8},~{d9}"()
46
47  ; Whatever
48  call i32 @test_i64_align()
49  ret void
50}
51
52; This time, there's no viable way to tack CS-registers onto the list: a real SP
53; adjustment needs to be performed to put d8 and d9 where they should be.
54define void @test_dpr_unwind_align_manually() {
55; CHECK-LABEL: test_dpr_unwind_align_manually:
56; CHECK: push {r4, r5, r6, r7, lr}
57; CHECK-NOT: sub sp
58; CHECK: push.w {r8, r11}
59; CHECK: sub sp, #4
60; CHECK: vpush {d8, d9}
61; CHECK: .cfi_offset d9, -40
62; CHECK: .cfi_offset d8, -48
63; [...]
64; CHECK: bl _test_i64_align
65; CHECK-NOT: add sp,
66; CHECK: vpop {d8, d9}
67; CHECK: add sp, #4
68; CHECK: pop.w {r8, r11}
69; CHECK: pop {r4, r5, r6, r7, pc}
70
71  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()
72
73  ; Whatever
74  call i32 @test_i64_align()
75  ret void
76}
77
78; If there's only a CS1 area, the sub should be in the right place:
79define void @test_dpr_unwind_align_just_cs1() {
80; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
81; CHECK: push {r4, r5, r6, r7, lr}
82; CHECK: sub sp, #4
83; CHECK: vpush {d8, d9}
84; CHECK: .cfi_offset d9, -32
85; CHECK: .cfi_offset d8, -40
86; CHECK: sub sp, #8
87; [...]
88; CHECK: bl _test_i64_align
89; CHECK: add sp, #8
90; CHECK: vpop {d8, d9}
91; CHECK: add sp, #4
92; CHECK: pop {r4, r5, r6, r7, pc}
93
94  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()
95
96  ; Whatever
97  call i32 @test_i64_align()
98  ret void
99}
100
101; If there are no DPRs, we shouldn't try to align the stack in stages anyway
102define void @test_dpr_unwind_align_no_dprs() {
103; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
104; CHECK: push {r4, r5, r6, r7, lr}
105; CHECK: sub sp, #12
106; [...]
107; CHECK: bl _test_i64_align
108; CHECK: add sp, #12
109; CHECK: pop {r4, r5, r6, r7, pc}
110
111  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
112
113  ; Whatever
114  call i32 @test_i64_align()
115  ret void
116}
117
118; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
119; the stack.
120define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
121; CHECK-LABEL: test_v128_stack_pass:
122; CHECK: add r[[ADDR:[0-9]+]], sp, #16
123; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]
124
125  ret <4 x float> %in
126}
127
128declare void @varargs(i32, ...)
129
130; When varargs are enabled, we go down a different route. Still want 128-bit
131; alignment though.
132define void @test_v128_stack_pass_varargs(<4 x float> %in) {
133; CHECK-LABEL: test_v128_stack_pass_varargs:
134; CHECK: add r[[ADDR:[0-9]+]], sp, #16
135; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]
136
137  call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
138  ret void
139}
140
141; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
142; a single pointer), 64-bit quantities must be pass
143define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
144; CHECK-LABEL: test_64bit_gpr_align:
145; CHECK: ldr [[RHS:r[0-9]+]], [sp]
146; CHECK: adds r0, [[RHS]], r2
147; CHECK: adc r1, r3, #0
148
149  %ext = zext i32 %sp to i64
150  %sum = add i64 %ext, %r2_r3
151  ret i64 %sum
152}
153