1; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS
2; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
3
4declare void @callee(...)
5
6define float @test_hfa_regs(float, [2 x float] %in) {
7; CHECK-LABEL: test_hfa_regs:
8; CHECK: fadd s0, s1, s2
9
10  %lhs = extractvalue [2 x float] %in, 0
11  %rhs = extractvalue [2 x float] %in, 1
12  %sum = fadd float %lhs, %rhs
13  ret float %sum
14}
15
16; Check that the array gets allocated to a contiguous block on the stack (rather
17; than the default of 2 8-byte slots).
18define float @test_hfa_block([7 x float], [2 x float] %in) {
19; CHECK-LABEL: test_hfa_block:
20; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp]
21; CHECK: fadd s0, [[LHS]], [[RHS]]
22
23  %lhs = extractvalue [2 x float] %in, 0
24  %rhs = extractvalue [2 x float] %in, 1
25  %sum = fadd float %lhs, %rhs
26  ret float %sum
27}
28
29; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on
30; the stack rather than in s7).
31define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) {
32; CHECK-LABEL: test_hfa_block_consume:
33; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp]
34; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8]
35; CHECK: fadd s0, [[LHS]], [[RHS]]
36
37  %lhs = extractvalue [2 x float] %in, 0
38  %sum = fadd float %lhs, %rhs
39  ret float %sum
40}
41
42define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) {
43; CHECK-LABEL: test_hfa_stackalign:
44; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8]
45; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4]
46; CHECK: fadd s0, [[LHS]], [[RHS]]
47  %lhs = extractvalue [2 x float] %in, 0
48  %rhs = extractvalue [2 x float] %in, 1
49  %sum = fadd float %lhs, %rhs
50  ret float %sum
51}
52
53; An HFA that ends up on the stack should not have any effect on where
54; integer-based arguments go.
55define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
56; CHECK-LABEL: test_hfa_ignores_gprs:
57; CHECK: mov x0, x1
58  ret i64 %res
59}
60
61; [2 x float] should not be promoted to double by the Darwin varargs handling,
62; but should go in an 8-byte aligned slot and can be merged as integer stores.
63define void @test_varargs_stackalign() {
64; CHECK-LABEL: test_varargs_stackalign:
65; CHECK-DARWINPCS: str {{x[0-9]+}}, [sp, #16]
66
67  call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
68  ret void
69}
70
71define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) {
72; CHECK-LABEL: test_smallstruct_block:
73; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp]
74; CHECK: add x0, [[LHS]], [[RHS]]
75  %lhs = extractvalue [2 x i64] %in, 0
76  %rhs = extractvalue [2 x i64] %in, 1
77  %sum = add i64 %lhs, %rhs
78  ret i64 %sum
79}
80
81; Check that a small struct prevents backfilling of registers (i.e. %rhs
82; must go on the stack rather than in x7).
83define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) {
84; CHECK-LABEL: test_smallstruct_block_consume:
85; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp]
86; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16]
87; CHECK: add x0, [[LHS]], [[RHS]]
88
89  %lhs = extractvalue [2 x i64] %in, 0
90  %sum = add i64 %lhs, %rhs
91  ret i64 %sum
92}
93
94define <1 x i64> @test_v1i64_blocked([7 x double], [2 x <1 x i64>] %in) {
95; CHECK-LABEL: test_v1i64_blocked:
96; CHECK: ldr d0, [sp]
97  %val = extractvalue [2 x <1 x i64>] %in, 0
98  ret <1 x i64> %val
99}
100
101define <1 x double> @test_v1f64_blocked([7 x double], [2 x <1 x double>] %in) {
102; CHECK-LABEL: test_v1f64_blocked:
103; CHECK: ldr d0, [sp]
104  %val = extractvalue [2 x <1 x double>] %in, 0
105  ret <1 x double> %val
106}
107
108define <2 x i32> @test_v2i32_blocked([7 x double], [2 x <2 x i32>] %in) {
109; CHECK-LABEL: test_v2i32_blocked:
110; CHECK: ldr d0, [sp]
111  %val = extractvalue [2 x <2 x i32>] %in, 0
112  ret <2 x i32> %val
113}
114
115define <2 x float> @test_v2f32_blocked([7 x double], [2 x <2 x float>] %in) {
116; CHECK-LABEL: test_v2f32_blocked:
117; CHECK: ldr d0, [sp]
118  %val = extractvalue [2 x <2 x float>] %in, 0
119  ret <2 x float> %val
120}
121
122define <4 x i16> @test_v4i16_blocked([7 x double], [2 x <4 x i16>] %in) {
123; CHECK-LABEL: test_v4i16_blocked:
124; CHECK: ldr d0, [sp]
125  %val = extractvalue [2 x <4 x i16>] %in, 0
126  ret <4 x i16> %val
127}
128
129define <4 x half> @test_v4f16_blocked([7 x double], [2 x <4 x half>] %in) {
130; CHECK-LABEL: test_v4f16_blocked:
131; CHECK: ldr d0, [sp]
132  %val = extractvalue [2 x <4 x half>] %in, 0
133  ret <4 x half> %val
134}
135
136define <8 x i8> @test_v8i8_blocked([7 x double], [2 x <8 x i8>] %in) {
137; CHECK-LABEL: test_v8i8_blocked:
138; CHECK: ldr d0, [sp]
139  %val = extractvalue [2 x <8 x i8>] %in, 0
140  ret <8 x i8> %val
141}
142
143define <2 x i64> @test_v2i64_blocked([7 x double], [2 x <2 x i64>] %in) {
144; CHECK-LABEL: test_v2i64_blocked:
145; CHECK: ldr q0, [sp]
146  %val = extractvalue [2 x <2 x i64>] %in, 0
147  ret <2 x i64> %val
148}
149
150define <2 x double> @test_v2f64_blocked([7 x double], [2 x <2 x double>] %in) {
151; CHECK-LABEL: test_v2f64_blocked:
152; CHECK: ldr q0, [sp]
153  %val = extractvalue [2 x <2 x double>] %in, 0
154  ret <2 x double> %val
155}
156
157define <4 x i32> @test_v4i32_blocked([7 x double], [2 x <4 x i32>] %in) {
158; CHECK-LABEL: test_v4i32_blocked:
159; CHECK: ldr q0, [sp]
160  %val = extractvalue [2 x <4 x i32>] %in, 0
161  ret <4 x i32> %val
162}
163
164define <4 x float> @test_v4f32_blocked([7 x double], [2 x <4 x float>] %in) {
165; CHECK-LABEL: test_v4f32_blocked:
166; CHECK: ldr q0, [sp]
167  %val = extractvalue [2 x <4 x float>] %in, 0
168  ret <4 x float> %val
169}
170
171define <8 x i16> @test_v8i16_blocked([7 x double], [2 x <8 x i16>] %in) {
172; CHECK-LABEL: test_v8i16_blocked:
173; CHECK: ldr q0, [sp]
174  %val = extractvalue [2 x <8 x i16>] %in, 0
175  ret <8 x i16> %val
176}
177
178define <8 x half> @test_v8f16_blocked([7 x double], [2 x <8 x half>] %in) {
179; CHECK-LABEL: test_v8f16_blocked:
180; CHECK: ldr q0, [sp]
181  %val = extractvalue [2 x <8 x half>] %in, 0
182  ret <8 x half> %val
183}
184
185define <16 x i8> @test_v16i8_blocked([7 x double], [2 x <16 x i8>] %in) {
186; CHECK-LABEL: test_v16i8_blocked:
187; CHECK: ldr q0, [sp]
188  %val = extractvalue [2 x <16 x i8>] %in, 0
189  ret <16 x i8> %val
190}
191
192define half @test_f16_blocked([7 x double], [2 x half] %in) {
193; CHECK-LABEL: test_f16_blocked:
194; CHECK: ldr h0, [sp]
195  %val = extractvalue [2 x half] %in, 0
196  ret half %val
197}
198