1; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-load-store-opt=0 | FileCheck %s
2
3declare void @callee_stack0()
4declare void @callee_stack8([8 x i32], i64)
5declare void @callee_stack16([8 x i32], i64, i64)
6
7define void @caller_to0_from0() nounwind {
8; CHECK-LABEL: caller_to0_from0:
9; CHECK-NEXT: // BB
10  tail call void @callee_stack0()
11  ret void
12; CHECK-NEXT: b callee_stack0
13}
14
15define void @caller_to0_from8([8 x i32], i64) nounwind{
16; CHECK-LABEL: caller_to0_from8:
17; CHECK-NEXT: // BB
18
19  tail call void @callee_stack0()
20  ret void
21; CHECK-NEXT: b callee_stack0
22}
23
24define void @caller_to8_from0() {
25; CHECK-LABEL: caller_to8_from0:
26
27; Caller isn't going to clean up any extra stack we allocate, so it
28; can't be a tail call.
29  tail call void @callee_stack8([8 x i32] undef, i64 42)
30  ret void
31; CHECK: bl callee_stack8
32}
33
34define void @caller_to8_from8([8 x i32], i64 %a) {
35; CHECK-LABEL: caller_to8_from8:
36; CHECK-NOT: sub sp, sp,
37
38; This should reuse our stack area for the 42
39  tail call void @callee_stack8([8 x i32] undef, i64 42)
40  ret void
41; CHECK: str {{x[0-9]+}}, [sp]
42; CHECK-NEXT: b callee_stack8
43}
44
45define void @caller_to16_from8([8 x i32], i64 %a) {
46; CHECK-LABEL: caller_to16_from8:
47
48; Shouldn't be a tail call: we can't use SP+8 because our caller might
49; have something there. This may sound obvious but implementation does
50; some funky aligning.
51  tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef)
52; CHECK: bl callee_stack16
53  ret void
54}
55
56define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
57; CHECK-LABEL: caller_to8_from24:
58; CHECK-NOT: sub sp, sp
59
60; Reuse our area, putting "42" at incoming sp
61  tail call void @callee_stack8([8 x i32] undef, i64 42)
62  ret void
63; CHECK: str {{x[0-9]+}}, [sp]
64; CHECK-NEXT: b callee_stack8
65}
66
67define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
68; CHECK-LABEL: caller_to16_from16:
69; CHECK-NOT: sub sp, sp,
70
71; Here we want to make sure that both loads happen before the stores:
72; otherwise either %a or %b will be wrongly clobbered.
73  tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
74  ret void
75
76; CHECK: ldr [[VAL0:x[0-9]+]],
77; CHECK: ldr [[VAL1:x[0-9]+]],
78; CHECK: str [[VAL0]],
79; CHECK: str [[VAL1]],
80
81; CHECK-NOT: add sp, sp,
82; CHECK: b callee_stack16
83}
84
85@func = global void(i32)* null
86
87define void @indirect_tail() {
88; CHECK-LABEL: indirect_tail:
89; CHECK-NOT: sub sp, sp
90
91  %fptr = load void(i32)*, void(i32)** @func
92  tail call void %fptr(i32 42)
93  ret void
94; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:func]
95; CHECK: mov w0, #{{42|0x2a}}
96; CHECK: br [[FPTR]]
97}
98