1; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false -disable-post-ra | FileCheck %s
2; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
3
4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
6; Check that we don't try to tail-call with a non-forwarded sret parameter.
7declare void @test_explicit_sret(i1024* sret) #0
8
9; This is the only OK case, where we forward the explicit sret pointer.
10
11; CHECK-LABEL: _test_tailcall_explicit_sret:
12; CHECK-NEXT: b _test_explicit_sret
13define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 {
14  tail call void @test_explicit_sret(i1024* %arg)
15  ret void
16}
17
18; CHECK-LABEL: _test_call_explicit_sret:
19; CHECK-NOT: mov  x8
20; CHECK: bl _test_explicit_sret
21; CHECK: ret
22define void @test_call_explicit_sret(i1024* sret %arg) #0 {
23  call void @test_explicit_sret(i1024* %arg)
24  ret void
25}
26
27; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused:
28; CHECK: mov  x8, sp
29; CHECK-NEXT: bl _test_explicit_sret
30; CHECK: ret
31define void @test_tailcall_explicit_sret_alloca_unused() #0 {
32  %l = alloca i1024, align 8
33  tail call void @test_explicit_sret(i1024* %l)
34  ret void
35}
36
37; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers:
38; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0]
39; CHECK: str [[PTRLOAD1]], [sp]
40; CHECK: mov  x8, sp
41; CHECK-NEXT: bl _test_explicit_sret
42; CHECK: ret
43define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 {
44  %l = alloca i1024, align 8
45  %r = load i1024, i1024* %ptr, align 8
46  store i1024 %r, i1024* %l, align 8
47  tail call void @test_explicit_sret(i1024* %l)
48  ret void
49}
50
51; This is too conservative, but doesn't really happen in practice.
52
53; CHECK-LABEL: _test_tailcall_explicit_sret_gep:
54; CHECK: add  x8, x0, #128
55; CHECK-NEXT: bl _test_explicit_sret
56; CHECK: ret
57define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
58  %ptr2 = getelementptr i1024, i1024* %ptr, i32 1
59  tail call void @test_explicit_sret(i1024* %ptr2)
60  ret void
61}
62
63; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned:
64; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
65; CHECK: mov  x8, sp
66; CHECK-NEXT: bl _test_explicit_sret
67; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
68; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
69; CHECK: ret
70define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
71  %l = alloca i1024, align 8
72  tail call void @test_explicit_sret(i1024* %l)
73  %r = load i1024, i1024* %l, align 8
74  ret i1024 %r
75}
76
77; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg:
78; CHECK-DAG: mov  x[[CALLERX8NUM:[0-9]+]], x8
79; CHECK-DAG: mov  [[FPTR:x[0-9]+]], x0
80; CHECK: mov  x0, sp
81; CHECK-NEXT: blr [[FPTR]]
82; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
83; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
84; CHECK: ret
85define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 {
86  %l = alloca i1024, align 8
87  tail call void %f(i1024* %l)
88  %r = load i1024, i1024* %l, align 8
89  store i1024 %r, i1024* %arg, align 8
90  ret void
91}
92
93; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_:
94; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
95; CHECK: mov  x8, sp
96; CHECK-NEXT: blr x0
97; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
98; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
99; CHECK: ret
100define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 {
101  %ret = tail call i1024 %f()
102  store i1024 %ret, i1024* %arg, align 8
103  ret void
104}
105
106attributes #0 = { nounwind }
107