1; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX
2; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=LINUX-X32
3; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS
4; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86
5; RUN: llc -verify-machineinstrs < %s -enable-tail-merge=0 -mtriple=i686-windows -mattr=+sse2 | FileCheck %s --check-prefix=X86
6
7; Test that we actually spill and reload all arguments in the variadic argument
8; pack. Doing a normal call will clobber all argument registers, and we will
9; spill around it. A simple adjustment should not require any XMM spills.
10
11declare void @llvm.va_start(i8*) nounwind
12
13declare void(i8*, ...)* @get_f(i8* %this)
14
15define void @f_thunk(i8* %this, ...) {
16  ; Use va_start so that we exercise the combination.
17  %ap = alloca [4 x i8*], align 16
18  %ap_i8 = bitcast [4 x i8*]* %ap to i8*
19  call void @llvm.va_start(i8* %ap_i8)
20
21  %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this)
22  musttail call void (i8*, ...) %fptr(i8* %this, ...)
23  ret void
24}
25
26; Save and restore 6 GPRs, 8 XMMs, and AL around the call.
27
28; LINUX-LABEL: f_thunk:
29; LINUX-DAG: movq %rdi, {{.*}}
30; LINUX-DAG: movq %rsi, {{.*}}
31; LINUX-DAG: movq %rdx, {{.*}}
32; LINUX-DAG: movq %rcx, {{.*}}
33; LINUX-DAG: movq %r8, {{.*}}
34; LINUX-DAG: movq %r9, {{.*}}
35; LINUX-DAG: movb %al, {{.*}}
36; LINUX-DAG: movaps %xmm0, {{[0-9]*}}(%rsp)
37; LINUX-DAG: movaps %xmm1, {{[0-9]*}}(%rsp)
38; LINUX-DAG: movaps %xmm2, {{[0-9]*}}(%rsp)
39; LINUX-DAG: movaps %xmm3, {{[0-9]*}}(%rsp)
40; LINUX-DAG: movaps %xmm4, {{[0-9]*}}(%rsp)
41; LINUX-DAG: movaps %xmm5, {{[0-9]*}}(%rsp)
42; LINUX-DAG: movaps %xmm6, {{[0-9]*}}(%rsp)
43; LINUX-DAG: movaps %xmm7, {{[0-9]*}}(%rsp)
44; LINUX: callq get_f
45; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm0
46; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm1
47; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm2
48; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm3
49; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm4
50; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm5
51; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm6
52; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm7
53; LINUX-DAG: movq {{.*}}, %rdi
54; LINUX-DAG: movq {{.*}}, %rsi
55; LINUX-DAG: movq {{.*}}, %rdx
56; LINUX-DAG: movq {{.*}}, %rcx
57; LINUX-DAG: movq {{.*}}, %r8
58; LINUX-DAG: movq {{.*}}, %r9
59; LINUX-DAG: movb {{.*}}, %al
60; LINUX: jmpq *{{.*}}  # TAILCALL
61
62; LINUX-X32-LABEL: f_thunk:
63; LINUX-X32-DAG: movl %edi, {{.*}}
64; LINUX-X32-DAG: movq %rsi, {{.*}}
65; LINUX-X32-DAG: movq %rdx, {{.*}}
66; LINUX-X32-DAG: movq %rcx, {{.*}}
67; LINUX-X32-DAG: movq %r8, {{.*}}
68; LINUX-X32-DAG: movq %r9, {{.*}}
69; LINUX-X32-DAG: movb %al, {{.*}}
70; LINUX-X32-DAG: movaps %xmm0, {{[0-9]*}}(%esp)
71; LINUX-X32-DAG: movaps %xmm1, {{[0-9]*}}(%esp)
72; LINUX-X32-DAG: movaps %xmm2, {{[0-9]*}}(%esp)
73; LINUX-X32-DAG: movaps %xmm3, {{[0-9]*}}(%esp)
74; LINUX-X32-DAG: movaps %xmm4, {{[0-9]*}}(%esp)
75; LINUX-X32-DAG: movaps %xmm5, {{[0-9]*}}(%esp)
76; LINUX-X32-DAG: movaps %xmm6, {{[0-9]*}}(%esp)
77; LINUX-X32-DAG: movaps %xmm7, {{[0-9]*}}(%esp)
78; LINUX-X32: callq get_f
79; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm0
80; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm1
81; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm2
82; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm3
83; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm4
84; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm5
85; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm6
86; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm7
87; LINUX-X32-DAG: movl {{.*}}, %edi
88; LINUX-X32-DAG: movq {{.*}}, %rsi
89; LINUX-X32-DAG: movq {{.*}}, %rdx
90; LINUX-X32-DAG: movq {{.*}}, %rcx
91; LINUX-X32-DAG: movq {{.*}}, %r8
92; LINUX-X32-DAG: movq {{.*}}, %r9
93; LINUX-X32-DAG: movb {{.*}}, %al
94; LINUX-X32: jmpq *{{.*}}  # TAILCALL
95
96; WINDOWS-LABEL: f_thunk:
97; WINDOWS-NOT: mov{{.}}ps
98; WINDOWS-DAG: movq %rdx, {{.*}}
99; WINDOWS-DAG: movq %rcx, {{.*}}
100; WINDOWS-DAG: movq %r8, {{.*}}
101; WINDOWS-DAG: movq %r9, {{.*}}
102; WINDOWS-NOT: mov{{.}}ps
103; WINDOWS: callq get_f
104; WINDOWS-NOT: mov{{.}}ps
105; WINDOWS-DAG: movq {{.*}}, %rdx
106; WINDOWS-DAG: movq {{.*}}, %rcx
107; WINDOWS-DAG: movq {{.*}}, %r8
108; WINDOWS-DAG: movq {{.*}}, %r9
109; WINDOWS-NOT: mov{{.}}ps
110; WINDOWS: jmpq *{{.*}} # TAILCALL
111
112; No regparms on normal x86 conventions.
113
114; X86-LABEL: _f_thunk:
115; X86: calll _get_f
116; X86: jmpl *{{.*}} # TAILCALL
117
118; This thunk shouldn't require any spills and reloads, assuming the register
119; allocator knows what it's doing.
120
121define void @g_thunk(i8* %fptr_i8, ...) {
122  %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
123  musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...)
124  ret void
125}
126
127; LINUX-LABEL: g_thunk:
128; LINUX-NOT: movq
129; LINUX: jmpq *%rdi  # TAILCALL
130
131; LINUX-X32-LABEL: g_thunk:
132; LINUX-X32-DAG: movl %edi, %[[REG:e[abcd]x|ebp|esi|edi|r8|r9|r1[0-5]]]
133; LINUX-X32-DAG: jmpq *%[[REG]]  # TAILCALL
134
135; WINDOWS-LABEL: g_thunk:
136; WINDOWS-NOT: movq
137; WINDOWS: jmpq *%rcx # TAILCALL
138
139; X86-LABEL: _g_thunk:
140; X86-NOT: push %ebp
141; X86-NOT: andl {{.*}}, %esp
142; X86: jmpl *%eax # TAILCALL
143
144; Do a simple multi-exit multi-bb test.
145
146%struct.Foo = type { i1, i8*, i8* }
147
148@g = external global i32
149
150define void @h_thunk(%struct.Foo* %this, ...) {
151  %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
152  %cond = load i1, i1* %cond_p
153  br i1 %cond, label %then, label %else
154
155then:
156  %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1
157  %a_i8 = load i8*, i8** %a_p
158  %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
159  musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...)
160  ret void
161
162else:
163  %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2
164  %b_i8 = load i8*, i8** %b_p
165  %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
166  store i32 42, i32* @g
167  musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...)
168  ret void
169}
170
171; LINUX-LABEL: h_thunk:
172; LINUX: jne
173; LINUX: jmpq *{{.*}} # TAILCALL
174; LINUX: jmpq *{{.*}} # TAILCALL
175; LINUX-X32-LABEL: h_thunk:
176; LINUX-X32: jne
177; LINUX-X32: jmpq *{{.*}} # TAILCALL
178; LINUX-X32: jmpq *{{.*}} # TAILCALL
179; WINDOWS-LABEL: h_thunk:
180; WINDOWS: jne
181; WINDOWS: jmpq *{{.*}} # TAILCALL
182; WINDOWS: jmpq *{{.*}} # TAILCALL
183; X86-LABEL: _h_thunk:
184; X86: jne
185; X86: jmpl *{{.*}} # TAILCALL
186; X86: jmpl *{{.*}} # TAILCALL
187