1; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX 2; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS 3; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86 4 5; Test that we actually spill and reload all arguments in the variadic argument 6; pack. Doing a normal call will clobber all argument registers, and we will 7; spill around it. A simple adjustment should not require any XMM spills. 8 9declare void @llvm.va_start(i8*) nounwind 10 11declare void(i8*, ...)* @get_f(i8* %this) 12 13define void @f_thunk(i8* %this, ...) { 14 ; Use va_start so that we exercise the combination. 15 %ap = alloca [4 x i8*], align 16 16 %ap_i8 = bitcast [4 x i8*]* %ap to i8* 17 call void @llvm.va_start(i8* %ap_i8) 18 19 %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this) 20 musttail call void (i8*, ...) %fptr(i8* %this, ...) 21 ret void 22} 23 24; Save and restore 6 GPRs, 8 XMMs, and AL around the call. 25 26; LINUX-LABEL: f_thunk: 27; LINUX-DAG: movq %rdi, {{.*}} 28; LINUX-DAG: movq %rsi, {{.*}} 29; LINUX-DAG: movq %rdx, {{.*}} 30; LINUX-DAG: movq %rcx, {{.*}} 31; LINUX-DAG: movq %r8, {{.*}} 32; LINUX-DAG: movq %r9, {{.*}} 33; LINUX-DAG: movb %al, {{.*}} 34; LINUX-DAG: movaps %xmm0, {{[0-9]*}}(%rsp) 35; LINUX-DAG: movaps %xmm1, {{[0-9]*}}(%rsp) 36; LINUX-DAG: movaps %xmm2, {{[0-9]*}}(%rsp) 37; LINUX-DAG: movaps %xmm3, {{[0-9]*}}(%rsp) 38; LINUX-DAG: movaps %xmm4, {{[0-9]*}}(%rsp) 39; LINUX-DAG: movaps %xmm5, {{[0-9]*}}(%rsp) 40; LINUX-DAG: movaps %xmm6, {{[0-9]*}}(%rsp) 41; LINUX-DAG: movaps %xmm7, {{[0-9]*}}(%rsp) 42; LINUX: callq get_f 43; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm0 44; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm1 45; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm2 46; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm3 47; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm4 48; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm5 49; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm6 50; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm7 51; LINUX-DAG: movq {{.*}}, %rdi 52; LINUX-DAG: movq {{.*}}, %rsi 53; LINUX-DAG: movq {{.*}}, %rdx 54; LINUX-DAG: movq {{.*}}, %rcx 55; LINUX-DAG: movq {{.*}}, %r8 56; LINUX-DAG: movq {{.*}}, %r9 57; LINUX-DAG: movb {{.*}}, %al 58; LINUX: jmpq *{{.*}} # TAILCALL 59 60; WINDOWS-LABEL: f_thunk: 61; WINDOWS-NOT: mov{{.}}ps 62; WINDOWS-DAG: movq %rdx, {{.*}} 63; WINDOWS-DAG: movq %rcx, {{.*}} 64; WINDOWS-DAG: movq %r8, {{.*}} 65; WINDOWS-DAG: movq %r9, {{.*}} 66; WINDOWS-NOT: mov{{.}}ps 67; WINDOWS: callq get_f 68; WINDOWS-NOT: mov{{.}}ps 69; WINDOWS-DAG: movq {{.*}}, %rdx 70; WINDOWS-DAG: movq {{.*}}, %rcx 71; WINDOWS-DAG: movq {{.*}}, %r8 72; WINDOWS-DAG: movq {{.*}}, %r9 73; WINDOWS-NOT: mov{{.}}ps 74; WINDOWS: jmpq *{{.*}} # TAILCALL 75 76; No regparms on normal x86 conventions. 77 78; X86-LABEL: _f_thunk: 79; X86: calll _get_f 80; X86: jmpl *{{.*}} # TAILCALL 81 82; This thunk shouldn't require any spills and reloads, assuming the register 83; allocator knows what it's doing. 84 85define void @g_thunk(i8* %fptr_i8, ...) { 86 %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)* 87 musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...) 88 ret void 89} 90 91; LINUX-LABEL: g_thunk: 92; LINUX-NOT: movq 93; LINUX: jmpq *%rdi # TAILCALL 94 95; WINDOWS-LABEL: g_thunk: 96; WINDOWS-NOT: movq 97; WINDOWS: jmpq *%rcx # TAILCALL 98 99; X86-LABEL: _g_thunk: 100; X86: jmpl *%eax # TAILCALL 101 102; Do a simple multi-exit multi-bb test. 103 104%struct.Foo = type { i1, i8*, i8* } 105 106@g = external global i32 107 108define void @h_thunk(%struct.Foo* %this, ...) { 109 %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 110 %cond = load i1, i1* %cond_p 111 br i1 %cond, label %then, label %else 112 113then: 114 %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1 115 %a_i8 = load i8*, i8** %a_p 116 %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)* 117 musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...) 118 ret void 119 120else: 121 %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2 122 %b_i8 = load i8*, i8** %b_p 123 %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)* 124 store i32 42, i32* @g 125 musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...) 126 ret void 127} 128 129; LINUX-LABEL: h_thunk: 130; LINUX: jne 131; LINUX: jmpq *{{.*}} # TAILCALL 132; LINUX: jmpq *{{.*}} # TAILCALL 133; WINDOWS-LABEL: h_thunk: 134; WINDOWS: jne 135; WINDOWS: jmpq *{{.*}} # TAILCALL 136; WINDOWS: jmpq *{{.*}} # TAILCALL 137; X86-LABEL: _h_thunk: 138; X86: jne 139; X86: jmpl *{{.*}} # TAILCALL 140; X86: jmpl *{{.*}} # TAILCALL 141