1; REQUIRES: x86-registered-target 2 3; Test devirtualization through the thin link and backend, ensuring that 4; it is only applied when the type test corresponding to a devirtualization 5; dominates an indirect call using the same vtable pointer. Indirect 6; call promotion and inlining may introduce a guarded indirect call 7; that can be promoted, which uses the same vtable address as the fallback 8; indirect call that cannot be devirtualized. 9 10; The code below illustrates the structure when we started with code like: 11; 12; class A { 13; public: 14; virtual int foo() { return 1; } 15; virtual int bar() { return 1; } 16; }; 17; class B : public A { 18; public: 19; virtual int foo(); 20; virtual int bar(); 21; }; 22; 23; int foo(A *a) { 24; return a->foo(); // ICP profile says most calls are to B::foo() 25; } 26; 27; int B::foo() { 28; return bar(); 29; } 30; 31; After the compile step, which will perform ICP and a round of inlining, we 32; have something like: 33; int foo(A *a) { 34; if (&a->foo() == B::foo()) 35; return ((B*)a)->bar(); // Inlined from promoted direct call to B::foo() 36; else 37; return a->foo(); 38; 39; The inlined code seqence will have a type test against "_ZTS1B", 40; which will allow us to devirtualize indirect call ((B*)a)->bar() to B::bar(); 41; Both that type test and the one for the fallback a->foo() indirect call 42; will use the same vtable pointer. Without a dominance check, we could 43; incorrectly devirtualize a->foo() to B::foo(); 44 45; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s 46 47; Legacy PM 48; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ 49; RUN: -whole-program-visibility \ 50; RUN: -o %t3 \ 51; RUN: -r=%t.o,_Z3bazP1A,px \ 52; RUN: -r=%t.o,_ZN1A3fooEv, \ 53; RUN: -r=%t.o,_ZN1A3barEv, \ 54; RUN: -r=%t.o,_ZN1B3fooEv, \ 55; RUN: -r=%t.o,_ZN1B3barEv, \ 56; RUN: -r=%t.o,_ZTV1A, \ 57; RUN: -r=%t.o,_ZTV1B, \ 58; RUN: -r=%t.o,_ZN1A3fooEv, \ 59; RUN: -r=%t.o,_ZN1A3barEv, \ 60; RUN: -r=%t.o,_ZN1B3fooEv, \ 61; RUN: -r=%t.o,_ZN1B3barEv, \ 62; RUN: -r=%t.o,_ZTV1A,px \ 63; RUN: -r=%t.o,_ZTV1B,px 2>&1 | FileCheck %s --check-prefix=REMARK 64; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR 65 66; New PM 67; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ 68; RUN: -whole-program-visibility \ 69; RUN: -o %t3 \ 70; RUN: -r=%t.o,_Z3bazP1A,px \ 71; RUN: -r=%t.o,_ZN1A3fooEv, \ 72; RUN: -r=%t.o,_ZN1A3barEv, \ 73; RUN: -r=%t.o,_ZN1B3fooEv, \ 74; RUN: -r=%t.o,_ZN1B3barEv, \ 75; RUN: -r=%t.o,_ZTV1A, \ 76; RUN: -r=%t.o,_ZTV1B, \ 77; RUN: -r=%t.o,_ZN1A3fooEv, \ 78; RUN: -r=%t.o,_ZN1A3barEv, \ 79; RUN: -r=%t.o,_ZN1B3fooEv, \ 80; RUN: -r=%t.o,_ZN1B3barEv, \ 81; RUN: -r=%t.o,_ZTV1A,px \ 82; RUN: -r=%t.o,_ZTV1B,px 2>&1 | FileCheck %s --check-prefix=REMARK 83; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR 84 85; We should only devirtualize the inlined call to bar(). 86; REMARK-NOT: single-impl: devirtualized a call to _ZN1B3fooEv 87; REMARK: single-impl: devirtualized a call to _ZN1B3barEv 88; REMARK-NOT: single-impl: devirtualized a call to _ZN1B3fooEv 89 90target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 91target triple = "x86_64-grtev4-linux-gnu" 92 93%class.A = type { i32 (...)** } 94%class.B = type { %class.A } 95 96@_ZTV1A = linkonce_odr hidden unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%class.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%class.A*)* @_ZN1A3barEv to i8*)] }, align 8, !type !0 97@_ZTV1B = hidden unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%class.B*)* @_ZN1B3fooEv to i8*), i8* bitcast (i32 (%class.B*)* @_ZN1B3barEv to i8*)] }, align 8, !type !0, !type !1 98 99define hidden i32 @_Z3bazP1A(%class.A* %a) local_unnamed_addr { 100entry: 101 %0 = bitcast %class.A* %a to i32 (%class.A*)*** 102 %vtable = load i32 (%class.A*)**, i32 (%class.A*)*** %0, align 8 103 %1 = bitcast i32 (%class.A*)** %vtable to i8* 104 %2 = tail call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") 105 tail call void @llvm.assume(i1 %2) 106 %3 = load i32 (%class.A*)*, i32 (%class.A*)** %vtable, align 8 107 ; This is the compare instruction inserted by ICP 108 %4 = icmp eq i32 (%class.A*)* %3, bitcast (i32 (%class.B*)* @_ZN1B3fooEv to i32 (%class.A*)*) 109 br i1 %4, label %if.true.direct_targ, label %if.false.orig_indirect 110 111; This block contains the promoted and inlined call to B::foo(); 112; CHECK-IR: if.true.direct_targ: ; preds = %entry 113if.true.direct_targ: ; preds = %entry 114 %5 = bitcast %class.A* %a to %class.B* 115 %6 = bitcast i32 (%class.A*)** %vtable to i8* 116 %7 = tail call i1 @llvm.type.test(i8* %6, metadata !"_ZTS1B") 117 tail call void @llvm.assume(i1 %7) 118 %vfn.i1 = getelementptr inbounds i32 (%class.A*)*, i32 (%class.A*)** %vtable, i64 1 119 %vfn.i = bitcast i32 (%class.A*)** %vfn.i1 to i32 (%class.B*)** 120 %8 = load i32 (%class.B*)*, i32 (%class.B*)** %vfn.i, align 8 121; Call to bar() can be devirtualized to call to B::bar(), since it was 122; inlined from B::foo() after ICP introduced the guarded promotion. 123; CHECK-IR: %call.i = tail call i32 @_ZN1B3barEv(%class.B* %3) 124 %call.i = tail call i32 %8(%class.B* %5) 125 br label %if.end.icp 126 127; This block contains the fallback indirect call a->foo() 128; CHECK-IR: if.false.orig_indirect: 129if.false.orig_indirect: ; preds = %entry 130; Fallback indirect call to foo() cannot be devirtualized. 131; CHECK-IR: %call = tail call i32 % 132 %call = tail call i32 %3(%class.A* nonnull %a) 133 br label %if.end.icp 134 135if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ 136 %9 = phi i32 [ %call, %if.false.orig_indirect ], [ %call.i, %if.true.direct_targ ] 137 ret i32 %9 138} 139 140declare i1 @llvm.type.test(i8*, metadata) 141 142declare void @llvm.assume(i1) 143 144declare dso_local i32 @_ZN1B3fooEv(%class.B* %this) unnamed_addr 145declare dso_local i32 @_ZN1B3barEv(%class.B*) unnamed_addr 146declare dso_local i32 @_ZN1A3barEv(%class.A* %this) unnamed_addr 147declare dso_local i32 @_ZN1A3fooEv(%class.A* %this) unnamed_addr 148 149!0 = !{i64 16, !"_ZTS1A"} 150!1 = !{i64 16, !"_ZTS1B"} 151