1 //===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the transformation that promotes indirect calls to
11 // conditional direct calls when the indirect-call value profile metadata is
12 // available.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Analysis/CFG.h"
20 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
21 #include "llvm/Analysis/IndirectCallSiteVisitor.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/InstIterator.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Pass.h"
32 #include "llvm/ProfileData/InstrProfReader.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Transforms/Instrumentation.h"
35 #include "llvm/Transforms/PGOInstrumentation.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include <string>
38 #include <utility>
39 #include <vector>
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "pgo-icall-prom"
44 
45 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
46 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
47 
48 // Command line option to disable indirect-call promotion with the default as
49 // false. This is for debug purpose.
50 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
51                                 cl::desc("Disable indirect call promotion"));
52 
53 // Set the cutoff value for the promotion. If the value is other than 0, we
54 // stop the transformation once the total number of promotions equals the cutoff
55 // value.
56 // For debug use only.
57 static cl::opt<unsigned>
58     ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
59               cl::desc("Max number of promotions for this compilaiton"));
60 
61 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
62 // For debug use only.
63 static cl::opt<unsigned>
64     ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
65               cl::desc("Skip Callsite up to this number for this compilaiton"));
66 
67 // Set if the pass is called in LTO optimization. The difference for LTO mode
68 // is the pass won't prefix the source module name to the internal linkage
69 // symbols.
70 static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
71                                 cl::desc("Run indirect-call promotion in LTO "
72                                          "mode"));
73 
74 // If the option is set to true, only call instructions will be considered for
75 // transformation -- invoke instructions will be ignored.
76 static cl::opt<bool>
77     ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
78                 cl::desc("Run indirect-call promotion for call instructions "
79                          "only"));
80 
81 // If the option is set to true, only invoke instructions will be considered for
82 // transformation -- call instructions will be ignored.
83 static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
84                                    cl::Hidden,
85                                    cl::desc("Run indirect-call promotion for "
86                                             "invoke instruction only"));
87 
88 // Dump the function level IR if the transformation happened in this
89 // function. For debug use only.
90 static cl::opt<bool>
91     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
92                  cl::desc("Dump IR after transformation happens"));
93 
94 namespace {
95 class PGOIndirectCallPromotionLegacyPass : public ModulePass {
96 public:
97   static char ID;
98 
PGOIndirectCallPromotionLegacyPass(bool InLTO=false)99   PGOIndirectCallPromotionLegacyPass(bool InLTO = false)
100       : ModulePass(ID), InLTO(InLTO) {
101     initializePGOIndirectCallPromotionLegacyPassPass(
102         *PassRegistry::getPassRegistry());
103   }
104 
getPassName() const105   const char *getPassName() const override {
106     return "PGOIndirectCallPromotion";
107   }
108 
109 private:
110   bool runOnModule(Module &M) override;
111 
112   // If this pass is called in LTO. We need to special handling the PGOFuncName
113   // for the static variables due to LTO's internalization.
114   bool InLTO;
115 };
116 } // end anonymous namespace
117 
118 char PGOIndirectCallPromotionLegacyPass::ID = 0;
119 INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
120                 "Use PGO instrumentation profile to promote indirect calls to "
121                 "direct calls.",
122                 false, false)
123 
createPGOIndirectCallPromotionLegacyPass(bool InLTO)124 ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO) {
125   return new PGOIndirectCallPromotionLegacyPass(InLTO);
126 }
127 
128 namespace {
129 // The class for main data structure to promote indirect calls to conditional
130 // direct calls.
131 class ICallPromotionFunc {
132 private:
133   Function &F;
134   Module *M;
135 
136   // Symtab that maps indirect call profile values to function names and
137   // defines.
138   InstrProfSymtab *Symtab;
139 
140   enum TargetStatus {
141     OK,                   // Should be able to promote.
142     NotAvailableInModule, // Cannot find the target in current module.
143     ReturnTypeMismatch,   // Return type mismatch b/w target and indirect-call.
144     NumArgsMismatch,      // Number of arguments does not match.
145     ArgTypeMismatch       // Type mismatch in the arguments (cannot bitcast).
146   };
147 
148   // Test if we can legally promote this direct-call of Target.
149   TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target,
150                                 Function *&F);
151 
152   // A struct that records the direct target and it's call count.
153   struct PromotionCandidate {
154     Function *TargetFunction;
155     uint64_t Count;
PromotionCandidate__anon2c77f5680211::ICallPromotionFunc::PromotionCandidate156     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
157   };
158 
159   // Check if the indirect-call call site should be promoted. Return the number
160   // of promotions. Inst is the candidate indirect call, ValueDataRef
161   // contains the array of value profile data for profiled targets,
162   // TotalCount is the total profiled count of call executions, and
163   // NumCandidates is the number of candidate entries in ValueDataRef.
164   std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
165       Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
166       uint64_t TotalCount, uint32_t NumCandidates);
167 
168   // Main function that transforms Inst (either a indirect-call instruction, or
169   // an invoke instruction , to a conditional call to F. This is like:
170   //     if (Inst.CalledValue == F)
171   //        F(...);
172   //     else
173   //        Inst(...);
174   //     end
175   // TotalCount is the profile count value that the instruction executes.
176   // Count is the profile count value that F is the target function.
177   // These two values are being used to update the branch weight.
178   void promote(Instruction *Inst, Function *F, uint64_t Count,
179                uint64_t TotalCount);
180 
181   // Promote a list of targets for one indirect-call callsite. Return
182   // the number of promotions.
183   uint32_t tryToPromote(Instruction *Inst,
184                         const std::vector<PromotionCandidate> &Candidates,
185                         uint64_t &TotalCount);
186 
StatusToString(const TargetStatus S)187   static const char *StatusToString(const TargetStatus S) {
188     switch (S) {
189     case OK:
190       return "OK to promote";
191     case NotAvailableInModule:
192       return "Cannot find the target";
193     case ReturnTypeMismatch:
194       return "Return type mismatch";
195     case NumArgsMismatch:
196       return "The number of arguments mismatch";
197     case ArgTypeMismatch:
198       return "Argument Type mismatch";
199     }
200     llvm_unreachable("Should not reach here");
201   }
202 
203   // Noncopyable
204   ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
205   ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
206 
207 public:
ICallPromotionFunc(Function & Func,Module * Modu,InstrProfSymtab * Symtab)208   ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab)
209       : F(Func), M(Modu), Symtab(Symtab) {
210   }
211   bool processFunction();
212 };
213 } // end anonymous namespace
214 
215 ICallPromotionFunc::TargetStatus
isPromotionLegal(Instruction * Inst,uint64_t Target,Function * & TargetFunction)216 ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
217                                      Function *&TargetFunction) {
218   Function *DirectCallee = Symtab->getFunction(Target);
219   if (DirectCallee == nullptr)
220     return NotAvailableInModule;
221   // Check the return type.
222   Type *CallRetType = Inst->getType();
223   if (!CallRetType->isVoidTy()) {
224     Type *FuncRetType = DirectCallee->getReturnType();
225     if (FuncRetType != CallRetType &&
226         !CastInst::isBitCastable(FuncRetType, CallRetType))
227       return ReturnTypeMismatch;
228   }
229 
230   // Check if the arguments are compatible with the parameters
231   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
232   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
233   CallSite CS(Inst);
234   unsigned ArgNum = CS.arg_size();
235 
236   if (ParamNum != ArgNum && !DirectCalleeType->isVarArg())
237     return NumArgsMismatch;
238 
239   for (unsigned I = 0; I < ParamNum; ++I) {
240     Type *PTy = DirectCalleeType->getFunctionParamType(I);
241     Type *ATy = CS.getArgument(I)->getType();
242     if (PTy == ATy)
243       continue;
244     if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy))
245       return ArgTypeMismatch;
246   }
247 
248   DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
249                << Symtab->getFuncName(Target) << "\n");
250   TargetFunction = DirectCallee;
251   return OK;
252 }
253 
254 // Indirect-call promotion heuristic. The direct targets are sorted based on
255 // the count. Stop at the first target that is not promoted.
256 std::vector<ICallPromotionFunc::PromotionCandidate>
getPromotionCandidatesForCallSite(Instruction * Inst,const ArrayRef<InstrProfValueData> & ValueDataRef,uint64_t TotalCount,uint32_t NumCandidates)257 ICallPromotionFunc::getPromotionCandidatesForCallSite(
258     Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
259     uint64_t TotalCount, uint32_t NumCandidates) {
260   std::vector<PromotionCandidate> Ret;
261 
262   DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
263                << " Num_targets: " << ValueDataRef.size()
264                << " Num_candidates: " << NumCandidates << "\n");
265   NumOfPGOICallsites++;
266   if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
267     DEBUG(dbgs() << " Skip: User options.\n");
268     return Ret;
269   }
270 
271   for (uint32_t I = 0; I < NumCandidates; I++) {
272     uint64_t Count = ValueDataRef[I].Count;
273     assert(Count <= TotalCount);
274     uint64_t Target = ValueDataRef[I].Value;
275     DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
276                  << "  Target_func: " << Target << "\n");
277 
278     if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
279       DEBUG(dbgs() << " Not promote: User options.\n");
280       break;
281     }
282     if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
283       DEBUG(dbgs() << " Not promote: User option.\n");
284       break;
285     }
286     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
287       DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
288       break;
289     }
290     Function *TargetFunction = nullptr;
291     TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction);
292     if (Status != OK) {
293       StringRef TargetFuncName = Symtab->getFuncName(Target);
294       const char *Reason = StatusToString(Status);
295       DEBUG(dbgs() << " Not promote: " << Reason << "\n");
296       emitOptimizationRemarkMissed(
297           F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
298           Twine("Cannot promote indirect call to ") +
299               (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) +
300               Twine(" with count of ") + Twine(Count) + ": " + Reason);
301       break;
302     }
303     Ret.push_back(PromotionCandidate(TargetFunction, Count));
304     TotalCount -= Count;
305   }
306   return Ret;
307 }
308 
309 // Create a diamond structure for If_Then_Else. Also update the profile
310 // count. Do the fix-up for the invoke instruction.
createIfThenElse(Instruction * Inst,Function * DirectCallee,uint64_t Count,uint64_t TotalCount,BasicBlock ** DirectCallBB,BasicBlock ** IndirectCallBB,BasicBlock ** MergeBB)311 static void createIfThenElse(Instruction *Inst, Function *DirectCallee,
312                              uint64_t Count, uint64_t TotalCount,
313                              BasicBlock **DirectCallBB,
314                              BasicBlock **IndirectCallBB,
315                              BasicBlock **MergeBB) {
316   CallSite CS(Inst);
317   Value *OrigCallee = CS.getCalledValue();
318 
319   IRBuilder<> BBBuilder(Inst);
320   LLVMContext &Ctx = Inst->getContext();
321   Value *BCI1 =
322       BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), "");
323   Value *BCI2 =
324       BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), "");
325   Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, "");
326 
327   uint64_t ElseCount = TotalCount - Count;
328   uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
329   uint64_t Scale = calculateCountScale(MaxCount);
330   MDBuilder MDB(Inst->getContext());
331   MDNode *BranchWeights = MDB.createBranchWeights(
332       scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
333   TerminatorInst *ThenTerm, *ElseTerm;
334   SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm,
335                                 BranchWeights);
336   *DirectCallBB = ThenTerm->getParent();
337   (*DirectCallBB)->setName("if.true.direct_targ");
338   *IndirectCallBB = ElseTerm->getParent();
339   (*IndirectCallBB)->setName("if.false.orig_indirect");
340   *MergeBB = Inst->getParent();
341   (*MergeBB)->setName("if.end.icp");
342 
343   // Special handing of Invoke instructions.
344   InvokeInst *II = dyn_cast<InvokeInst>(Inst);
345   if (!II)
346     return;
347 
348   // We don't need branch instructions for invoke.
349   ThenTerm->eraseFromParent();
350   ElseTerm->eraseFromParent();
351 
352   // Add jump from Merge BB to the NormalDest. This is needed for the newly
353   // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB.
354   BranchInst::Create(II->getNormalDest(), *MergeBB);
355 }
356 
357 // Find the PHI in BB that have the CallResult as the operand.
getCallRetPHINode(BasicBlock * BB,Instruction * Inst)358 static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) {
359   BasicBlock *From = Inst->getParent();
360   for (auto &I : *BB) {
361     PHINode *PHI = dyn_cast<PHINode>(&I);
362     if (!PHI)
363       continue;
364     int IX = PHI->getBasicBlockIndex(From);
365     if (IX == -1)
366       continue;
367     Value *V = PHI->getIncomingValue(IX);
368     if (dyn_cast<Instruction>(V) == Inst)
369       return true;
370   }
371   return false;
372 }
373 
374 // This method fixes up PHI nodes in BB where BB is the UnwindDest of an
375 // invoke instruction. In BB, there may be PHIs with incoming block being
376 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
377 // instructions to its own BB, OrigBB is no longer the predecessor block of BB.
378 // Instead two new predecessors are added: IndirectCallBB and DirectCallBB,
379 // so the PHI node's incoming BBs need to be fixed up accordingly.
fixupPHINodeForUnwind(Instruction * Inst,BasicBlock * BB,BasicBlock * OrigBB,BasicBlock * IndirectCallBB,BasicBlock * DirectCallBB)380 static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB,
381                                   BasicBlock *OrigBB,
382                                   BasicBlock *IndirectCallBB,
383                                   BasicBlock *DirectCallBB) {
384   for (auto &I : *BB) {
385     PHINode *PHI = dyn_cast<PHINode>(&I);
386     if (!PHI)
387       continue;
388     int IX = PHI->getBasicBlockIndex(OrigBB);
389     if (IX == -1)
390       continue;
391     Value *V = PHI->getIncomingValue(IX);
392     PHI->addIncoming(V, IndirectCallBB);
393     PHI->setIncomingBlock(IX, DirectCallBB);
394   }
395 }
396 
397 // This method fixes up PHI nodes in BB where BB is the NormalDest of an
398 // invoke instruction. In BB, there may be PHIs with incoming block being
399 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
400 // instructions to its own BB, a new incoming edge will be added to the original
401 // NormalDstBB from the IndirectCallBB.
fixupPHINodeForNormalDest(Instruction * Inst,BasicBlock * BB,BasicBlock * OrigBB,BasicBlock * IndirectCallBB,Instruction * NewInst)402 static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB,
403                                       BasicBlock *OrigBB,
404                                       BasicBlock *IndirectCallBB,
405                                       Instruction *NewInst) {
406   for (auto &I : *BB) {
407     PHINode *PHI = dyn_cast<PHINode>(&I);
408     if (!PHI)
409       continue;
410     int IX = PHI->getBasicBlockIndex(OrigBB);
411     if (IX == -1)
412       continue;
413     Value *V = PHI->getIncomingValue(IX);
414     if (dyn_cast<Instruction>(V) == Inst) {
415       PHI->setIncomingBlock(IX, IndirectCallBB);
416       PHI->addIncoming(NewInst, OrigBB);
417       continue;
418     }
419     PHI->addIncoming(V, IndirectCallBB);
420   }
421 }
422 
423 // Add a bitcast instruction to the direct-call return value if needed.
insertCallRetCast(const Instruction * Inst,Instruction * DirectCallInst,Function * DirectCallee)424 static Instruction *insertCallRetCast(const Instruction *Inst,
425                                       Instruction *DirectCallInst,
426                                       Function *DirectCallee) {
427   if (Inst->getType()->isVoidTy())
428     return DirectCallInst;
429 
430   Type *CallRetType = Inst->getType();
431   Type *FuncRetType = DirectCallee->getReturnType();
432   if (FuncRetType == CallRetType)
433     return DirectCallInst;
434 
435   BasicBlock *InsertionBB;
436   if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst))
437     InsertionBB = CI->getParent();
438   else
439     InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest();
440 
441   return (new BitCastInst(DirectCallInst, CallRetType, "",
442                           InsertionBB->getTerminator()));
443 }
444 
445 // Create a DirectCall instruction in the DirectCallBB.
446 // Parameter Inst is the indirect-call (invoke) instruction.
447 // DirectCallee is the decl of the direct-call (invoke) target.
448 // DirecallBB is the BB that the direct-call (invoke) instruction is inserted.
449 // MergeBB is the bottom BB of the if-then-else-diamond after the
450 // transformation. For invoke instruction, the edges from DirectCallBB and
451 // IndirectCallBB to MergeBB are removed before this call (during
452 // createIfThenElse).
createDirectCallInst(const Instruction * Inst,Function * DirectCallee,BasicBlock * DirectCallBB,BasicBlock * MergeBB)453 static Instruction *createDirectCallInst(const Instruction *Inst,
454                                          Function *DirectCallee,
455                                          BasicBlock *DirectCallBB,
456                                          BasicBlock *MergeBB) {
457   Instruction *NewInst = Inst->clone();
458   if (CallInst *CI = dyn_cast<CallInst>(NewInst)) {
459     CI->setCalledFunction(DirectCallee);
460     CI->mutateFunctionType(DirectCallee->getFunctionType());
461   } else {
462     // Must be an invoke instruction. Direct invoke's normal destination is
463     // fixed up to MergeBB. MergeBB is the place where return cast is inserted.
464     // Also since IndirectCallBB does not have an edge to MergeBB, there is no
465     // need to insert new PHIs into MergeBB.
466     InvokeInst *II = dyn_cast<InvokeInst>(NewInst);
467     assert(II);
468     II->setCalledFunction(DirectCallee);
469     II->mutateFunctionType(DirectCallee->getFunctionType());
470     II->setNormalDest(MergeBB);
471   }
472 
473   DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(),
474                                      NewInst);
475 
476   // Clear the value profile data.
477   NewInst->setMetadata(LLVMContext::MD_prof, 0);
478   CallSite NewCS(NewInst);
479   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
480   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
481   for (unsigned I = 0; I < ParamNum; ++I) {
482     Type *ATy = NewCS.getArgument(I)->getType();
483     Type *PTy = DirectCalleeType->getParamType(I);
484     if (ATy != PTy) {
485       BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst);
486       NewCS.setArgument(I, BI);
487     }
488   }
489 
490   return insertCallRetCast(Inst, NewInst, DirectCallee);
491 }
492 
493 // Create a PHI to unify the return values of calls.
insertCallRetPHI(Instruction * Inst,Instruction * CallResult,Function * DirectCallee)494 static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
495                              Function *DirectCallee) {
496   if (Inst->getType()->isVoidTy())
497     return;
498 
499   BasicBlock *RetValBB = CallResult->getParent();
500 
501   BasicBlock *PHIBB;
502   if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult))
503     RetValBB = II->getNormalDest();
504 
505   PHIBB = RetValBB->getSingleSuccessor();
506   if (getCallRetPHINode(PHIBB, Inst))
507     return;
508 
509   PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0);
510   PHIBB->getInstList().push_front(CallRetPHI);
511   Inst->replaceAllUsesWith(CallRetPHI);
512   CallRetPHI->addIncoming(Inst, Inst->getParent());
513   CallRetPHI->addIncoming(CallResult, RetValBB);
514 }
515 
516 // This function does the actual indirect-call promotion transformation:
517 // For an indirect-call like:
518 //     Ret = (*Foo)(Args);
519 // It transforms to:
520 //     if (Foo == DirectCallee)
521 //        Ret1 = DirectCallee(Args);
522 //     else
523 //        Ret2 = (*Foo)(Args);
524 //     Ret = phi(Ret1, Ret2);
525 // It adds type casts for the args do not match the parameters and the return
526 // value. Branch weights metadata also updated.
promote(Instruction * Inst,Function * DirectCallee,uint64_t Count,uint64_t TotalCount)527 void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee,
528                                  uint64_t Count, uint64_t TotalCount) {
529   assert(DirectCallee != nullptr);
530   BasicBlock *BB = Inst->getParent();
531   // Just to suppress the non-debug build warning.
532   (void)BB;
533   DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
534   DEBUG(dbgs() << *BB << "\n");
535 
536   BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB;
537   createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB,
538                    &IndirectCallBB, &MergeBB);
539 
540   Instruction *NewInst =
541       createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
542 
543   // Move Inst from MergeBB to IndirectCallBB.
544   Inst->removeFromParent();
545   IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
546                                        Inst);
547 
548   if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) {
549     // At this point, the original indirect invoke instruction has the original
550     // UnwindDest and NormalDest. For the direct invoke instruction, the
551     // NormalDest points to MergeBB, and MergeBB jumps to the original
552     // NormalDest. MergeBB might have a new bitcast instruction for the return
553     // value. The PHIs are with the original NormalDest. Since we now have two
554     // incoming edges to NormalDest and UnwindDest, we have to do some fixups.
555     //
556     // UnwindDest will not use the return value. So pass nullptr here.
557     fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB,
558                           DirectCallBB);
559     // We don't need to update the operand from NormalDest for DirectCallBB.
560     // Pass nullptr here.
561     fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB,
562                               IndirectCallBB, NewInst);
563   }
564 
565   insertCallRetPHI(Inst, NewInst, DirectCallee);
566 
567   DEBUG(dbgs() << "\n== Basic Blocks After ==\n");
568   DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
569 
570   emitOptimizationRemark(
571       F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
572       Twine("Promote indirect call to ") + DirectCallee->getName() +
573           " with count " + Twine(Count) + " out of " + Twine(TotalCount));
574 }
575 
576 // Promote indirect-call to conditional direct-call for one callsite.
tryToPromote(Instruction * Inst,const std::vector<PromotionCandidate> & Candidates,uint64_t & TotalCount)577 uint32_t ICallPromotionFunc::tryToPromote(
578     Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
579     uint64_t &TotalCount) {
580   uint32_t NumPromoted = 0;
581 
582   for (auto &C : Candidates) {
583     uint64_t Count = C.Count;
584     promote(Inst, C.TargetFunction, Count, TotalCount);
585     assert(TotalCount >= Count);
586     TotalCount -= Count;
587     NumOfPGOICallPromotion++;
588     NumPromoted++;
589   }
590   return NumPromoted;
591 }
592 
593 // Traverse all the indirect-call callsite and get the value profile
594 // annotation to perform indirect-call promotion.
processFunction()595 bool ICallPromotionFunc::processFunction() {
596   bool Changed = false;
597   ICallPromotionAnalysis ICallAnalysis;
598   for (auto &I : findIndirectCallSites(F)) {
599     uint32_t NumVals, NumCandidates;
600     uint64_t TotalCount;
601     auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
602         I, NumVals, TotalCount, NumCandidates);
603     if (!NumCandidates)
604       continue;
605     auto PromotionCandidates = getPromotionCandidatesForCallSite(
606         I, ICallProfDataRef, TotalCount, NumCandidates);
607     uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
608     if (NumPromoted == 0)
609       continue;
610 
611     Changed = true;
612     // Adjust the MD.prof metadata. First delete the old one.
613     I->setMetadata(LLVMContext::MD_prof, 0);
614     // If all promoted, we don't need the MD.prof metadata.
615     if (TotalCount == 0 || NumPromoted == NumVals)
616       continue;
617     // Otherwise we need update with the un-promoted records back.
618     annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount,
619                       IPVK_IndirectCallTarget, NumCandidates);
620   }
621   return Changed;
622 }
623 
624 // A wrapper function that does the actual work.
promoteIndirectCalls(Module & M,bool InLTO)625 static bool promoteIndirectCalls(Module &M, bool InLTO) {
626   if (DisableICP)
627     return false;
628   InstrProfSymtab Symtab;
629   Symtab.create(M, InLTO);
630   bool Changed = false;
631   for (auto &F : M) {
632     if (F.isDeclaration())
633       continue;
634     if (F.hasFnAttribute(Attribute::OptimizeNone))
635       continue;
636     ICallPromotionFunc ICallPromotion(F, &M, &Symtab);
637     bool FuncChanged = ICallPromotion.processFunction();
638     if (ICPDUMPAFTER && FuncChanged) {
639       DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
640       DEBUG(dbgs() << "\n");
641     }
642     Changed |= FuncChanged;
643     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
644       DEBUG(dbgs() << " Stop: Cutoff reached.\n");
645       break;
646     }
647   }
648   return Changed;
649 }
650 
runOnModule(Module & M)651 bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
652   // Command-line option has the priority for InLTO.
653   return promoteIndirectCalls(M, InLTO | ICPLTOMode);
654 }
655 
run(Module & M,AnalysisManager<Module> & AM)656 PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, AnalysisManager<Module> &AM) {
657   if (!promoteIndirectCalls(M, InLTO | ICPLTOMode))
658     return PreservedAnalyses::all();
659 
660   return PreservedAnalyses::none();
661 }
662