1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion, attempting to remove as much
11 // code from the body of a loop as possible.  It does this by either hoisting
12 // code into the preheader block, or by sinking code to the exit blocks if it is
13 // safe.  This pass also promotes must-aliased memory locations in the loop to
14 // live in registers, thus hoisting and sinking "invariant" loads and stores.
15 //
16 // This pass uses alias analysis for two purposes:
17 //
18 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
19 //     that a load or call inside of a loop never aliases anything stored to,
20 //     we can hoist it or sink it like any other instruction.
21 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
22 //     the loop, we try to move the store to happen AFTER the loop instead of
23 //     inside of the loop.  This can only happen if a few conditions are true:
24 //       A. The pointer stored through is loop invariant
25 //       B. There are no stores or loads in the loop which _may_ alias the
26 //          pointer.  There are no calls in the loop which mod/ref the pointer.
27 //     If these conditions are true, we can promote the loads and stores in the
28 //     loop of the pointer to use a temporary alloca'd variable.  We then use
29 //     the SSAUpdater to construct the appropriate SSA form for the value.
30 //
31 //===----------------------------------------------------------------------===//
32 
33 #include "llvm/Transforms/Scalar.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/Analysis/AliasAnalysis.h"
36 #include "llvm/Analysis/AliasSetTracker.h"
37 #include "llvm/Analysis/ConstantFolding.h"
38 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/Analysis/LoopPass.h"
40 #include "llvm/Analysis/ScalarEvolution.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/ValueTracking.h"
43 #include "llvm/IR/CFG.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Dominators.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/PredIteratorCache.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/Utils/Local.h"
57 #include "llvm/Transforms/Utils/LoopUtils.h"
58 #include "llvm/Transforms/Utils/SSAUpdater.h"
59 #include <algorithm>
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "licm"
63 
64 STATISTIC(NumSunk      , "Number of instructions sunk out of loop");
65 STATISTIC(NumHoisted   , "Number of instructions hoisted out of loop");
66 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
67 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
68 STATISTIC(NumPromoted  , "Number of memory locations promoted to registers");
69 
70 static cl::opt<bool>
71 DisablePromotion("disable-licm-promotion", cl::Hidden,
72                  cl::desc("Disable memory promotion in LICM pass"));
73 
74 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
75 static bool isNotUsedInLoop(Instruction &I, Loop *CurLoop);
76 static bool hoist(Instruction &I, BasicBlock *Preheader);
77 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
78                  Loop *CurLoop, AliasSetTracker *CurAST );
79 static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT,
80                                   Loop *CurLoop, LICMSafetyInfo *SafetyInfo);
81 static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT,
82                                            Loop *CurLoop,
83                                            LICMSafetyInfo *SafetyInfo);
84 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
85                                      const AAMDNodes &AAInfo,
86                                      AliasSetTracker *CurAST);
87 static Instruction *CloneInstructionInExitBlock(Instruction &I,
88                                                 BasicBlock &ExitBlock,
89                                                 PHINode &PN, LoopInfo *LI);
90 static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
91                                DominatorTree *DT, Loop *CurLoop,
92                                AliasSetTracker *CurAST,
93                                LICMSafetyInfo *SafetyInfo);
94 
95 namespace {
96   struct LICM : public LoopPass {
97     static char ID; // Pass identification, replacement for typeid
LICM__anon68105c7a0111::LICM98     LICM() : LoopPass(ID) {
99       initializeLICMPass(*PassRegistry::getPassRegistry());
100     }
101 
102     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
103 
104     /// This transformation requires natural loop information & requires that
105     /// loop preheaders be inserted into the CFG...
106     ///
getAnalysisUsage__anon68105c7a0111::LICM107     void getAnalysisUsage(AnalysisUsage &AU) const override {
108       AU.setPreservesCFG();
109       AU.addRequired<DominatorTreeWrapperPass>();
110       AU.addRequired<LoopInfoWrapperPass>();
111       AU.addRequiredID(LoopSimplifyID);
112       AU.addPreservedID(LoopSimplifyID);
113       AU.addRequiredID(LCSSAID);
114       AU.addPreservedID(LCSSAID);
115       AU.addRequired<AliasAnalysis>();
116       AU.addPreserved<AliasAnalysis>();
117       AU.addPreserved<ScalarEvolution>();
118       AU.addRequired<TargetLibraryInfoWrapperPass>();
119     }
120 
121     using llvm::Pass::doFinalization;
122 
doFinalization__anon68105c7a0111::LICM123     bool doFinalization() override {
124       assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
125       return false;
126     }
127 
128   private:
129     AliasAnalysis *AA;       // Current AliasAnalysis information
130     LoopInfo      *LI;       // Current LoopInfo
131     DominatorTree *DT;       // Dominator Tree for the current Loop.
132 
133     TargetLibraryInfo *TLI;  // TargetLibraryInfo for constant folding.
134 
135     // State that is updated as we process loops.
136     bool Changed;            // Set to true when we change anything.
137     BasicBlock *Preheader;   // The preheader block of the current loop...
138     Loop *CurLoop;           // The current loop we are working on...
139     AliasSetTracker *CurAST; // AliasSet information for the current loop...
140     DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
141 
142     /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
143     void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
144                                  Loop *L) override;
145 
146     /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
147     /// set.
148     void deleteAnalysisValue(Value *V, Loop *L) override;
149 
150     /// Simple Analysis hook. Delete loop L from alias set map.
151     void deleteAnalysisLoop(Loop *L) override;
152   };
153 }
154 
155 char LICM::ID = 0;
156 INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)157 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
158 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
159 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
160 INITIALIZE_PASS_DEPENDENCY(LCSSA)
161 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
162 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
163 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
164 INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
165 
166 Pass *llvm::createLICMPass() { return new LICM(); }
167 
168 /// Hoist expressions out of the specified loop. Note, alias info for inner
169 /// loop is not preserved so it is not a good idea to run LICM multiple
170 /// times on one loop.
171 ///
runOnLoop(Loop * L,LPPassManager & LPM)172 bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
173   if (skipOptnoneFunction(L))
174     return false;
175 
176   Changed = false;
177 
178   // Get our Loop and Alias Analysis information...
179   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
180   AA = &getAnalysis<AliasAnalysis>();
181   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
182 
183   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
184 
185   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
186 
187   CurAST = new AliasSetTracker(*AA);
188   // Collect Alias info from subloops.
189   for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
190        LoopItr != LoopItrE; ++LoopItr) {
191     Loop *InnerL = *LoopItr;
192     AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
193     assert(InnerAST && "Where is my AST?");
194 
195     // What if InnerLoop was modified by other passes ?
196     CurAST->add(*InnerAST);
197 
198     // Once we've incorporated the inner loop's AST into ours, we don't need the
199     // subloop's anymore.
200     delete InnerAST;
201     LoopToAliasSetMap.erase(InnerL);
202   }
203 
204   CurLoop = L;
205 
206   // Get the preheader block to move instructions into...
207   Preheader = L->getLoopPreheader();
208 
209   // Loop over the body of this loop, looking for calls, invokes, and stores.
210   // Because subloops have already been incorporated into AST, we skip blocks in
211   // subloops.
212   //
213   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
214        I != E; ++I) {
215     BasicBlock *BB = *I;
216     if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops.
217       CurAST->add(*BB);                 // Incorporate the specified basic block
218   }
219 
220   // Compute loop safety information.
221   LICMSafetyInfo SafetyInfo;
222   computeLICMSafetyInfo(&SafetyInfo, CurLoop);
223 
224   // We want to visit all of the instructions in this loop... that are not parts
225   // of our subloops (they have already had their invariants hoisted out of
226   // their loop, into this loop, so there is no need to process the BODIES of
227   // the subloops).
228   //
229   // Traverse the body of the loop in depth first order on the dominator tree so
230   // that we are guaranteed to see definitions before we see uses.  This allows
231   // us to sink instructions in one pass, without iteration.  After sinking
232   // instructions, we perform another pass to hoist them out of the loop.
233   //
234   if (L->hasDedicatedExits())
235     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop,
236                           CurAST, &SafetyInfo);
237   if (Preheader)
238     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI,
239                            CurLoop, CurAST, &SafetyInfo);
240 
241   // Now that all loop invariants have been removed from the loop, promote any
242   // memory references to scalars that we can.
243   if (!DisablePromotion && (Preheader || L->hasDedicatedExits())) {
244     SmallVector<BasicBlock *, 8> ExitBlocks;
245     SmallVector<Instruction *, 8> InsertPts;
246     PredIteratorCache PIC;
247 
248     // Loop over all of the alias sets in the tracker object.
249     for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
250          I != E; ++I)
251       Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts,
252                                               PIC, LI, DT, CurLoop,
253                                               CurAST, &SafetyInfo);
254 
255     // Once we have promoted values across the loop body we have to recursively
256     // reform LCSSA as any nested loop may now have values defined within the
257     // loop used in the outer loop.
258     // FIXME: This is really heavy handed. It would be a bit better to use an
259     // SSAUpdater strategy during promotion that was LCSSA aware and reformed
260     // it as it went.
261     if (Changed)
262       formLCSSARecursively(*L, *DT, LI,
263                            getAnalysisIfAvailable<ScalarEvolution>());
264   }
265 
266   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
267   // specifically moving instructions across the loop boundary and so it is
268   // especially in need of sanity checking here.
269   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
270   assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
271          "Parent loop not left in LCSSA form after LICM!");
272 
273   // Clear out loops state information for the next iteration
274   CurLoop = nullptr;
275   Preheader = nullptr;
276 
277   // If this loop is nested inside of another one, save the alias information
278   // for when we process the outer loop.
279   if (L->getParentLoop())
280     LoopToAliasSetMap[L] = CurAST;
281   else
282     delete CurAST;
283   return Changed;
284 }
285 
286 /// Walk the specified region of the CFG (defined by all blocks dominated by
287 /// the specified block, and that are in the current loop) in reverse depth
288 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
289 /// definitions, allowing us to sink a loop body in one pass without iteration.
290 ///
sinkRegion(DomTreeNode * N,AliasAnalysis * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,Loop * CurLoop,AliasSetTracker * CurAST,LICMSafetyInfo * SafetyInfo)291 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
292                       DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
293                       AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
294 
295   // Verify inputs.
296   assert(N != nullptr && AA != nullptr && LI != nullptr &&
297          DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
298          SafetyInfo != nullptr && "Unexpected input to sinkRegion");
299 
300   // Set changed as false.
301   bool Changed = false;
302   // Get basic block
303   BasicBlock *BB = N->getBlock();
304   // If this subregion is not in the top level loop at all, exit.
305   if (!CurLoop->contains(BB)) return Changed;
306 
307   // We are processing blocks in reverse dfo, so process children first.
308   const std::vector<DomTreeNode*> &Children = N->getChildren();
309   for (unsigned i = 0, e = Children.size(); i != e; ++i)
310     Changed |=
311         sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
312   // Only need to process the contents of this block if it is not part of a
313   // subloop (which would already have been processed).
314   if (inSubLoop(BB,CurLoop,LI)) return Changed;
315 
316   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
317     Instruction &I = *--II;
318 
319     // If the instruction is dead, we would try to sink it because it isn't used
320     // in the loop, instead, just delete it.
321     if (isInstructionTriviallyDead(&I, TLI)) {
322       DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
323       ++II;
324       CurAST->deleteValue(&I);
325       I.eraseFromParent();
326       Changed = true;
327       continue;
328     }
329 
330     // Check to see if we can sink this instruction to the exit blocks
331     // of the loop.  We can do this if the all users of the instruction are
332     // outside of the loop.  In this case, it doesn't even matter if the
333     // operands of the instruction are loop invariant.
334     //
335     if (isNotUsedInLoop(I, CurLoop) &&
336         canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) {
337       ++II;
338       Changed |= sink(I, LI, DT, CurLoop, CurAST);
339     }
340   }
341   return Changed;
342 }
343 
344 /// Walk the specified region of the CFG (defined by all blocks dominated by
345 /// the specified block, and that are in the current loop) in depth first
346 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
347 /// uses, allowing us to hoist a loop body in one pass without iteration.
348 ///
hoistRegion(DomTreeNode * N,AliasAnalysis * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,Loop * CurLoop,AliasSetTracker * CurAST,LICMSafetyInfo * SafetyInfo)349 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
350                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
351                        AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
352   // Verify inputs.
353   assert(N != nullptr && AA != nullptr && LI != nullptr &&
354          DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
355          SafetyInfo != nullptr && "Unexpected input to hoistRegion");
356   // Set changed as false.
357   bool Changed = false;
358   // Get basic block
359   BasicBlock *BB = N->getBlock();
360   // If this subregion is not in the top level loop at all, exit.
361   if (!CurLoop->contains(BB)) return Changed;
362   // Only need to process the contents of this block if it is not part of a
363   // subloop (which would already have been processed).
364   if (!inSubLoop(BB, CurLoop, LI))
365     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
366       Instruction &I = *II++;
367       // Try constant folding this instruction.  If all the operands are
368       // constants, it is technically hoistable, but it would be better to just
369       // fold it.
370       if (Constant *C = ConstantFoldInstruction(
371               &I, I.getModule()->getDataLayout(), TLI)) {
372         DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
373         CurAST->copyValue(&I, C);
374         CurAST->deleteValue(&I);
375         I.replaceAllUsesWith(C);
376         I.eraseFromParent();
377         continue;
378       }
379 
380       // Try hoisting the instruction out to the preheader.  We can only do this
381       // if all of the operands of the instruction are loop invariant and if it
382       // is safe to hoist the instruction.
383       //
384       if (CurLoop->hasLoopInvariantOperands(&I) &&
385           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) &&
386           isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo))
387         Changed |= hoist(I, CurLoop->getLoopPreheader());
388     }
389 
390   const std::vector<DomTreeNode*> &Children = N->getChildren();
391   for (unsigned i = 0, e = Children.size(); i != e; ++i)
392     Changed |=
393         hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
394   return Changed;
395 }
396 
397 /// Computes loop safety information, checks loop body & header
398 /// for the possiblity of may throw exception.
399 ///
computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo,Loop * CurLoop)400 void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
401   assert(CurLoop != nullptr && "CurLoop cant be null");
402   BasicBlock *Header = CurLoop->getHeader();
403   // Setting default safety values.
404   SafetyInfo->MayThrow = false;
405   SafetyInfo->HeaderMayThrow = false;
406   // Iterate over header and compute dafety info.
407   for (BasicBlock::iterator I = Header->begin(), E = Header->end();
408        (I != E) && !SafetyInfo->HeaderMayThrow; ++I)
409     SafetyInfo->HeaderMayThrow |= I->mayThrow();
410 
411   SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
412   // Iterate over loop instructions and compute safety info.
413   for (Loop::block_iterator BB = CurLoop->block_begin(),
414        BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow ; ++BB)
415     for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
416          (I != E) && !SafetyInfo->MayThrow; ++I)
417       SafetyInfo->MayThrow |= I->mayThrow();
418 }
419 
420 /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
421 /// instruction.
422 ///
canSinkOrHoistInst(Instruction & I,AliasAnalysis * AA,DominatorTree * DT,Loop * CurLoop,AliasSetTracker * CurAST,LICMSafetyInfo * SafetyInfo)423 bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
424                         Loop *CurLoop, AliasSetTracker *CurAST,
425                         LICMSafetyInfo *SafetyInfo) {
426   // Loads have extra constraints we have to verify before we can hoist them.
427   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
428     if (!LI->isUnordered())
429       return false;        // Don't hoist volatile/atomic loads!
430 
431     // Loads from constant memory are always safe to move, even if they end up
432     // in the same alias set as something that ends up being modified.
433     if (AA->pointsToConstantMemory(LI->getOperand(0)))
434       return true;
435     if (LI->getMetadata(LLVMContext::MD_invariant_load))
436       return true;
437 
438     // Don't hoist loads which have may-aliased stores in loop.
439     uint64_t Size = 0;
440     if (LI->getType()->isSized())
441       Size = AA->getTypeStoreSize(LI->getType());
442 
443     AAMDNodes AAInfo;
444     LI->getAAMetadata(AAInfo);
445 
446     return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
447   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
448     // Don't sink or hoist dbg info; it's legal, but not useful.
449     if (isa<DbgInfoIntrinsic>(I))
450       return false;
451 
452     // Handle simple cases by querying alias analysis.
453     AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
454     if (Behavior == AliasAnalysis::DoesNotAccessMemory)
455       return true;
456     if (AliasAnalysis::onlyReadsMemory(Behavior)) {
457       // If this call only reads from memory and there are no writes to memory
458       // in the loop, we can hoist or sink the call as appropriate.
459       bool FoundMod = false;
460       for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
461            I != E; ++I) {
462         AliasSet &AS = *I;
463         if (!AS.isForwardingAliasSet() && AS.isMod()) {
464           FoundMod = true;
465           break;
466         }
467       }
468       if (!FoundMod) return true;
469     }
470 
471     // FIXME: This should use mod/ref information to see if we can hoist or
472     // sink the call.
473 
474     return false;
475   }
476 
477   // Only these instructions are hoistable/sinkable.
478   if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
479       !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
480       !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
481       !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
482       !isa<InsertValueInst>(I))
483     return false;
484 
485   return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo);
486 }
487 
488 /// Returns true if a PHINode is a trivially replaceable with an
489 /// Instruction.
490 /// This is true when all incoming values are that instruction.
491 /// This pattern occurs most often with LCSSA PHI nodes.
492 ///
isTriviallyReplacablePHI(PHINode & PN,Instruction & I)493 static bool isTriviallyReplacablePHI(PHINode &PN, Instruction &I) {
494   for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
495     if (PN.getIncomingValue(i) != &I)
496       return false;
497 
498   return true;
499 }
500 
501 /// Return true if the only users of this instruction are outside of
502 /// the loop. If this is true, we can sink the instruction to the exit
503 /// blocks of the loop.
504 ///
isNotUsedInLoop(Instruction & I,Loop * CurLoop)505 static bool isNotUsedInLoop(Instruction &I, Loop *CurLoop) {
506   for (User *U : I.users()) {
507     Instruction *UI = cast<Instruction>(U);
508     if (PHINode *PN = dyn_cast<PHINode>(UI)) {
509       // A PHI node where all of the incoming values are this instruction are
510       // special -- they can just be RAUW'ed with the instruction and thus
511       // don't require a use in the predecessor. This is a particular important
512       // special case because it is the pattern found in LCSSA form.
513       if (isTriviallyReplacablePHI(*PN, I)) {
514         if (CurLoop->contains(PN))
515           return false;
516         else
517           continue;
518       }
519 
520       // Otherwise, PHI node uses occur in predecessor blocks if the incoming
521       // values. Check for such a use being inside the loop.
522       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
523         if (PN->getIncomingValue(i) == &I)
524           if (CurLoop->contains(PN->getIncomingBlock(i)))
525             return false;
526 
527       continue;
528     }
529 
530     if (CurLoop->contains(UI))
531       return false;
532   }
533   return true;
534 }
535 
CloneInstructionInExitBlock(Instruction & I,BasicBlock & ExitBlock,PHINode & PN,LoopInfo * LI)536 static Instruction *CloneInstructionInExitBlock(Instruction &I,
537                                                 BasicBlock &ExitBlock,
538                                                 PHINode &PN, LoopInfo *LI) {
539   Instruction *New = I.clone();
540   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
541   if (!I.getName().empty()) New->setName(I.getName() + ".le");
542 
543   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
544   // particularly cheap because we can rip off the PHI node that we're
545   // replacing for the number and blocks of the predecessors.
546   // OPT: If this shows up in a profile, we can instead finish sinking all
547   // invariant instructions, and then walk their operands to re-establish
548   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
549   // sinking bottom-up.
550   for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
551        ++OI)
552     if (Instruction *OInst = dyn_cast<Instruction>(*OI))
553       if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
554         if (!OLoop->contains(&PN)) {
555           PHINode *OpPN =
556               PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
557                               OInst->getName() + ".lcssa", ExitBlock.begin());
558           for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
559             OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
560           *OI = OpPN;
561         }
562   return New;
563 }
564 
565 /// When an instruction is found to only be used outside of the loop, this
566 /// function moves it to the exit blocks and patches up SSA form as needed.
567 /// This method is guaranteed to remove the original instruction from its
568 /// position, and may either delete it or move it to outside of the loop.
569 ///
sink(Instruction & I,LoopInfo * LI,DominatorTree * DT,Loop * CurLoop,AliasSetTracker * CurAST)570 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
571                  Loop *CurLoop, AliasSetTracker *CurAST ) {
572   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
573   bool Changed = false;
574   if (isa<LoadInst>(I)) ++NumMovedLoads;
575   else if (isa<CallInst>(I)) ++NumMovedCalls;
576   ++NumSunk;
577   Changed = true;
578 
579 #ifndef NDEBUG
580   SmallVector<BasicBlock *, 32> ExitBlocks;
581   CurLoop->getUniqueExitBlocks(ExitBlocks);
582   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
583                                              ExitBlocks.end());
584 #endif
585 
586   // Clones of this instruction. Don't create more than one per exit block!
587   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
588 
589   // If this instruction is only used outside of the loop, then all users are
590   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
591   // the instruction.
592   while (!I.use_empty()) {
593     Instruction *User = I.user_back();
594     if (!DT->isReachableFromEntry(User->getParent())) {
595       User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
596       continue;
597     }
598     // The user must be a PHI node.
599     PHINode *PN = cast<PHINode>(User);
600 
601     BasicBlock *ExitBlock = PN->getParent();
602     assert(ExitBlockSet.count(ExitBlock) &&
603            "The LCSSA PHI is not in an exit block!");
604 
605     Instruction *New;
606     auto It = SunkCopies.find(ExitBlock);
607     if (It != SunkCopies.end())
608       New = It->second;
609     else
610       New = SunkCopies[ExitBlock] =
611             CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI);
612 
613     PN->replaceAllUsesWith(New);
614     PN->eraseFromParent();
615   }
616 
617   CurAST->deleteValue(&I);
618   I.eraseFromParent();
619   return Changed;
620 }
621 
622 /// When an instruction is found to only use loop invariant operands that
623 /// is safe to hoist, this instruction is called to do the dirty work.
624 ///
hoist(Instruction & I,BasicBlock * Preheader)625 static bool hoist(Instruction &I, BasicBlock *Preheader) {
626   DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
627         << I << "\n");
628   // Move the new node to the Preheader, before its terminator.
629   I.moveBefore(Preheader->getTerminator());
630 
631   if (isa<LoadInst>(I)) ++NumMovedLoads;
632   else if (isa<CallInst>(I)) ++NumMovedCalls;
633   ++NumHoisted;
634   return true;
635 }
636 
637 /// Only sink or hoist an instruction if it is not a trapping instruction
638 /// or if it is a trapping instruction and is guaranteed to execute.
639 ///
isSafeToExecuteUnconditionally(Instruction & Inst,DominatorTree * DT,Loop * CurLoop,LICMSafetyInfo * SafetyInfo)640 static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT,
641                                            Loop *CurLoop,
642                                            LICMSafetyInfo *SafetyInfo) {
643   // If it is not a trapping instruction, it is always safe to hoist.
644   if (isSafeToSpeculativelyExecute(&Inst))
645     return true;
646 
647   return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
648 }
649 
isGuaranteedToExecute(Instruction & Inst,DominatorTree * DT,Loop * CurLoop,LICMSafetyInfo * SafetyInfo)650 static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT,
651                                   Loop *CurLoop, LICMSafetyInfo * SafetyInfo) {
652 
653   // We have to check to make sure that the instruction dominates all
654   // of the exit blocks.  If it doesn't, then there is a path out of the loop
655   // which does not execute this instruction, so we can't hoist it.
656 
657   // If the instruction is in the header block for the loop (which is very
658   // common), it is always guaranteed to dominate the exit blocks.  Since this
659   // is a common case, and can save some work, check it now.
660   if (Inst.getParent() == CurLoop->getHeader())
661     // If there's a throw in the header block, we can't guarantee we'll reach
662     // Inst.
663     return !SafetyInfo->HeaderMayThrow;
664 
665   // Somewhere in this loop there is an instruction which may throw and make us
666   // exit the loop.
667   if (SafetyInfo->MayThrow)
668     return false;
669 
670   // Get the exit blocks for the current loop.
671   SmallVector<BasicBlock*, 8> ExitBlocks;
672   CurLoop->getExitBlocks(ExitBlocks);
673 
674   // Verify that the block dominates each of the exit blocks of the loop.
675   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
676     if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
677       return false;
678 
679   // As a degenerate case, if the loop is statically infinite then we haven't
680   // proven anything since there are no exit blocks.
681   if (ExitBlocks.empty())
682     return false;
683 
684   return true;
685 }
686 
687 namespace {
688   class LoopPromoter : public LoadAndStorePromoter {
689     Value *SomePtr;  // Designated pointer to store to.
690     SmallPtrSetImpl<Value*> &PointerMustAliases;
691     SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
692     SmallVectorImpl<Instruction*> &LoopInsertPts;
693     PredIteratorCache &PredCache;
694     AliasSetTracker &AST;
695     LoopInfo &LI;
696     DebugLoc DL;
697     int Alignment;
698     AAMDNodes AATags;
699 
maybeInsertLCSSAPHI(Value * V,BasicBlock * BB) const700     Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
701       if (Instruction *I = dyn_cast<Instruction>(V))
702         if (Loop *L = LI.getLoopFor(I->getParent()))
703           if (!L->contains(BB)) {
704             // We need to create an LCSSA PHI node for the incoming value and
705             // store that.
706             PHINode *PN = PHINode::Create(
707                 I->getType(), PredCache.GetNumPreds(BB),
708                 I->getName() + ".lcssa", BB->begin());
709             for (BasicBlock **PI = PredCache.GetPreds(BB); *PI; ++PI)
710               PN->addIncoming(I, *PI);
711             return PN;
712           }
713       return V;
714     }
715 
716   public:
LoopPromoter(Value * SP,const SmallVectorImpl<Instruction * > & Insts,SSAUpdater & S,SmallPtrSetImpl<Value * > & PMA,SmallVectorImpl<BasicBlock * > & LEB,SmallVectorImpl<Instruction * > & LIP,PredIteratorCache & PIC,AliasSetTracker & ast,LoopInfo & li,DebugLoc dl,int alignment,const AAMDNodes & AATags)717     LoopPromoter(Value *SP, const SmallVectorImpl<Instruction *> &Insts,
718                  SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
719                  SmallVectorImpl<BasicBlock *> &LEB,
720                  SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
721                  AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
722                  const AAMDNodes &AATags)
723         : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
724           LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
725           LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
726 
isInstInList(Instruction * I,const SmallVectorImpl<Instruction * > &) const727     bool isInstInList(Instruction *I,
728                       const SmallVectorImpl<Instruction*> &) const override {
729       Value *Ptr;
730       if (LoadInst *LI = dyn_cast<LoadInst>(I))
731         Ptr = LI->getOperand(0);
732       else
733         Ptr = cast<StoreInst>(I)->getPointerOperand();
734       return PointerMustAliases.count(Ptr);
735     }
736 
doExtraRewritesBeforeFinalDeletion() const737     void doExtraRewritesBeforeFinalDeletion() const override {
738       // Insert stores after in the loop exit blocks.  Each exit block gets a
739       // store of the live-out values that feed them.  Since we've already told
740       // the SSA updater about the defs in the loop and the preheader
741       // definition, it is all set and we can start using it.
742       for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
743         BasicBlock *ExitBlock = LoopExitBlocks[i];
744         Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
745         LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
746         Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
747         Instruction *InsertPos = LoopInsertPts[i];
748         StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
749         NewSI->setAlignment(Alignment);
750         NewSI->setDebugLoc(DL);
751         if (AATags) NewSI->setAAMetadata(AATags);
752       }
753     }
754 
replaceLoadWithValue(LoadInst * LI,Value * V) const755     void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
756       // Update alias analysis.
757       AST.copyValue(LI, V);
758     }
instructionDeleted(Instruction * I) const759     void instructionDeleted(Instruction *I) const override {
760       AST.deleteValue(I);
761     }
762   };
763 } // end anon namespace
764 
765 /// Try to promote memory values to scalars by sinking stores out of the
766 /// loop and moving loads to before the loop.  We do this by looping over
767 /// the stores in the loop, looking for stores to Must pointers which are
768 /// loop invariant.
769 ///
promoteLoopAccessesToScalars(AliasSet & AS,SmallVectorImpl<BasicBlock * > & ExitBlocks,SmallVectorImpl<Instruction * > & InsertPts,PredIteratorCache & PIC,LoopInfo * LI,DominatorTree * DT,Loop * CurLoop,AliasSetTracker * CurAST,LICMSafetyInfo * SafetyInfo)770 bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
771                                         SmallVectorImpl<BasicBlock*>&ExitBlocks,
772                                         SmallVectorImpl<Instruction*>&InsertPts,
773                                         PredIteratorCache &PIC, LoopInfo *LI,
774                                         DominatorTree *DT, Loop *CurLoop,
775                                         AliasSetTracker *CurAST,
776                                         LICMSafetyInfo * SafetyInfo) {
777   // Verify inputs.
778   assert(LI != nullptr && DT != nullptr &&
779          CurLoop != nullptr && CurAST != nullptr &&
780          SafetyInfo != nullptr &&
781          "Unexpected Input to promoteLoopAccessesToScalars");
782   // Initially set Changed status to false.
783   bool Changed = false;
784   // We can promote this alias set if it has a store, if it is a "Must" alias
785   // set, if the pointer is loop invariant, and if we are not eliminating any
786   // volatile loads or stores.
787   if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
788       AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
789     return Changed;
790 
791   assert(!AS.empty() &&
792          "Must alias set should have at least one pointer element in it!");
793 
794   Value *SomePtr = AS.begin()->getValue();
795   BasicBlock * Preheader = CurLoop->getLoopPreheader();
796 
797   // It isn't safe to promote a load/store from the loop if the load/store is
798   // conditional.  For example, turning:
799   //
800   //    for () { if (c) *P += 1; }
801   //
802   // into:
803   //
804   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
805   //
806   // is not safe, because *P may only be valid to access if 'c' is true.
807   //
808   // It is safe to promote P if all uses are direct load/stores and if at
809   // least one is guaranteed to be executed.
810   bool GuaranteedToExecute = false;
811 
812   SmallVector<Instruction*, 64> LoopUses;
813   SmallPtrSet<Value*, 4> PointerMustAliases;
814 
815   // We start with an alignment of one and try to find instructions that allow
816   // us to prove better alignment.
817   unsigned Alignment = 1;
818   AAMDNodes AATags;
819   bool HasDedicatedExits = CurLoop->hasDedicatedExits();
820 
821   // Check that all of the pointers in the alias set have the same type.  We
822   // cannot (yet) promote a memory location that is loaded and stored in
823   // different sizes.  While we are at it, collect alignment and AA info.
824   for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
825     Value *ASIV = ASI->getValue();
826     PointerMustAliases.insert(ASIV);
827 
828     // Check that all of the pointers in the alias set have the same type.  We
829     // cannot (yet) promote a memory location that is loaded and stored in
830     // different sizes.
831     if (SomePtr->getType() != ASIV->getType())
832       return Changed;
833 
834     for (User *U : ASIV->users()) {
835       // Ignore instructions that are outside the loop.
836       Instruction *UI = dyn_cast<Instruction>(U);
837       if (!UI || !CurLoop->contains(UI))
838         continue;
839 
840       // If there is an non-load/store instruction in the loop, we can't promote
841       // it.
842       if (LoadInst *load = dyn_cast<LoadInst>(UI)) {
843         assert(!load->isVolatile() && "AST broken");
844         if (!load->isSimple())
845           return Changed;
846       } else if (StoreInst *store = dyn_cast<StoreInst>(UI)) {
847         // Stores *of* the pointer are not interesting, only stores *to* the
848         // pointer.
849         if (UI->getOperand(1) != ASIV)
850           continue;
851         assert(!store->isVolatile() && "AST broken");
852         if (!store->isSimple())
853           return Changed;
854         // Don't sink stores from loops without dedicated block exits. Exits
855         // containing indirect branches are not transformed by loop simplify,
856         // make sure we catch that. An additional load may be generated in the
857         // preheader for SSA updater, so also avoid sinking when no preheader
858         // is available.
859         if (!HasDedicatedExits || !Preheader)
860           return Changed;
861 
862         // Note that we only check GuaranteedToExecute inside the store case
863         // so that we do not introduce stores where they did not exist before
864         // (which would break the LLVM concurrency model).
865 
866         // If the alignment of this instruction allows us to specify a more
867         // restrictive (and performant) alignment and if we are sure this
868         // instruction will be executed, update the alignment.
869         // Larger is better, with the exception of 0 being the best alignment.
870         unsigned InstAlignment = store->getAlignment();
871         if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
872           if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
873             GuaranteedToExecute = true;
874             Alignment = InstAlignment;
875           }
876 
877         if (!GuaranteedToExecute)
878           GuaranteedToExecute = isGuaranteedToExecute(*UI, DT,
879                                                       CurLoop, SafetyInfo);
880 
881       } else
882         return Changed; // Not a load or store.
883 
884       // Merge the AA tags.
885       if (LoopUses.empty()) {
886         // On the first load/store, just take its AA tags.
887         UI->getAAMetadata(AATags);
888       } else if (AATags) {
889         UI->getAAMetadata(AATags, /* Merge = */ true);
890       }
891 
892       LoopUses.push_back(UI);
893     }
894   }
895 
896   // If there isn't a guaranteed-to-execute instruction, we can't promote.
897   if (!GuaranteedToExecute)
898     return Changed;
899 
900   // Otherwise, this is safe to promote, lets do it!
901   DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
902   Changed = true;
903   ++NumPromoted;
904 
905   // Grab a debug location for the inserted loads/stores; given that the
906   // inserted loads/stores have little relation to the original loads/stores,
907   // this code just arbitrarily picks a location from one, since any debug
908   // location is better than none.
909   DebugLoc DL = LoopUses[0]->getDebugLoc();
910 
911   // Figure out the loop exits and their insertion points, if this is the
912   // first promotion.
913   if (ExitBlocks.empty()) {
914     CurLoop->getUniqueExitBlocks(ExitBlocks);
915     InsertPts.resize(ExitBlocks.size());
916     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
917       InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
918   }
919 
920   // We use the SSAUpdater interface to insert phi nodes as required.
921   SmallVector<PHINode*, 16> NewPHIs;
922   SSAUpdater SSA(&NewPHIs);
923   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
924                         InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
925 
926   // Set up the preheader to have a definition of the value.  It is the live-out
927   // value from the preheader that uses in the loop will use.
928   LoadInst *PreheaderLoad =
929     new LoadInst(SomePtr, SomePtr->getName()+".promoted",
930                  Preheader->getTerminator());
931   PreheaderLoad->setAlignment(Alignment);
932   PreheaderLoad->setDebugLoc(DL);
933   if (AATags) PreheaderLoad->setAAMetadata(AATags);
934   SSA.AddAvailableValue(Preheader, PreheaderLoad);
935 
936   // Rewrite all the loads in the loop and remember all the definitions from
937   // stores in the loop.
938   Promoter.run(LoopUses);
939 
940   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
941   if (PreheaderLoad->use_empty())
942     PreheaderLoad->eraseFromParent();
943 
944   return Changed;
945 }
946 
947 /// Simple Analysis hook. Clone alias set info.
948 ///
cloneBasicBlockAnalysis(BasicBlock * From,BasicBlock * To,Loop * L)949 void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
950   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
951   if (!AST)
952     return;
953 
954   AST->copyValue(From, To);
955 }
956 
957 /// Simple Analysis hook. Delete value V from alias set
958 ///
deleteAnalysisValue(Value * V,Loop * L)959 void LICM::deleteAnalysisValue(Value *V, Loop *L) {
960   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
961   if (!AST)
962     return;
963 
964   AST->deleteValue(V);
965 }
966 
967 /// Simple Analysis hook. Delete value L from alias set map.
968 ///
deleteAnalysisLoop(Loop * L)969 void LICM::deleteAnalysisLoop(Loop *L) {
970   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
971   if (!AST)
972     return;
973 
974   delete AST;
975   LoopToAliasSetMap.erase(L);
976 }
977 
978 
979 /// Return true if the body of this loop may store into the memory
980 /// location pointed to by V.
981 ///
pointerInvalidatedByLoop(Value * V,uint64_t Size,const AAMDNodes & AAInfo,AliasSetTracker * CurAST)982 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
983                                      const AAMDNodes &AAInfo,
984                                      AliasSetTracker *CurAST) {
985   // Check to see if any of the basic blocks in CurLoop invalidate *V.
986   return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
987 }
988 
989 /// Little predicate that returns true if the specified basic block is in
990 /// a subloop of the current one, not the current one itself.
991 ///
inSubLoop(BasicBlock * BB,Loop * CurLoop,LoopInfo * LI)992 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
993   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
994   return LI->getLoopFor(BB) != CurLoop;
995 }
996 
997