1 //===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass transforms loops by placing phi nodes at the end of the loops for
11 // all values that are live across the loop boundary. For example, it turns
12 // the left into the right code:
13 //
14 // for (...) for (...)
15 // if (c) if (c)
16 // X1 = ... X1 = ...
17 // else else
18 // X2 = ... X2 = ...
19 // X3 = phi(X1, X2) X3 = phi(X1, X2)
20 // ... = X3 + 4 X4 = phi(X3)
21 // ... = X4 + 4
22 //
23 // This is still valid LLVM; the extra phi nodes are purely redundant, and will
24 // be trivially eliminated by InstCombine. The major benefit of this
25 // transformation is that it makes many other loop optimizations, such as
26 // LoopUnswitching, simpler.
27 //
28 //===----------------------------------------------------------------------===//
29
30 #include "llvm/Transforms/Utils/LCSSA.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/AliasAnalysis.h"
34 #include "llvm/Analysis/BasicAliasAnalysis.h"
35 #include "llvm/Analysis/GlobalsModRef.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/ScalarEvolution.h"
38 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
39 #include "llvm/Transforms/Utils/Local.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/PredIteratorCache.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Transforms/Utils.h"
48 #include "llvm/Transforms/Utils/LoopUtils.h"
49 #include "llvm/Transforms/Utils/SSAUpdater.h"
50 using namespace llvm;
51
52 #define DEBUG_TYPE "lcssa"
53
54 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
55
56 #ifdef EXPENSIVE_CHECKS
57 static bool VerifyLoopLCSSA = true;
58 #else
59 static bool VerifyLoopLCSSA = false;
60 #endif
61 static cl::opt<bool, true>
62 VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
63 cl::Hidden,
64 cl::desc("Verify loop lcssa form (time consuming)"));
65
66 /// Return true if the specified block is in the list.
isExitBlock(BasicBlock * BB,const SmallVectorImpl<BasicBlock * > & ExitBlocks)67 static bool isExitBlock(BasicBlock *BB,
68 const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
69 return is_contained(ExitBlocks, BB);
70 }
71
72 /// For every instruction from the worklist, check to see if it has any uses
73 /// that are outside the current loop. If so, insert LCSSA PHI nodes and
74 /// rewrite the uses.
formLCSSAForInstructions(SmallVectorImpl<Instruction * > & Worklist,DominatorTree & DT,LoopInfo & LI)75 bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
76 DominatorTree &DT, LoopInfo &LI) {
77 SmallVector<Use *, 16> UsesToRewrite;
78 SmallSetVector<PHINode *, 16> PHIsToRemove;
79 PredIteratorCache PredCache;
80 bool Changed = false;
81
82 // Cache the Loop ExitBlocks across this loop. We expect to get a lot of
83 // instructions within the same loops, computing the exit blocks is
84 // expensive, and we're not mutating the loop structure.
85 SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
86
87 while (!Worklist.empty()) {
88 UsesToRewrite.clear();
89
90 Instruction *I = Worklist.pop_back_val();
91 assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
92 BasicBlock *InstBB = I->getParent();
93 Loop *L = LI.getLoopFor(InstBB);
94 assert(L && "Instruction belongs to a BB that's not part of a loop");
95 if (!LoopExitBlocks.count(L))
96 L->getExitBlocks(LoopExitBlocks[L]);
97 assert(LoopExitBlocks.count(L));
98 const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
99
100 if (ExitBlocks.empty())
101 continue;
102
103 for (Use &U : I->uses()) {
104 Instruction *User = cast<Instruction>(U.getUser());
105 BasicBlock *UserBB = User->getParent();
106 if (auto *PN = dyn_cast<PHINode>(User))
107 UserBB = PN->getIncomingBlock(U);
108
109 if (InstBB != UserBB && !L->contains(UserBB))
110 UsesToRewrite.push_back(&U);
111 }
112
113 // If there are no uses outside the loop, exit with no change.
114 if (UsesToRewrite.empty())
115 continue;
116
117 ++NumLCSSA; // We are applying the transformation
118
119 // Invoke instructions are special in that their result value is not
120 // available along their unwind edge. The code below tests to see whether
121 // DomBB dominates the value, so adjust DomBB to the normal destination
122 // block, which is effectively where the value is first usable.
123 BasicBlock *DomBB = InstBB;
124 if (auto *Inv = dyn_cast<InvokeInst>(I))
125 DomBB = Inv->getNormalDest();
126
127 DomTreeNode *DomNode = DT.getNode(DomBB);
128
129 SmallVector<PHINode *, 16> AddedPHIs;
130 SmallVector<PHINode *, 8> PostProcessPHIs;
131
132 SmallVector<PHINode *, 4> InsertedPHIs;
133 SSAUpdater SSAUpdate(&InsertedPHIs);
134 SSAUpdate.Initialize(I->getType(), I->getName());
135
136 // Insert the LCSSA phi's into all of the exit blocks dominated by the
137 // value, and add them to the Phi's map.
138 for (BasicBlock *ExitBB : ExitBlocks) {
139 if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
140 continue;
141
142 // If we already inserted something for this BB, don't reprocess it.
143 if (SSAUpdate.HasValueForBlock(ExitBB))
144 continue;
145
146 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
147 I->getName() + ".lcssa", &ExitBB->front());
148 // Get the debug location from the original instruction.
149 PN->setDebugLoc(I->getDebugLoc());
150 // Add inputs from inside the loop for this PHI.
151 for (BasicBlock *Pred : PredCache.get(ExitBB)) {
152 PN->addIncoming(I, Pred);
153
154 // If the exit block has a predecessor not within the loop, arrange for
155 // the incoming value use corresponding to that predecessor to be
156 // rewritten in terms of a different LCSSA PHI.
157 if (!L->contains(Pred))
158 UsesToRewrite.push_back(
159 &PN->getOperandUse(PN->getOperandNumForIncomingValue(
160 PN->getNumIncomingValues() - 1)));
161 }
162
163 AddedPHIs.push_back(PN);
164
165 // Remember that this phi makes the value alive in this block.
166 SSAUpdate.AddAvailableValue(ExitBB, PN);
167
168 // LoopSimplify might fail to simplify some loops (e.g. when indirect
169 // branches are involved). In such situations, it might happen that an
170 // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
171 // create PHIs in such an exit block, we are also inserting PHIs into L2's
172 // header. This could break LCSSA form for L2 because these inserted PHIs
173 // can also have uses outside of L2. Remember all PHIs in such situation
174 // as to revisit than later on. FIXME: Remove this if indirectbr support
175 // into LoopSimplify gets improved.
176 if (auto *OtherLoop = LI.getLoopFor(ExitBB))
177 if (!L->contains(OtherLoop))
178 PostProcessPHIs.push_back(PN);
179 }
180
181 // Rewrite all uses outside the loop in terms of the new PHIs we just
182 // inserted.
183 for (Use *UseToRewrite : UsesToRewrite) {
184 // If this use is in an exit block, rewrite to use the newly inserted PHI.
185 // This is required for correctness because SSAUpdate doesn't handle uses
186 // in the same block. It assumes the PHI we inserted is at the end of the
187 // block.
188 Instruction *User = cast<Instruction>(UseToRewrite->getUser());
189 BasicBlock *UserBB = User->getParent();
190 if (auto *PN = dyn_cast<PHINode>(User))
191 UserBB = PN->getIncomingBlock(*UseToRewrite);
192
193 if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
194 // Tell the VHs that the uses changed. This updates SCEV's caches.
195 if (UseToRewrite->get()->hasValueHandle())
196 ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
197 UseToRewrite->set(&UserBB->front());
198 continue;
199 }
200
201 // Otherwise, do full PHI insertion.
202 SSAUpdate.RewriteUse(*UseToRewrite);
203 }
204
205 SmallVector<DbgValueInst *, 4> DbgValues;
206 llvm::findDbgValues(DbgValues, I);
207
208 // Update pre-existing debug value uses that reside outside the loop.
209 auto &Ctx = I->getContext();
210 for (auto DVI : DbgValues) {
211 BasicBlock *UserBB = DVI->getParent();
212 if (InstBB == UserBB || L->contains(UserBB))
213 continue;
214 // We currently only handle debug values residing in blocks where we have
215 // inserted a PHI instruction.
216 if (Value *V = SSAUpdate.FindValueForBlock(UserBB))
217 DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
218 }
219
220 // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
221 // to post-process them to keep LCSSA form.
222 for (PHINode *InsertedPN : InsertedPHIs) {
223 if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
224 if (!L->contains(OtherLoop))
225 PostProcessPHIs.push_back(InsertedPN);
226 }
227
228 // Post process PHI instructions that were inserted into another disjoint
229 // loop and update their exits properly.
230 for (auto *PostProcessPN : PostProcessPHIs)
231 if (!PostProcessPN->use_empty())
232 Worklist.push_back(PostProcessPN);
233
234 // Keep track of PHI nodes that we want to remove because they did not have
235 // any uses rewritten. If the new PHI is used, store it so that we can
236 // try to propagate dbg.value intrinsics to it.
237 SmallVector<PHINode *, 2> NeedDbgValues;
238 for (PHINode *PN : AddedPHIs)
239 if (PN->use_empty())
240 PHIsToRemove.insert(PN);
241 else
242 NeedDbgValues.push_back(PN);
243 insertDebugValuesForPHIs(InstBB, NeedDbgValues);
244 Changed = true;
245 }
246 // Remove PHI nodes that did not have any uses rewritten. We need to redo the
247 // use_empty() check here, because even if the PHI node wasn't used when added
248 // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is
249 // not guaranteed to handle trees/cycles of PHI nodes that only are used by
250 // each other. Such situations has only been noticed when the input IR
251 // contains unreachable code, and leaving some extra redundant PHI nodes in
252 // such situations is considered a minor problem.
253 for (PHINode *PN : PHIsToRemove)
254 if (PN->use_empty())
255 PN->eraseFromParent();
256 return Changed;
257 }
258
259 // Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
computeBlocksDominatingExits(Loop & L,DominatorTree & DT,SmallVector<BasicBlock *,8> & ExitBlocks,SmallSetVector<BasicBlock *,8> & BlocksDominatingExits)260 static void computeBlocksDominatingExits(
261 Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
262 SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
263 SmallVector<BasicBlock *, 8> BBWorklist;
264
265 // We start from the exit blocks, as every block trivially dominates itself
266 // (not strictly).
267 for (BasicBlock *BB : ExitBlocks)
268 BBWorklist.push_back(BB);
269
270 while (!BBWorklist.empty()) {
271 BasicBlock *BB = BBWorklist.pop_back_val();
272
273 // Check if this is a loop header. If this is the case, we're done.
274 if (L.getHeader() == BB)
275 continue;
276
277 // Otherwise, add its immediate predecessor in the dominator tree to the
278 // worklist, unless we visited it already.
279 BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
280
281 // Exit blocks can have an immediate dominator not beloinging to the
282 // loop. For an exit block to be immediately dominated by another block
283 // outside the loop, it implies not all paths from that dominator, to the
284 // exit block, go through the loop.
285 // Example:
286 //
287 // |---- A
288 // | |
289 // | B<--
290 // | | |
291 // |---> C --
292 // |
293 // D
294 //
295 // C is the exit block of the loop and it's immediately dominated by A,
296 // which doesn't belong to the loop.
297 if (!L.contains(IDomBB))
298 continue;
299
300 if (BlocksDominatingExits.insert(IDomBB))
301 BBWorklist.push_back(IDomBB);
302 }
303 }
304
formLCSSA(Loop & L,DominatorTree & DT,LoopInfo * LI,ScalarEvolution * SE)305 bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
306 ScalarEvolution *SE) {
307 bool Changed = false;
308
309 SmallVector<BasicBlock *, 8> ExitBlocks;
310 L.getExitBlocks(ExitBlocks);
311 if (ExitBlocks.empty())
312 return false;
313
314 SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
315
316 // We want to avoid use-scanning leveraging dominance informations.
317 // If a block doesn't dominate any of the loop exits, the none of the values
318 // defined in the loop can be used outside.
319 // We compute the set of blocks fullfilling the conditions in advance
320 // walking the dominator tree upwards until we hit a loop header.
321 computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
322
323 SmallVector<Instruction *, 8> Worklist;
324
325 // Look at all the instructions in the loop, checking to see if they have uses
326 // outside the loop. If so, put them into the worklist to rewrite those uses.
327 for (BasicBlock *BB : BlocksDominatingExits) {
328 for (Instruction &I : *BB) {
329 // Reject two common cases fast: instructions with no uses (like stores)
330 // and instructions with one use that is in the same block as this.
331 if (I.use_empty() ||
332 (I.hasOneUse() && I.user_back()->getParent() == BB &&
333 !isa<PHINode>(I.user_back())))
334 continue;
335
336 // Tokens cannot be used in PHI nodes, so we skip over them.
337 // We can run into tokens which are live out of a loop with catchswitch
338 // instructions in Windows EH if the catchswitch has one catchpad which
339 // is inside the loop and another which is not.
340 if (I.getType()->isTokenTy())
341 continue;
342
343 Worklist.push_back(&I);
344 }
345 }
346 Changed = formLCSSAForInstructions(Worklist, DT, *LI);
347
348 // If we modified the code, remove any caches about the loop from SCEV to
349 // avoid dangling entries.
350 // FIXME: This is a big hammer, can we clear the cache more selectively?
351 if (SE && Changed)
352 SE->forgetLoop(&L);
353
354 assert(L.isLCSSAForm(DT));
355
356 return Changed;
357 }
358
359 /// Process a loop nest depth first.
formLCSSARecursively(Loop & L,DominatorTree & DT,LoopInfo * LI,ScalarEvolution * SE)360 bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
361 ScalarEvolution *SE) {
362 bool Changed = false;
363
364 // Recurse depth-first through inner loops.
365 for (Loop *SubLoop : L.getSubLoops())
366 Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
367
368 Changed |= formLCSSA(L, DT, LI, SE);
369 return Changed;
370 }
371
372 /// Process all loops in the function, inner-most out.
formLCSSAOnAllLoops(LoopInfo * LI,DominatorTree & DT,ScalarEvolution * SE)373 static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT,
374 ScalarEvolution *SE) {
375 bool Changed = false;
376 for (auto &L : *LI)
377 Changed |= formLCSSARecursively(*L, DT, LI, SE);
378 return Changed;
379 }
380
381 namespace {
382 struct LCSSAWrapperPass : public FunctionPass {
383 static char ID; // Pass identification, replacement for typeid
LCSSAWrapperPass__anon010b3f9e0111::LCSSAWrapperPass384 LCSSAWrapperPass() : FunctionPass(ID) {
385 initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
386 }
387
388 // Cached analysis information for the current function.
389 DominatorTree *DT;
390 LoopInfo *LI;
391 ScalarEvolution *SE;
392
393 bool runOnFunction(Function &F) override;
verifyAnalysis__anon010b3f9e0111::LCSSAWrapperPass394 void verifyAnalysis() const override {
395 // This check is very expensive. On the loop intensive compiles it may cause
396 // up to 10x slowdown. Currently it's disabled by default. LPPassManager
397 // always does limited form of the LCSSA verification. Similar reasoning
398 // was used for the LoopInfo verifier.
399 if (VerifyLoopLCSSA) {
400 assert(all_of(*LI,
401 [&](Loop *L) {
402 return L->isRecursivelyLCSSAForm(*DT, *LI);
403 }) &&
404 "LCSSA form is broken!");
405 }
406 };
407
408 /// This transformation requires natural loop information & requires that
409 /// loop preheaders be inserted into the CFG. It maintains both of these,
410 /// as well as the CFG. It also requires dominator information.
getAnalysisUsage__anon010b3f9e0111::LCSSAWrapperPass411 void getAnalysisUsage(AnalysisUsage &AU) const override {
412 AU.setPreservesCFG();
413
414 AU.addRequired<DominatorTreeWrapperPass>();
415 AU.addRequired<LoopInfoWrapperPass>();
416 AU.addPreservedID(LoopSimplifyID);
417 AU.addPreserved<AAResultsWrapperPass>();
418 AU.addPreserved<BasicAAWrapperPass>();
419 AU.addPreserved<GlobalsAAWrapperPass>();
420 AU.addPreserved<ScalarEvolutionWrapperPass>();
421 AU.addPreserved<SCEVAAWrapperPass>();
422
423 // This is needed to perform LCSSA verification inside LPPassManager
424 AU.addRequired<LCSSAVerificationPass>();
425 AU.addPreserved<LCSSAVerificationPass>();
426 }
427 };
428 }
429
430 char LCSSAWrapperPass::ID = 0;
431 INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
432 false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)433 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
434 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
435 INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
436 INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
437 false, false)
438
439 Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
440 char &llvm::LCSSAID = LCSSAWrapperPass::ID;
441
442 /// Transform \p F into loop-closed SSA form.
runOnFunction(Function & F)443 bool LCSSAWrapperPass::runOnFunction(Function &F) {
444 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
445 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
446 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
447 SE = SEWP ? &SEWP->getSE() : nullptr;
448
449 return formLCSSAOnAllLoops(LI, *DT, SE);
450 }
451
run(Function & F,FunctionAnalysisManager & AM)452 PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
453 auto &LI = AM.getResult<LoopAnalysis>(F);
454 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
455 auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
456 if (!formLCSSAOnAllLoops(&LI, DT, SE))
457 return PreservedAnalyses::all();
458
459 PreservedAnalyses PA;
460 PA.preserveSet<CFGAnalyses>();
461 PA.preserve<BasicAA>();
462 PA.preserve<GlobalsAA>();
463 PA.preserve<SCEVAA>();
464 PA.preserve<ScalarEvolutionAnalysis>();
465 return PA;
466 }
467