1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/PostOrderIterator.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/Analysis/DivergenceAnalysis.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/Analysis/RegionInfo.h"
19 #include "llvm/Analysis/RegionIterator.h"
20 #include "llvm/Analysis/RegionPass.h"
21 #include "llvm/IR/Argument.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/Constant.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Use.h"
35 #include "llvm/IR/User.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Scalar.h"
43 #include "llvm/Transforms/Utils.h"
44 #include "llvm/Transforms/Utils/SSAUpdater.h"
45 #include <algorithm>
46 #include <cassert>
47 #include <utility>
48
49 using namespace llvm;
50 using namespace llvm::PatternMatch;
51
52 #define DEBUG_TYPE "structurizecfg"
53
54 // The name for newly created blocks.
55 static const char *const FlowBlockName = "Flow";
56
57 namespace {
58
59 static cl::opt<bool> ForceSkipUniformRegions(
60 "structurizecfg-skip-uniform-regions",
61 cl::Hidden,
62 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
63 cl::init(false));
64
65 // Definition of the complex types used in this pass.
66
67 using BBValuePair = std::pair<BasicBlock *, Value *>;
68
69 using RNVector = SmallVector<RegionNode *, 8>;
70 using BBVector = SmallVector<BasicBlock *, 8>;
71 using BranchVector = SmallVector<BranchInst *, 8>;
72 using BBValueVector = SmallVector<BBValuePair, 2>;
73
74 using BBSet = SmallPtrSet<BasicBlock *, 8>;
75
76 using PhiMap = MapVector<PHINode *, BBValueVector>;
77 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
78
79 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
80 using BBPredicates = DenseMap<BasicBlock *, Value *>;
81 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
82 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
83
84 /// Finds the nearest common dominator of a set of BasicBlocks.
85 ///
86 /// For every BB you add to the set, you can specify whether we "remember" the
87 /// block. When you get the common dominator, you can also ask whether it's one
88 /// of the blocks we remembered.
89 class NearestCommonDominator {
90 DominatorTree *DT;
91 BasicBlock *Result = nullptr;
92 bool ResultIsRemembered = false;
93
94 /// Add BB to the resulting dominator.
addBlock(BasicBlock * BB,bool Remember)95 void addBlock(BasicBlock *BB, bool Remember) {
96 if (!Result) {
97 Result = BB;
98 ResultIsRemembered = Remember;
99 return;
100 }
101
102 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
103 if (NewResult != Result)
104 ResultIsRemembered = false;
105 if (NewResult == BB)
106 ResultIsRemembered |= Remember;
107 Result = NewResult;
108 }
109
110 public:
NearestCommonDominator(DominatorTree * DomTree)111 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
112
addBlock(BasicBlock * BB)113 void addBlock(BasicBlock *BB) {
114 addBlock(BB, /* Remember = */ false);
115 }
116
addAndRememberBlock(BasicBlock * BB)117 void addAndRememberBlock(BasicBlock *BB) {
118 addBlock(BB, /* Remember = */ true);
119 }
120
121 /// Get the nearest common dominator of all the BBs added via addBlock() and
122 /// addAndRememberBlock().
result()123 BasicBlock *result() { return Result; }
124
125 /// Is the BB returned by getResult() one of the blocks we added to the set
126 /// with addAndRememberBlock()?
resultIsRememberedBlock()127 bool resultIsRememberedBlock() { return ResultIsRemembered; }
128 };
129
130 /// Transforms the control flow graph on one single entry/exit region
131 /// at a time.
132 ///
133 /// After the transform all "If"/"Then"/"Else" style control flow looks like
134 /// this:
135 ///
136 /// \verbatim
137 /// 1
138 /// ||
139 /// | |
140 /// 2 |
141 /// | /
142 /// |/
143 /// 3
144 /// || Where:
145 /// | | 1 = "If" block, calculates the condition
146 /// 4 | 2 = "Then" subregion, runs if the condition is true
147 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
148 /// |/ 4 = "Else" optional subregion, runs if the condition is false
149 /// 5 5 = "End" block, also rejoins the control flow
150 /// \endverbatim
151 ///
152 /// Control flow is expressed as a branch where the true exit goes into the
153 /// "Then"/"Else" region, while the false exit skips the region
154 /// The condition for the optional "Else" region is expressed as a PHI node.
155 /// The incoming values of the PHI node are true for the "If" edge and false
156 /// for the "Then" edge.
157 ///
158 /// Additionally to that even complicated loops look like this:
159 ///
160 /// \verbatim
161 /// 1
162 /// ||
163 /// | |
164 /// 2 ^ Where:
165 /// | / 1 = "Entry" block
166 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
167 /// 3 3 = "Flow" block, with back edge to entry block
168 /// |
169 /// \endverbatim
170 ///
171 /// The back edge of the "Flow" block is always on the false side of the branch
172 /// while the true side continues the general flow. So the loop condition
173 /// consist of a network of PHI nodes where the true incoming values expresses
174 /// breaks and the false values expresses continue states.
175 class StructurizeCFG : public RegionPass {
176 bool SkipUniformRegions;
177
178 Type *Boolean;
179 ConstantInt *BoolTrue;
180 ConstantInt *BoolFalse;
181 UndefValue *BoolUndef;
182
183 Function *Func;
184 Region *ParentRegion;
185
186 DivergenceAnalysis *DA;
187 DominatorTree *DT;
188 LoopInfo *LI;
189
190 SmallVector<RegionNode *, 8> Order;
191 BBSet Visited;
192
193 BBPhiMap DeletedPhis;
194 BB2BBVecMap AddedPhis;
195
196 PredMap Predicates;
197 BranchVector Conditions;
198
199 BB2BBMap Loops;
200 PredMap LoopPreds;
201 BranchVector LoopConds;
202
203 RegionNode *PrevNode;
204
205 void orderNodes();
206
207 Loop *getAdjustedLoop(RegionNode *RN);
208 unsigned getAdjustedLoopDepth(RegionNode *RN);
209
210 void analyzeLoops(RegionNode *N);
211
212 Value *invert(Value *Condition);
213
214 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
215
216 void gatherPredicates(RegionNode *N);
217
218 void collectInfos();
219
220 void insertConditions(bool Loops);
221
222 void delPhiValues(BasicBlock *From, BasicBlock *To);
223
224 void addPhiValues(BasicBlock *From, BasicBlock *To);
225
226 void setPhiValues();
227
228 void killTerminator(BasicBlock *BB);
229
230 void changeExit(RegionNode *Node, BasicBlock *NewExit,
231 bool IncludeDominator);
232
233 BasicBlock *getNextFlow(BasicBlock *Dominator);
234
235 BasicBlock *needPrefix(bool NeedEmpty);
236
237 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
238
239 void setPrevNode(BasicBlock *BB);
240
241 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
242
243 bool isPredictableTrue(RegionNode *Node);
244
245 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
246
247 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
248
249 void createFlow();
250
251 void rebuildSSA();
252
253 public:
254 static char ID;
255
StructurizeCFG(bool SkipUniformRegions_=false)256 explicit StructurizeCFG(bool SkipUniformRegions_ = false)
257 : RegionPass(ID),
258 SkipUniformRegions(SkipUniformRegions_) {
259 if (ForceSkipUniformRegions.getNumOccurrences())
260 SkipUniformRegions = ForceSkipUniformRegions.getValue();
261 initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
262 }
263
264 bool doInitialization(Region *R, RGPassManager &RGM) override;
265
266 bool runOnRegion(Region *R, RGPassManager &RGM) override;
267
getPassName() const268 StringRef getPassName() const override { return "Structurize control flow"; }
269
getAnalysisUsage(AnalysisUsage & AU) const270 void getAnalysisUsage(AnalysisUsage &AU) const override {
271 if (SkipUniformRegions)
272 AU.addRequired<DivergenceAnalysis>();
273 AU.addRequiredID(LowerSwitchID);
274 AU.addRequired<DominatorTreeWrapperPass>();
275 AU.addRequired<LoopInfoWrapperPass>();
276
277 AU.addPreserved<DominatorTreeWrapperPass>();
278 RegionPass::getAnalysisUsage(AU);
279 }
280 };
281
282 } // end anonymous namespace
283
284 char StructurizeCFG::ID = 0;
285
286 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
287 false, false)
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)288 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
289 INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
290 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
291 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
292 INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
293 false, false)
294
295 /// Initialize the types and constants used in the pass
296 bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
297 LLVMContext &Context = R->getEntry()->getContext();
298
299 Boolean = Type::getInt1Ty(Context);
300 BoolTrue = ConstantInt::getTrue(Context);
301 BoolFalse = ConstantInt::getFalse(Context);
302 BoolUndef = UndefValue::get(Boolean);
303
304 return false;
305 }
306
307 /// Use the exit block to determine the loop if RN is a SubRegion.
getAdjustedLoop(RegionNode * RN)308 Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) {
309 if (RN->isSubRegion()) {
310 Region *SubRegion = RN->getNodeAs<Region>();
311 return LI->getLoopFor(SubRegion->getExit());
312 }
313
314 return LI->getLoopFor(RN->getEntry());
315 }
316
317 /// Use the exit block to determine the loop depth if RN is a SubRegion.
getAdjustedLoopDepth(RegionNode * RN)318 unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) {
319 if (RN->isSubRegion()) {
320 Region *SubR = RN->getNodeAs<Region>();
321 return LI->getLoopDepth(SubR->getExit());
322 }
323
324 return LI->getLoopDepth(RN->getEntry());
325 }
326
327 /// Build up the general order of nodes
orderNodes()328 void StructurizeCFG::orderNodes() {
329 ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
330 SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
331
332 // The reverse post-order traversal of the list gives us an ordering close
333 // to what we want. The only problem with it is that sometimes backedges
334 // for outer loops will be visited before backedges for inner loops.
335 for (RegionNode *RN : RPOT) {
336 Loop *Loop = getAdjustedLoop(RN);
337 ++LoopBlocks[Loop];
338 }
339
340 unsigned CurrentLoopDepth = 0;
341 Loop *CurrentLoop = nullptr;
342 for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
343 RegionNode *RN = cast<RegionNode>(*I);
344 unsigned LoopDepth = getAdjustedLoopDepth(RN);
345
346 if (is_contained(Order, *I))
347 continue;
348
349 if (LoopDepth < CurrentLoopDepth) {
350 // Make sure we have visited all blocks in this loop before moving back to
351 // the outer loop.
352
353 auto LoopI = I;
354 while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
355 LoopI++;
356 if (getAdjustedLoop(cast<RegionNode>(*LoopI)) == CurrentLoop) {
357 --BlockCount;
358 Order.push_back(*LoopI);
359 }
360 }
361 }
362
363 CurrentLoop = getAdjustedLoop(RN);
364 if (CurrentLoop)
365 LoopBlocks[CurrentLoop]--;
366
367 CurrentLoopDepth = LoopDepth;
368 Order.push_back(*I);
369 }
370
371 // This pass originally used a post-order traversal and then operated on
372 // the list in reverse. Now that we are using a reverse post-order traversal
373 // rather than re-working the whole pass to operate on the list in order,
374 // we just reverse the list and continue to operate on it in reverse.
375 std::reverse(Order.begin(), Order.end());
376 }
377
378 /// Determine the end of the loops
analyzeLoops(RegionNode * N)379 void StructurizeCFG::analyzeLoops(RegionNode *N) {
380 if (N->isSubRegion()) {
381 // Test for exit as back edge
382 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
383 if (Visited.count(Exit))
384 Loops[Exit] = N->getEntry();
385
386 } else {
387 // Test for successors as back edge
388 BasicBlock *BB = N->getNodeAs<BasicBlock>();
389 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
390
391 for (BasicBlock *Succ : Term->successors())
392 if (Visited.count(Succ))
393 Loops[Succ] = BB;
394 }
395 }
396
397 /// Invert the given condition
invert(Value * Condition)398 Value *StructurizeCFG::invert(Value *Condition) {
399 // First: Check if it's a constant
400 if (Constant *C = dyn_cast<Constant>(Condition))
401 return ConstantExpr::getNot(C);
402
403 // Second: If the condition is already inverted, return the original value
404 Value *NotCondition;
405 if (match(Condition, m_Not(m_Value(NotCondition))))
406 return NotCondition;
407
408 if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
409 // Third: Check all the users for an invert
410 BasicBlock *Parent = Inst->getParent();
411 for (User *U : Condition->users())
412 if (Instruction *I = dyn_cast<Instruction>(U))
413 if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
414 return I;
415
416 // Last option: Create a new instruction
417 return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
418 }
419
420 if (Argument *Arg = dyn_cast<Argument>(Condition)) {
421 BasicBlock &EntryBlock = Arg->getParent()->getEntryBlock();
422 return BinaryOperator::CreateNot(Condition,
423 Arg->getName() + ".inv",
424 EntryBlock.getTerminator());
425 }
426
427 llvm_unreachable("Unhandled condition to invert");
428 }
429
430 /// Build the condition for one edge
buildCondition(BranchInst * Term,unsigned Idx,bool Invert)431 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
432 bool Invert) {
433 Value *Cond = Invert ? BoolFalse : BoolTrue;
434 if (Term->isConditional()) {
435 Cond = Term->getCondition();
436
437 if (Idx != (unsigned)Invert)
438 Cond = invert(Cond);
439 }
440 return Cond;
441 }
442
443 /// Analyze the predecessors of each block and build up predicates
gatherPredicates(RegionNode * N)444 void StructurizeCFG::gatherPredicates(RegionNode *N) {
445 RegionInfo *RI = ParentRegion->getRegionInfo();
446 BasicBlock *BB = N->getEntry();
447 BBPredicates &Pred = Predicates[BB];
448 BBPredicates &LPred = LoopPreds[BB];
449
450 for (BasicBlock *P : predecessors(BB)) {
451 // Ignore it if it's a branch from outside into our region entry
452 if (!ParentRegion->contains(P))
453 continue;
454
455 Region *R = RI->getRegionFor(P);
456 if (R == ParentRegion) {
457 // It's a top level block in our region
458 BranchInst *Term = cast<BranchInst>(P->getTerminator());
459 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
460 BasicBlock *Succ = Term->getSuccessor(i);
461 if (Succ != BB)
462 continue;
463
464 if (Visited.count(P)) {
465 // Normal forward edge
466 if (Term->isConditional()) {
467 // Try to treat it like an ELSE block
468 BasicBlock *Other = Term->getSuccessor(!i);
469 if (Visited.count(Other) && !Loops.count(Other) &&
470 !Pred.count(Other) && !Pred.count(P)) {
471
472 Pred[Other] = BoolFalse;
473 Pred[P] = BoolTrue;
474 continue;
475 }
476 }
477 Pred[P] = buildCondition(Term, i, false);
478 } else {
479 // Back edge
480 LPred[P] = buildCondition(Term, i, true);
481 }
482 }
483 } else {
484 // It's an exit from a sub region
485 while (R->getParent() != ParentRegion)
486 R = R->getParent();
487
488 // Edge from inside a subregion to its entry, ignore it
489 if (*R == *N)
490 continue;
491
492 BasicBlock *Entry = R->getEntry();
493 if (Visited.count(Entry))
494 Pred[Entry] = BoolTrue;
495 else
496 LPred[Entry] = BoolFalse;
497 }
498 }
499 }
500
501 /// Collect various loop and predicate infos
collectInfos()502 void StructurizeCFG::collectInfos() {
503 // Reset predicate
504 Predicates.clear();
505
506 // and loop infos
507 Loops.clear();
508 LoopPreds.clear();
509
510 // Reset the visited nodes
511 Visited.clear();
512
513 for (RegionNode *RN : reverse(Order)) {
514 LLVM_DEBUG(dbgs() << "Visiting: "
515 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
516 << RN->getEntry()->getName() << " Loop Depth: "
517 << LI->getLoopDepth(RN->getEntry()) << "\n");
518
519 // Analyze all the conditions leading to a node
520 gatherPredicates(RN);
521
522 // Remember that we've seen this node
523 Visited.insert(RN->getEntry());
524
525 // Find the last back edges
526 analyzeLoops(RN);
527 }
528 }
529
530 /// Insert the missing branch conditions
insertConditions(bool Loops)531 void StructurizeCFG::insertConditions(bool Loops) {
532 BranchVector &Conds = Loops ? LoopConds : Conditions;
533 Value *Default = Loops ? BoolTrue : BoolFalse;
534 SSAUpdater PhiInserter;
535
536 for (BranchInst *Term : Conds) {
537 assert(Term->isConditional());
538
539 BasicBlock *Parent = Term->getParent();
540 BasicBlock *SuccTrue = Term->getSuccessor(0);
541 BasicBlock *SuccFalse = Term->getSuccessor(1);
542
543 PhiInserter.Initialize(Boolean, "");
544 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
545 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
546
547 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
548
549 NearestCommonDominator Dominator(DT);
550 Dominator.addBlock(Parent);
551
552 Value *ParentValue = nullptr;
553 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
554 BasicBlock *BB = BBAndPred.first;
555 Value *Pred = BBAndPred.second;
556
557 if (BB == Parent) {
558 ParentValue = Pred;
559 break;
560 }
561 PhiInserter.AddAvailableValue(BB, Pred);
562 Dominator.addAndRememberBlock(BB);
563 }
564
565 if (ParentValue) {
566 Term->setCondition(ParentValue);
567 } else {
568 if (!Dominator.resultIsRememberedBlock())
569 PhiInserter.AddAvailableValue(Dominator.result(), Default);
570
571 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
572 }
573 }
574 }
575
576 /// Remove all PHI values coming from "From" into "To" and remember
577 /// them in DeletedPhis
delPhiValues(BasicBlock * From,BasicBlock * To)578 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
579 PhiMap &Map = DeletedPhis[To];
580 for (PHINode &Phi : To->phis()) {
581 while (Phi.getBasicBlockIndex(From) != -1) {
582 Value *Deleted = Phi.removeIncomingValue(From, false);
583 Map[&Phi].push_back(std::make_pair(From, Deleted));
584 }
585 }
586 }
587
588 /// Add a dummy PHI value as soon as we knew the new predecessor
addPhiValues(BasicBlock * From,BasicBlock * To)589 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
590 for (PHINode &Phi : To->phis()) {
591 Value *Undef = UndefValue::get(Phi.getType());
592 Phi.addIncoming(Undef, From);
593 }
594 AddedPhis[To].push_back(From);
595 }
596
597 /// Add the real PHI value as soon as everything is set up
setPhiValues()598 void StructurizeCFG::setPhiValues() {
599 SSAUpdater Updater;
600 for (const auto &AddedPhi : AddedPhis) {
601 BasicBlock *To = AddedPhi.first;
602 const BBVector &From = AddedPhi.second;
603
604 if (!DeletedPhis.count(To))
605 continue;
606
607 PhiMap &Map = DeletedPhis[To];
608 for (const auto &PI : Map) {
609 PHINode *Phi = PI.first;
610 Value *Undef = UndefValue::get(Phi->getType());
611 Updater.Initialize(Phi->getType(), "");
612 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
613 Updater.AddAvailableValue(To, Undef);
614
615 NearestCommonDominator Dominator(DT);
616 Dominator.addBlock(To);
617 for (const auto &VI : PI.second) {
618 Updater.AddAvailableValue(VI.first, VI.second);
619 Dominator.addAndRememberBlock(VI.first);
620 }
621
622 if (!Dominator.resultIsRememberedBlock())
623 Updater.AddAvailableValue(Dominator.result(), Undef);
624
625 for (BasicBlock *FI : From) {
626 int Idx = Phi->getBasicBlockIndex(FI);
627 assert(Idx != -1);
628 Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(FI));
629 }
630 }
631
632 DeletedPhis.erase(To);
633 }
634 assert(DeletedPhis.empty());
635 }
636
637 /// Remove phi values from all successors and then remove the terminator.
killTerminator(BasicBlock * BB)638 void StructurizeCFG::killTerminator(BasicBlock *BB) {
639 TerminatorInst *Term = BB->getTerminator();
640 if (!Term)
641 return;
642
643 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
644 SI != SE; ++SI)
645 delPhiValues(BB, *SI);
646
647 if (DA)
648 DA->removeValue(Term);
649 Term->eraseFromParent();
650 }
651
652 /// Let node exit(s) point to NewExit
changeExit(RegionNode * Node,BasicBlock * NewExit,bool IncludeDominator)653 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
654 bool IncludeDominator) {
655 if (Node->isSubRegion()) {
656 Region *SubRegion = Node->getNodeAs<Region>();
657 BasicBlock *OldExit = SubRegion->getExit();
658 BasicBlock *Dominator = nullptr;
659
660 // Find all the edges from the sub region to the exit
661 for (auto BBI = pred_begin(OldExit), E = pred_end(OldExit); BBI != E;) {
662 // Incrememt BBI before mucking with BB's terminator.
663 BasicBlock *BB = *BBI++;
664
665 if (!SubRegion->contains(BB))
666 continue;
667
668 // Modify the edges to point to the new exit
669 delPhiValues(BB, OldExit);
670 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
671 addPhiValues(BB, NewExit);
672
673 // Find the new dominator (if requested)
674 if (IncludeDominator) {
675 if (!Dominator)
676 Dominator = BB;
677 else
678 Dominator = DT->findNearestCommonDominator(Dominator, BB);
679 }
680 }
681
682 // Change the dominator (if requested)
683 if (Dominator)
684 DT->changeImmediateDominator(NewExit, Dominator);
685
686 // Update the region info
687 SubRegion->replaceExit(NewExit);
688 } else {
689 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
690 killTerminator(BB);
691 BranchInst::Create(NewExit, BB);
692 addPhiValues(BB, NewExit);
693 if (IncludeDominator)
694 DT->changeImmediateDominator(NewExit, BB);
695 }
696 }
697
698 /// Create a new flow node and update dominator tree and region info
getNextFlow(BasicBlock * Dominator)699 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
700 LLVMContext &Context = Func->getContext();
701 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
702 Order.back()->getEntry();
703 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
704 Func, Insert);
705 DT->addNewBlock(Flow, Dominator);
706 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
707 return Flow;
708 }
709
710 /// Create a new or reuse the previous node as flow node
needPrefix(bool NeedEmpty)711 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
712 BasicBlock *Entry = PrevNode->getEntry();
713
714 if (!PrevNode->isSubRegion()) {
715 killTerminator(Entry);
716 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
717 return Entry;
718 }
719
720 // create a new flow node
721 BasicBlock *Flow = getNextFlow(Entry);
722
723 // and wire it up
724 changeExit(PrevNode, Flow, true);
725 PrevNode = ParentRegion->getBBNode(Flow);
726 return Flow;
727 }
728
729 /// Returns the region exit if possible, otherwise just a new flow node
needPostfix(BasicBlock * Flow,bool ExitUseAllowed)730 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
731 bool ExitUseAllowed) {
732 if (!Order.empty() || !ExitUseAllowed)
733 return getNextFlow(Flow);
734
735 BasicBlock *Exit = ParentRegion->getExit();
736 DT->changeImmediateDominator(Exit, Flow);
737 addPhiValues(Flow, Exit);
738 return Exit;
739 }
740
741 /// Set the previous node
setPrevNode(BasicBlock * BB)742 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
743 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
744 : nullptr;
745 }
746
747 /// Does BB dominate all the predicates of Node?
dominatesPredicates(BasicBlock * BB,RegionNode * Node)748 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
749 BBPredicates &Preds = Predicates[Node->getEntry()];
750 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
751 return DT->dominates(BB, Pred.first);
752 });
753 }
754
755 /// Can we predict that this node will always be called?
isPredictableTrue(RegionNode * Node)756 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
757 BBPredicates &Preds = Predicates[Node->getEntry()];
758 bool Dominated = false;
759
760 // Regionentry is always true
761 if (!PrevNode)
762 return true;
763
764 for (std::pair<BasicBlock*, Value*> Pred : Preds) {
765 BasicBlock *BB = Pred.first;
766 Value *V = Pred.second;
767
768 if (V != BoolTrue)
769 return false;
770
771 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
772 Dominated = true;
773 }
774
775 // TODO: The dominator check is too strict
776 return Dominated;
777 }
778
779 /// Take one node from the order vector and wire it up
wireFlow(bool ExitUseAllowed,BasicBlock * LoopEnd)780 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
781 BasicBlock *LoopEnd) {
782 RegionNode *Node = Order.pop_back_val();
783 Visited.insert(Node->getEntry());
784
785 if (isPredictableTrue(Node)) {
786 // Just a linear flow
787 if (PrevNode) {
788 changeExit(PrevNode, Node->getEntry(), true);
789 }
790 PrevNode = Node;
791 } else {
792 // Insert extra prefix node (or reuse last one)
793 BasicBlock *Flow = needPrefix(false);
794
795 // Insert extra postfix node (or use exit instead)
796 BasicBlock *Entry = Node->getEntry();
797 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
798
799 // let it point to entry and next block
800 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
801 addPhiValues(Flow, Entry);
802 DT->changeImmediateDominator(Entry, Flow);
803
804 PrevNode = Node;
805 while (!Order.empty() && !Visited.count(LoopEnd) &&
806 dominatesPredicates(Entry, Order.back())) {
807 handleLoops(false, LoopEnd);
808 }
809
810 changeExit(PrevNode, Next, false);
811 setPrevNode(Next);
812 }
813 }
814
handleLoops(bool ExitUseAllowed,BasicBlock * LoopEnd)815 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
816 BasicBlock *LoopEnd) {
817 RegionNode *Node = Order.back();
818 BasicBlock *LoopStart = Node->getEntry();
819
820 if (!Loops.count(LoopStart)) {
821 wireFlow(ExitUseAllowed, LoopEnd);
822 return;
823 }
824
825 if (!isPredictableTrue(Node))
826 LoopStart = needPrefix(true);
827
828 LoopEnd = Loops[Node->getEntry()];
829 wireFlow(false, LoopEnd);
830 while (!Visited.count(LoopEnd)) {
831 handleLoops(false, LoopEnd);
832 }
833
834 // If the start of the loop is the entry block, we can't branch to it so
835 // insert a new dummy entry block.
836 Function *LoopFunc = LoopStart->getParent();
837 if (LoopStart == &LoopFunc->getEntryBlock()) {
838 LoopStart->setName("entry.orig");
839
840 BasicBlock *NewEntry =
841 BasicBlock::Create(LoopStart->getContext(),
842 "entry",
843 LoopFunc,
844 LoopStart);
845 BranchInst::Create(LoopStart, NewEntry);
846 DT->setNewRoot(NewEntry);
847 }
848
849 // Create an extra loop end node
850 LoopEnd = needPrefix(false);
851 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
852 LoopConds.push_back(BranchInst::Create(Next, LoopStart,
853 BoolUndef, LoopEnd));
854 addPhiValues(LoopEnd, LoopStart);
855 setPrevNode(Next);
856 }
857
858 /// After this function control flow looks like it should be, but
859 /// branches and PHI nodes only have undefined conditions.
createFlow()860 void StructurizeCFG::createFlow() {
861 BasicBlock *Exit = ParentRegion->getExit();
862 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
863
864 DeletedPhis.clear();
865 AddedPhis.clear();
866 Conditions.clear();
867 LoopConds.clear();
868
869 PrevNode = nullptr;
870 Visited.clear();
871
872 while (!Order.empty()) {
873 handleLoops(EntryDominatesExit, nullptr);
874 }
875
876 if (PrevNode)
877 changeExit(PrevNode, Exit, EntryDominatesExit);
878 else
879 assert(EntryDominatesExit);
880 }
881
882 /// Handle a rare case where the disintegrated nodes instructions
883 /// no longer dominate all their uses. Not sure if this is really necessary
rebuildSSA()884 void StructurizeCFG::rebuildSSA() {
885 SSAUpdater Updater;
886 for (BasicBlock *BB : ParentRegion->blocks())
887 for (Instruction &I : *BB) {
888 bool Initialized = false;
889 // We may modify the use list as we iterate over it, so be careful to
890 // compute the next element in the use list at the top of the loop.
891 for (auto UI = I.use_begin(), E = I.use_end(); UI != E;) {
892 Use &U = *UI++;
893 Instruction *User = cast<Instruction>(U.getUser());
894 if (User->getParent() == BB) {
895 continue;
896 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
897 if (UserPN->getIncomingBlock(U) == BB)
898 continue;
899 }
900
901 if (DT->dominates(&I, User))
902 continue;
903
904 if (!Initialized) {
905 Value *Undef = UndefValue::get(I.getType());
906 Updater.Initialize(I.getType(), "");
907 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
908 Updater.AddAvailableValue(BB, &I);
909 Initialized = true;
910 }
911 Updater.RewriteUseAfterInsertions(U);
912 }
913 }
914 }
915
hasOnlyUniformBranches(Region * R,unsigned UniformMDKindID,const DivergenceAnalysis & DA)916 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
917 const DivergenceAnalysis &DA) {
918 for (auto E : R->elements()) {
919 if (!E->isSubRegion()) {
920 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
921 if (!Br || !Br->isConditional())
922 continue;
923
924 if (!DA.isUniform(Br))
925 return false;
926 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
927 << " has uniform terminator\n");
928 } else {
929 // Explicitly refuse to treat regions as uniform if they have non-uniform
930 // subregions. We cannot rely on DivergenceAnalysis for branches in
931 // subregions because those branches may have been removed and re-created,
932 // so we look for our metadata instead.
933 //
934 // Warning: It would be nice to treat regions as uniform based only on
935 // their direct child basic blocks' terminators, regardless of whether
936 // subregions are uniform or not. However, this requires a very careful
937 // look at SIAnnotateControlFlow to make sure nothing breaks there.
938 for (auto BB : E->getNodeAs<Region>()->blocks()) {
939 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
940 if (!Br || !Br->isConditional())
941 continue;
942
943 if (!Br->getMetadata(UniformMDKindID))
944 return false;
945 }
946 }
947 }
948 return true;
949 }
950
951 /// Run the transformation for each region found
runOnRegion(Region * R,RGPassManager & RGM)952 bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
953 if (R->isTopLevelRegion())
954 return false;
955
956 DA = nullptr;
957
958 if (SkipUniformRegions) {
959 // TODO: We could probably be smarter here with how we handle sub-regions.
960 // We currently rely on the fact that metadata is set by earlier invocations
961 // of the pass on sub-regions, and that this metadata doesn't get lost --
962 // but we shouldn't rely on metadata for correctness!
963 unsigned UniformMDKindID =
964 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
965 DA = &getAnalysis<DivergenceAnalysis>();
966
967 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
968 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
969 << '\n');
970
971 // Mark all direct child block terminators as having been treated as
972 // uniform. To account for a possible future in which non-uniform
973 // sub-regions are treated more cleverly, indirect children are not
974 // marked as uniform.
975 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
976 for (RegionNode *E : R->elements()) {
977 if (E->isSubRegion())
978 continue;
979
980 if (Instruction *Term = E->getEntry()->getTerminator())
981 Term->setMetadata(UniformMDKindID, MD);
982 }
983
984 return false;
985 }
986 }
987
988 Func = R->getEntry()->getParent();
989 ParentRegion = R;
990
991 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
992 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
993
994 orderNodes();
995 collectInfos();
996 createFlow();
997 insertConditions(false);
998 insertConditions(true);
999 setPhiValues();
1000 rebuildSSA();
1001
1002 // Cleanup
1003 Order.clear();
1004 Visited.clear();
1005 DeletedPhis.clear();
1006 AddedPhis.clear();
1007 Predicates.clear();
1008 Conditions.clear();
1009 Loops.clear();
1010 LoopPreds.clear();
1011 LoopConds.clear();
1012
1013 return true;
1014 }
1015
createStructurizeCFGPass(bool SkipUniformRegions)1016 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1017 return new StructurizeCFG(SkipUniformRegions);
1018 }
1019