1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 
10 #define DEBUGME 0
11 #define DEBUG_TYPE "structcfg"
12 
13 #include "AMDGPUInstrInfo.h"
14 #include "AMDIL.h"
15 #include "AMDILUtilityFunctions.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/DominatorInternals.h"
20 #include "llvm/Analysis/Dominators.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 
33 #define FirstNonDebugInstr(A) A->begin()
34 using namespace llvm;
35 
36 // TODO: move-begin.
37 
38 //===----------------------------------------------------------------------===//
39 //
40 // Statistics for CFGStructurizer.
41 //
42 //===----------------------------------------------------------------------===//
43 
44 STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
45     "matched");
46 STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
47     "matched");
48 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
49     "pattern matched");
50 STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
51     "pattern matched");
52 STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
53     "matched");
54 STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
55 STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
56 
57 //===----------------------------------------------------------------------===//
58 //
59 // Miscellaneous utility for CFGStructurizer.
60 //
61 //===----------------------------------------------------------------------===//
62 namespace llvmCFGStruct
63 {
64 #define SHOWNEWINSTR(i) \
65   if (DEBUGME) errs() << "New instr: " << *i << "\n"
66 
67 #define SHOWNEWBLK(b, msg) \
68 if (DEBUGME) { \
69   errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70   errs() << "\n"; \
71 }
72 
73 #define SHOWBLK_DETAIL(b, msg) \
74 if (DEBUGME) { \
75   if (b) { \
76   errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77   b->print(errs()); \
78   errs() << "\n"; \
79   } \
80 }
81 
82 #define INVALIDSCCNUM -1
83 #define INVALIDREGNUM 0
84 
85 template<class LoopinfoT>
PrintLoopinfo(const LoopinfoT & LoopInfo,llvm::raw_ostream & OS)86 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
87   for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
88        iterEnd = LoopInfo.end();
89        iter != iterEnd; ++iter) {
90     (*iter)->print(OS, 0);
91   }
92 }
93 
94 template<class NodeT>
ReverseVector(SmallVector<NodeT *,DEFAULT_VEC_SLOTS> & Src)95 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
96   size_t sz = Src.size();
97   for (size_t i = 0; i < sz/2; ++i) {
98     NodeT *t = Src[i];
99     Src[i] = Src[sz - i - 1];
100     Src[sz - i - 1] = t;
101   }
102 }
103 
104 } //end namespace llvmCFGStruct
105 
106 
107 //===----------------------------------------------------------------------===//
108 //
109 // MachinePostDominatorTree
110 //
111 //===----------------------------------------------------------------------===//
112 
113 namespace llvm {
114 
115 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
116 /// to compute the a post-dominator tree.
117 ///
118 struct MachinePostDominatorTree : public MachineFunctionPass {
119   static char ID; // Pass identification, replacement for typeid
120   DominatorTreeBase<MachineBasicBlock> *DT;
MachinePostDominatorTreellvm::MachinePostDominatorTree121   MachinePostDominatorTree() : MachineFunctionPass(ID)
122   {
123     DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
124     // postdominator
125   }
126 
127   ~MachinePostDominatorTree();
128 
129   virtual bool runOnMachineFunction(MachineFunction &MF);
130 
getAnalysisUsagellvm::MachinePostDominatorTree131   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
132     AU.setPreservesAll();
133     MachineFunctionPass::getAnalysisUsage(AU);
134   }
135 
getRootsllvm::MachinePostDominatorTree136   inline const std::vector<MachineBasicBlock *> &getRoots() const {
137     return DT->getRoots();
138   }
139 
getRootNodellvm::MachinePostDominatorTree140   inline MachineDomTreeNode *getRootNode() const {
141     return DT->getRootNode();
142   }
143 
operator []llvm::MachinePostDominatorTree144   inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
145     return DT->getNode(BB);
146   }
147 
getNodellvm::MachinePostDominatorTree148   inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
149     return DT->getNode(BB);
150   }
151 
dominatesllvm::MachinePostDominatorTree152   inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
153     return DT->dominates(A, B);
154   }
155 
dominatesllvm::MachinePostDominatorTree156   inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
157     return DT->dominates(A, B);
158   }
159 
160   inline bool
properlyDominatesllvm::MachinePostDominatorTree161   properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
162     return DT->properlyDominates(A, B);
163   }
164 
165   inline bool
properlyDominatesllvm::MachinePostDominatorTree166   properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
167     return DT->properlyDominates(A, B);
168   }
169 
170   inline MachineBasicBlock *
findNearestCommonDominatorllvm::MachinePostDominatorTree171   findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
172     return DT->findNearestCommonDominator(A, B);
173   }
174 
printllvm::MachinePostDominatorTree175   virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
176     DT->print(OS);
177   }
178 };
179 } //end of namespace llvm
180 
181 char MachinePostDominatorTree::ID = 0;
182 static RegisterPass<MachinePostDominatorTree>
183 machinePostDominatorTreePass("machinepostdomtree",
184                              "MachinePostDominator Tree Construction",
185                              true, true);
186 
187 //const PassInfo *const llvm::MachinePostDominatorsID
188 //= &machinePostDominatorTreePass;
189 
runOnMachineFunction(MachineFunction & F)190 bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
191   DT->recalculate(F);
192   //DEBUG(DT->dump());
193   return false;
194 }
195 
~MachinePostDominatorTree()196 MachinePostDominatorTree::~MachinePostDominatorTree() {
197   delete DT;
198 }
199 
200 //===----------------------------------------------------------------------===//
201 //
202 // supporting data structure for CFGStructurizer
203 //
204 //===----------------------------------------------------------------------===//
205 
206 namespace llvmCFGStruct
207 {
208 template<class PassT>
209 struct CFGStructTraits {
210 };
211 
212 template <class InstrT>
213 class BlockInformation {
214 public:
215   bool isRetired;
216   int  sccNum;
217   //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
218   //Instructions defining the corresponding successor.
BlockInformation()219   BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
220 };
221 
222 template <class BlockT, class InstrT, class RegiT>
223 class LandInformation {
224 public:
225   BlockT *landBlk;
226   std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
227                                   //WHILELOOP(thisloop) init before entering
228                                   //thisloop.
229   std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
230                                   //WHILELOOP(thisloop) init after entering
231                                   //thisloop.
232   std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
233                                      //land block, branch cond on this reg.
234   std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
235                                      //endif" after ENDLOOP(thisloop) break
236                                      //outerLoopOf(thisLoop).
237   std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
238                                     //endif" after ENDLOOP(thisloop) continue on
239                                     //outerLoopOf(thisLoop).
LandInformation()240   LandInformation() : landBlk(NULL) {}
241 };
242 
243 } //end of namespace llvmCFGStruct
244 
245 //===----------------------------------------------------------------------===//
246 //
247 // CFGStructurizer
248 //
249 //===----------------------------------------------------------------------===//
250 
251 namespace llvmCFGStruct
252 {
253 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
254 template<class PassT>
255 class  CFGStructurizer
256 {
257 public:
258   typedef enum {
259     Not_SinglePath = 0,
260     SinglePath_InPath = 1,
261     SinglePath_NotInPath = 2
262   } PathToKind;
263 
264 public:
265   typedef typename PassT::InstructionType         InstrT;
266   typedef typename PassT::FunctionType            FuncT;
267   typedef typename PassT::DominatortreeType       DomTreeT;
268   typedef typename PassT::PostDominatortreeType   PostDomTreeT;
269   typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
270   typedef typename PassT::LoopinfoType            LoopInfoT;
271 
272   typedef GraphTraits<FuncT *>                    FuncGTraits;
273   //typedef FuncGTraits::nodes_iterator BlockIterator;
274   typedef typename FuncT::iterator                BlockIterator;
275 
276   typedef typename FuncGTraits::NodeType          BlockT;
277   typedef GraphTraits<BlockT *>                   BlockGTraits;
278   typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
279   //typedef BlockGTraits::succ_iterator InstructionIterator;
280   typedef typename BlockT::iterator               InstrIterator;
281 
282   typedef CFGStructTraits<PassT>                  CFGTraits;
283   typedef BlockInformation<InstrT>                BlockInfo;
284   typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
285 
286   typedef int                                     RegiT;
287   typedef typename PassT::LoopType                LoopT;
288   typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
289         typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
290         //landing info for loop break
291   typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
292 
293 public:
294   CFGStructurizer();
295   ~CFGStructurizer();
296 
297   /// Perform the CFG structurization
298   bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
299 
300   /// Perform the CFG preparation
301   bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
302 
303 private:
304   void reversePredicateSetter(typename BlockT::iterator);
305   void   orderBlocks();
306   void   printOrderedBlocks(llvm::raw_ostream &OS);
307   int patternMatch(BlockT *CurBlock);
308   int patternMatchGroup(BlockT *CurBlock);
309 
310   int serialPatternMatch(BlockT *CurBlock);
311   int ifPatternMatch(BlockT *CurBlock);
312   int switchPatternMatch(BlockT *CurBlock);
313   int loopendPatternMatch(BlockT *CurBlock);
314   int loopPatternMatch(BlockT *CurBlock);
315 
316   int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
317   int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
318   //int loopWithoutBreak(BlockT *);
319 
320   void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
321                         BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
322   void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
323                            BlockT *ContBlock, LoopT *contLoop);
324   bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
325   int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
326                        BlockT *FalseBlock);
327   int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
328                           BlockT *FalseBlock);
329   int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
330                               BlockT *FalseBlock, BlockT **LandBlockPtr);
331   void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
332                                    BlockT *FalseBlock, BlockT *LandBlock,
333                                    bool Detail = false);
334   PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
335                           bool AllowSideEntry = true);
336   BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
337                         bool AllowSideEntry = true);
338   int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
339   void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
340 
341   void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
342                             BlockT *TrueBlock, BlockT *FalseBlock,
343                             BlockT *LandBlock);
344   void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
345   void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
346                            BlockT *ExitLandBlock, RegiT SetReg);
347   void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
348                            RegiT SetReg);
349   BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
350                                 std::set<BlockT*> &ExitBlockSet,
351                                 BlockT *ExitLandBlk);
352   BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
353                                 BlockTSmallerVector &ExitingBlocks,
354                                 BlockTSmallerVector &ExitBlocks);
355   BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
356   void removeUnconditionalBranch(BlockT *SrcBlock);
357   void removeRedundantConditionalBranch(BlockT *SrcBlock);
358   void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
359 
360   void removeSuccessor(BlockT *SrcBlock);
361   BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
362   BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
363 
364   void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
365                           InstrIterator InsertPos);
366 
367   void recordSccnum(BlockT *SrcBlock, int SCCNum);
368   int getSCCNum(BlockT *srcBlk);
369 
370   void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
371   bool isRetiredBlock(BlockT *SrcBlock);
372   bool isActiveLoophead(BlockT *CurBlock);
373   bool needMigrateBlock(BlockT *Block);
374 
375   BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
376                               BlockTSmallerVector &exitBlocks,
377                               std::set<BlockT*> &ExitBlockSet);
378   void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
379   BlockT *getLoopLandBlock(LoopT *LoopRep);
380   LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
381 
382   void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
383   void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
384   void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
385   void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
386   void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
387 
388   bool hasBackEdge(BlockT *curBlock);
389   unsigned getLoopDepth  (LoopT *LoopRep);
390   int countActiveBlock(
391     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
392     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
393     BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
394   BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
395 
396 private:
397   DomTreeT *domTree;
398   PostDomTreeT *postDomTree;
399   LoopInfoT *loopInfo;
400   PassT *passRep;
401   FuncT *funcRep;
402 
403   BlockInfoMap blockInfoMap;
404   LoopLandInfoMap loopLandInfoMap;
405   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
406   const AMDGPURegisterInfo *TRI;
407 
408 };  //template class CFGStructurizer
409 
CFGStructurizer()410 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
411   : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
412 }
413 
~CFGStructurizer()414 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
415   for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
416        E = blockInfoMap.end(); I != E; ++I) {
417     delete I->second;
418   }
419 }
420 
421 template<class PassT>
prepare(FuncT & func,PassT & pass,const AMDGPURegisterInfo * tri)422 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
423                                      const AMDGPURegisterInfo * tri) {
424   passRep = &pass;
425   funcRep = &func;
426   TRI = tri;
427 
428   bool changed = false;
429   //func.RenumberBlocks();
430 
431   //to do, if not reducible flow graph, make it so ???
432 
433   if (DEBUGME) {
434         errs() << "AMDGPUCFGStructurizer::prepare\n";
435     //func.viewCFG();
436     //func.viewCFGOnly();
437     //func.dump();
438   }
439 
440   //FIXME: gcc complains on this.
441   //domTree = &pass.getAnalysis<DomTreeT>();
442       //domTree = CFGTraits::getDominatorTree(pass);
443       //if (DEBUGME) {
444       //    domTree->print(errs());
445     //}
446 
447   //FIXME: gcc complains on this.
448   //domTree = &pass.getAnalysis<DomTreeT>();
449       //postDomTree = CFGTraits::getPostDominatorTree(pass);
450       //if (DEBUGME) {
451       //   postDomTree->print(errs());
452     //}
453 
454   //FIXME: gcc complains on this.
455   //loopInfo = &pass.getAnalysis<LoopInfoT>();
456   loopInfo = CFGTraits::getLoopInfo(pass);
457   if (DEBUGME) {
458     errs() << "LoopInfo:\n";
459     PrintLoopinfo(*loopInfo, errs());
460   }
461 
462   orderBlocks();
463   if (DEBUGME) {
464     errs() << "Ordered blocks:\n";
465     printOrderedBlocks(errs());
466   }
467 
468   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
469 
470   for (typename LoopInfoT::iterator iter = loopInfo->begin(),
471        iterEnd = loopInfo->end();
472        iter != iterEnd; ++iter) {
473     LoopT* loopRep = (*iter);
474     BlockTSmallerVector exitingBlks;
475     loopRep->getExitingBlocks(exitingBlks);
476 
477     if (exitingBlks.size() == 0) {
478       BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
479       if (dummyExitBlk != NULL)
480         retBlks.push_back(dummyExitBlk);
481     }
482   }
483 
484   // Remove unconditional branch instr.
485   // Add dummy exit block iff there are multiple returns.
486 
487   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
488        iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
489        iterBlk != iterEndBlk;
490        ++iterBlk) {
491     BlockT *curBlk = *iterBlk;
492     removeUnconditionalBranch(curBlk);
493     removeRedundantConditionalBranch(curBlk);
494     if (CFGTraits::isReturnBlock(curBlk)) {
495       retBlks.push_back(curBlk);
496     }
497     assert(curBlk->succ_size() <= 2);
498     //assert(curBlk->size() > 0);
499     //removeEmptyBlock(curBlk) ??
500   } //for
501 
502   if (retBlks.size() >= 2) {
503     addDummyExitBlock(retBlks);
504     changed = true;
505   }
506 
507   return changed;
508 } //CFGStructurizer::prepare
509 
510 template<class PassT>
run(FuncT & func,PassT & pass,const AMDGPURegisterInfo * tri)511 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
512     const AMDGPURegisterInfo * tri) {
513   passRep = &pass;
514   funcRep = &func;
515   TRI = tri;
516 
517   //func.RenumberBlocks();
518 
519   //Assume reducible CFG...
520   if (DEBUGME) {
521     errs() << "AMDGPUCFGStructurizer::run\n";
522     //errs() << func.getFunction()->getNameStr() << "\n";
523     func.viewCFG();
524     //func.viewCFGOnly();
525     //func.dump();
526   }
527 
528 #if 1
529   //FIXME: gcc complains on this.
530   //domTree = &pass.getAnalysis<DomTreeT>();
531   domTree = CFGTraits::getDominatorTree(pass);
532   if (DEBUGME) {
533     domTree->print(errs(), (const llvm::Module*)0);
534   }
535 #endif
536 
537   //FIXME: gcc complains on this.
538   //domTree = &pass.getAnalysis<DomTreeT>();
539   postDomTree = CFGTraits::getPostDominatorTree(pass);
540   if (DEBUGME) {
541     postDomTree->print(errs());
542   }
543 
544   //FIXME: gcc complains on this.
545   //loopInfo = &pass.getAnalysis<LoopInfoT>();
546   loopInfo = CFGTraits::getLoopInfo(pass);
547   if (DEBUGME) {
548     errs() << "LoopInfo:\n";
549     PrintLoopinfo(*loopInfo, errs());
550   }
551 
552   orderBlocks();
553 //#define STRESSTEST
554 #ifdef STRESSTEST
555   //Use the worse block ordering to test the algorithm.
556   ReverseVector(orderedBlks);
557 #endif
558 
559   if (DEBUGME) {
560     errs() << "Ordered blocks:\n";
561     printOrderedBlocks(errs());
562   }
563   int numIter = 0;
564   bool finish = false;
565   BlockT *curBlk;
566   bool makeProgress = false;
567   int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
568                                         orderedBlks.end());
569 
570   do {
571     ++numIter;
572     if (DEBUGME) {
573       errs() << "numIter = " << numIter
574              << ", numRemaintedBlk = " << numRemainedBlk << "\n";
575     }
576 
577     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
578       iterBlk = orderedBlks.begin();
579     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
580       iterBlkEnd = orderedBlks.end();
581 
582     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
583       sccBeginIter = iterBlk;
584     BlockT *sccBeginBlk = NULL;
585     int sccNumBlk = 0;  // The number of active blocks, init to a
586                         // maximum possible number.
587     int sccNumIter;     // Number of iteration in this SCC.
588 
589     while (iterBlk != iterBlkEnd) {
590       curBlk = *iterBlk;
591 
592       if (sccBeginBlk == NULL) {
593         sccBeginIter = iterBlk;
594         sccBeginBlk = curBlk;
595         sccNumIter = 0;
596         sccNumBlk = numRemainedBlk; // Init to maximum possible number.
597         if (DEBUGME) {
598               errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
599               errs() << "\n";
600         }
601       }
602 
603       if (!isRetiredBlock(curBlk)) {
604         patternMatch(curBlk);
605       }
606 
607       ++iterBlk;
608 
609       bool contNextScc = true;
610       if (iterBlk == iterBlkEnd
611           || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
612         // Just finish one scc.
613         ++sccNumIter;
614         int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
615         if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
616           if (DEBUGME) {
617             errs() << "Can't reduce SCC " << getSCCNum(curBlk)
618                    << ", sccNumIter = " << sccNumIter;
619             errs() << "doesn't make any progress\n";
620           }
621           contNextScc = true;
622         } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
623           sccNumBlk = sccRemainedNumBlk;
624           iterBlk = sccBeginIter;
625           contNextScc = false;
626           if (DEBUGME) {
627             errs() << "repeat processing SCC" << getSCCNum(curBlk)
628                    << "sccNumIter = " << sccNumIter << "\n";
629             func.viewCFG();
630             //func.viewCFGOnly();
631           }
632         } else {
633           // Finish the current scc.
634           contNextScc = true;
635         }
636       } else {
637         // Continue on next component in the current scc.
638         contNextScc = false;
639       }
640 
641       if (contNextScc) {
642         sccBeginBlk = NULL;
643       }
644     } //while, "one iteration" over the function.
645 
646     BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
647     if (entryBlk->succ_size() == 0) {
648       finish = true;
649       if (DEBUGME) {
650         errs() << "Reduce to one block\n";
651       }
652     } else {
653       int newnumRemainedBlk
654         = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
655       // consider cloned blocks ??
656       if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
657         makeProgress = true;
658         numRemainedBlk = newnumRemainedBlk;
659       } else {
660         makeProgress = false;
661         if (DEBUGME) {
662           errs() << "No progress\n";
663         }
664       }
665     }
666   } while (!finish && makeProgress);
667 
668   // Misc wrap up to maintain the consistency of the Function representation.
669   CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
670 
671   // Detach retired Block, release memory.
672   for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
673        iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
674     if ((*iterMap).second && (*iterMap).second->isRetired) {
675       assert(((*iterMap).first)->getNumber() != -1);
676       if (DEBUGME) {
677         errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
678       }
679       (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
680     }
681     delete (*iterMap).second;
682   }
683   blockInfoMap.clear();
684 
685   // clear loopLandInfoMap
686   for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
687        iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
688     delete (*iterMap).second;
689   }
690   loopLandInfoMap.clear();
691 
692   if (DEBUGME) {
693     func.viewCFG();
694     //func.dump();
695   }
696 
697   if (!finish) {
698     assert(!"IRREDUCIBL_CF");
699   }
700 
701   return true;
702 } //CFGStructurizer::run
703 
704 /// Print the ordered Blocks.
705 ///
706 template<class PassT>
printOrderedBlocks(llvm::raw_ostream & os)707 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
708   size_t i = 0;
709   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
710       iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
711        iterBlk != iterBlkEnd;
712        ++iterBlk, ++i) {
713     os << "BB" << (*iterBlk)->getNumber();
714     os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
715     if (i != 0 && i % 10 == 0) {
716       os << "\n";
717     } else {
718       os << " ";
719     }
720   }
721 } //printOrderedBlocks
722 
723 /// Compute the reversed DFS post order of Blocks
724 ///
orderBlocks()725 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
726   int sccNum = 0;
727   BlockT *bb;
728   for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
729        sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
730     std::vector<BlockT *> &sccNext = *sccIter;
731     for (typename std::vector<BlockT *>::const_iterator
732          blockIter = sccNext.begin(), blockEnd = sccNext.end();
733          blockIter != blockEnd; ++blockIter) {
734       bb = *blockIter;
735       orderedBlks.push_back(bb);
736       recordSccnum(bb, sccNum);
737     }
738   }
739 
740   //walk through all the block in func to check for unreachable
741   for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
742        blockEnd1 = FuncGTraits::nodes_end(funcRep);
743        blockIter1 != blockEnd1; ++blockIter1) {
744     BlockT *bb = &(*blockIter1);
745     sccNum = getSCCNum(bb);
746     if (sccNum == INVALIDSCCNUM) {
747       errs() << "unreachable block BB" << bb->getNumber() << "\n";
748     }
749   } //end of for
750 } //orderBlocks
751 
patternMatch(BlockT * curBlk)752 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
753   int numMatch = 0;
754   int curMatch;
755 
756   if (DEBUGME) {
757         errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
758   }
759 
760   while ((curMatch = patternMatchGroup(curBlk)) > 0) {
761     numMatch += curMatch;
762   }
763 
764   if (DEBUGME) {
765         errs() << "End patternMatch BB" << curBlk->getNumber()
766       << ", numMatch = " << numMatch << "\n";
767   }
768 
769   return numMatch;
770 } //patternMatch
771 
772 template<class PassT>
patternMatchGroup(BlockT * curBlk)773 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
774   int numMatch = 0;
775   numMatch += serialPatternMatch(curBlk);
776   numMatch += ifPatternMatch(curBlk);
777   //numMatch += switchPatternMatch(curBlk);
778   numMatch += loopendPatternMatch(curBlk);
779   numMatch += loopPatternMatch(curBlk);
780   return numMatch;
781 }//patternMatchGroup
782 
783 template<class PassT>
serialPatternMatch(BlockT * curBlk)784 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
785   if (curBlk->succ_size() != 1) {
786     return 0;
787   }
788 
789   BlockT *childBlk = *curBlk->succ_begin();
790   if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
791     return 0;
792   }
793 
794   mergeSerialBlock(curBlk, childBlk);
795   ++numSerialPatternMatch;
796   return 1;
797 } //serialPatternMatch
798 
799 template<class PassT>
ifPatternMatch(BlockT * curBlk)800 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
801   //two edges
802   if (curBlk->succ_size() != 2) {
803     return 0;
804   }
805 
806   if (hasBackEdge(curBlk)) {
807     return 0;
808   }
809 
810   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
811   if (branchInstr == NULL) {
812     return 0;
813   }
814 
815   assert(CFGTraits::isCondBranch(branchInstr));
816 
817   BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
818   BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
819   BlockT *landBlk;
820   int cloned = 0;
821 
822   // TODO: Simplify
823   if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
824     && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
825     landBlk = *trueBlk->succ_begin();
826   } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
827     landBlk = NULL;
828   } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
829     landBlk = falseBlk;
830     falseBlk = NULL;
831   } else if (falseBlk->succ_size() == 1
832              && *falseBlk->succ_begin() == trueBlk) {
833     landBlk = trueBlk;
834     trueBlk = NULL;
835   } else if (falseBlk->succ_size() == 1
836              && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
837     landBlk = *falseBlk->succ_begin();
838   } else if (trueBlk->succ_size() == 1
839     && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
840     landBlk = *trueBlk->succ_begin();
841   } else {
842     return handleJumpintoIf(curBlk, trueBlk, falseBlk);
843   }
844 
845   // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
846   // new BB created for landBlk==NULL may introduce new challenge to the
847   // reduction process.
848   if (landBlk != NULL &&
849       ((trueBlk && trueBlk->pred_size() > 1)
850       || (falseBlk && falseBlk->pred_size() > 1))) {
851      cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
852   }
853 
854   if (trueBlk && trueBlk->pred_size() > 1) {
855     trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
856     ++cloned;
857   }
858 
859   if (falseBlk && falseBlk->pred_size() > 1) {
860     falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
861     ++cloned;
862   }
863 
864   mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
865 
866   ++numIfPatternMatch;
867 
868   numClonedBlock += cloned;
869 
870   return 1 + cloned;
871 } //ifPatternMatch
872 
873 template<class PassT>
switchPatternMatch(BlockT * curBlk)874 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
875   return 0;
876 } //switchPatternMatch
877 
878 template<class PassT>
loopendPatternMatch(BlockT * curBlk)879 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
880   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
881   typename std::vector<LoopT *> nestedLoops;
882   while (loopRep) {
883     nestedLoops.push_back(loopRep);
884     loopRep = loopRep->getParentLoop();
885   }
886 
887   if (nestedLoops.size() == 0) {
888     return 0;
889   }
890 
891   // Process nested loop outside->inside, so "continue" to a outside loop won't
892   // be mistaken as "break" of the current loop.
893   int num = 0;
894   for (typename std::vector<LoopT *>::reverse_iterator
895        iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
896        iter != iterEnd; ++iter) {
897     loopRep = *iter;
898 
899     if (getLoopLandBlock(loopRep) != NULL) {
900       continue;
901     }
902 
903     BlockT *loopHeader = loopRep->getHeader();
904 
905     int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
906 
907     if (numBreak == -1) {
908       break;
909     }
910 
911     int numCont = loopcontPatternMatch(loopRep, loopHeader);
912     num += numBreak + numCont;
913   }
914 
915   return num;
916 } //loopendPatternMatch
917 
918 template<class PassT>
loopPatternMatch(BlockT * curBlk)919 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
920   if (curBlk->succ_size() != 0) {
921     return 0;
922   }
923 
924   int numLoop = 0;
925   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
926   while (loopRep && loopRep->getHeader() == curBlk) {
927     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
928     if (loopLand) {
929       BlockT *landBlk = loopLand->landBlk;
930       assert(landBlk);
931       if (!isRetiredBlock(landBlk)) {
932         mergeLooplandBlock(curBlk, loopLand);
933         ++numLoop;
934       }
935     }
936     loopRep = loopRep->getParentLoop();
937   }
938 
939   numLoopPatternMatch += numLoop;
940 
941   return numLoop;
942 } //loopPatternMatch
943 
944 template<class PassT>
loopbreakPatternMatch(LoopT * loopRep,BlockT * loopHeader)945 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
946                                                   BlockT *loopHeader) {
947   BlockTSmallerVector exitingBlks;
948   loopRep->getExitingBlocks(exitingBlks);
949 
950   if (DEBUGME) {
951     errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
952   }
953 
954   if (exitingBlks.size() == 0) {
955     setLoopLandBlock(loopRep);
956     return 0;
957   }
958 
959   // Compute the corresponding exitBlks and exit block set.
960   BlockTSmallerVector exitBlks;
961   std::set<BlockT *> exitBlkSet;
962   for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
963        iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
964     BlockT *exitingBlk = *iter;
965     BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
966     exitBlks.push_back(exitBlk);
967     exitBlkSet.insert(exitBlk);  //non-duplicate insert
968   }
969 
970   assert(exitBlkSet.size() > 0);
971   assert(exitBlks.size() == exitingBlks.size());
972 
973   if (DEBUGME) {
974     errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
975   }
976 
977   // Find exitLandBlk.
978   BlockT *exitLandBlk = NULL;
979   int numCloned = 0;
980   int numSerial = 0;
981 
982   if (exitBlkSet.size() == 1)
983   {
984     exitLandBlk = *exitBlkSet.begin();
985   } else {
986     exitLandBlk = findNearestCommonPostDom(exitBlkSet);
987 
988     if (exitLandBlk == NULL) {
989       return -1;
990     }
991 
992     bool allInPath = true;
993     bool allNotInPath = true;
994     for (typename std::set<BlockT*>::const_iterator
995          iter = exitBlkSet.begin(),
996          iterEnd = exitBlkSet.end();
997          iter != iterEnd; ++iter) {
998       BlockT *exitBlk = *iter;
999 
1000       PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
1001       if (DEBUGME) {
1002         errs() << "BB" << exitBlk->getNumber()
1003                << " to BB" << exitLandBlk->getNumber() << " PathToKind="
1004                << pathKind << "\n";
1005       }
1006 
1007       allInPath = allInPath && (pathKind == SinglePath_InPath);
1008       allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
1009 
1010       if (!allInPath && !allNotInPath) {
1011         if (DEBUGME) {
1012               errs() << "singlePath check fail\n";
1013         }
1014         return -1;
1015       }
1016     } // check all exit blocks
1017 
1018     if (allNotInPath) {
1019 #if 1
1020 
1021       // TODO: Simplify, maybe separate function?
1022       //funcRep->viewCFG();
1023       LoopT *parentLoopRep = loopRep->getParentLoop();
1024       BlockT *parentLoopHeader = NULL;
1025       if (parentLoopRep)
1026         parentLoopHeader = parentLoopRep->getHeader();
1027 
1028       if (exitLandBlk == parentLoopHeader &&
1029           (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
1030                                                loopRep,
1031                                                exitBlkSet,
1032                                                exitLandBlk)) != NULL) {
1033         if (DEBUGME) {
1034           errs() << "relocateLoopcontBlock success\n";
1035         }
1036       } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
1037                                                       exitingBlks,
1038                                                       exitBlks)) != NULL) {
1039         if (DEBUGME) {
1040           errs() << "insertEndbranchBlock success\n";
1041         }
1042       } else {
1043         if (DEBUGME) {
1044           errs() << "loop exit fail\n";
1045         }
1046         return -1;
1047       }
1048 #else
1049       return -1;
1050 #endif
1051     }
1052 
1053     // Handle side entry to exit path.
1054     exitBlks.clear();
1055     exitBlkSet.clear();
1056     for (typename BlockTSmallerVector::iterator iterExiting =
1057            exitingBlks.begin(),
1058          iterExitingEnd = exitingBlks.end();
1059          iterExiting != iterExitingEnd; ++iterExiting) {
1060       BlockT *exitingBlk = *iterExiting;
1061       BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
1062       BlockT *newExitBlk = exitBlk;
1063 
1064       if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
1065         newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
1066         ++numCloned;
1067       }
1068 
1069       numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
1070 
1071       exitBlks.push_back(newExitBlk);
1072       exitBlkSet.insert(newExitBlk);
1073     }
1074 
1075     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1076          iterExitEnd = exitBlks.end();
1077          iterExit != iterExitEnd; ++iterExit) {
1078       BlockT *exitBlk = *iterExit;
1079       numSerial += serialPatternMatch(exitBlk);
1080     }
1081 
1082     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1083          iterExitEnd = exitBlks.end();
1084          iterExit != iterExitEnd; ++iterExit) {
1085       BlockT *exitBlk = *iterExit;
1086       if (exitBlk->pred_size() > 1) {
1087         if (exitBlk != exitLandBlk) {
1088           return -1;
1089         }
1090       } else {
1091         if (exitBlk != exitLandBlk &&
1092             (exitBlk->succ_size() != 1 ||
1093             *exitBlk->succ_begin() != exitLandBlk)) {
1094           return -1;
1095         }
1096       }
1097     }
1098   } // else
1099 
1100   // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1101   exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
1102 
1103   // Fold break into the breaking block. Leverage across level breaks.
1104   assert(exitingBlks.size() == exitBlks.size());
1105   for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
1106        iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
1107        iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
1108     BlockT *exitBlk = *iterExit;
1109     BlockT *exitingBlk = *iterExiting;
1110     assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
1111     LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
1112     handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
1113   }
1114 
1115   int numBreak = static_cast<int>(exitingBlks.size());
1116   numLoopbreakPatternMatch += numBreak;
1117   numClonedBlock += numCloned;
1118   return numBreak + numSerial + numCloned;
1119 } //loopbreakPatternMatch
1120 
1121 template<class PassT>
loopcontPatternMatch(LoopT * loopRep,BlockT * loopHeader)1122 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
1123                                                  BlockT *loopHeader) {
1124   int numCont = 0;
1125   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
1126   for (typename InvBlockGTraits::ChildIteratorType iter =
1127        InvBlockGTraits::child_begin(loopHeader),
1128        iterEnd = InvBlockGTraits::child_end(loopHeader);
1129        iter != iterEnd; ++iter) {
1130     BlockT *curBlk = *iter;
1131     if (loopRep->contains(curBlk)) {
1132       handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
1133                           loopHeader, loopRep);
1134       contBlk.push_back(curBlk);
1135       ++numCont;
1136     }
1137   }
1138 
1139   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
1140        iter = contBlk.begin(), iterEnd = contBlk.end();
1141        iter != iterEnd; ++iter) {
1142     (*iter)->removeSuccessor(loopHeader);
1143   }
1144 
1145   numLoopcontPatternMatch += numCont;
1146 
1147   return numCont;
1148 } //loopcontPatternMatch
1149 
1150 
1151 template<class PassT>
isSameloopDetachedContbreak(BlockT * src1Blk,BlockT * src2Blk)1152 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1153                                                          BlockT *src2Blk) {
1154   // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1155   // same loop with LoopLandInfo without explicitly keeping track of
1156   // loopContBlks and loopBreakBlks, this is a method to get the information.
1157   //
1158   if (src1Blk->succ_size() == 0) {
1159     LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1160     if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1161       LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1162       if (theEntry != NULL) {
1163         if (DEBUGME) {
1164           errs() << "isLoopContBreakBlock yes src1 = BB"
1165                  << src1Blk->getNumber()
1166                  << " src2 = BB" << src2Blk->getNumber() << "\n";
1167         }
1168         return true;
1169       }
1170     }
1171   }
1172   return false;
1173 }  //isSameloopDetachedContbreak
1174 
1175 template<class PassT>
handleJumpintoIf(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk)1176 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1177                                              BlockT *trueBlk,
1178                                              BlockT *falseBlk) {
1179   int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1180   if (num == 0) {
1181     if (DEBUGME) {
1182       errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1183     }
1184     num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1185   }
1186   return num;
1187 }
1188 
1189 template<class PassT>
handleJumpintoIfImp(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk)1190 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1191                                                 BlockT *trueBlk,
1192                                                 BlockT *falseBlk) {
1193   int num = 0;
1194   BlockT *downBlk;
1195 
1196   //trueBlk could be the common post dominator
1197   downBlk = trueBlk;
1198 
1199   if (DEBUGME) {
1200     errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1201            << " true = BB" << trueBlk->getNumber()
1202            << ", numSucc=" << trueBlk->succ_size()
1203            << " false = BB" << falseBlk->getNumber() << "\n";
1204   }
1205 
1206   while (downBlk) {
1207     if (DEBUGME) {
1208       errs() << "check down = BB" << downBlk->getNumber();
1209     }
1210 
1211     if (//postDomTree->dominates(downBlk, falseBlk) &&
1212         singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1213       if (DEBUGME) {
1214         errs() << " working\n";
1215       }
1216 
1217       num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1218       num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1219 
1220       numClonedBlock += num;
1221       num += serialPatternMatch(*headBlk->succ_begin());
1222       num += serialPatternMatch(*(++headBlk->succ_begin()));
1223       num += ifPatternMatch(headBlk);
1224       assert(num > 0); //
1225 
1226       break;
1227     }
1228     if (DEBUGME) {
1229       errs() << " not working\n";
1230     }
1231     downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1232   } // walk down the postDomTree
1233 
1234   return num;
1235 } //handleJumpintoIf
1236 
1237 template<class PassT>
showImproveSimpleJumpintoIf(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk,BlockT * landBlk,bool detail)1238 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1239                                                          BlockT *trueBlk,
1240                                                          BlockT *falseBlk,
1241                                                          BlockT *landBlk,
1242                                                          bool detail) {
1243   errs() << "head = BB" << headBlk->getNumber()
1244          << " size = " << headBlk->size();
1245   if (detail) {
1246     errs() << "\n";
1247     headBlk->print(errs());
1248     errs() << "\n";
1249   }
1250 
1251   if (trueBlk) {
1252     errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1253            << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1254     if (detail) {
1255       errs() << "\n";
1256       trueBlk->print(errs());
1257       errs() << "\n";
1258     }
1259   }
1260   if (falseBlk) {
1261     errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1262            << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1263     if (detail) {
1264       errs() << "\n";
1265       falseBlk->print(errs());
1266       errs() << "\n";
1267     }
1268   }
1269   if (landBlk) {
1270     errs() << ", land = BB" << landBlk->getNumber() << " size = "
1271            << landBlk->size() << " numPred = " << landBlk->pred_size();
1272     if (detail) {
1273       errs() << "\n";
1274       landBlk->print(errs());
1275       errs() << "\n";
1276     }
1277   }
1278 
1279     errs() << "\n";
1280 } //showImproveSimpleJumpintoIf
1281 
1282 template<class PassT>
improveSimpleJumpintoIf(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk,BlockT ** plandBlk)1283 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1284                                                     BlockT *trueBlk,
1285                                                     BlockT *falseBlk,
1286                                                     BlockT **plandBlk) {
1287   bool migrateTrue = false;
1288   bool migrateFalse = false;
1289 
1290   BlockT *landBlk = *plandBlk;
1291 
1292   assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1293          && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1294 
1295   if (trueBlk == falseBlk) {
1296     return 0;
1297   }
1298 
1299 #if 0
1300   if (DEBUGME) {
1301     errs() << "improveSimpleJumpintoIf: ";
1302     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1303   }
1304 #endif
1305 
1306   // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1307   // May consider the # landBlk->pred_size() as it represents the number of
1308   // assignment initReg = .. needed to insert.
1309   migrateTrue = needMigrateBlock(trueBlk);
1310   migrateFalse = needMigrateBlock(falseBlk);
1311 
1312   if (!migrateTrue && !migrateFalse) {
1313     return 0;
1314   }
1315 
1316   // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1317   // have more than one predecessors.  without doing this, its predecessor
1318   // rather than headBlk will have undefined value in initReg.
1319   if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1320     migrateTrue = true;
1321   }
1322   if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1323     migrateFalse = true;
1324   }
1325 
1326   if (DEBUGME) {
1327     errs() << "before improveSimpleJumpintoIf: ";
1328     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1329     //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1330   }
1331 
1332   // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1333   //
1334   // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1335   //      {initReg = 0; org falseBlk branch }
1336   //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1337   //      => org landBlk
1338   //      if landBlk->pred_size() > 2, put the about if-else inside
1339   //      if (initReg !=2) {...}
1340   //
1341   // add initReg = initVal to headBlk
1342 
1343   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1344   unsigned initReg =
1345     funcRep->getRegInfo().createVirtualRegister(I32RC);
1346   if (!migrateTrue || !migrateFalse) {
1347     int initVal = migrateTrue ? 0 : 1;
1348     CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1349   }
1350 
1351   int numNewBlk = 0;
1352 
1353   if (landBlk == NULL) {
1354     landBlk = funcRep->CreateMachineBasicBlock();
1355     funcRep->push_back(landBlk);  //insert to function
1356 
1357     if (trueBlk) {
1358       trueBlk->addSuccessor(landBlk);
1359     } else {
1360       headBlk->addSuccessor(landBlk);
1361     }
1362 
1363     if (falseBlk) {
1364       falseBlk->addSuccessor(landBlk);
1365     } else {
1366       headBlk->addSuccessor(landBlk);
1367     }
1368 
1369     numNewBlk ++;
1370   }
1371 
1372   bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1373 
1374   //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1375   typename BlockT::iterator insertPos =
1376     CFGTraits::getInstrPos
1377     (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1378 
1379   if (landBlkHasOtherPred) {
1380     unsigned immReg =
1381       funcRep->getRegInfo().createVirtualRegister(I32RC);
1382     CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1383     unsigned cmpResReg =
1384       funcRep->getRegInfo().createVirtualRegister(I32RC);
1385 
1386     CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1387                                         initReg, immReg);
1388     CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1389                                       AMDGPU::IF_LOGICALZ_i32, passRep,
1390                                       cmpResReg, DebugLoc());
1391   }
1392 
1393   CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
1394                                     passRep, initReg, DebugLoc());
1395 
1396   if (migrateTrue) {
1397     migrateInstruction(trueBlk, landBlk, insertPos);
1398     // need to uncondionally insert the assignment to ensure a path from its
1399     // predecessor rather than headBlk has valid value in initReg if
1400     // (initVal != 1).
1401     CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1402   }
1403   CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1404 
1405   if (migrateFalse) {
1406     migrateInstruction(falseBlk, landBlk, insertPos);
1407     // need to uncondionally insert the assignment to ensure a path from its
1408     // predecessor rather than headBlk has valid value in initReg if
1409     // (initVal != 0)
1410     CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1411   }
1412   //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1413 
1414   if (landBlkHasOtherPred) {
1415     // add endif
1416     CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1417 
1418     // put initReg = 2 to other predecessors of landBlk
1419     for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1420          predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1421          ++predIter) {
1422       BlockT *curBlk = *predIter;
1423       if (curBlk != trueBlk && curBlk != falseBlk) {
1424         CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1425       }
1426     } //for
1427   }
1428   if (DEBUGME) {
1429     errs() << "result from improveSimpleJumpintoIf: ";
1430     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1431     //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1432   }
1433 
1434   // update landBlk
1435   *plandBlk = landBlk;
1436 
1437   return numNewBlk;
1438 } //improveSimpleJumpintoIf
1439 
1440 template<class PassT>
handleLoopbreak(BlockT * exitingBlk,LoopT * exitingLoop,BlockT * exitBlk,LoopT * exitLoop,BlockT * landBlk)1441 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1442                                               LoopT *exitingLoop,
1443                                              BlockT *exitBlk,
1444                                               LoopT *exitLoop,
1445                                              BlockT *landBlk) {
1446   if (DEBUGME) {
1447     errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1448            << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1449   }
1450   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1451 
1452   RegiT initReg = INVALIDREGNUM;
1453   if (exitingLoop != exitLoop) {
1454     initReg = static_cast<int>
1455       (funcRep->getRegInfo().createVirtualRegister(I32RC));
1456     assert(initReg != INVALIDREGNUM);
1457     addLoopBreakInitReg(exitLoop, initReg);
1458     while (exitingLoop != exitLoop && exitingLoop) {
1459       addLoopBreakOnReg(exitingLoop, initReg);
1460       exitingLoop = exitingLoop->getParentLoop();
1461     }
1462     assert(exitingLoop == exitLoop);
1463   }
1464 
1465   mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1466 
1467 } //handleLoopbreak
1468 
1469 template<class PassT>
handleLoopcontBlock(BlockT * contingBlk,LoopT * contingLoop,BlockT * contBlk,LoopT * contLoop)1470 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1471                                                   LoopT *contingLoop,
1472                                                  BlockT *contBlk,
1473                                                   LoopT *contLoop) {
1474   if (DEBUGME) {
1475     errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1476            << " header = BB" << contBlk->getNumber() << "\n";
1477 
1478     errs() << "Trying to continue loop-depth = "
1479            << getLoopDepth(contLoop)
1480            << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1481   }
1482 
1483   RegiT initReg = INVALIDREGNUM;
1484   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1485   if (contingLoop != contLoop) {
1486     initReg = static_cast<int>
1487       (funcRep->getRegInfo().createVirtualRegister(I32RC));
1488     assert(initReg != INVALIDREGNUM);
1489     addLoopContInitReg(contLoop, initReg);
1490     while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1491       addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
1492       contingLoop = contingLoop->getParentLoop();
1493     }
1494     assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1495     addLoopContOnReg(contingLoop, initReg);
1496   }
1497 
1498   settleLoopcontBlock(contingBlk, contBlk, initReg);
1499   //contingBlk->removeSuccessor(loopHeader);
1500 } //handleLoopcontBlock
1501 
1502 template<class PassT>
mergeSerialBlock(BlockT * dstBlk,BlockT * srcBlk)1503 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1504   if (DEBUGME) {
1505     errs() << "serialPattern BB" << dstBlk->getNumber()
1506            << " <= BB" << srcBlk->getNumber() << "\n";
1507   }
1508   //removeUnconditionalBranch(dstBlk);
1509   dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
1510 
1511   dstBlk->removeSuccessor(srcBlk);
1512   CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1513 
1514   removeSuccessor(srcBlk);
1515   retireBlock(dstBlk, srcBlk);
1516 } //mergeSerialBlock
1517 
1518 template<class PassT>
mergeIfthenelseBlock(InstrT * branchInstr,BlockT * curBlk,BlockT * trueBlk,BlockT * falseBlk,BlockT * landBlk)1519 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1520                                                   BlockT *curBlk,
1521                                                   BlockT *trueBlk,
1522                                                   BlockT *falseBlk,
1523                                                   BlockT *landBlk) {
1524   if (DEBUGME) {
1525     errs() << "ifPattern BB" << curBlk->getNumber();
1526     errs() << "{  ";
1527     if (trueBlk) {
1528       errs() << "BB" << trueBlk->getNumber();
1529     }
1530     errs() << "  } else ";
1531     errs() << "{  ";
1532     if (falseBlk) {
1533       errs() << "BB" << falseBlk->getNumber();
1534     }
1535     errs() << "  }\n ";
1536     errs() << "landBlock: ";
1537     if (landBlk == NULL) {
1538       errs() << "NULL";
1539     } else {
1540       errs() << "BB" << landBlk->getNumber();
1541     }
1542     errs() << "\n";
1543   }
1544 
1545   int oldOpcode = branchInstr->getOpcode();
1546   DebugLoc branchDL = branchInstr->getDebugLoc();
1547 
1548 //    transform to
1549 //    if cond
1550 //       trueBlk
1551 //    else
1552 //       falseBlk
1553 //    endif
1554 //    landBlk
1555 
1556   typename BlockT::iterator branchInstrPos =
1557     CFGTraits::getInstrPos(curBlk, branchInstr);
1558   CFGTraits::insertCondBranchBefore(branchInstrPos,
1559                                     CFGTraits::getBranchNzeroOpcode(oldOpcode),
1560                                     passRep,
1561 									branchDL);
1562 
1563   if (trueBlk) {
1564     curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
1565     curBlk->removeSuccessor(trueBlk);
1566     if (landBlk && trueBlk->succ_size()!=0) {
1567       trueBlk->removeSuccessor(landBlk);
1568     }
1569     retireBlock(curBlk, trueBlk);
1570   }
1571   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1572 
1573   if (falseBlk) {
1574     curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
1575                    falseBlk->end());
1576     curBlk->removeSuccessor(falseBlk);
1577     if (landBlk && falseBlk->succ_size() != 0) {
1578       falseBlk->removeSuccessor(landBlk);
1579     }
1580     retireBlock(curBlk, falseBlk);
1581   }
1582   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1583 
1584   //curBlk->remove(branchInstrPos);
1585   branchInstr->eraseFromParent();
1586 
1587   if (landBlk && trueBlk && falseBlk) {
1588     curBlk->addSuccessor(landBlk);
1589   }
1590 
1591 } //mergeIfthenelseBlock
1592 
1593 template<class PassT>
mergeLooplandBlock(BlockT * dstBlk,LoopLandInfo * loopLand)1594 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1595                                                 LoopLandInfo *loopLand) {
1596   BlockT *landBlk = loopLand->landBlk;
1597 
1598   if (DEBUGME) {
1599     errs() << "loopPattern header = BB" << dstBlk->getNumber()
1600            << " land = BB" << landBlk->getNumber() << "\n";
1601   }
1602 
1603   // Loop contInitRegs are init at the beginning of the loop.
1604   for (typename std::set<RegiT>::const_iterator iter =
1605          loopLand->contInitRegs.begin(),
1606        iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1607     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1608   }
1609 
1610   /* we last inserterd the DebugLoc in the
1611    * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1612    * search for the DebugLoc in the that statement.
1613    * if not found, we have to insert the empty/default DebugLoc */
1614   InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1615   DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1616 
1617   CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1618   // Loop breakInitRegs are init before entering the loop.
1619   for (typename std::set<RegiT>::const_iterator iter =
1620          loopLand->breakInitRegs.begin(),
1621        iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
1622   {
1623     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1624   }
1625   // Loop endbranchInitRegs are init before entering the loop.
1626   for (typename std::set<RegiT>::const_iterator iter =
1627          loopLand->endbranchInitRegs.begin(),
1628        iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1629     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1630   }
1631 
1632   /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1633    * search for the DebugLoc in the continue statement.
1634    * if not found, we have to insert the empty/default DebugLoc */
1635   InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1636   DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1637 
1638   CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1639   // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1640   // loop.
1641   for (typename std::set<RegiT>::const_iterator iter =
1642          loopLand->breakOnRegs.begin(),
1643        iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1644     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
1645                                    *iter);
1646   }
1647 
1648   // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1649   // loop.
1650   for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1651        iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1652     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1653                                    passRep, *iter);
1654   }
1655 
1656   dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1657 
1658   for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1659        iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1660     dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
1661   }
1662 
1663   removeSuccessor(landBlk);
1664   retireBlock(dstBlk, landBlk);
1665 } //mergeLooplandBlock
1666 
1667 template<class PassT>
reversePredicateSetter(typename BlockT::iterator I)1668 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
1669 {
1670   while (I--) {
1671     if (I->getOpcode() == AMDGPU::PRED_X) {
1672       switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1673       case OPCODE_IS_ZERO_INT:
1674         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1675         return;
1676       case OPCODE_IS_NOT_ZERO_INT:
1677         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1678         return;
1679       case OPCODE_IS_ZERO:
1680         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1681         return;
1682       case OPCODE_IS_NOT_ZERO:
1683         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1684         return;
1685       default:
1686         assert(0 && "PRED_X Opcode invalid!");
1687       }
1688     }
1689   }
1690 }
1691 
1692 template<class PassT>
mergeLoopbreakBlock(BlockT * exitingBlk,BlockT * exitBlk,BlockT * exitLandBlk,RegiT setReg)1693 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1694                                                  BlockT *exitBlk,
1695                                                  BlockT *exitLandBlk,
1696                                                  RegiT  setReg) {
1697   if (DEBUGME) {
1698     errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1699            << " exit = BB" << exitBlk->getNumber()
1700            << " land = BB" << exitLandBlk->getNumber() << "\n";
1701   }
1702 
1703   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1704   assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1705 
1706   DebugLoc DL = branchInstr->getDebugLoc();
1707 
1708   BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1709   int oldOpcode = branchInstr->getOpcode();
1710 
1711   //    transform exitingBlk to
1712   //    if ( ) {
1713   //       exitBlk (if exitBlk != exitLandBlk)
1714   //       setReg = 1
1715   //       break
1716   //    }endif
1717   //    successor = {orgSuccessor(exitingBlk) - exitBlk}
1718 
1719   typename BlockT::iterator branchInstrPos =
1720     CFGTraits::getInstrPos(exitingBlk, branchInstr);
1721 
1722   if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1723     //break_logical
1724 
1725     if (trueBranch != exitBlk) {
1726       reversePredicateSetter(branchInstrPos);
1727     }
1728     int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1729     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1730   } else {
1731     if (trueBranch != exitBlk) {
1732       reversePredicateSetter(branchInstr);
1733     }
1734     int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1735     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1736     if (exitBlk != exitLandBlk) {
1737       //splice is insert-before ...
1738       exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1739                          exitBlk->end());
1740     }
1741     if (setReg != INVALIDREGNUM) {
1742       CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1743     }
1744     CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1745     CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1746   } //if_logical
1747 
1748   //now branchInst can be erase safely
1749   //exitingBlk->eraseFromParent(branchInstr);
1750   branchInstr->eraseFromParent();
1751 
1752   //now take care of successors, retire blocks
1753   exitingBlk->removeSuccessor(exitBlk);
1754   if (exitBlk != exitLandBlk) {
1755     //splice is insert-before ...
1756     exitBlk->removeSuccessor(exitLandBlk);
1757     retireBlock(exitingBlk, exitBlk);
1758   }
1759 
1760 } //mergeLoopbreakBlock
1761 
1762 template<class PassT>
settleLoopcontBlock(BlockT * contingBlk,BlockT * contBlk,RegiT setReg)1763 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1764                                                  BlockT *contBlk,
1765                                                  RegiT   setReg) {
1766   if (DEBUGME) {
1767     errs() << "settleLoopcontBlock conting = BB"
1768            << contingBlk->getNumber()
1769            << ", cont = BB" << contBlk->getNumber() << "\n";
1770   }
1771 
1772   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1773   if (branchInstr) {
1774     assert(CFGTraits::isCondBranch(branchInstr));
1775     typename BlockT::iterator branchInstrPos =
1776       CFGTraits::getInstrPos(contingBlk, branchInstr);
1777     BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1778     int oldOpcode = branchInstr->getOpcode();
1779 	DebugLoc DL = branchInstr->getDebugLoc();
1780 
1781     //    transform contingBlk to
1782     //     if () {
1783     //          move instr after branchInstr
1784     //          continue
1785     //        or
1786     //          setReg = 1
1787     //          break
1788     //     }endif
1789     //     successor = {orgSuccessor(contingBlk) - loopHeader}
1790 
1791     bool useContinueLogical =
1792       (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1793 
1794     if (useContinueLogical == false)
1795     {
1796       int branchOpcode =
1797         trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1798                               : CFGTraits::getBranchZeroOpcode(oldOpcode);
1799 
1800       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1801 
1802       if (setReg != INVALIDREGNUM) {
1803         CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1804         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1805         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1806       } else {
1807         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1808         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1809       }
1810 
1811       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1812     } else {
1813       int branchOpcode =
1814         trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1815                               : CFGTraits::getContinueZeroOpcode(oldOpcode);
1816 
1817       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1818     }
1819 
1820     //contingBlk->eraseFromParent(branchInstr);
1821     branchInstr->eraseFromParent();
1822   } else {
1823     /* if we've arrived here then we've already erased the branch instruction
1824 	 * travel back up the basic block to see the last reference of our debug location
1825 	 * we've just inserted that reference here so it should be representative */
1826     if (setReg != INVALIDREGNUM) {
1827       CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1828       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1829       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1830     } else {
1831       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1832       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1833     }
1834   } //else
1835 
1836 } //settleLoopcontBlock
1837 
1838 // BBs in exitBlkSet are determined as in break-path for loopRep,
1839 // before we can put code for BBs as inside loop-body for loopRep
1840 // check whether those BBs are determined as cont-BB for parentLoopRep
1841 // earlier.
1842 // If so, generate a new BB newBlk
1843 //    (1) set newBlk common successor of BBs in exitBlkSet
1844 //    (2) change the continue-instr in BBs in exitBlkSet to break-instr
1845 //    (3) generate continue-instr in newBlk
1846 //
1847 template<class PassT>
1848 typename CFGStructurizer<PassT>::BlockT *
relocateLoopcontBlock(LoopT * parentLoopRep,LoopT * loopRep,std::set<BlockT * > & exitBlkSet,BlockT * exitLandBlk)1849 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1850                                               LoopT *loopRep,
1851                                               std::set<BlockT *> &exitBlkSet,
1852                                               BlockT *exitLandBlk) {
1853   std::set<BlockT *> endBlkSet;
1854 
1855 //  BlockT *parentLoopHead = parentLoopRep->getHeader();
1856 
1857 
1858   for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1859        iterEnd = exitBlkSet.end();
1860        iter != iterEnd; ++iter) {
1861     BlockT *exitBlk = *iter;
1862     BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1863 
1864     if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1865       return NULL;
1866 
1867     endBlkSet.insert(endBlk);
1868   }
1869 
1870   BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1871   funcRep->push_back(newBlk);  //insert to function
1872   CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1873   SHOWNEWBLK(newBlk, "New continue block: ");
1874 
1875   for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1876        iterEnd = endBlkSet.end();
1877        iter != iterEnd; ++iter) {
1878       BlockT *endBlk = *iter;
1879       InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1880       if (contInstr) {
1881         contInstr->eraseFromParent();
1882       }
1883       endBlk->addSuccessor(newBlk);
1884       if (DEBUGME) {
1885         errs() << "Add new continue Block to BB"
1886                << endBlk->getNumber() << " successors\n";
1887       }
1888   }
1889 
1890   return newBlk;
1891 } //relocateLoopcontBlock
1892 
1893 
1894 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1895 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
1896 // pathes corresponding to the loop exiting branches.
1897 
1898 template<class PassT>
1899 typename CFGStructurizer<PassT>::BlockT *
addLoopEndbranchBlock(LoopT * loopRep,BlockTSmallerVector & exitingBlks,BlockTSmallerVector & exitBlks)1900 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1901                                               BlockTSmallerVector &exitingBlks,
1902                                               BlockTSmallerVector &exitBlks) {
1903   const AMDGPUInstrInfo *tii =
1904              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1905   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1906 
1907   RegiT endBranchReg = static_cast<int>
1908     (funcRep->getRegInfo().createVirtualRegister(I32RC));
1909   assert(endBranchReg >= 0);
1910 
1911   // reg = 0 before entering the loop
1912   addLoopEndbranchInitReg(loopRep, endBranchReg);
1913 
1914   uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1915   assert(numBlks >=2 && numBlks == exitBlks.size());
1916 
1917   BlockT *preExitingBlk = exitingBlks[0];
1918   BlockT *preExitBlk = exitBlks[0];
1919   BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1920   funcRep->push_back(preBranchBlk);  //insert to function
1921   SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1922 
1923   BlockT *newLandBlk = preBranchBlk;
1924 
1925       CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1926         newLandBlk);
1927   preExitingBlk->removeSuccessor(preExitBlk);
1928   preExitingBlk->addSuccessor(newLandBlk);
1929 
1930   //it is redundant to add reg = 0 to exitingBlks[0]
1931 
1932   // For 1..n th exiting path (the last iteration handles two pathes) create the
1933   // branch to the previous path and the current path.
1934   for (uint32_t i = 1; i < numBlks; ++i) {
1935     BlockT *curExitingBlk = exitingBlks[i];
1936     BlockT *curExitBlk = exitBlks[i];
1937     BlockT *curBranchBlk;
1938 
1939     if (i == numBlks - 1) {
1940       curBranchBlk = curExitBlk;
1941     } else {
1942       curBranchBlk = funcRep->CreateMachineBasicBlock();
1943       funcRep->push_back(curBranchBlk);  //insert to function
1944       SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1945     }
1946 
1947     // Add reg = i to exitingBlks[i].
1948     CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1949                                        endBranchReg, i);
1950 
1951     // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1952     // (exitingBlks[i], newLandBlk).
1953     CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1954                                           newLandBlk);
1955     curExitingBlk->removeSuccessor(curExitBlk);
1956     curExitingBlk->addSuccessor(newLandBlk);
1957 
1958     // add to preBranchBlk the branch instruction:
1959     // if (endBranchReg == preVal)
1960     //    preExitBlk
1961     // else
1962     //    curBranchBlk
1963     //
1964     // preValReg = i - 1
1965 
1966   DebugLoc DL;
1967   RegiT preValReg = static_cast<int>
1968     (funcRep->getRegInfo().createVirtualRegister(I32RC));
1969 
1970   preBranchBlk->insert(preBranchBlk->begin(),
1971                        tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1972                        i - 1));
1973 
1974   // condResReg = (endBranchReg == preValReg)
1975     RegiT condResReg = static_cast<int>
1976       (funcRep->getRegInfo().createVirtualRegister(I32RC));
1977     BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1978       .addReg(endBranchReg).addReg(preValReg);
1979 
1980     BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1981       .addMBB(preExitBlk).addReg(condResReg);
1982 
1983     preBranchBlk->addSuccessor(preExitBlk);
1984     preBranchBlk->addSuccessor(curBranchBlk);
1985 
1986     // Update preExitingBlk, preExitBlk, preBranchBlk.
1987     preExitingBlk = curExitingBlk;
1988     preExitBlk = curExitBlk;
1989     preBranchBlk = curBranchBlk;
1990 
1991   }  //end for 1 .. n blocks
1992 
1993   return newLandBlk;
1994 } //addLoopEndbranchBlock
1995 
1996 template<class PassT>
1997 typename CFGStructurizer<PassT>::PathToKind
singlePathTo(BlockT * srcBlk,BlockT * dstBlk,bool allowSideEntry)1998 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1999                                      bool allowSideEntry) {
2000   assert(dstBlk);
2001 
2002   if (srcBlk == dstBlk) {
2003     return SinglePath_InPath;
2004   }
2005 
2006   while (srcBlk && srcBlk->succ_size() == 1) {
2007     srcBlk = *srcBlk->succ_begin();
2008     if (srcBlk == dstBlk) {
2009       return SinglePath_InPath;
2010     }
2011 
2012     if (!allowSideEntry && srcBlk->pred_size() > 1) {
2013       return Not_SinglePath;
2014     }
2015   }
2016 
2017   if (srcBlk && srcBlk->succ_size()==0) {
2018     return SinglePath_NotInPath;
2019   }
2020 
2021   return Not_SinglePath;
2022 } //singlePathTo
2023 
2024 // If there is a single path from srcBlk to dstBlk, return the last block before
2025 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
2026 // last block in the path Otherwise, return NULL
2027 template<class PassT>
2028 typename CFGStructurizer<PassT>::BlockT *
singlePathEnd(BlockT * srcBlk,BlockT * dstBlk,bool allowSideEntry)2029 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
2030                                       bool allowSideEntry) {
2031   assert(dstBlk);
2032 
2033   if (srcBlk == dstBlk) {
2034     return srcBlk;
2035   }
2036 
2037   if (srcBlk->succ_size() == 0) {
2038     return srcBlk;
2039   }
2040 
2041   while (srcBlk && srcBlk->succ_size() == 1) {
2042     BlockT *preBlk = srcBlk;
2043 
2044     srcBlk = *srcBlk->succ_begin();
2045     if (srcBlk == NULL) {
2046       return preBlk;
2047     }
2048 
2049     if (!allowSideEntry && srcBlk->pred_size() > 1) {
2050       return NULL;
2051     }
2052   }
2053 
2054   if (srcBlk && srcBlk->succ_size()==0) {
2055     return srcBlk;
2056   }
2057 
2058   return NULL;
2059 
2060 } //singlePathEnd
2061 
2062 template<class PassT>
cloneOnSideEntryTo(BlockT * preBlk,BlockT * srcBlk,BlockT * dstBlk)2063 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
2064                                                BlockT *dstBlk) {
2065   int cloned = 0;
2066   assert(preBlk->isSuccessor(srcBlk));
2067   while (srcBlk && srcBlk != dstBlk) {
2068     assert(srcBlk->succ_size() == 1);
2069     if (srcBlk->pred_size() > 1) {
2070       srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
2071       ++cloned;
2072     }
2073 
2074     preBlk = srcBlk;
2075     srcBlk = *srcBlk->succ_begin();
2076   }
2077 
2078   return cloned;
2079 } //cloneOnSideEntryTo
2080 
2081 template<class PassT>
2082 typename CFGStructurizer<PassT>::BlockT *
cloneBlockForPredecessor(BlockT * curBlk,BlockT * predBlk)2083 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
2084                                                  BlockT *predBlk) {
2085   assert(predBlk->isSuccessor(curBlk) &&
2086          "succBlk is not a prececessor of curBlk");
2087 
2088   BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
2089   CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
2090   //srcBlk, oldBlk, newBlk
2091 
2092   predBlk->removeSuccessor(curBlk);
2093   predBlk->addSuccessor(cloneBlk);
2094 
2095   // add all successor to cloneBlk
2096   CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
2097 
2098   numClonedInstr += curBlk->size();
2099 
2100   if (DEBUGME) {
2101     errs() << "Cloned block: " << "BB"
2102            << curBlk->getNumber() << "size " << curBlk->size() << "\n";
2103   }
2104 
2105   SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
2106 
2107   return cloneBlk;
2108 } //cloneBlockForPredecessor
2109 
2110 template<class PassT>
2111 typename CFGStructurizer<PassT>::BlockT *
exitingBlock2ExitBlock(LoopT * loopRep,BlockT * exitingBlk)2112 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
2113                                                BlockT *exitingBlk) {
2114   BlockT *exitBlk = NULL;
2115 
2116   for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
2117        iterSuccEnd = exitingBlk->succ_end();
2118        iterSucc != iterSuccEnd; ++iterSucc) {
2119     BlockT *curBlk = *iterSucc;
2120     if (!loopRep->contains(curBlk)) {
2121       assert(exitBlk == NULL);
2122       exitBlk = curBlk;
2123     }
2124   }
2125 
2126   assert(exitBlk != NULL);
2127 
2128   return exitBlk;
2129 } //exitingBlock2ExitBlock
2130 
2131 template<class PassT>
migrateInstruction(BlockT * srcBlk,BlockT * dstBlk,InstrIterator insertPos)2132 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
2133                                                 BlockT *dstBlk,
2134                                                 InstrIterator insertPos) {
2135   InstrIterator spliceEnd;
2136   //look for the input branchinstr, not the AMDGPU branchinstr
2137   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2138   if (branchInstr == NULL) {
2139     if (DEBUGME) {
2140       errs() << "migrateInstruction don't see branch instr\n" ;
2141     }
2142     spliceEnd = srcBlk->end();
2143   } else {
2144     if (DEBUGME) {
2145       errs() << "migrateInstruction see branch instr\n" ;
2146       branchInstr->dump();
2147     }
2148     spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
2149   }
2150   if (DEBUGME) {
2151     errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
2152       << "srcSize = " << srcBlk->size() << "\n";
2153   }
2154 
2155   //splice insert before insertPos
2156   dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
2157 
2158   if (DEBUGME) {
2159     errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
2160       << "srcSize = " << srcBlk->size() << "\n";
2161   }
2162 } //migrateInstruction
2163 
2164 // normalizeInfiniteLoopExit change
2165 //   B1:
2166 //        uncond_br LoopHeader
2167 //
2168 // to
2169 //   B1:
2170 //        cond_br 1 LoopHeader dummyExit
2171 // and return the newly added dummy exit block
2172 //
2173 template<class PassT>
2174 typename CFGStructurizer<PassT>::BlockT *
normalizeInfiniteLoopExit(LoopT * LoopRep)2175 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2176   BlockT *loopHeader;
2177   BlockT *loopLatch;
2178   loopHeader = LoopRep->getHeader();
2179   loopLatch = LoopRep->getLoopLatch();
2180   BlockT *dummyExitBlk = NULL;
2181   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2182   if (loopHeader!=NULL && loopLatch!=NULL) {
2183     InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2184     if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2185       dummyExitBlk = funcRep->CreateMachineBasicBlock();
2186       funcRep->push_back(dummyExitBlk);  //insert to function
2187       SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2188 
2189       if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2190 
2191       typename BlockT::iterator insertPos =
2192         CFGTraits::getInstrPos(loopLatch, branchInstr);
2193       unsigned immReg =
2194         funcRep->getRegInfo().createVirtualRegister(I32RC);
2195       CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2196       InstrT *newInstr =
2197         CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2198       MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
2199 
2200       SHOWNEWINSTR(newInstr);
2201 
2202       branchInstr->eraseFromParent();
2203       loopLatch->addSuccessor(dummyExitBlk);
2204     }
2205   }
2206 
2207   return dummyExitBlk;
2208 } //normalizeInfiniteLoopExit
2209 
2210 template<class PassT>
removeUnconditionalBranch(BlockT * srcBlk)2211 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2212   InstrT *branchInstr;
2213 
2214   // I saw two unconditional branch in one basic block in example
2215   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2216   while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2217           && CFGTraits::isUncondBranch(branchInstr)) {
2218     if (DEBUGME) {
2219           errs() << "Removing unconditional branch instruction" ;
2220       branchInstr->dump();
2221     }
2222     branchInstr->eraseFromParent();
2223   }
2224 } //removeUnconditionalBranch
2225 
2226 template<class PassT>
removeRedundantConditionalBranch(BlockT * srcBlk)2227 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2228   if (srcBlk->succ_size() == 2) {
2229     BlockT *blk1 = *srcBlk->succ_begin();
2230     BlockT *blk2 = *(++srcBlk->succ_begin());
2231 
2232     if (blk1 == blk2) {
2233       InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2234       assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2235       if (DEBUGME) {
2236         errs() << "Removing unneeded conditional branch instruction" ;
2237         branchInstr->dump();
2238       }
2239       branchInstr->eraseFromParent();
2240       SHOWNEWBLK(blk1, "Removing redundant successor");
2241       srcBlk->removeSuccessor(blk1);
2242     }
2243   }
2244 } //removeRedundantConditionalBranch
2245 
2246 template<class PassT>
addDummyExitBlock(SmallVector<BlockT *,DEFAULT_VEC_SLOTS> & retBlks)2247 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2248                                                DEFAULT_VEC_SLOTS> &retBlks) {
2249   BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2250   funcRep->push_back(dummyExitBlk);  //insert to function
2251   CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2252 
2253   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2254          retBlks.begin(),
2255        iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2256     BlockT *curBlk = *iter;
2257     InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2258     if (curInstr) {
2259       curInstr->eraseFromParent();
2260     }
2261 #if 0
2262     if (curBlk->size()==0 && curBlk->pred_size() == 1) {
2263       if (DEBUGME) {
2264         errs() << "Replace empty block BB" <<  curBlk->getNumber()
2265           << " with dummyExitBlock\n";
2266       }
2267       BlockT *predb = *curBlk->pred_begin();
2268       predb->removeSuccessor(curBlk);
2269       curBlk = predb;
2270     } //handle empty curBlk
2271 #endif
2272     curBlk->addSuccessor(dummyExitBlk);
2273     if (DEBUGME) {
2274       errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2275              << " successors\n";
2276     }
2277   } //for
2278 
2279   SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2280 } //addDummyExitBlock
2281 
2282 template<class PassT>
removeSuccessor(BlockT * srcBlk)2283 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2284   while (srcBlk->succ_size()) {
2285     srcBlk->removeSuccessor(*srcBlk->succ_begin());
2286   }
2287 }
2288 
2289 template<class PassT>
recordSccnum(BlockT * srcBlk,int sccNum)2290 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2291   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2292 
2293   if (srcBlkInfo == NULL) {
2294     srcBlkInfo = new BlockInfo();
2295   }
2296 
2297   srcBlkInfo->sccNum = sccNum;
2298 }
2299 
2300 template<class PassT>
getSCCNum(BlockT * srcBlk)2301 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2302   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2303   return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2304 }
2305 
2306 template<class PassT>
retireBlock(BlockT * dstBlk,BlockT * srcBlk)2307 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2308   if (DEBUGME) {
2309         errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2310   }
2311 
2312   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2313 
2314   if (srcBlkInfo == NULL) {
2315     srcBlkInfo = new BlockInfo();
2316   }
2317 
2318   srcBlkInfo->isRetired = true;
2319   //int i = srcBlk->succ_size();
2320   //int j = srcBlk->pred_size();
2321   assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2322          && "can't retire block yet");
2323 }
2324 
2325 template<class PassT>
isRetiredBlock(BlockT * srcBlk)2326 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2327   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2328   return (srcBlkInfo && srcBlkInfo->isRetired);
2329 }
2330 
2331 template<class PassT>
isActiveLoophead(BlockT * curBlk)2332 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2333   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2334   while (loopRep && loopRep->getHeader() == curBlk) {
2335     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2336 
2337     if(loopLand == NULL)
2338       return true;
2339 
2340     BlockT *landBlk = loopLand->landBlk;
2341     assert(landBlk);
2342     if (!isRetiredBlock(landBlk)) {
2343       return true;
2344     }
2345 
2346     loopRep = loopRep->getParentLoop();
2347   }
2348 
2349   return false;
2350 } //isActiveLoophead
2351 
2352 template<class PassT>
needMigrateBlock(BlockT * blk)2353 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2354   const unsigned blockSizeThreshold = 30;
2355   const unsigned cloneInstrThreshold = 100;
2356 
2357   bool multiplePreds = blk && (blk->pred_size() > 1);
2358 
2359   if(!multiplePreds)
2360     return false;
2361 
2362   unsigned blkSize = blk->size();
2363   return ((blkSize > blockSizeThreshold)
2364           && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2365 } //needMigrateBlock
2366 
2367 template<class PassT>
2368 typename CFGStructurizer<PassT>::BlockT *
recordLoopLandBlock(LoopT * loopRep,BlockT * landBlk,BlockTSmallerVector & exitBlks,std::set<BlockT * > & exitBlkSet)2369 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2370                                             BlockTSmallerVector &exitBlks,
2371                                             std::set<BlockT *> &exitBlkSet) {
2372   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
2373 
2374   for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2375        predIterEnd = landBlk->pred_end();
2376        predIter != predIterEnd; ++predIter) {
2377     BlockT *curBlk = *predIter;
2378     if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2379       inpathBlks.push_back(curBlk);
2380     }
2381   } //for
2382 
2383   //if landBlk has predecessors that are not in the given loop,
2384   //create a new block
2385   BlockT *newLandBlk = landBlk;
2386   if (inpathBlks.size() != landBlk->pred_size()) {
2387     newLandBlk = funcRep->CreateMachineBasicBlock();
2388     funcRep->push_back(newLandBlk);  //insert to function
2389     newLandBlk->addSuccessor(landBlk);
2390     for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2391          inpathBlks.begin(),
2392          iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2393       BlockT *curBlk = *iter;
2394       CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2395       //srcBlk, oldBlk, newBlk
2396       curBlk->removeSuccessor(landBlk);
2397       curBlk->addSuccessor(newLandBlk);
2398     }
2399     for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2400       if (exitBlks[i] == landBlk) {
2401         exitBlks[i] = newLandBlk;
2402       }
2403     }
2404     SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2405   }
2406 
2407   setLoopLandBlock(loopRep, newLandBlk);
2408 
2409   return newLandBlk;
2410 } // recordLoopbreakLand
2411 
2412 template<class PassT>
setLoopLandBlock(LoopT * loopRep,BlockT * blk)2413 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2414   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2415 
2416   if (theEntry == NULL) {
2417     theEntry = new LoopLandInfo();
2418   }
2419   assert(theEntry->landBlk == NULL);
2420 
2421   if (blk == NULL) {
2422     blk = funcRep->CreateMachineBasicBlock();
2423     funcRep->push_back(blk);  //insert to function
2424     SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2425   }
2426 
2427   theEntry->landBlk = blk;
2428 
2429   if (DEBUGME) {
2430     errs() << "setLoopLandBlock loop-header = BB"
2431            << loopRep->getHeader()->getNumber()
2432            << "  landing-block = BB" << blk->getNumber() << "\n";
2433   }
2434 } // setLoopLandBlock
2435 
2436 template<class PassT>
addLoopBreakOnReg(LoopT * loopRep,RegiT regNum)2437 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2438   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2439 
2440   if (theEntry == NULL) {
2441     theEntry = new LoopLandInfo();
2442   }
2443 
2444   theEntry->breakOnRegs.insert(regNum);
2445 
2446   if (DEBUGME) {
2447     errs() << "addLoopBreakOnReg loop-header = BB"
2448            << loopRep->getHeader()->getNumber()
2449            << "  regNum = " << regNum << "\n";
2450   }
2451 } // addLoopBreakOnReg
2452 
2453 template<class PassT>
addLoopContOnReg(LoopT * loopRep,RegiT regNum)2454 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2455   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2456 
2457   if (theEntry == NULL) {
2458     theEntry = new LoopLandInfo();
2459   }
2460   theEntry->contOnRegs.insert(regNum);
2461 
2462   if (DEBUGME) {
2463     errs() << "addLoopContOnReg loop-header = BB"
2464            << loopRep->getHeader()->getNumber()
2465            << "  regNum = " << regNum << "\n";
2466   }
2467 } // addLoopContOnReg
2468 
2469 template<class PassT>
addLoopBreakInitReg(LoopT * loopRep,RegiT regNum)2470 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2471   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2472 
2473   if (theEntry == NULL) {
2474     theEntry = new LoopLandInfo();
2475   }
2476   theEntry->breakInitRegs.insert(regNum);
2477 
2478   if (DEBUGME) {
2479     errs() << "addLoopBreakInitReg loop-header = BB"
2480            << loopRep->getHeader()->getNumber()
2481            << "  regNum = " << regNum << "\n";
2482   }
2483 } // addLoopBreakInitReg
2484 
2485 template<class PassT>
addLoopContInitReg(LoopT * loopRep,RegiT regNum)2486 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2487   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2488 
2489   if (theEntry == NULL) {
2490     theEntry = new LoopLandInfo();
2491   }
2492   theEntry->contInitRegs.insert(regNum);
2493 
2494   if (DEBUGME) {
2495     errs() << "addLoopContInitReg loop-header = BB"
2496            << loopRep->getHeader()->getNumber()
2497            << "  regNum = " << regNum << "\n";
2498   }
2499 } // addLoopContInitReg
2500 
2501 template<class PassT>
addLoopEndbranchInitReg(LoopT * loopRep,RegiT regNum)2502 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2503                                                      RegiT regNum) {
2504   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2505 
2506   if (theEntry == NULL) {
2507     theEntry = new LoopLandInfo();
2508   }
2509   theEntry->endbranchInitRegs.insert(regNum);
2510 
2511   if (DEBUGME)
2512   {
2513         errs() << "addLoopEndbranchInitReg loop-header = BB"
2514       << loopRep->getHeader()->getNumber()
2515       << "  regNum = " << regNum << "\n";
2516   }
2517 } // addLoopEndbranchInitReg
2518 
2519 template<class PassT>
2520 typename CFGStructurizer<PassT>::LoopLandInfo *
getLoopLandInfo(LoopT * loopRep)2521 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2522   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2523 
2524   return theEntry;
2525 } // getLoopLandInfo
2526 
2527 template<class PassT>
2528 typename CFGStructurizer<PassT>::BlockT *
getLoopLandBlock(LoopT * loopRep)2529 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2530   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2531 
2532   return theEntry ? theEntry->landBlk : NULL;
2533 } // getLoopLandBlock
2534 
2535 
2536 template<class PassT>
hasBackEdge(BlockT * curBlk)2537 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2538   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2539   if (loopRep == NULL)
2540     return false;
2541 
2542   BlockT *loopHeader = loopRep->getHeader();
2543 
2544   return curBlk->isSuccessor(loopHeader);
2545 
2546 } //hasBackEdge
2547 
2548 template<class PassT>
getLoopDepth(LoopT * loopRep)2549 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2550   return loopRep ? loopRep->getLoopDepth() : 0;
2551 } //getLoopDepth
2552 
2553 template<class PassT>
countActiveBlock(typename SmallVector<BlockT *,DEFAULT_VEC_SLOTS>::const_iterator iterStart,typename SmallVector<BlockT *,DEFAULT_VEC_SLOTS>::const_iterator iterEnd)2554 int CFGStructurizer<PassT>::countActiveBlock
2555 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2556  typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2557   int count = 0;
2558   while (iterStart != iterEnd) {
2559     if (!isRetiredBlock(*iterStart)) {
2560       ++count;
2561     }
2562     ++iterStart;
2563   }
2564 
2565   return count;
2566 } //countActiveBlock
2567 
2568 // This is work around solution for findNearestCommonDominator not avaiable to
2569 // post dom a proper fix should go to Dominators.h.
2570 
2571 template<class PassT>
2572 typename CFGStructurizer<PassT>::BlockT*
findNearestCommonPostDom(BlockT * blk1,BlockT * blk2)2573 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2574 
2575   if (postDomTree->dominates(blk1, blk2)) {
2576     return blk1;
2577   }
2578   if (postDomTree->dominates(blk2, blk1)) {
2579     return blk2;
2580   }
2581 
2582   DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2583   DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2584 
2585   // Handle newly cloned node.
2586   if (node1 == NULL && blk1->succ_size() == 1) {
2587     return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2588   }
2589   if (node2 == NULL && blk2->succ_size() == 1) {
2590     return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2591   }
2592 
2593   if (node1 == NULL || node2 == NULL) {
2594     return NULL;
2595   }
2596 
2597   node1 = node1->getIDom();
2598   while (node1) {
2599     if (postDomTree->dominates(node1, node2)) {
2600       return node1->getBlock();
2601     }
2602     node1 = node1->getIDom();
2603   }
2604 
2605   return NULL;
2606 }
2607 
2608 template<class PassT>
2609 typename CFGStructurizer<PassT>::BlockT *
findNearestCommonPostDom(typename std::set<BlockT * > & blks)2610 CFGStructurizer<PassT>::findNearestCommonPostDom
2611 (typename std::set<BlockT *> &blks) {
2612   BlockT *commonDom;
2613   typename std::set<BlockT *>::const_iterator iter = blks.begin();
2614   typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2615   for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2616     BlockT *curBlk = *iter;
2617     if (curBlk != commonDom) {
2618       commonDom = findNearestCommonPostDom(curBlk, commonDom);
2619     }
2620   }
2621 
2622   if (DEBUGME) {
2623     errs() << "Common post dominator for exit blocks is ";
2624     if (commonDom) {
2625           errs() << "BB" << commonDom->getNumber() << "\n";
2626     } else {
2627       errs() << "NULL\n";
2628     }
2629   }
2630 
2631   return commonDom;
2632 } //findNearestCommonPostDom
2633 
2634 } //end namespace llvm
2635 
2636 //todo: move-end
2637 
2638 
2639 //===----------------------------------------------------------------------===//
2640 //
2641 // CFGStructurizer for AMDGPU
2642 //
2643 //===----------------------------------------------------------------------===//
2644 
2645 
2646 using namespace llvmCFGStruct;
2647 
2648 namespace llvm
2649 {
2650 class AMDGPUCFGStructurizer : public MachineFunctionPass
2651 {
2652 public:
2653   typedef MachineInstr              InstructionType;
2654   typedef MachineFunction           FunctionType;
2655   typedef MachineBasicBlock         BlockType;
2656   typedef MachineLoopInfo           LoopinfoType;
2657   typedef MachineDominatorTree      DominatortreeType;
2658   typedef MachinePostDominatorTree  PostDominatortreeType;
2659   typedef MachineDomTreeNode        DomTreeNodeType;
2660   typedef MachineLoop               LoopType;
2661 
2662 protected:
2663   TargetMachine &TM;
2664   const TargetInstrInfo *TII;
2665   const AMDGPURegisterInfo *TRI;
2666 
2667 public:
2668   AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2669   const TargetInstrInfo *getTargetInstrInfo() const;
2670   //bool runOnMachineFunction(MachineFunction &F);
2671 
2672 private:
2673 
2674 };   //end of class AMDGPUCFGStructurizer
2675 
2676 //char AMDGPUCFGStructurizer::ID = 0;
2677 } //end of namespace llvm
AMDGPUCFGStructurizer(char & pid,TargetMachine & tm)2678 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm
2679                                           )
2680 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2681   TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
2682   ) {
2683 }
2684 
getTargetInstrInfo() const2685 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2686   return TII;
2687 }
2688 //===----------------------------------------------------------------------===//
2689 //
2690 // CFGPrepare
2691 //
2692 //===----------------------------------------------------------------------===//
2693 
2694 
2695 using namespace llvmCFGStruct;
2696 
2697 namespace llvm
2698 {
2699 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer
2700 {
2701 public:
2702   static char ID;
2703 
2704 public:
2705   AMDGPUCFGPrepare(TargetMachine &tm);
2706 
2707   virtual const char *getPassName() const;
2708   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2709 
2710   bool runOnMachineFunction(MachineFunction &F);
2711 
2712 private:
2713 
2714 };   //end of class AMDGPUCFGPrepare
2715 
2716 char AMDGPUCFGPrepare::ID = 0;
2717 } //end of namespace llvm
2718 
AMDGPUCFGPrepare(TargetMachine & tm)2719 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2720   : AMDGPUCFGStructurizer(ID, tm )
2721 {
2722 }
getPassName() const2723 const char *AMDGPUCFGPrepare::getPassName() const {
2724   return "AMD IL Control Flow Graph Preparation Pass";
2725 }
2726 
getAnalysisUsage(AnalysisUsage & AU) const2727 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2728   AU.addPreserved<MachineFunctionAnalysis>();
2729   AU.addRequired<MachineFunctionAnalysis>();
2730   AU.addRequired<MachineDominatorTree>();
2731   AU.addRequired<MachinePostDominatorTree>();
2732   AU.addRequired<MachineLoopInfo>();
2733 }
2734 
2735 //===----------------------------------------------------------------------===//
2736 //
2737 // CFGPerform
2738 //
2739 //===----------------------------------------------------------------------===//
2740 
2741 
2742 using namespace llvmCFGStruct;
2743 
2744 namespace llvm
2745 {
2746 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer
2747 {
2748 public:
2749   static char ID;
2750 
2751 public:
2752   AMDGPUCFGPerform(TargetMachine &tm);
2753   virtual const char *getPassName() const;
2754   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2755   bool runOnMachineFunction(MachineFunction &F);
2756 
2757 private:
2758 
2759 };   //end of class AMDGPUCFGPerform
2760 
2761 char AMDGPUCFGPerform::ID = 0;
2762 } //end of namespace llvm
2763 
AMDGPUCFGPerform(TargetMachine & tm)2764   AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2765 : AMDGPUCFGStructurizer(ID, tm)
2766 {
2767 }
2768 
getPassName() const2769 const char *AMDGPUCFGPerform::getPassName() const {
2770   return "AMD IL Control Flow Graph structurizer Pass";
2771 }
2772 
getAnalysisUsage(AnalysisUsage & AU) const2773 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2774   AU.addPreserved<MachineFunctionAnalysis>();
2775   AU.addRequired<MachineFunctionAnalysis>();
2776   AU.addRequired<MachineDominatorTree>();
2777   AU.addRequired<MachinePostDominatorTree>();
2778   AU.addRequired<MachineLoopInfo>();
2779 }
2780 
2781 //===----------------------------------------------------------------------===//
2782 //
2783 // CFGStructTraits<AMDGPUCFGStructurizer>
2784 //
2785 //===----------------------------------------------------------------------===//
2786 
2787 namespace llvmCFGStruct
2788 {
2789 // this class is tailor to the AMDGPU backend
2790 template<>
2791 struct CFGStructTraits<AMDGPUCFGStructurizer>
2792 {
2793   typedef int RegiT;
2794 
getBreakNzeroOpcodellvmCFGStruct::CFGStructTraits2795   static int getBreakNzeroOpcode(int oldOpcode) {
2796     switch(oldOpcode) {
2797       case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
2798     default:
2799       assert(0 && "internal error");
2800     };
2801     return -1;
2802   }
2803 
getBreakZeroOpcodellvmCFGStruct::CFGStructTraits2804   static int getBreakZeroOpcode(int oldOpcode) {
2805     switch(oldOpcode) {
2806       case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
2807     default:
2808       assert(0 && "internal error");
2809     };
2810     return -1;
2811   }
2812 
getBranchNzeroOpcodellvmCFGStruct::CFGStructTraits2813   static int getBranchNzeroOpcode(int oldOpcode) {
2814     switch(oldOpcode) {
2815     case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
2816       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
2817       case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
2818     default:
2819       assert(0 && "internal error");
2820     };
2821     return -1;
2822   }
2823 
getBranchZeroOpcodellvmCFGStruct::CFGStructTraits2824   static int getBranchZeroOpcode(int oldOpcode) {
2825     switch(oldOpcode) {
2826     case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
2827       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
2828       case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
2829     default:
2830       assert(0 && "internal error");
2831     };
2832     return -1;
2833   }
2834 
getContinueNzeroOpcodellvmCFGStruct::CFGStructTraits2835   static int getContinueNzeroOpcode(int oldOpcode)
2836   {
2837     switch(oldOpcode) {
2838       case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2839       default:
2840         assert(0 && "internal error");
2841     };
2842     return -1;
2843   }
2844 
getContinueZeroOpcodellvmCFGStruct::CFGStructTraits2845   static int getContinueZeroOpcode(int oldOpcode) {
2846     switch(oldOpcode) {
2847       case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2848     default:
2849       assert(0 && "internal error");
2850     };
2851     return -1;
2852   }
2853 
2854 // the explicitly represented branch target is the true branch target
2855 #define getExplicitBranch getTrueBranch
2856 #define setExplicitBranch setTrueBranch
2857 
getTrueBranchllvmCFGStruct::CFGStructTraits2858   static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2859     return instr->getOperand(0).getMBB();
2860   }
2861 
setTrueBranchllvmCFGStruct::CFGStructTraits2862   static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2863     instr->getOperand(0).setMBB(blk);
2864   }
2865 
2866   static MachineBasicBlock *
getFalseBranchllvmCFGStruct::CFGStructTraits2867   getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2868     assert(blk->succ_size() == 2);
2869     MachineBasicBlock *trueBranch = getTrueBranch(instr);
2870     MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2871     MachineBasicBlock::succ_iterator iterNext = iter;
2872     ++iterNext;
2873 
2874     return (*iter == trueBranch) ? *iterNext : *iter;
2875   }
2876 
isCondBranchllvmCFGStruct::CFGStructTraits2877   static bool isCondBranch(MachineInstr *instr) {
2878     switch (instr->getOpcode()) {
2879       case AMDGPU::JUMP:
2880         return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
2881       ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
2882       case AMDGPU::SI_IF_NZ:
2883       case AMDGPU::SI_IF_Z:
2884       break;
2885     default:
2886       return false;
2887     }
2888     return true;
2889   }
2890 
isUncondBranchllvmCFGStruct::CFGStructTraits2891   static bool isUncondBranch(MachineInstr *instr) {
2892     switch (instr->getOpcode()) {
2893     case AMDGPU::JUMP:
2894       return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
2895     default:
2896       return false;
2897     }
2898     return true;
2899   }
2900 
getLastDebugLocInBBllvmCFGStruct::CFGStructTraits2901   static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2902     //get DebugLoc from the first MachineBasicBlock instruction with debug info
2903     DebugLoc DL;
2904 	for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2905 	  MachineInstr *instr = &(*iter);
2906 	  if (instr->getDebugLoc().isUnknown() == false) {
2907 	    DL = instr->getDebugLoc();
2908 	  }
2909     }
2910     return DL;
2911   }
2912 
getNormalBlockBranchInstrllvmCFGStruct::CFGStructTraits2913   static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2914     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2915     MachineInstr *instr = &*iter;
2916     if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2917       return instr;
2918     }
2919     return NULL;
2920   }
2921 
2922   // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2923   //
2924   // BB with backward-edge could have move instructions after the branch
2925   // instruction.  Such move instruction "belong to" the loop backward-edge.
2926   //
getLoopendBlockBranchInstrllvmCFGStruct::CFGStructTraits2927   static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2928     const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2929                                   blk->getParent()->getTarget().getInstrInfo());
2930 
2931     for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2932          iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2933       // FIXME: Simplify
2934       MachineInstr *instr = &*iter;
2935       if (instr) {
2936         if (isCondBranch(instr) || isUncondBranch(instr)) {
2937           return instr;
2938         } else if (!TII->isMov(instr->getOpcode())) {
2939           break;
2940         }
2941       }
2942     }
2943     return NULL;
2944   }
2945 
getReturnInstrllvmCFGStruct::CFGStructTraits2946   static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2947     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2948     if (iter != blk->rend()) {
2949       MachineInstr *instr = &(*iter);
2950       if (instr->getOpcode() == AMDGPU::RETURN) {
2951         return instr;
2952       }
2953     }
2954     return NULL;
2955   }
2956 
getContinueInstrllvmCFGStruct::CFGStructTraits2957   static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2958     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2959     if (iter != blk->rend()) {
2960       MachineInstr *instr = &(*iter);
2961       if (instr->getOpcode() == AMDGPU::CONTINUE) {
2962         return instr;
2963       }
2964     }
2965     return NULL;
2966   }
2967 
getLoopBreakInstrllvmCFGStruct::CFGStructTraits2968   static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2969     for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2970       MachineInstr *instr = &(*iter);
2971       if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
2972         return instr;
2973       }
2974     }
2975     return NULL;
2976   }
2977 
isReturnBlockllvmCFGStruct::CFGStructTraits2978   static bool isReturnBlock(MachineBasicBlock *blk) {
2979     MachineInstr *instr = getReturnInstr(blk);
2980     bool isReturn = (blk->succ_size() == 0);
2981     if (instr) {
2982       assert(isReturn);
2983     } else if (isReturn) {
2984       if (DEBUGME) {
2985         errs() << "BB" << blk->getNumber()
2986                <<" is return block without RETURN instr\n";
2987       }
2988     }
2989 
2990     return  isReturn;
2991   }
2992 
2993   static MachineBasicBlock::iterator
getInstrPosllvmCFGStruct::CFGStructTraits2994   getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2995     assert(instr->getParent() == blk && "instruction doesn't belong to block");
2996     MachineBasicBlock::iterator iter = blk->begin();
2997     MachineBasicBlock::iterator iterEnd = blk->end();
2998     while (&(*iter) != instr && iter != iterEnd) {
2999       ++iter;
3000     }
3001 
3002     assert(iter != iterEnd);
3003     return iter;
3004   }//getInstrPos
3005 
insertInstrBeforellvmCFGStruct::CFGStructTraits3006   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3007                                          AMDGPUCFGStructurizer *passRep) {
3008     return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
3009   } //insertInstrBefore
3010 
insertInstrBeforellvmCFGStruct::CFGStructTraits3011   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3012                                          AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3013     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3014     MachineInstr *newInstr =
3015       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3016 
3017     MachineBasicBlock::iterator res;
3018     if (blk->begin() != blk->end()) {
3019       blk->insert(blk->begin(), newInstr);
3020     } else {
3021       blk->push_back(newInstr);
3022     }
3023 
3024     SHOWNEWINSTR(newInstr);
3025 
3026     return newInstr;
3027   } //insertInstrBefore
3028 
insertInstrEndllvmCFGStruct::CFGStructTraits3029   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3030                              AMDGPUCFGStructurizer *passRep) {
3031     insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
3032   } //insertInstrEnd
3033 
insertInstrEndllvmCFGStruct::CFGStructTraits3034   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3035                              AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3036     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3037    MachineInstr *newInstr = blk->getParent()
3038       ->CreateMachineInstr(tii->get(newOpcode), DL);
3039 
3040     blk->push_back(newInstr);
3041     //assume the instruction doesn't take any reg operand ...
3042 
3043     SHOWNEWINSTR(newInstr);
3044   } //insertInstrEnd
3045 
insertInstrBeforellvmCFGStruct::CFGStructTraits3046   static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
3047                                          int newOpcode,
3048                                          AMDGPUCFGStructurizer *passRep) {
3049     MachineInstr *oldInstr = &(*instrPos);
3050     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3051     MachineBasicBlock *blk = oldInstr->getParent();
3052     MachineInstr *newInstr =
3053       blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3054                                            DebugLoc());
3055 
3056     blk->insert(instrPos, newInstr);
3057     //assume the instruction doesn't take any reg operand ...
3058 
3059     SHOWNEWINSTR(newInstr);
3060     return newInstr;
3061   } //insertInstrBefore
3062 
insertCondBranchBeforellvmCFGStruct::CFGStructTraits3063   static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
3064                                      int newOpcode,
3065                                      AMDGPUCFGStructurizer *passRep,
3066 									 DebugLoc DL) {
3067     MachineInstr *oldInstr = &(*instrPos);
3068     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3069     MachineBasicBlock *blk = oldInstr->getParent();
3070     MachineInstr *newInstr =
3071       blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3072                                            DL);
3073 
3074     blk->insert(instrPos, newInstr);
3075     MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
3076                                          false);
3077 
3078     SHOWNEWINSTR(newInstr);
3079     //erase later oldInstr->eraseFromParent();
3080   } //insertCondBranchBefore
3081 
insertCondBranchBeforellvmCFGStruct::CFGStructTraits3082   static void insertCondBranchBefore(MachineBasicBlock *blk,
3083                                      MachineBasicBlock::iterator insertPos,
3084                                      int newOpcode,
3085                                      AMDGPUCFGStructurizer *passRep,
3086                                      RegiT regNum,
3087 									 DebugLoc DL) {
3088     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3089 
3090     MachineInstr *newInstr =
3091       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3092 
3093     //insert before
3094     blk->insert(insertPos, newInstr);
3095     MachineInstrBuilder(newInstr).addReg(regNum, false);
3096 
3097     SHOWNEWINSTR(newInstr);
3098   } //insertCondBranchBefore
3099 
insertCondBranchEndllvmCFGStruct::CFGStructTraits3100   static void insertCondBranchEnd(MachineBasicBlock *blk,
3101                                   int newOpcode,
3102                                   AMDGPUCFGStructurizer *passRep,
3103                                   RegiT regNum) {
3104     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3105     MachineInstr *newInstr =
3106       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
3107 
3108     blk->push_back(newInstr);
3109     MachineInstrBuilder(newInstr).addReg(regNum, false);
3110 
3111     SHOWNEWINSTR(newInstr);
3112   } //insertCondBranchEnd
3113 
3114 
insertAssignInstrBeforellvmCFGStruct::CFGStructTraits3115   static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
3116                                       AMDGPUCFGStructurizer *passRep,
3117                                       RegiT regNum, int regVal) {
3118     MachineInstr *oldInstr = &(*instrPos);
3119     const AMDGPUInstrInfo *tii =
3120              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3121     MachineBasicBlock *blk = oldInstr->getParent();
3122     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3123                                                  regVal);
3124     blk->insert(instrPos, newInstr);
3125 
3126     SHOWNEWINSTR(newInstr);
3127   } //insertAssignInstrBefore
3128 
insertAssignInstrBeforellvmCFGStruct::CFGStructTraits3129   static void insertAssignInstrBefore(MachineBasicBlock *blk,
3130                                       AMDGPUCFGStructurizer *passRep,
3131                                       RegiT regNum, int regVal) {
3132     const AMDGPUInstrInfo *tii =
3133              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3134 
3135     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3136                                                  regVal);
3137     if (blk->begin() != blk->end()) {
3138       blk->insert(blk->begin(), newInstr);
3139     } else {
3140       blk->push_back(newInstr);
3141     }
3142 
3143     SHOWNEWINSTR(newInstr);
3144 
3145   } //insertInstrBefore
3146 
insertCompareInstrBeforellvmCFGStruct::CFGStructTraits3147   static void insertCompareInstrBefore(MachineBasicBlock *blk,
3148                                        MachineBasicBlock::iterator instrPos,
3149                                        AMDGPUCFGStructurizer *passRep,
3150                                        RegiT dstReg, RegiT src1Reg,
3151                                        RegiT src2Reg) {
3152     const AMDGPUInstrInfo *tii =
3153              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3154     MachineInstr *newInstr =
3155       blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
3156 
3157     MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
3158     MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
3159     MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
3160 
3161     blk->insert(instrPos, newInstr);
3162     SHOWNEWINSTR(newInstr);
3163 
3164   } //insertCompareInstrBefore
3165 
cloneSuccessorListllvmCFGStruct::CFGStructTraits3166   static void cloneSuccessorList(MachineBasicBlock *dstBlk,
3167                                  MachineBasicBlock *srcBlk) {
3168     for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
3169          iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
3170       dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
3171     }
3172   } //cloneSuccessorList
3173 
clonellvmCFGStruct::CFGStructTraits3174   static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
3175     MachineFunction *func = srcBlk->getParent();
3176     MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
3177     func->push_back(newBlk);  //insert to function
3178     //newBlk->setNumber(srcBlk->getNumber());
3179     for (MachineBasicBlock::iterator iter = srcBlk->begin(),
3180          iterEnd = srcBlk->end();
3181          iter != iterEnd; ++iter) {
3182       MachineInstr *instr = func->CloneMachineInstr(iter);
3183       newBlk->push_back(instr);
3184     }
3185     return newBlk;
3186   }
3187 
3188   //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
3189   //the AMDGPU instruction is not recognized as terminator fix this and retire
3190   //this routine
replaceInstrUseOfBlockWithllvmCFGStruct::CFGStructTraits3191   static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
3192                                          MachineBasicBlock *oldBlk,
3193                                          MachineBasicBlock *newBlk) {
3194     MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
3195     if (branchInstr && isCondBranch(branchInstr) &&
3196         getExplicitBranch(branchInstr) == oldBlk) {
3197       setExplicitBranch(branchInstr, newBlk);
3198     }
3199   }
3200 
wrapupllvmCFGStruct::CFGStructTraits3201   static void wrapup(MachineBasicBlock *entryBlk) {
3202     assert((!entryBlk->getParent()->getJumpTableInfo()
3203             || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
3204            && "found a jump table");
3205 
3206      //collect continue right before endloop
3207      SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
3208      MachineBasicBlock::iterator pre = entryBlk->begin();
3209      MachineBasicBlock::iterator iterEnd = entryBlk->end();
3210      MachineBasicBlock::iterator iter = pre;
3211      while (iter != iterEnd) {
3212        if (pre->getOpcode() == AMDGPU::CONTINUE
3213            && iter->getOpcode() == AMDGPU::ENDLOOP) {
3214          contInstr.push_back(pre);
3215        }
3216        pre = iter;
3217        ++iter;
3218      } //end while
3219 
3220      //delete continue right before endloop
3221      for (unsigned i = 0; i < contInstr.size(); ++i) {
3222         contInstr[i]->eraseFromParent();
3223      }
3224 
3225      // TODO to fix up jump table so later phase won't be confused.  if
3226      // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3227      // there isn't such an interface yet.  alternatively, replace all the other
3228      // blocks in the jump table with the entryBlk //}
3229 
3230   } //wrapup
3231 
getDominatorTreellvmCFGStruct::CFGStructTraits3232   static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3233     return &pass.getAnalysis<MachineDominatorTree>();
3234   }
3235 
3236   static MachinePostDominatorTree*
getPostDominatorTreellvmCFGStruct::CFGStructTraits3237   getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3238     return &pass.getAnalysis<MachinePostDominatorTree>();
3239   }
3240 
getLoopInfollvmCFGStruct::CFGStructTraits3241   static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3242     return &pass.getAnalysis<MachineLoopInfo>();
3243   }
3244 }; // template class CFGStructTraits
3245 } //end of namespace llvm
3246 
3247 // createAMDGPUCFGPreparationPass- Returns a pass
createAMDGPUCFGPreparationPass(TargetMachine & tm)3248 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
3249                                                  ) {
3250   return new AMDGPUCFGPrepare(tm );
3251 }
3252 
runOnMachineFunction(MachineFunction & func)3253 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3254   return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
3255                                                                         *this,
3256                                                                         TRI);
3257 }
3258 
3259 // createAMDGPUCFGStructurizerPass- Returns a pass
createAMDGPUCFGStructurizerPass(TargetMachine & tm)3260 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
3261                                                   ) {
3262   return new AMDGPUCFGPerform(tm );
3263 }
3264 
runOnMachineFunction(MachineFunction & func)3265 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3266   return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
3267                                                                     *this,
3268                                                                     TRI);
3269 }
3270 
3271 //end of file newline goes below
3272 
3273