1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9
10 #define DEBUGME 0
11 #define DEBUG_TYPE "structcfg"
12
13 #include "AMDGPUInstrInfo.h"
14 #include "AMDIL.h"
15 #include "AMDILUtilityFunctions.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/DominatorInternals.h"
20 #include "llvm/Analysis/Dominators.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32
33 #define FirstNonDebugInstr(A) A->begin()
34 using namespace llvm;
35
36 // TODO: move-begin.
37
38 //===----------------------------------------------------------------------===//
39 //
40 // Statistics for CFGStructurizer.
41 //
42 //===----------------------------------------------------------------------===//
43
44 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
45 "matched");
46 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
47 "matched");
48 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
49 "pattern matched");
50 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
51 "pattern matched");
52 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
53 "matched");
54 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
55 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
56
57 //===----------------------------------------------------------------------===//
58 //
59 // Miscellaneous utility for CFGStructurizer.
60 //
61 //===----------------------------------------------------------------------===//
62 namespace llvmCFGStruct
63 {
64 #define SHOWNEWINSTR(i) \
65 if (DEBUGME) errs() << "New instr: " << *i << "\n"
66
67 #define SHOWNEWBLK(b, msg) \
68 if (DEBUGME) { \
69 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70 errs() << "\n"; \
71 }
72
73 #define SHOWBLK_DETAIL(b, msg) \
74 if (DEBUGME) { \
75 if (b) { \
76 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77 b->print(errs()); \
78 errs() << "\n"; \
79 } \
80 }
81
82 #define INVALIDSCCNUM -1
83 #define INVALIDREGNUM 0
84
85 template<class LoopinfoT>
PrintLoopinfo(const LoopinfoT & LoopInfo,llvm::raw_ostream & OS)86 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
87 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
88 iterEnd = LoopInfo.end();
89 iter != iterEnd; ++iter) {
90 (*iter)->print(OS, 0);
91 }
92 }
93
94 template<class NodeT>
ReverseVector(SmallVector<NodeT *,DEFAULT_VEC_SLOTS> & Src)95 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
96 size_t sz = Src.size();
97 for (size_t i = 0; i < sz/2; ++i) {
98 NodeT *t = Src[i];
99 Src[i] = Src[sz - i - 1];
100 Src[sz - i - 1] = t;
101 }
102 }
103
104 } //end namespace llvmCFGStruct
105
106
107 //===----------------------------------------------------------------------===//
108 //
109 // MachinePostDominatorTree
110 //
111 //===----------------------------------------------------------------------===//
112
113 namespace llvm {
114
115 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
116 /// to compute the a post-dominator tree.
117 ///
118 struct MachinePostDominatorTree : public MachineFunctionPass {
119 static char ID; // Pass identification, replacement for typeid
120 DominatorTreeBase<MachineBasicBlock> *DT;
MachinePostDominatorTreellvm::MachinePostDominatorTree121 MachinePostDominatorTree() : MachineFunctionPass(ID)
122 {
123 DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
124 // postdominator
125 }
126
127 ~MachinePostDominatorTree();
128
129 virtual bool runOnMachineFunction(MachineFunction &MF);
130
getAnalysisUsagellvm::MachinePostDominatorTree131 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
132 AU.setPreservesAll();
133 MachineFunctionPass::getAnalysisUsage(AU);
134 }
135
getRootsllvm::MachinePostDominatorTree136 inline const std::vector<MachineBasicBlock *> &getRoots() const {
137 return DT->getRoots();
138 }
139
getRootNodellvm::MachinePostDominatorTree140 inline MachineDomTreeNode *getRootNode() const {
141 return DT->getRootNode();
142 }
143
operator []llvm::MachinePostDominatorTree144 inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
145 return DT->getNode(BB);
146 }
147
getNodellvm::MachinePostDominatorTree148 inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
149 return DT->getNode(BB);
150 }
151
dominatesllvm::MachinePostDominatorTree152 inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
153 return DT->dominates(A, B);
154 }
155
dominatesllvm::MachinePostDominatorTree156 inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
157 return DT->dominates(A, B);
158 }
159
160 inline bool
properlyDominatesllvm::MachinePostDominatorTree161 properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
162 return DT->properlyDominates(A, B);
163 }
164
165 inline bool
properlyDominatesllvm::MachinePostDominatorTree166 properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
167 return DT->properlyDominates(A, B);
168 }
169
170 inline MachineBasicBlock *
findNearestCommonDominatorllvm::MachinePostDominatorTree171 findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
172 return DT->findNearestCommonDominator(A, B);
173 }
174
printllvm::MachinePostDominatorTree175 virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
176 DT->print(OS);
177 }
178 };
179 } //end of namespace llvm
180
181 char MachinePostDominatorTree::ID = 0;
182 static RegisterPass<MachinePostDominatorTree>
183 machinePostDominatorTreePass("machinepostdomtree",
184 "MachinePostDominator Tree Construction",
185 true, true);
186
187 //const PassInfo *const llvm::MachinePostDominatorsID
188 //= &machinePostDominatorTreePass;
189
runOnMachineFunction(MachineFunction & F)190 bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
191 DT->recalculate(F);
192 //DEBUG(DT->dump());
193 return false;
194 }
195
~MachinePostDominatorTree()196 MachinePostDominatorTree::~MachinePostDominatorTree() {
197 delete DT;
198 }
199
200 //===----------------------------------------------------------------------===//
201 //
202 // supporting data structure for CFGStructurizer
203 //
204 //===----------------------------------------------------------------------===//
205
206 namespace llvmCFGStruct
207 {
208 template<class PassT>
209 struct CFGStructTraits {
210 };
211
212 template <class InstrT>
213 class BlockInformation {
214 public:
215 bool isRetired;
216 int sccNum;
217 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
218 //Instructions defining the corresponding successor.
BlockInformation()219 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
220 };
221
222 template <class BlockT, class InstrT, class RegiT>
223 class LandInformation {
224 public:
225 BlockT *landBlk;
226 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
227 //WHILELOOP(thisloop) init before entering
228 //thisloop.
229 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
230 //WHILELOOP(thisloop) init after entering
231 //thisloop.
232 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
233 //land block, branch cond on this reg.
234 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
235 //endif" after ENDLOOP(thisloop) break
236 //outerLoopOf(thisLoop).
237 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
238 //endif" after ENDLOOP(thisloop) continue on
239 //outerLoopOf(thisLoop).
LandInformation()240 LandInformation() : landBlk(NULL) {}
241 };
242
243 } //end of namespace llvmCFGStruct
244
245 //===----------------------------------------------------------------------===//
246 //
247 // CFGStructurizer
248 //
249 //===----------------------------------------------------------------------===//
250
251 namespace llvmCFGStruct
252 {
253 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
254 template<class PassT>
255 class CFGStructurizer
256 {
257 public:
258 typedef enum {
259 Not_SinglePath = 0,
260 SinglePath_InPath = 1,
261 SinglePath_NotInPath = 2
262 } PathToKind;
263
264 public:
265 typedef typename PassT::InstructionType InstrT;
266 typedef typename PassT::FunctionType FuncT;
267 typedef typename PassT::DominatortreeType DomTreeT;
268 typedef typename PassT::PostDominatortreeType PostDomTreeT;
269 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
270 typedef typename PassT::LoopinfoType LoopInfoT;
271
272 typedef GraphTraits<FuncT *> FuncGTraits;
273 //typedef FuncGTraits::nodes_iterator BlockIterator;
274 typedef typename FuncT::iterator BlockIterator;
275
276 typedef typename FuncGTraits::NodeType BlockT;
277 typedef GraphTraits<BlockT *> BlockGTraits;
278 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
279 //typedef BlockGTraits::succ_iterator InstructionIterator;
280 typedef typename BlockT::iterator InstrIterator;
281
282 typedef CFGStructTraits<PassT> CFGTraits;
283 typedef BlockInformation<InstrT> BlockInfo;
284 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
285
286 typedef int RegiT;
287 typedef typename PassT::LoopType LoopT;
288 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
289 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
290 //landing info for loop break
291 typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
292
293 public:
294 CFGStructurizer();
295 ~CFGStructurizer();
296
297 /// Perform the CFG structurization
298 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
299
300 /// Perform the CFG preparation
301 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
302
303 private:
304 void reversePredicateSetter(typename BlockT::iterator);
305 void orderBlocks();
306 void printOrderedBlocks(llvm::raw_ostream &OS);
307 int patternMatch(BlockT *CurBlock);
308 int patternMatchGroup(BlockT *CurBlock);
309
310 int serialPatternMatch(BlockT *CurBlock);
311 int ifPatternMatch(BlockT *CurBlock);
312 int switchPatternMatch(BlockT *CurBlock);
313 int loopendPatternMatch(BlockT *CurBlock);
314 int loopPatternMatch(BlockT *CurBlock);
315
316 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
317 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
318 //int loopWithoutBreak(BlockT *);
319
320 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
321 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
322 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
323 BlockT *ContBlock, LoopT *contLoop);
324 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
325 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
326 BlockT *FalseBlock);
327 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
328 BlockT *FalseBlock);
329 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
330 BlockT *FalseBlock, BlockT **LandBlockPtr);
331 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
332 BlockT *FalseBlock, BlockT *LandBlock,
333 bool Detail = false);
334 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
335 bool AllowSideEntry = true);
336 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
337 bool AllowSideEntry = true);
338 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
339 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
340
341 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
342 BlockT *TrueBlock, BlockT *FalseBlock,
343 BlockT *LandBlock);
344 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
345 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
346 BlockT *ExitLandBlock, RegiT SetReg);
347 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
348 RegiT SetReg);
349 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
350 std::set<BlockT*> &ExitBlockSet,
351 BlockT *ExitLandBlk);
352 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
353 BlockTSmallerVector &ExitingBlocks,
354 BlockTSmallerVector &ExitBlocks);
355 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
356 void removeUnconditionalBranch(BlockT *SrcBlock);
357 void removeRedundantConditionalBranch(BlockT *SrcBlock);
358 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
359
360 void removeSuccessor(BlockT *SrcBlock);
361 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
362 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
363
364 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
365 InstrIterator InsertPos);
366
367 void recordSccnum(BlockT *SrcBlock, int SCCNum);
368 int getSCCNum(BlockT *srcBlk);
369
370 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
371 bool isRetiredBlock(BlockT *SrcBlock);
372 bool isActiveLoophead(BlockT *CurBlock);
373 bool needMigrateBlock(BlockT *Block);
374
375 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
376 BlockTSmallerVector &exitBlocks,
377 std::set<BlockT*> &ExitBlockSet);
378 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
379 BlockT *getLoopLandBlock(LoopT *LoopRep);
380 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
381
382 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
383 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
384 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
385 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
386 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
387
388 bool hasBackEdge(BlockT *curBlock);
389 unsigned getLoopDepth (LoopT *LoopRep);
390 int countActiveBlock(
391 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
392 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
393 BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
394 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
395
396 private:
397 DomTreeT *domTree;
398 PostDomTreeT *postDomTree;
399 LoopInfoT *loopInfo;
400 PassT *passRep;
401 FuncT *funcRep;
402
403 BlockInfoMap blockInfoMap;
404 LoopLandInfoMap loopLandInfoMap;
405 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
406 const AMDGPURegisterInfo *TRI;
407
408 }; //template class CFGStructurizer
409
CFGStructurizer()410 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
411 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
412 }
413
~CFGStructurizer()414 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
415 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
416 E = blockInfoMap.end(); I != E; ++I) {
417 delete I->second;
418 }
419 }
420
421 template<class PassT>
prepare(FuncT & func,PassT & pass,const AMDGPURegisterInfo * tri)422 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
423 const AMDGPURegisterInfo * tri) {
424 passRep = &pass;
425 funcRep = &func;
426 TRI = tri;
427
428 bool changed = false;
429 //func.RenumberBlocks();
430
431 //to do, if not reducible flow graph, make it so ???
432
433 if (DEBUGME) {
434 errs() << "AMDGPUCFGStructurizer::prepare\n";
435 //func.viewCFG();
436 //func.viewCFGOnly();
437 //func.dump();
438 }
439
440 //FIXME: gcc complains on this.
441 //domTree = &pass.getAnalysis<DomTreeT>();
442 //domTree = CFGTraits::getDominatorTree(pass);
443 //if (DEBUGME) {
444 // domTree->print(errs());
445 //}
446
447 //FIXME: gcc complains on this.
448 //domTree = &pass.getAnalysis<DomTreeT>();
449 //postDomTree = CFGTraits::getPostDominatorTree(pass);
450 //if (DEBUGME) {
451 // postDomTree->print(errs());
452 //}
453
454 //FIXME: gcc complains on this.
455 //loopInfo = &pass.getAnalysis<LoopInfoT>();
456 loopInfo = CFGTraits::getLoopInfo(pass);
457 if (DEBUGME) {
458 errs() << "LoopInfo:\n";
459 PrintLoopinfo(*loopInfo, errs());
460 }
461
462 orderBlocks();
463 if (DEBUGME) {
464 errs() << "Ordered blocks:\n";
465 printOrderedBlocks(errs());
466 }
467
468 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
469
470 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
471 iterEnd = loopInfo->end();
472 iter != iterEnd; ++iter) {
473 LoopT* loopRep = (*iter);
474 BlockTSmallerVector exitingBlks;
475 loopRep->getExitingBlocks(exitingBlks);
476
477 if (exitingBlks.size() == 0) {
478 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
479 if (dummyExitBlk != NULL)
480 retBlks.push_back(dummyExitBlk);
481 }
482 }
483
484 // Remove unconditional branch instr.
485 // Add dummy exit block iff there are multiple returns.
486
487 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
488 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
489 iterBlk != iterEndBlk;
490 ++iterBlk) {
491 BlockT *curBlk = *iterBlk;
492 removeUnconditionalBranch(curBlk);
493 removeRedundantConditionalBranch(curBlk);
494 if (CFGTraits::isReturnBlock(curBlk)) {
495 retBlks.push_back(curBlk);
496 }
497 assert(curBlk->succ_size() <= 2);
498 //assert(curBlk->size() > 0);
499 //removeEmptyBlock(curBlk) ??
500 } //for
501
502 if (retBlks.size() >= 2) {
503 addDummyExitBlock(retBlks);
504 changed = true;
505 }
506
507 return changed;
508 } //CFGStructurizer::prepare
509
510 template<class PassT>
run(FuncT & func,PassT & pass,const AMDGPURegisterInfo * tri)511 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
512 const AMDGPURegisterInfo * tri) {
513 passRep = &pass;
514 funcRep = &func;
515 TRI = tri;
516
517 //func.RenumberBlocks();
518
519 //Assume reducible CFG...
520 if (DEBUGME) {
521 errs() << "AMDGPUCFGStructurizer::run\n";
522 //errs() << func.getFunction()->getNameStr() << "\n";
523 func.viewCFG();
524 //func.viewCFGOnly();
525 //func.dump();
526 }
527
528 #if 1
529 //FIXME: gcc complains on this.
530 //domTree = &pass.getAnalysis<DomTreeT>();
531 domTree = CFGTraits::getDominatorTree(pass);
532 if (DEBUGME) {
533 domTree->print(errs(), (const llvm::Module*)0);
534 }
535 #endif
536
537 //FIXME: gcc complains on this.
538 //domTree = &pass.getAnalysis<DomTreeT>();
539 postDomTree = CFGTraits::getPostDominatorTree(pass);
540 if (DEBUGME) {
541 postDomTree->print(errs());
542 }
543
544 //FIXME: gcc complains on this.
545 //loopInfo = &pass.getAnalysis<LoopInfoT>();
546 loopInfo = CFGTraits::getLoopInfo(pass);
547 if (DEBUGME) {
548 errs() << "LoopInfo:\n";
549 PrintLoopinfo(*loopInfo, errs());
550 }
551
552 orderBlocks();
553 //#define STRESSTEST
554 #ifdef STRESSTEST
555 //Use the worse block ordering to test the algorithm.
556 ReverseVector(orderedBlks);
557 #endif
558
559 if (DEBUGME) {
560 errs() << "Ordered blocks:\n";
561 printOrderedBlocks(errs());
562 }
563 int numIter = 0;
564 bool finish = false;
565 BlockT *curBlk;
566 bool makeProgress = false;
567 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
568 orderedBlks.end());
569
570 do {
571 ++numIter;
572 if (DEBUGME) {
573 errs() << "numIter = " << numIter
574 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
575 }
576
577 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
578 iterBlk = orderedBlks.begin();
579 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
580 iterBlkEnd = orderedBlks.end();
581
582 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
583 sccBeginIter = iterBlk;
584 BlockT *sccBeginBlk = NULL;
585 int sccNumBlk = 0; // The number of active blocks, init to a
586 // maximum possible number.
587 int sccNumIter; // Number of iteration in this SCC.
588
589 while (iterBlk != iterBlkEnd) {
590 curBlk = *iterBlk;
591
592 if (sccBeginBlk == NULL) {
593 sccBeginIter = iterBlk;
594 sccBeginBlk = curBlk;
595 sccNumIter = 0;
596 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
597 if (DEBUGME) {
598 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
599 errs() << "\n";
600 }
601 }
602
603 if (!isRetiredBlock(curBlk)) {
604 patternMatch(curBlk);
605 }
606
607 ++iterBlk;
608
609 bool contNextScc = true;
610 if (iterBlk == iterBlkEnd
611 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
612 // Just finish one scc.
613 ++sccNumIter;
614 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
615 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
616 if (DEBUGME) {
617 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
618 << ", sccNumIter = " << sccNumIter;
619 errs() << "doesn't make any progress\n";
620 }
621 contNextScc = true;
622 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
623 sccNumBlk = sccRemainedNumBlk;
624 iterBlk = sccBeginIter;
625 contNextScc = false;
626 if (DEBUGME) {
627 errs() << "repeat processing SCC" << getSCCNum(curBlk)
628 << "sccNumIter = " << sccNumIter << "\n";
629 func.viewCFG();
630 //func.viewCFGOnly();
631 }
632 } else {
633 // Finish the current scc.
634 contNextScc = true;
635 }
636 } else {
637 // Continue on next component in the current scc.
638 contNextScc = false;
639 }
640
641 if (contNextScc) {
642 sccBeginBlk = NULL;
643 }
644 } //while, "one iteration" over the function.
645
646 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
647 if (entryBlk->succ_size() == 0) {
648 finish = true;
649 if (DEBUGME) {
650 errs() << "Reduce to one block\n";
651 }
652 } else {
653 int newnumRemainedBlk
654 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
655 // consider cloned blocks ??
656 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
657 makeProgress = true;
658 numRemainedBlk = newnumRemainedBlk;
659 } else {
660 makeProgress = false;
661 if (DEBUGME) {
662 errs() << "No progress\n";
663 }
664 }
665 }
666 } while (!finish && makeProgress);
667
668 // Misc wrap up to maintain the consistency of the Function representation.
669 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
670
671 // Detach retired Block, release memory.
672 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
673 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
674 if ((*iterMap).second && (*iterMap).second->isRetired) {
675 assert(((*iterMap).first)->getNumber() != -1);
676 if (DEBUGME) {
677 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
678 }
679 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
680 }
681 delete (*iterMap).second;
682 }
683 blockInfoMap.clear();
684
685 // clear loopLandInfoMap
686 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
687 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
688 delete (*iterMap).second;
689 }
690 loopLandInfoMap.clear();
691
692 if (DEBUGME) {
693 func.viewCFG();
694 //func.dump();
695 }
696
697 if (!finish) {
698 assert(!"IRREDUCIBL_CF");
699 }
700
701 return true;
702 } //CFGStructurizer::run
703
704 /// Print the ordered Blocks.
705 ///
706 template<class PassT>
printOrderedBlocks(llvm::raw_ostream & os)707 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
708 size_t i = 0;
709 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
710 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
711 iterBlk != iterBlkEnd;
712 ++iterBlk, ++i) {
713 os << "BB" << (*iterBlk)->getNumber();
714 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
715 if (i != 0 && i % 10 == 0) {
716 os << "\n";
717 } else {
718 os << " ";
719 }
720 }
721 } //printOrderedBlocks
722
723 /// Compute the reversed DFS post order of Blocks
724 ///
orderBlocks()725 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
726 int sccNum = 0;
727 BlockT *bb;
728 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
729 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
730 std::vector<BlockT *> &sccNext = *sccIter;
731 for (typename std::vector<BlockT *>::const_iterator
732 blockIter = sccNext.begin(), blockEnd = sccNext.end();
733 blockIter != blockEnd; ++blockIter) {
734 bb = *blockIter;
735 orderedBlks.push_back(bb);
736 recordSccnum(bb, sccNum);
737 }
738 }
739
740 //walk through all the block in func to check for unreachable
741 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
742 blockEnd1 = FuncGTraits::nodes_end(funcRep);
743 blockIter1 != blockEnd1; ++blockIter1) {
744 BlockT *bb = &(*blockIter1);
745 sccNum = getSCCNum(bb);
746 if (sccNum == INVALIDSCCNUM) {
747 errs() << "unreachable block BB" << bb->getNumber() << "\n";
748 }
749 } //end of for
750 } //orderBlocks
751
patternMatch(BlockT * curBlk)752 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
753 int numMatch = 0;
754 int curMatch;
755
756 if (DEBUGME) {
757 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
758 }
759
760 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
761 numMatch += curMatch;
762 }
763
764 if (DEBUGME) {
765 errs() << "End patternMatch BB" << curBlk->getNumber()
766 << ", numMatch = " << numMatch << "\n";
767 }
768
769 return numMatch;
770 } //patternMatch
771
772 template<class PassT>
patternMatchGroup(BlockT * curBlk)773 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
774 int numMatch = 0;
775 numMatch += serialPatternMatch(curBlk);
776 numMatch += ifPatternMatch(curBlk);
777 //numMatch += switchPatternMatch(curBlk);
778 numMatch += loopendPatternMatch(curBlk);
779 numMatch += loopPatternMatch(curBlk);
780 return numMatch;
781 }//patternMatchGroup
782
783 template<class PassT>
serialPatternMatch(BlockT * curBlk)784 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
785 if (curBlk->succ_size() != 1) {
786 return 0;
787 }
788
789 BlockT *childBlk = *curBlk->succ_begin();
790 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
791 return 0;
792 }
793
794 mergeSerialBlock(curBlk, childBlk);
795 ++numSerialPatternMatch;
796 return 1;
797 } //serialPatternMatch
798
799 template<class PassT>
ifPatternMatch(BlockT * curBlk)800 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
801 //two edges
802 if (curBlk->succ_size() != 2) {
803 return 0;
804 }
805
806 if (hasBackEdge(curBlk)) {
807 return 0;
808 }
809
810 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
811 if (branchInstr == NULL) {
812 return 0;
813 }
814
815 assert(CFGTraits::isCondBranch(branchInstr));
816
817 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
818 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
819 BlockT *landBlk;
820 int cloned = 0;
821
822 // TODO: Simplify
823 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
824 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
825 landBlk = *trueBlk->succ_begin();
826 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
827 landBlk = NULL;
828 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
829 landBlk = falseBlk;
830 falseBlk = NULL;
831 } else if (falseBlk->succ_size() == 1
832 && *falseBlk->succ_begin() == trueBlk) {
833 landBlk = trueBlk;
834 trueBlk = NULL;
835 } else if (falseBlk->succ_size() == 1
836 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
837 landBlk = *falseBlk->succ_begin();
838 } else if (trueBlk->succ_size() == 1
839 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
840 landBlk = *trueBlk->succ_begin();
841 } else {
842 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
843 }
844
845 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
846 // new BB created for landBlk==NULL may introduce new challenge to the
847 // reduction process.
848 if (landBlk != NULL &&
849 ((trueBlk && trueBlk->pred_size() > 1)
850 || (falseBlk && falseBlk->pred_size() > 1))) {
851 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
852 }
853
854 if (trueBlk && trueBlk->pred_size() > 1) {
855 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
856 ++cloned;
857 }
858
859 if (falseBlk && falseBlk->pred_size() > 1) {
860 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
861 ++cloned;
862 }
863
864 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
865
866 ++numIfPatternMatch;
867
868 numClonedBlock += cloned;
869
870 return 1 + cloned;
871 } //ifPatternMatch
872
873 template<class PassT>
switchPatternMatch(BlockT * curBlk)874 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
875 return 0;
876 } //switchPatternMatch
877
878 template<class PassT>
loopendPatternMatch(BlockT * curBlk)879 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
880 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
881 typename std::vector<LoopT *> nestedLoops;
882 while (loopRep) {
883 nestedLoops.push_back(loopRep);
884 loopRep = loopRep->getParentLoop();
885 }
886
887 if (nestedLoops.size() == 0) {
888 return 0;
889 }
890
891 // Process nested loop outside->inside, so "continue" to a outside loop won't
892 // be mistaken as "break" of the current loop.
893 int num = 0;
894 for (typename std::vector<LoopT *>::reverse_iterator
895 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
896 iter != iterEnd; ++iter) {
897 loopRep = *iter;
898
899 if (getLoopLandBlock(loopRep) != NULL) {
900 continue;
901 }
902
903 BlockT *loopHeader = loopRep->getHeader();
904
905 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
906
907 if (numBreak == -1) {
908 break;
909 }
910
911 int numCont = loopcontPatternMatch(loopRep, loopHeader);
912 num += numBreak + numCont;
913 }
914
915 return num;
916 } //loopendPatternMatch
917
918 template<class PassT>
loopPatternMatch(BlockT * curBlk)919 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
920 if (curBlk->succ_size() != 0) {
921 return 0;
922 }
923
924 int numLoop = 0;
925 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
926 while (loopRep && loopRep->getHeader() == curBlk) {
927 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
928 if (loopLand) {
929 BlockT *landBlk = loopLand->landBlk;
930 assert(landBlk);
931 if (!isRetiredBlock(landBlk)) {
932 mergeLooplandBlock(curBlk, loopLand);
933 ++numLoop;
934 }
935 }
936 loopRep = loopRep->getParentLoop();
937 }
938
939 numLoopPatternMatch += numLoop;
940
941 return numLoop;
942 } //loopPatternMatch
943
944 template<class PassT>
loopbreakPatternMatch(LoopT * loopRep,BlockT * loopHeader)945 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
946 BlockT *loopHeader) {
947 BlockTSmallerVector exitingBlks;
948 loopRep->getExitingBlocks(exitingBlks);
949
950 if (DEBUGME) {
951 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
952 }
953
954 if (exitingBlks.size() == 0) {
955 setLoopLandBlock(loopRep);
956 return 0;
957 }
958
959 // Compute the corresponding exitBlks and exit block set.
960 BlockTSmallerVector exitBlks;
961 std::set<BlockT *> exitBlkSet;
962 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
963 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
964 BlockT *exitingBlk = *iter;
965 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
966 exitBlks.push_back(exitBlk);
967 exitBlkSet.insert(exitBlk); //non-duplicate insert
968 }
969
970 assert(exitBlkSet.size() > 0);
971 assert(exitBlks.size() == exitingBlks.size());
972
973 if (DEBUGME) {
974 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
975 }
976
977 // Find exitLandBlk.
978 BlockT *exitLandBlk = NULL;
979 int numCloned = 0;
980 int numSerial = 0;
981
982 if (exitBlkSet.size() == 1)
983 {
984 exitLandBlk = *exitBlkSet.begin();
985 } else {
986 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
987
988 if (exitLandBlk == NULL) {
989 return -1;
990 }
991
992 bool allInPath = true;
993 bool allNotInPath = true;
994 for (typename std::set<BlockT*>::const_iterator
995 iter = exitBlkSet.begin(),
996 iterEnd = exitBlkSet.end();
997 iter != iterEnd; ++iter) {
998 BlockT *exitBlk = *iter;
999
1000 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
1001 if (DEBUGME) {
1002 errs() << "BB" << exitBlk->getNumber()
1003 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
1004 << pathKind << "\n";
1005 }
1006
1007 allInPath = allInPath && (pathKind == SinglePath_InPath);
1008 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
1009
1010 if (!allInPath && !allNotInPath) {
1011 if (DEBUGME) {
1012 errs() << "singlePath check fail\n";
1013 }
1014 return -1;
1015 }
1016 } // check all exit blocks
1017
1018 if (allNotInPath) {
1019 #if 1
1020
1021 // TODO: Simplify, maybe separate function?
1022 //funcRep->viewCFG();
1023 LoopT *parentLoopRep = loopRep->getParentLoop();
1024 BlockT *parentLoopHeader = NULL;
1025 if (parentLoopRep)
1026 parentLoopHeader = parentLoopRep->getHeader();
1027
1028 if (exitLandBlk == parentLoopHeader &&
1029 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
1030 loopRep,
1031 exitBlkSet,
1032 exitLandBlk)) != NULL) {
1033 if (DEBUGME) {
1034 errs() << "relocateLoopcontBlock success\n";
1035 }
1036 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
1037 exitingBlks,
1038 exitBlks)) != NULL) {
1039 if (DEBUGME) {
1040 errs() << "insertEndbranchBlock success\n";
1041 }
1042 } else {
1043 if (DEBUGME) {
1044 errs() << "loop exit fail\n";
1045 }
1046 return -1;
1047 }
1048 #else
1049 return -1;
1050 #endif
1051 }
1052
1053 // Handle side entry to exit path.
1054 exitBlks.clear();
1055 exitBlkSet.clear();
1056 for (typename BlockTSmallerVector::iterator iterExiting =
1057 exitingBlks.begin(),
1058 iterExitingEnd = exitingBlks.end();
1059 iterExiting != iterExitingEnd; ++iterExiting) {
1060 BlockT *exitingBlk = *iterExiting;
1061 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
1062 BlockT *newExitBlk = exitBlk;
1063
1064 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
1065 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
1066 ++numCloned;
1067 }
1068
1069 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
1070
1071 exitBlks.push_back(newExitBlk);
1072 exitBlkSet.insert(newExitBlk);
1073 }
1074
1075 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1076 iterExitEnd = exitBlks.end();
1077 iterExit != iterExitEnd; ++iterExit) {
1078 BlockT *exitBlk = *iterExit;
1079 numSerial += serialPatternMatch(exitBlk);
1080 }
1081
1082 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1083 iterExitEnd = exitBlks.end();
1084 iterExit != iterExitEnd; ++iterExit) {
1085 BlockT *exitBlk = *iterExit;
1086 if (exitBlk->pred_size() > 1) {
1087 if (exitBlk != exitLandBlk) {
1088 return -1;
1089 }
1090 } else {
1091 if (exitBlk != exitLandBlk &&
1092 (exitBlk->succ_size() != 1 ||
1093 *exitBlk->succ_begin() != exitLandBlk)) {
1094 return -1;
1095 }
1096 }
1097 }
1098 } // else
1099
1100 // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1101 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
1102
1103 // Fold break into the breaking block. Leverage across level breaks.
1104 assert(exitingBlks.size() == exitBlks.size());
1105 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
1106 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
1107 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
1108 BlockT *exitBlk = *iterExit;
1109 BlockT *exitingBlk = *iterExiting;
1110 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
1111 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
1112 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
1113 }
1114
1115 int numBreak = static_cast<int>(exitingBlks.size());
1116 numLoopbreakPatternMatch += numBreak;
1117 numClonedBlock += numCloned;
1118 return numBreak + numSerial + numCloned;
1119 } //loopbreakPatternMatch
1120
1121 template<class PassT>
loopcontPatternMatch(LoopT * loopRep,BlockT * loopHeader)1122 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
1123 BlockT *loopHeader) {
1124 int numCont = 0;
1125 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
1126 for (typename InvBlockGTraits::ChildIteratorType iter =
1127 InvBlockGTraits::child_begin(loopHeader),
1128 iterEnd = InvBlockGTraits::child_end(loopHeader);
1129 iter != iterEnd; ++iter) {
1130 BlockT *curBlk = *iter;
1131 if (loopRep->contains(curBlk)) {
1132 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
1133 loopHeader, loopRep);
1134 contBlk.push_back(curBlk);
1135 ++numCont;
1136 }
1137 }
1138
1139 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
1140 iter = contBlk.begin(), iterEnd = contBlk.end();
1141 iter != iterEnd; ++iter) {
1142 (*iter)->removeSuccessor(loopHeader);
1143 }
1144
1145 numLoopcontPatternMatch += numCont;
1146
1147 return numCont;
1148 } //loopcontPatternMatch
1149
1150
1151 template<class PassT>
isSameloopDetachedContbreak(BlockT * src1Blk,BlockT * src2Blk)1152 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1153 BlockT *src2Blk) {
1154 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1155 // same loop with LoopLandInfo without explicitly keeping track of
1156 // loopContBlks and loopBreakBlks, this is a method to get the information.
1157 //
1158 if (src1Blk->succ_size() == 0) {
1159 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1160 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1161 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1162 if (theEntry != NULL) {
1163 if (DEBUGME) {
1164 errs() << "isLoopContBreakBlock yes src1 = BB"
1165 << src1Blk->getNumber()
1166 << " src2 = BB" << src2Blk->getNumber() << "\n";
1167 }
1168 return true;
1169 }
1170 }
1171 }
1172 return false;
1173 } //isSameloopDetachedContbreak
1174
1175 template<class PassT>
handleJumpintoIf(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk)1176 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1177 BlockT *trueBlk,
1178 BlockT *falseBlk) {
1179 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1180 if (num == 0) {
1181 if (DEBUGME) {
1182 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1183 }
1184 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1185 }
1186 return num;
1187 }
1188
1189 template<class PassT>
handleJumpintoIfImp(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk)1190 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1191 BlockT *trueBlk,
1192 BlockT *falseBlk) {
1193 int num = 0;
1194 BlockT *downBlk;
1195
1196 //trueBlk could be the common post dominator
1197 downBlk = trueBlk;
1198
1199 if (DEBUGME) {
1200 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1201 << " true = BB" << trueBlk->getNumber()
1202 << ", numSucc=" << trueBlk->succ_size()
1203 << " false = BB" << falseBlk->getNumber() << "\n";
1204 }
1205
1206 while (downBlk) {
1207 if (DEBUGME) {
1208 errs() << "check down = BB" << downBlk->getNumber();
1209 }
1210
1211 if (//postDomTree->dominates(downBlk, falseBlk) &&
1212 singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1213 if (DEBUGME) {
1214 errs() << " working\n";
1215 }
1216
1217 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1218 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1219
1220 numClonedBlock += num;
1221 num += serialPatternMatch(*headBlk->succ_begin());
1222 num += serialPatternMatch(*(++headBlk->succ_begin()));
1223 num += ifPatternMatch(headBlk);
1224 assert(num > 0); //
1225
1226 break;
1227 }
1228 if (DEBUGME) {
1229 errs() << " not working\n";
1230 }
1231 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1232 } // walk down the postDomTree
1233
1234 return num;
1235 } //handleJumpintoIf
1236
1237 template<class PassT>
showImproveSimpleJumpintoIf(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk,BlockT * landBlk,bool detail)1238 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1239 BlockT *trueBlk,
1240 BlockT *falseBlk,
1241 BlockT *landBlk,
1242 bool detail) {
1243 errs() << "head = BB" << headBlk->getNumber()
1244 << " size = " << headBlk->size();
1245 if (detail) {
1246 errs() << "\n";
1247 headBlk->print(errs());
1248 errs() << "\n";
1249 }
1250
1251 if (trueBlk) {
1252 errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1253 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1254 if (detail) {
1255 errs() << "\n";
1256 trueBlk->print(errs());
1257 errs() << "\n";
1258 }
1259 }
1260 if (falseBlk) {
1261 errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1262 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1263 if (detail) {
1264 errs() << "\n";
1265 falseBlk->print(errs());
1266 errs() << "\n";
1267 }
1268 }
1269 if (landBlk) {
1270 errs() << ", land = BB" << landBlk->getNumber() << " size = "
1271 << landBlk->size() << " numPred = " << landBlk->pred_size();
1272 if (detail) {
1273 errs() << "\n";
1274 landBlk->print(errs());
1275 errs() << "\n";
1276 }
1277 }
1278
1279 errs() << "\n";
1280 } //showImproveSimpleJumpintoIf
1281
1282 template<class PassT>
improveSimpleJumpintoIf(BlockT * headBlk,BlockT * trueBlk,BlockT * falseBlk,BlockT ** plandBlk)1283 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1284 BlockT *trueBlk,
1285 BlockT *falseBlk,
1286 BlockT **plandBlk) {
1287 bool migrateTrue = false;
1288 bool migrateFalse = false;
1289
1290 BlockT *landBlk = *plandBlk;
1291
1292 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1293 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1294
1295 if (trueBlk == falseBlk) {
1296 return 0;
1297 }
1298
1299 #if 0
1300 if (DEBUGME) {
1301 errs() << "improveSimpleJumpintoIf: ";
1302 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1303 }
1304 #endif
1305
1306 // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1307 // May consider the # landBlk->pred_size() as it represents the number of
1308 // assignment initReg = .. needed to insert.
1309 migrateTrue = needMigrateBlock(trueBlk);
1310 migrateFalse = needMigrateBlock(falseBlk);
1311
1312 if (!migrateTrue && !migrateFalse) {
1313 return 0;
1314 }
1315
1316 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1317 // have more than one predecessors. without doing this, its predecessor
1318 // rather than headBlk will have undefined value in initReg.
1319 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1320 migrateTrue = true;
1321 }
1322 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1323 migrateFalse = true;
1324 }
1325
1326 if (DEBUGME) {
1327 errs() << "before improveSimpleJumpintoIf: ";
1328 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1329 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1330 }
1331
1332 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1333 //
1334 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1335 // {initReg = 0; org falseBlk branch }
1336 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1337 // => org landBlk
1338 // if landBlk->pred_size() > 2, put the about if-else inside
1339 // if (initReg !=2) {...}
1340 //
1341 // add initReg = initVal to headBlk
1342
1343 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1344 unsigned initReg =
1345 funcRep->getRegInfo().createVirtualRegister(I32RC);
1346 if (!migrateTrue || !migrateFalse) {
1347 int initVal = migrateTrue ? 0 : 1;
1348 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1349 }
1350
1351 int numNewBlk = 0;
1352
1353 if (landBlk == NULL) {
1354 landBlk = funcRep->CreateMachineBasicBlock();
1355 funcRep->push_back(landBlk); //insert to function
1356
1357 if (trueBlk) {
1358 trueBlk->addSuccessor(landBlk);
1359 } else {
1360 headBlk->addSuccessor(landBlk);
1361 }
1362
1363 if (falseBlk) {
1364 falseBlk->addSuccessor(landBlk);
1365 } else {
1366 headBlk->addSuccessor(landBlk);
1367 }
1368
1369 numNewBlk ++;
1370 }
1371
1372 bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1373
1374 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1375 typename BlockT::iterator insertPos =
1376 CFGTraits::getInstrPos
1377 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1378
1379 if (landBlkHasOtherPred) {
1380 unsigned immReg =
1381 funcRep->getRegInfo().createVirtualRegister(I32RC);
1382 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1383 unsigned cmpResReg =
1384 funcRep->getRegInfo().createVirtualRegister(I32RC);
1385
1386 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1387 initReg, immReg);
1388 CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1389 AMDGPU::IF_LOGICALZ_i32, passRep,
1390 cmpResReg, DebugLoc());
1391 }
1392
1393 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
1394 passRep, initReg, DebugLoc());
1395
1396 if (migrateTrue) {
1397 migrateInstruction(trueBlk, landBlk, insertPos);
1398 // need to uncondionally insert the assignment to ensure a path from its
1399 // predecessor rather than headBlk has valid value in initReg if
1400 // (initVal != 1).
1401 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1402 }
1403 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1404
1405 if (migrateFalse) {
1406 migrateInstruction(falseBlk, landBlk, insertPos);
1407 // need to uncondionally insert the assignment to ensure a path from its
1408 // predecessor rather than headBlk has valid value in initReg if
1409 // (initVal != 0)
1410 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1411 }
1412 //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1413
1414 if (landBlkHasOtherPred) {
1415 // add endif
1416 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1417
1418 // put initReg = 2 to other predecessors of landBlk
1419 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1420 predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1421 ++predIter) {
1422 BlockT *curBlk = *predIter;
1423 if (curBlk != trueBlk && curBlk != falseBlk) {
1424 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1425 }
1426 } //for
1427 }
1428 if (DEBUGME) {
1429 errs() << "result from improveSimpleJumpintoIf: ";
1430 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1431 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1432 }
1433
1434 // update landBlk
1435 *plandBlk = landBlk;
1436
1437 return numNewBlk;
1438 } //improveSimpleJumpintoIf
1439
1440 template<class PassT>
handleLoopbreak(BlockT * exitingBlk,LoopT * exitingLoop,BlockT * exitBlk,LoopT * exitLoop,BlockT * landBlk)1441 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1442 LoopT *exitingLoop,
1443 BlockT *exitBlk,
1444 LoopT *exitLoop,
1445 BlockT *landBlk) {
1446 if (DEBUGME) {
1447 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1448 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1449 }
1450 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1451
1452 RegiT initReg = INVALIDREGNUM;
1453 if (exitingLoop != exitLoop) {
1454 initReg = static_cast<int>
1455 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1456 assert(initReg != INVALIDREGNUM);
1457 addLoopBreakInitReg(exitLoop, initReg);
1458 while (exitingLoop != exitLoop && exitingLoop) {
1459 addLoopBreakOnReg(exitingLoop, initReg);
1460 exitingLoop = exitingLoop->getParentLoop();
1461 }
1462 assert(exitingLoop == exitLoop);
1463 }
1464
1465 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1466
1467 } //handleLoopbreak
1468
1469 template<class PassT>
handleLoopcontBlock(BlockT * contingBlk,LoopT * contingLoop,BlockT * contBlk,LoopT * contLoop)1470 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1471 LoopT *contingLoop,
1472 BlockT *contBlk,
1473 LoopT *contLoop) {
1474 if (DEBUGME) {
1475 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1476 << " header = BB" << contBlk->getNumber() << "\n";
1477
1478 errs() << "Trying to continue loop-depth = "
1479 << getLoopDepth(contLoop)
1480 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1481 }
1482
1483 RegiT initReg = INVALIDREGNUM;
1484 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1485 if (contingLoop != contLoop) {
1486 initReg = static_cast<int>
1487 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1488 assert(initReg != INVALIDREGNUM);
1489 addLoopContInitReg(contLoop, initReg);
1490 while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1491 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
1492 contingLoop = contingLoop->getParentLoop();
1493 }
1494 assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1495 addLoopContOnReg(contingLoop, initReg);
1496 }
1497
1498 settleLoopcontBlock(contingBlk, contBlk, initReg);
1499 //contingBlk->removeSuccessor(loopHeader);
1500 } //handleLoopcontBlock
1501
1502 template<class PassT>
mergeSerialBlock(BlockT * dstBlk,BlockT * srcBlk)1503 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1504 if (DEBUGME) {
1505 errs() << "serialPattern BB" << dstBlk->getNumber()
1506 << " <= BB" << srcBlk->getNumber() << "\n";
1507 }
1508 //removeUnconditionalBranch(dstBlk);
1509 dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
1510
1511 dstBlk->removeSuccessor(srcBlk);
1512 CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1513
1514 removeSuccessor(srcBlk);
1515 retireBlock(dstBlk, srcBlk);
1516 } //mergeSerialBlock
1517
1518 template<class PassT>
mergeIfthenelseBlock(InstrT * branchInstr,BlockT * curBlk,BlockT * trueBlk,BlockT * falseBlk,BlockT * landBlk)1519 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1520 BlockT *curBlk,
1521 BlockT *trueBlk,
1522 BlockT *falseBlk,
1523 BlockT *landBlk) {
1524 if (DEBUGME) {
1525 errs() << "ifPattern BB" << curBlk->getNumber();
1526 errs() << "{ ";
1527 if (trueBlk) {
1528 errs() << "BB" << trueBlk->getNumber();
1529 }
1530 errs() << " } else ";
1531 errs() << "{ ";
1532 if (falseBlk) {
1533 errs() << "BB" << falseBlk->getNumber();
1534 }
1535 errs() << " }\n ";
1536 errs() << "landBlock: ";
1537 if (landBlk == NULL) {
1538 errs() << "NULL";
1539 } else {
1540 errs() << "BB" << landBlk->getNumber();
1541 }
1542 errs() << "\n";
1543 }
1544
1545 int oldOpcode = branchInstr->getOpcode();
1546 DebugLoc branchDL = branchInstr->getDebugLoc();
1547
1548 // transform to
1549 // if cond
1550 // trueBlk
1551 // else
1552 // falseBlk
1553 // endif
1554 // landBlk
1555
1556 typename BlockT::iterator branchInstrPos =
1557 CFGTraits::getInstrPos(curBlk, branchInstr);
1558 CFGTraits::insertCondBranchBefore(branchInstrPos,
1559 CFGTraits::getBranchNzeroOpcode(oldOpcode),
1560 passRep,
1561 branchDL);
1562
1563 if (trueBlk) {
1564 curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
1565 curBlk->removeSuccessor(trueBlk);
1566 if (landBlk && trueBlk->succ_size()!=0) {
1567 trueBlk->removeSuccessor(landBlk);
1568 }
1569 retireBlock(curBlk, trueBlk);
1570 }
1571 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1572
1573 if (falseBlk) {
1574 curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
1575 falseBlk->end());
1576 curBlk->removeSuccessor(falseBlk);
1577 if (landBlk && falseBlk->succ_size() != 0) {
1578 falseBlk->removeSuccessor(landBlk);
1579 }
1580 retireBlock(curBlk, falseBlk);
1581 }
1582 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1583
1584 //curBlk->remove(branchInstrPos);
1585 branchInstr->eraseFromParent();
1586
1587 if (landBlk && trueBlk && falseBlk) {
1588 curBlk->addSuccessor(landBlk);
1589 }
1590
1591 } //mergeIfthenelseBlock
1592
1593 template<class PassT>
mergeLooplandBlock(BlockT * dstBlk,LoopLandInfo * loopLand)1594 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1595 LoopLandInfo *loopLand) {
1596 BlockT *landBlk = loopLand->landBlk;
1597
1598 if (DEBUGME) {
1599 errs() << "loopPattern header = BB" << dstBlk->getNumber()
1600 << " land = BB" << landBlk->getNumber() << "\n";
1601 }
1602
1603 // Loop contInitRegs are init at the beginning of the loop.
1604 for (typename std::set<RegiT>::const_iterator iter =
1605 loopLand->contInitRegs.begin(),
1606 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1607 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1608 }
1609
1610 /* we last inserterd the DebugLoc in the
1611 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1612 * search for the DebugLoc in the that statement.
1613 * if not found, we have to insert the empty/default DebugLoc */
1614 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1615 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1616
1617 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1618 // Loop breakInitRegs are init before entering the loop.
1619 for (typename std::set<RegiT>::const_iterator iter =
1620 loopLand->breakInitRegs.begin(),
1621 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
1622 {
1623 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1624 }
1625 // Loop endbranchInitRegs are init before entering the loop.
1626 for (typename std::set<RegiT>::const_iterator iter =
1627 loopLand->endbranchInitRegs.begin(),
1628 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1629 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1630 }
1631
1632 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1633 * search for the DebugLoc in the continue statement.
1634 * if not found, we have to insert the empty/default DebugLoc */
1635 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1636 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1637
1638 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1639 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1640 // loop.
1641 for (typename std::set<RegiT>::const_iterator iter =
1642 loopLand->breakOnRegs.begin(),
1643 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1644 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
1645 *iter);
1646 }
1647
1648 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1649 // loop.
1650 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1651 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1652 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1653 passRep, *iter);
1654 }
1655
1656 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1657
1658 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1659 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1660 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
1661 }
1662
1663 removeSuccessor(landBlk);
1664 retireBlock(dstBlk, landBlk);
1665 } //mergeLooplandBlock
1666
1667 template<class PassT>
reversePredicateSetter(typename BlockT::iterator I)1668 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
1669 {
1670 while (I--) {
1671 if (I->getOpcode() == AMDGPU::PRED_X) {
1672 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1673 case OPCODE_IS_ZERO_INT:
1674 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1675 return;
1676 case OPCODE_IS_NOT_ZERO_INT:
1677 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1678 return;
1679 case OPCODE_IS_ZERO:
1680 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1681 return;
1682 case OPCODE_IS_NOT_ZERO:
1683 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1684 return;
1685 default:
1686 assert(0 && "PRED_X Opcode invalid!");
1687 }
1688 }
1689 }
1690 }
1691
1692 template<class PassT>
mergeLoopbreakBlock(BlockT * exitingBlk,BlockT * exitBlk,BlockT * exitLandBlk,RegiT setReg)1693 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1694 BlockT *exitBlk,
1695 BlockT *exitLandBlk,
1696 RegiT setReg) {
1697 if (DEBUGME) {
1698 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1699 << " exit = BB" << exitBlk->getNumber()
1700 << " land = BB" << exitLandBlk->getNumber() << "\n";
1701 }
1702
1703 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1704 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1705
1706 DebugLoc DL = branchInstr->getDebugLoc();
1707
1708 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1709 int oldOpcode = branchInstr->getOpcode();
1710
1711 // transform exitingBlk to
1712 // if ( ) {
1713 // exitBlk (if exitBlk != exitLandBlk)
1714 // setReg = 1
1715 // break
1716 // }endif
1717 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1718
1719 typename BlockT::iterator branchInstrPos =
1720 CFGTraits::getInstrPos(exitingBlk, branchInstr);
1721
1722 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1723 //break_logical
1724
1725 if (trueBranch != exitBlk) {
1726 reversePredicateSetter(branchInstrPos);
1727 }
1728 int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1729 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1730 } else {
1731 if (trueBranch != exitBlk) {
1732 reversePredicateSetter(branchInstr);
1733 }
1734 int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1735 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1736 if (exitBlk != exitLandBlk) {
1737 //splice is insert-before ...
1738 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1739 exitBlk->end());
1740 }
1741 if (setReg != INVALIDREGNUM) {
1742 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1743 }
1744 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1745 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1746 } //if_logical
1747
1748 //now branchInst can be erase safely
1749 //exitingBlk->eraseFromParent(branchInstr);
1750 branchInstr->eraseFromParent();
1751
1752 //now take care of successors, retire blocks
1753 exitingBlk->removeSuccessor(exitBlk);
1754 if (exitBlk != exitLandBlk) {
1755 //splice is insert-before ...
1756 exitBlk->removeSuccessor(exitLandBlk);
1757 retireBlock(exitingBlk, exitBlk);
1758 }
1759
1760 } //mergeLoopbreakBlock
1761
1762 template<class PassT>
settleLoopcontBlock(BlockT * contingBlk,BlockT * contBlk,RegiT setReg)1763 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1764 BlockT *contBlk,
1765 RegiT setReg) {
1766 if (DEBUGME) {
1767 errs() << "settleLoopcontBlock conting = BB"
1768 << contingBlk->getNumber()
1769 << ", cont = BB" << contBlk->getNumber() << "\n";
1770 }
1771
1772 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1773 if (branchInstr) {
1774 assert(CFGTraits::isCondBranch(branchInstr));
1775 typename BlockT::iterator branchInstrPos =
1776 CFGTraits::getInstrPos(contingBlk, branchInstr);
1777 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1778 int oldOpcode = branchInstr->getOpcode();
1779 DebugLoc DL = branchInstr->getDebugLoc();
1780
1781 // transform contingBlk to
1782 // if () {
1783 // move instr after branchInstr
1784 // continue
1785 // or
1786 // setReg = 1
1787 // break
1788 // }endif
1789 // successor = {orgSuccessor(contingBlk) - loopHeader}
1790
1791 bool useContinueLogical =
1792 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1793
1794 if (useContinueLogical == false)
1795 {
1796 int branchOpcode =
1797 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1798 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1799
1800 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1801
1802 if (setReg != INVALIDREGNUM) {
1803 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1804 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1805 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1806 } else {
1807 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1808 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1809 }
1810
1811 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1812 } else {
1813 int branchOpcode =
1814 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1815 : CFGTraits::getContinueZeroOpcode(oldOpcode);
1816
1817 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1818 }
1819
1820 //contingBlk->eraseFromParent(branchInstr);
1821 branchInstr->eraseFromParent();
1822 } else {
1823 /* if we've arrived here then we've already erased the branch instruction
1824 * travel back up the basic block to see the last reference of our debug location
1825 * we've just inserted that reference here so it should be representative */
1826 if (setReg != INVALIDREGNUM) {
1827 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1828 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1829 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1830 } else {
1831 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1832 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1833 }
1834 } //else
1835
1836 } //settleLoopcontBlock
1837
1838 // BBs in exitBlkSet are determined as in break-path for loopRep,
1839 // before we can put code for BBs as inside loop-body for loopRep
1840 // check whether those BBs are determined as cont-BB for parentLoopRep
1841 // earlier.
1842 // If so, generate a new BB newBlk
1843 // (1) set newBlk common successor of BBs in exitBlkSet
1844 // (2) change the continue-instr in BBs in exitBlkSet to break-instr
1845 // (3) generate continue-instr in newBlk
1846 //
1847 template<class PassT>
1848 typename CFGStructurizer<PassT>::BlockT *
relocateLoopcontBlock(LoopT * parentLoopRep,LoopT * loopRep,std::set<BlockT * > & exitBlkSet,BlockT * exitLandBlk)1849 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1850 LoopT *loopRep,
1851 std::set<BlockT *> &exitBlkSet,
1852 BlockT *exitLandBlk) {
1853 std::set<BlockT *> endBlkSet;
1854
1855 // BlockT *parentLoopHead = parentLoopRep->getHeader();
1856
1857
1858 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1859 iterEnd = exitBlkSet.end();
1860 iter != iterEnd; ++iter) {
1861 BlockT *exitBlk = *iter;
1862 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1863
1864 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1865 return NULL;
1866
1867 endBlkSet.insert(endBlk);
1868 }
1869
1870 BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1871 funcRep->push_back(newBlk); //insert to function
1872 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1873 SHOWNEWBLK(newBlk, "New continue block: ");
1874
1875 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1876 iterEnd = endBlkSet.end();
1877 iter != iterEnd; ++iter) {
1878 BlockT *endBlk = *iter;
1879 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1880 if (contInstr) {
1881 contInstr->eraseFromParent();
1882 }
1883 endBlk->addSuccessor(newBlk);
1884 if (DEBUGME) {
1885 errs() << "Add new continue Block to BB"
1886 << endBlk->getNumber() << " successors\n";
1887 }
1888 }
1889
1890 return newBlk;
1891 } //relocateLoopcontBlock
1892
1893
1894 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1895 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
1896 // pathes corresponding to the loop exiting branches.
1897
1898 template<class PassT>
1899 typename CFGStructurizer<PassT>::BlockT *
addLoopEndbranchBlock(LoopT * loopRep,BlockTSmallerVector & exitingBlks,BlockTSmallerVector & exitBlks)1900 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1901 BlockTSmallerVector &exitingBlks,
1902 BlockTSmallerVector &exitBlks) {
1903 const AMDGPUInstrInfo *tii =
1904 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1905 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1906
1907 RegiT endBranchReg = static_cast<int>
1908 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1909 assert(endBranchReg >= 0);
1910
1911 // reg = 0 before entering the loop
1912 addLoopEndbranchInitReg(loopRep, endBranchReg);
1913
1914 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1915 assert(numBlks >=2 && numBlks == exitBlks.size());
1916
1917 BlockT *preExitingBlk = exitingBlks[0];
1918 BlockT *preExitBlk = exitBlks[0];
1919 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1920 funcRep->push_back(preBranchBlk); //insert to function
1921 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1922
1923 BlockT *newLandBlk = preBranchBlk;
1924
1925 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1926 newLandBlk);
1927 preExitingBlk->removeSuccessor(preExitBlk);
1928 preExitingBlk->addSuccessor(newLandBlk);
1929
1930 //it is redundant to add reg = 0 to exitingBlks[0]
1931
1932 // For 1..n th exiting path (the last iteration handles two pathes) create the
1933 // branch to the previous path and the current path.
1934 for (uint32_t i = 1; i < numBlks; ++i) {
1935 BlockT *curExitingBlk = exitingBlks[i];
1936 BlockT *curExitBlk = exitBlks[i];
1937 BlockT *curBranchBlk;
1938
1939 if (i == numBlks - 1) {
1940 curBranchBlk = curExitBlk;
1941 } else {
1942 curBranchBlk = funcRep->CreateMachineBasicBlock();
1943 funcRep->push_back(curBranchBlk); //insert to function
1944 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1945 }
1946
1947 // Add reg = i to exitingBlks[i].
1948 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1949 endBranchReg, i);
1950
1951 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1952 // (exitingBlks[i], newLandBlk).
1953 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1954 newLandBlk);
1955 curExitingBlk->removeSuccessor(curExitBlk);
1956 curExitingBlk->addSuccessor(newLandBlk);
1957
1958 // add to preBranchBlk the branch instruction:
1959 // if (endBranchReg == preVal)
1960 // preExitBlk
1961 // else
1962 // curBranchBlk
1963 //
1964 // preValReg = i - 1
1965
1966 DebugLoc DL;
1967 RegiT preValReg = static_cast<int>
1968 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1969
1970 preBranchBlk->insert(preBranchBlk->begin(),
1971 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1972 i - 1));
1973
1974 // condResReg = (endBranchReg == preValReg)
1975 RegiT condResReg = static_cast<int>
1976 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1977 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1978 .addReg(endBranchReg).addReg(preValReg);
1979
1980 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1981 .addMBB(preExitBlk).addReg(condResReg);
1982
1983 preBranchBlk->addSuccessor(preExitBlk);
1984 preBranchBlk->addSuccessor(curBranchBlk);
1985
1986 // Update preExitingBlk, preExitBlk, preBranchBlk.
1987 preExitingBlk = curExitingBlk;
1988 preExitBlk = curExitBlk;
1989 preBranchBlk = curBranchBlk;
1990
1991 } //end for 1 .. n blocks
1992
1993 return newLandBlk;
1994 } //addLoopEndbranchBlock
1995
1996 template<class PassT>
1997 typename CFGStructurizer<PassT>::PathToKind
singlePathTo(BlockT * srcBlk,BlockT * dstBlk,bool allowSideEntry)1998 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1999 bool allowSideEntry) {
2000 assert(dstBlk);
2001
2002 if (srcBlk == dstBlk) {
2003 return SinglePath_InPath;
2004 }
2005
2006 while (srcBlk && srcBlk->succ_size() == 1) {
2007 srcBlk = *srcBlk->succ_begin();
2008 if (srcBlk == dstBlk) {
2009 return SinglePath_InPath;
2010 }
2011
2012 if (!allowSideEntry && srcBlk->pred_size() > 1) {
2013 return Not_SinglePath;
2014 }
2015 }
2016
2017 if (srcBlk && srcBlk->succ_size()==0) {
2018 return SinglePath_NotInPath;
2019 }
2020
2021 return Not_SinglePath;
2022 } //singlePathTo
2023
2024 // If there is a single path from srcBlk to dstBlk, return the last block before
2025 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
2026 // last block in the path Otherwise, return NULL
2027 template<class PassT>
2028 typename CFGStructurizer<PassT>::BlockT *
singlePathEnd(BlockT * srcBlk,BlockT * dstBlk,bool allowSideEntry)2029 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
2030 bool allowSideEntry) {
2031 assert(dstBlk);
2032
2033 if (srcBlk == dstBlk) {
2034 return srcBlk;
2035 }
2036
2037 if (srcBlk->succ_size() == 0) {
2038 return srcBlk;
2039 }
2040
2041 while (srcBlk && srcBlk->succ_size() == 1) {
2042 BlockT *preBlk = srcBlk;
2043
2044 srcBlk = *srcBlk->succ_begin();
2045 if (srcBlk == NULL) {
2046 return preBlk;
2047 }
2048
2049 if (!allowSideEntry && srcBlk->pred_size() > 1) {
2050 return NULL;
2051 }
2052 }
2053
2054 if (srcBlk && srcBlk->succ_size()==0) {
2055 return srcBlk;
2056 }
2057
2058 return NULL;
2059
2060 } //singlePathEnd
2061
2062 template<class PassT>
cloneOnSideEntryTo(BlockT * preBlk,BlockT * srcBlk,BlockT * dstBlk)2063 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
2064 BlockT *dstBlk) {
2065 int cloned = 0;
2066 assert(preBlk->isSuccessor(srcBlk));
2067 while (srcBlk && srcBlk != dstBlk) {
2068 assert(srcBlk->succ_size() == 1);
2069 if (srcBlk->pred_size() > 1) {
2070 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
2071 ++cloned;
2072 }
2073
2074 preBlk = srcBlk;
2075 srcBlk = *srcBlk->succ_begin();
2076 }
2077
2078 return cloned;
2079 } //cloneOnSideEntryTo
2080
2081 template<class PassT>
2082 typename CFGStructurizer<PassT>::BlockT *
cloneBlockForPredecessor(BlockT * curBlk,BlockT * predBlk)2083 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
2084 BlockT *predBlk) {
2085 assert(predBlk->isSuccessor(curBlk) &&
2086 "succBlk is not a prececessor of curBlk");
2087
2088 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
2089 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
2090 //srcBlk, oldBlk, newBlk
2091
2092 predBlk->removeSuccessor(curBlk);
2093 predBlk->addSuccessor(cloneBlk);
2094
2095 // add all successor to cloneBlk
2096 CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
2097
2098 numClonedInstr += curBlk->size();
2099
2100 if (DEBUGME) {
2101 errs() << "Cloned block: " << "BB"
2102 << curBlk->getNumber() << "size " << curBlk->size() << "\n";
2103 }
2104
2105 SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
2106
2107 return cloneBlk;
2108 } //cloneBlockForPredecessor
2109
2110 template<class PassT>
2111 typename CFGStructurizer<PassT>::BlockT *
exitingBlock2ExitBlock(LoopT * loopRep,BlockT * exitingBlk)2112 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
2113 BlockT *exitingBlk) {
2114 BlockT *exitBlk = NULL;
2115
2116 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
2117 iterSuccEnd = exitingBlk->succ_end();
2118 iterSucc != iterSuccEnd; ++iterSucc) {
2119 BlockT *curBlk = *iterSucc;
2120 if (!loopRep->contains(curBlk)) {
2121 assert(exitBlk == NULL);
2122 exitBlk = curBlk;
2123 }
2124 }
2125
2126 assert(exitBlk != NULL);
2127
2128 return exitBlk;
2129 } //exitingBlock2ExitBlock
2130
2131 template<class PassT>
migrateInstruction(BlockT * srcBlk,BlockT * dstBlk,InstrIterator insertPos)2132 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
2133 BlockT *dstBlk,
2134 InstrIterator insertPos) {
2135 InstrIterator spliceEnd;
2136 //look for the input branchinstr, not the AMDGPU branchinstr
2137 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2138 if (branchInstr == NULL) {
2139 if (DEBUGME) {
2140 errs() << "migrateInstruction don't see branch instr\n" ;
2141 }
2142 spliceEnd = srcBlk->end();
2143 } else {
2144 if (DEBUGME) {
2145 errs() << "migrateInstruction see branch instr\n" ;
2146 branchInstr->dump();
2147 }
2148 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
2149 }
2150 if (DEBUGME) {
2151 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
2152 << "srcSize = " << srcBlk->size() << "\n";
2153 }
2154
2155 //splice insert before insertPos
2156 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
2157
2158 if (DEBUGME) {
2159 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
2160 << "srcSize = " << srcBlk->size() << "\n";
2161 }
2162 } //migrateInstruction
2163
2164 // normalizeInfiniteLoopExit change
2165 // B1:
2166 // uncond_br LoopHeader
2167 //
2168 // to
2169 // B1:
2170 // cond_br 1 LoopHeader dummyExit
2171 // and return the newly added dummy exit block
2172 //
2173 template<class PassT>
2174 typename CFGStructurizer<PassT>::BlockT *
normalizeInfiniteLoopExit(LoopT * LoopRep)2175 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2176 BlockT *loopHeader;
2177 BlockT *loopLatch;
2178 loopHeader = LoopRep->getHeader();
2179 loopLatch = LoopRep->getLoopLatch();
2180 BlockT *dummyExitBlk = NULL;
2181 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2182 if (loopHeader!=NULL && loopLatch!=NULL) {
2183 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2184 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2185 dummyExitBlk = funcRep->CreateMachineBasicBlock();
2186 funcRep->push_back(dummyExitBlk); //insert to function
2187 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2188
2189 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2190
2191 typename BlockT::iterator insertPos =
2192 CFGTraits::getInstrPos(loopLatch, branchInstr);
2193 unsigned immReg =
2194 funcRep->getRegInfo().createVirtualRegister(I32RC);
2195 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2196 InstrT *newInstr =
2197 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2198 MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
2199
2200 SHOWNEWINSTR(newInstr);
2201
2202 branchInstr->eraseFromParent();
2203 loopLatch->addSuccessor(dummyExitBlk);
2204 }
2205 }
2206
2207 return dummyExitBlk;
2208 } //normalizeInfiniteLoopExit
2209
2210 template<class PassT>
removeUnconditionalBranch(BlockT * srcBlk)2211 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2212 InstrT *branchInstr;
2213
2214 // I saw two unconditional branch in one basic block in example
2215 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2216 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2217 && CFGTraits::isUncondBranch(branchInstr)) {
2218 if (DEBUGME) {
2219 errs() << "Removing unconditional branch instruction" ;
2220 branchInstr->dump();
2221 }
2222 branchInstr->eraseFromParent();
2223 }
2224 } //removeUnconditionalBranch
2225
2226 template<class PassT>
removeRedundantConditionalBranch(BlockT * srcBlk)2227 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2228 if (srcBlk->succ_size() == 2) {
2229 BlockT *blk1 = *srcBlk->succ_begin();
2230 BlockT *blk2 = *(++srcBlk->succ_begin());
2231
2232 if (blk1 == blk2) {
2233 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2234 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2235 if (DEBUGME) {
2236 errs() << "Removing unneeded conditional branch instruction" ;
2237 branchInstr->dump();
2238 }
2239 branchInstr->eraseFromParent();
2240 SHOWNEWBLK(blk1, "Removing redundant successor");
2241 srcBlk->removeSuccessor(blk1);
2242 }
2243 }
2244 } //removeRedundantConditionalBranch
2245
2246 template<class PassT>
addDummyExitBlock(SmallVector<BlockT *,DEFAULT_VEC_SLOTS> & retBlks)2247 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2248 DEFAULT_VEC_SLOTS> &retBlks) {
2249 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2250 funcRep->push_back(dummyExitBlk); //insert to function
2251 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2252
2253 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2254 retBlks.begin(),
2255 iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2256 BlockT *curBlk = *iter;
2257 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2258 if (curInstr) {
2259 curInstr->eraseFromParent();
2260 }
2261 #if 0
2262 if (curBlk->size()==0 && curBlk->pred_size() == 1) {
2263 if (DEBUGME) {
2264 errs() << "Replace empty block BB" << curBlk->getNumber()
2265 << " with dummyExitBlock\n";
2266 }
2267 BlockT *predb = *curBlk->pred_begin();
2268 predb->removeSuccessor(curBlk);
2269 curBlk = predb;
2270 } //handle empty curBlk
2271 #endif
2272 curBlk->addSuccessor(dummyExitBlk);
2273 if (DEBUGME) {
2274 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2275 << " successors\n";
2276 }
2277 } //for
2278
2279 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2280 } //addDummyExitBlock
2281
2282 template<class PassT>
removeSuccessor(BlockT * srcBlk)2283 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2284 while (srcBlk->succ_size()) {
2285 srcBlk->removeSuccessor(*srcBlk->succ_begin());
2286 }
2287 }
2288
2289 template<class PassT>
recordSccnum(BlockT * srcBlk,int sccNum)2290 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2291 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2292
2293 if (srcBlkInfo == NULL) {
2294 srcBlkInfo = new BlockInfo();
2295 }
2296
2297 srcBlkInfo->sccNum = sccNum;
2298 }
2299
2300 template<class PassT>
getSCCNum(BlockT * srcBlk)2301 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2302 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2303 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2304 }
2305
2306 template<class PassT>
retireBlock(BlockT * dstBlk,BlockT * srcBlk)2307 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2308 if (DEBUGME) {
2309 errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2310 }
2311
2312 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2313
2314 if (srcBlkInfo == NULL) {
2315 srcBlkInfo = new BlockInfo();
2316 }
2317
2318 srcBlkInfo->isRetired = true;
2319 //int i = srcBlk->succ_size();
2320 //int j = srcBlk->pred_size();
2321 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2322 && "can't retire block yet");
2323 }
2324
2325 template<class PassT>
isRetiredBlock(BlockT * srcBlk)2326 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2327 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2328 return (srcBlkInfo && srcBlkInfo->isRetired);
2329 }
2330
2331 template<class PassT>
isActiveLoophead(BlockT * curBlk)2332 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2333 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2334 while (loopRep && loopRep->getHeader() == curBlk) {
2335 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2336
2337 if(loopLand == NULL)
2338 return true;
2339
2340 BlockT *landBlk = loopLand->landBlk;
2341 assert(landBlk);
2342 if (!isRetiredBlock(landBlk)) {
2343 return true;
2344 }
2345
2346 loopRep = loopRep->getParentLoop();
2347 }
2348
2349 return false;
2350 } //isActiveLoophead
2351
2352 template<class PassT>
needMigrateBlock(BlockT * blk)2353 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2354 const unsigned blockSizeThreshold = 30;
2355 const unsigned cloneInstrThreshold = 100;
2356
2357 bool multiplePreds = blk && (blk->pred_size() > 1);
2358
2359 if(!multiplePreds)
2360 return false;
2361
2362 unsigned blkSize = blk->size();
2363 return ((blkSize > blockSizeThreshold)
2364 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2365 } //needMigrateBlock
2366
2367 template<class PassT>
2368 typename CFGStructurizer<PassT>::BlockT *
recordLoopLandBlock(LoopT * loopRep,BlockT * landBlk,BlockTSmallerVector & exitBlks,std::set<BlockT * > & exitBlkSet)2369 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2370 BlockTSmallerVector &exitBlks,
2371 std::set<BlockT *> &exitBlkSet) {
2372 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
2373
2374 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2375 predIterEnd = landBlk->pred_end();
2376 predIter != predIterEnd; ++predIter) {
2377 BlockT *curBlk = *predIter;
2378 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2379 inpathBlks.push_back(curBlk);
2380 }
2381 } //for
2382
2383 //if landBlk has predecessors that are not in the given loop,
2384 //create a new block
2385 BlockT *newLandBlk = landBlk;
2386 if (inpathBlks.size() != landBlk->pred_size()) {
2387 newLandBlk = funcRep->CreateMachineBasicBlock();
2388 funcRep->push_back(newLandBlk); //insert to function
2389 newLandBlk->addSuccessor(landBlk);
2390 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2391 inpathBlks.begin(),
2392 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2393 BlockT *curBlk = *iter;
2394 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2395 //srcBlk, oldBlk, newBlk
2396 curBlk->removeSuccessor(landBlk);
2397 curBlk->addSuccessor(newLandBlk);
2398 }
2399 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2400 if (exitBlks[i] == landBlk) {
2401 exitBlks[i] = newLandBlk;
2402 }
2403 }
2404 SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2405 }
2406
2407 setLoopLandBlock(loopRep, newLandBlk);
2408
2409 return newLandBlk;
2410 } // recordLoopbreakLand
2411
2412 template<class PassT>
setLoopLandBlock(LoopT * loopRep,BlockT * blk)2413 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2414 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2415
2416 if (theEntry == NULL) {
2417 theEntry = new LoopLandInfo();
2418 }
2419 assert(theEntry->landBlk == NULL);
2420
2421 if (blk == NULL) {
2422 blk = funcRep->CreateMachineBasicBlock();
2423 funcRep->push_back(blk); //insert to function
2424 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2425 }
2426
2427 theEntry->landBlk = blk;
2428
2429 if (DEBUGME) {
2430 errs() << "setLoopLandBlock loop-header = BB"
2431 << loopRep->getHeader()->getNumber()
2432 << " landing-block = BB" << blk->getNumber() << "\n";
2433 }
2434 } // setLoopLandBlock
2435
2436 template<class PassT>
addLoopBreakOnReg(LoopT * loopRep,RegiT regNum)2437 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2438 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2439
2440 if (theEntry == NULL) {
2441 theEntry = new LoopLandInfo();
2442 }
2443
2444 theEntry->breakOnRegs.insert(regNum);
2445
2446 if (DEBUGME) {
2447 errs() << "addLoopBreakOnReg loop-header = BB"
2448 << loopRep->getHeader()->getNumber()
2449 << " regNum = " << regNum << "\n";
2450 }
2451 } // addLoopBreakOnReg
2452
2453 template<class PassT>
addLoopContOnReg(LoopT * loopRep,RegiT regNum)2454 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2455 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2456
2457 if (theEntry == NULL) {
2458 theEntry = new LoopLandInfo();
2459 }
2460 theEntry->contOnRegs.insert(regNum);
2461
2462 if (DEBUGME) {
2463 errs() << "addLoopContOnReg loop-header = BB"
2464 << loopRep->getHeader()->getNumber()
2465 << " regNum = " << regNum << "\n";
2466 }
2467 } // addLoopContOnReg
2468
2469 template<class PassT>
addLoopBreakInitReg(LoopT * loopRep,RegiT regNum)2470 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2471 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2472
2473 if (theEntry == NULL) {
2474 theEntry = new LoopLandInfo();
2475 }
2476 theEntry->breakInitRegs.insert(regNum);
2477
2478 if (DEBUGME) {
2479 errs() << "addLoopBreakInitReg loop-header = BB"
2480 << loopRep->getHeader()->getNumber()
2481 << " regNum = " << regNum << "\n";
2482 }
2483 } // addLoopBreakInitReg
2484
2485 template<class PassT>
addLoopContInitReg(LoopT * loopRep,RegiT regNum)2486 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2487 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2488
2489 if (theEntry == NULL) {
2490 theEntry = new LoopLandInfo();
2491 }
2492 theEntry->contInitRegs.insert(regNum);
2493
2494 if (DEBUGME) {
2495 errs() << "addLoopContInitReg loop-header = BB"
2496 << loopRep->getHeader()->getNumber()
2497 << " regNum = " << regNum << "\n";
2498 }
2499 } // addLoopContInitReg
2500
2501 template<class PassT>
addLoopEndbranchInitReg(LoopT * loopRep,RegiT regNum)2502 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2503 RegiT regNum) {
2504 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2505
2506 if (theEntry == NULL) {
2507 theEntry = new LoopLandInfo();
2508 }
2509 theEntry->endbranchInitRegs.insert(regNum);
2510
2511 if (DEBUGME)
2512 {
2513 errs() << "addLoopEndbranchInitReg loop-header = BB"
2514 << loopRep->getHeader()->getNumber()
2515 << " regNum = " << regNum << "\n";
2516 }
2517 } // addLoopEndbranchInitReg
2518
2519 template<class PassT>
2520 typename CFGStructurizer<PassT>::LoopLandInfo *
getLoopLandInfo(LoopT * loopRep)2521 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2522 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2523
2524 return theEntry;
2525 } // getLoopLandInfo
2526
2527 template<class PassT>
2528 typename CFGStructurizer<PassT>::BlockT *
getLoopLandBlock(LoopT * loopRep)2529 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2530 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2531
2532 return theEntry ? theEntry->landBlk : NULL;
2533 } // getLoopLandBlock
2534
2535
2536 template<class PassT>
hasBackEdge(BlockT * curBlk)2537 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2538 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2539 if (loopRep == NULL)
2540 return false;
2541
2542 BlockT *loopHeader = loopRep->getHeader();
2543
2544 return curBlk->isSuccessor(loopHeader);
2545
2546 } //hasBackEdge
2547
2548 template<class PassT>
getLoopDepth(LoopT * loopRep)2549 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2550 return loopRep ? loopRep->getLoopDepth() : 0;
2551 } //getLoopDepth
2552
2553 template<class PassT>
countActiveBlock(typename SmallVector<BlockT *,DEFAULT_VEC_SLOTS>::const_iterator iterStart,typename SmallVector<BlockT *,DEFAULT_VEC_SLOTS>::const_iterator iterEnd)2554 int CFGStructurizer<PassT>::countActiveBlock
2555 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2556 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2557 int count = 0;
2558 while (iterStart != iterEnd) {
2559 if (!isRetiredBlock(*iterStart)) {
2560 ++count;
2561 }
2562 ++iterStart;
2563 }
2564
2565 return count;
2566 } //countActiveBlock
2567
2568 // This is work around solution for findNearestCommonDominator not avaiable to
2569 // post dom a proper fix should go to Dominators.h.
2570
2571 template<class PassT>
2572 typename CFGStructurizer<PassT>::BlockT*
findNearestCommonPostDom(BlockT * blk1,BlockT * blk2)2573 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2574
2575 if (postDomTree->dominates(blk1, blk2)) {
2576 return blk1;
2577 }
2578 if (postDomTree->dominates(blk2, blk1)) {
2579 return blk2;
2580 }
2581
2582 DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2583 DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2584
2585 // Handle newly cloned node.
2586 if (node1 == NULL && blk1->succ_size() == 1) {
2587 return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2588 }
2589 if (node2 == NULL && blk2->succ_size() == 1) {
2590 return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2591 }
2592
2593 if (node1 == NULL || node2 == NULL) {
2594 return NULL;
2595 }
2596
2597 node1 = node1->getIDom();
2598 while (node1) {
2599 if (postDomTree->dominates(node1, node2)) {
2600 return node1->getBlock();
2601 }
2602 node1 = node1->getIDom();
2603 }
2604
2605 return NULL;
2606 }
2607
2608 template<class PassT>
2609 typename CFGStructurizer<PassT>::BlockT *
findNearestCommonPostDom(typename std::set<BlockT * > & blks)2610 CFGStructurizer<PassT>::findNearestCommonPostDom
2611 (typename std::set<BlockT *> &blks) {
2612 BlockT *commonDom;
2613 typename std::set<BlockT *>::const_iterator iter = blks.begin();
2614 typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2615 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2616 BlockT *curBlk = *iter;
2617 if (curBlk != commonDom) {
2618 commonDom = findNearestCommonPostDom(curBlk, commonDom);
2619 }
2620 }
2621
2622 if (DEBUGME) {
2623 errs() << "Common post dominator for exit blocks is ";
2624 if (commonDom) {
2625 errs() << "BB" << commonDom->getNumber() << "\n";
2626 } else {
2627 errs() << "NULL\n";
2628 }
2629 }
2630
2631 return commonDom;
2632 } //findNearestCommonPostDom
2633
2634 } //end namespace llvm
2635
2636 //todo: move-end
2637
2638
2639 //===----------------------------------------------------------------------===//
2640 //
2641 // CFGStructurizer for AMDGPU
2642 //
2643 //===----------------------------------------------------------------------===//
2644
2645
2646 using namespace llvmCFGStruct;
2647
2648 namespace llvm
2649 {
2650 class AMDGPUCFGStructurizer : public MachineFunctionPass
2651 {
2652 public:
2653 typedef MachineInstr InstructionType;
2654 typedef MachineFunction FunctionType;
2655 typedef MachineBasicBlock BlockType;
2656 typedef MachineLoopInfo LoopinfoType;
2657 typedef MachineDominatorTree DominatortreeType;
2658 typedef MachinePostDominatorTree PostDominatortreeType;
2659 typedef MachineDomTreeNode DomTreeNodeType;
2660 typedef MachineLoop LoopType;
2661
2662 protected:
2663 TargetMachine &TM;
2664 const TargetInstrInfo *TII;
2665 const AMDGPURegisterInfo *TRI;
2666
2667 public:
2668 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2669 const TargetInstrInfo *getTargetInstrInfo() const;
2670 //bool runOnMachineFunction(MachineFunction &F);
2671
2672 private:
2673
2674 }; //end of class AMDGPUCFGStructurizer
2675
2676 //char AMDGPUCFGStructurizer::ID = 0;
2677 } //end of namespace llvm
AMDGPUCFGStructurizer(char & pid,TargetMachine & tm)2678 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm
2679 )
2680 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2681 TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
2682 ) {
2683 }
2684
getTargetInstrInfo() const2685 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2686 return TII;
2687 }
2688 //===----------------------------------------------------------------------===//
2689 //
2690 // CFGPrepare
2691 //
2692 //===----------------------------------------------------------------------===//
2693
2694
2695 using namespace llvmCFGStruct;
2696
2697 namespace llvm
2698 {
2699 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer
2700 {
2701 public:
2702 static char ID;
2703
2704 public:
2705 AMDGPUCFGPrepare(TargetMachine &tm);
2706
2707 virtual const char *getPassName() const;
2708 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2709
2710 bool runOnMachineFunction(MachineFunction &F);
2711
2712 private:
2713
2714 }; //end of class AMDGPUCFGPrepare
2715
2716 char AMDGPUCFGPrepare::ID = 0;
2717 } //end of namespace llvm
2718
AMDGPUCFGPrepare(TargetMachine & tm)2719 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2720 : AMDGPUCFGStructurizer(ID, tm )
2721 {
2722 }
getPassName() const2723 const char *AMDGPUCFGPrepare::getPassName() const {
2724 return "AMD IL Control Flow Graph Preparation Pass";
2725 }
2726
getAnalysisUsage(AnalysisUsage & AU) const2727 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2728 AU.addPreserved<MachineFunctionAnalysis>();
2729 AU.addRequired<MachineFunctionAnalysis>();
2730 AU.addRequired<MachineDominatorTree>();
2731 AU.addRequired<MachinePostDominatorTree>();
2732 AU.addRequired<MachineLoopInfo>();
2733 }
2734
2735 //===----------------------------------------------------------------------===//
2736 //
2737 // CFGPerform
2738 //
2739 //===----------------------------------------------------------------------===//
2740
2741
2742 using namespace llvmCFGStruct;
2743
2744 namespace llvm
2745 {
2746 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer
2747 {
2748 public:
2749 static char ID;
2750
2751 public:
2752 AMDGPUCFGPerform(TargetMachine &tm);
2753 virtual const char *getPassName() const;
2754 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2755 bool runOnMachineFunction(MachineFunction &F);
2756
2757 private:
2758
2759 }; //end of class AMDGPUCFGPerform
2760
2761 char AMDGPUCFGPerform::ID = 0;
2762 } //end of namespace llvm
2763
AMDGPUCFGPerform(TargetMachine & tm)2764 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2765 : AMDGPUCFGStructurizer(ID, tm)
2766 {
2767 }
2768
getPassName() const2769 const char *AMDGPUCFGPerform::getPassName() const {
2770 return "AMD IL Control Flow Graph structurizer Pass";
2771 }
2772
getAnalysisUsage(AnalysisUsage & AU) const2773 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2774 AU.addPreserved<MachineFunctionAnalysis>();
2775 AU.addRequired<MachineFunctionAnalysis>();
2776 AU.addRequired<MachineDominatorTree>();
2777 AU.addRequired<MachinePostDominatorTree>();
2778 AU.addRequired<MachineLoopInfo>();
2779 }
2780
2781 //===----------------------------------------------------------------------===//
2782 //
2783 // CFGStructTraits<AMDGPUCFGStructurizer>
2784 //
2785 //===----------------------------------------------------------------------===//
2786
2787 namespace llvmCFGStruct
2788 {
2789 // this class is tailor to the AMDGPU backend
2790 template<>
2791 struct CFGStructTraits<AMDGPUCFGStructurizer>
2792 {
2793 typedef int RegiT;
2794
getBreakNzeroOpcodellvmCFGStruct::CFGStructTraits2795 static int getBreakNzeroOpcode(int oldOpcode) {
2796 switch(oldOpcode) {
2797 case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
2798 default:
2799 assert(0 && "internal error");
2800 };
2801 return -1;
2802 }
2803
getBreakZeroOpcodellvmCFGStruct::CFGStructTraits2804 static int getBreakZeroOpcode(int oldOpcode) {
2805 switch(oldOpcode) {
2806 case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
2807 default:
2808 assert(0 && "internal error");
2809 };
2810 return -1;
2811 }
2812
getBranchNzeroOpcodellvmCFGStruct::CFGStructTraits2813 static int getBranchNzeroOpcode(int oldOpcode) {
2814 switch(oldOpcode) {
2815 case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
2816 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
2817 case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
2818 default:
2819 assert(0 && "internal error");
2820 };
2821 return -1;
2822 }
2823
getBranchZeroOpcodellvmCFGStruct::CFGStructTraits2824 static int getBranchZeroOpcode(int oldOpcode) {
2825 switch(oldOpcode) {
2826 case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
2827 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
2828 case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
2829 default:
2830 assert(0 && "internal error");
2831 };
2832 return -1;
2833 }
2834
getContinueNzeroOpcodellvmCFGStruct::CFGStructTraits2835 static int getContinueNzeroOpcode(int oldOpcode)
2836 {
2837 switch(oldOpcode) {
2838 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2839 default:
2840 assert(0 && "internal error");
2841 };
2842 return -1;
2843 }
2844
getContinueZeroOpcodellvmCFGStruct::CFGStructTraits2845 static int getContinueZeroOpcode(int oldOpcode) {
2846 switch(oldOpcode) {
2847 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2848 default:
2849 assert(0 && "internal error");
2850 };
2851 return -1;
2852 }
2853
2854 // the explicitly represented branch target is the true branch target
2855 #define getExplicitBranch getTrueBranch
2856 #define setExplicitBranch setTrueBranch
2857
getTrueBranchllvmCFGStruct::CFGStructTraits2858 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2859 return instr->getOperand(0).getMBB();
2860 }
2861
setTrueBranchllvmCFGStruct::CFGStructTraits2862 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2863 instr->getOperand(0).setMBB(blk);
2864 }
2865
2866 static MachineBasicBlock *
getFalseBranchllvmCFGStruct::CFGStructTraits2867 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2868 assert(blk->succ_size() == 2);
2869 MachineBasicBlock *trueBranch = getTrueBranch(instr);
2870 MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2871 MachineBasicBlock::succ_iterator iterNext = iter;
2872 ++iterNext;
2873
2874 return (*iter == trueBranch) ? *iterNext : *iter;
2875 }
2876
isCondBranchllvmCFGStruct::CFGStructTraits2877 static bool isCondBranch(MachineInstr *instr) {
2878 switch (instr->getOpcode()) {
2879 case AMDGPU::JUMP:
2880 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
2881 ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
2882 case AMDGPU::SI_IF_NZ:
2883 case AMDGPU::SI_IF_Z:
2884 break;
2885 default:
2886 return false;
2887 }
2888 return true;
2889 }
2890
isUncondBranchllvmCFGStruct::CFGStructTraits2891 static bool isUncondBranch(MachineInstr *instr) {
2892 switch (instr->getOpcode()) {
2893 case AMDGPU::JUMP:
2894 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
2895 default:
2896 return false;
2897 }
2898 return true;
2899 }
2900
getLastDebugLocInBBllvmCFGStruct::CFGStructTraits2901 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2902 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2903 DebugLoc DL;
2904 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2905 MachineInstr *instr = &(*iter);
2906 if (instr->getDebugLoc().isUnknown() == false) {
2907 DL = instr->getDebugLoc();
2908 }
2909 }
2910 return DL;
2911 }
2912
getNormalBlockBranchInstrllvmCFGStruct::CFGStructTraits2913 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2914 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2915 MachineInstr *instr = &*iter;
2916 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2917 return instr;
2918 }
2919 return NULL;
2920 }
2921
2922 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2923 //
2924 // BB with backward-edge could have move instructions after the branch
2925 // instruction. Such move instruction "belong to" the loop backward-edge.
2926 //
getLoopendBlockBranchInstrllvmCFGStruct::CFGStructTraits2927 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2928 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2929 blk->getParent()->getTarget().getInstrInfo());
2930
2931 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2932 iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2933 // FIXME: Simplify
2934 MachineInstr *instr = &*iter;
2935 if (instr) {
2936 if (isCondBranch(instr) || isUncondBranch(instr)) {
2937 return instr;
2938 } else if (!TII->isMov(instr->getOpcode())) {
2939 break;
2940 }
2941 }
2942 }
2943 return NULL;
2944 }
2945
getReturnInstrllvmCFGStruct::CFGStructTraits2946 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2947 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2948 if (iter != blk->rend()) {
2949 MachineInstr *instr = &(*iter);
2950 if (instr->getOpcode() == AMDGPU::RETURN) {
2951 return instr;
2952 }
2953 }
2954 return NULL;
2955 }
2956
getContinueInstrllvmCFGStruct::CFGStructTraits2957 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2958 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2959 if (iter != blk->rend()) {
2960 MachineInstr *instr = &(*iter);
2961 if (instr->getOpcode() == AMDGPU::CONTINUE) {
2962 return instr;
2963 }
2964 }
2965 return NULL;
2966 }
2967
getLoopBreakInstrllvmCFGStruct::CFGStructTraits2968 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2969 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2970 MachineInstr *instr = &(*iter);
2971 if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
2972 return instr;
2973 }
2974 }
2975 return NULL;
2976 }
2977
isReturnBlockllvmCFGStruct::CFGStructTraits2978 static bool isReturnBlock(MachineBasicBlock *blk) {
2979 MachineInstr *instr = getReturnInstr(blk);
2980 bool isReturn = (blk->succ_size() == 0);
2981 if (instr) {
2982 assert(isReturn);
2983 } else if (isReturn) {
2984 if (DEBUGME) {
2985 errs() << "BB" << blk->getNumber()
2986 <<" is return block without RETURN instr\n";
2987 }
2988 }
2989
2990 return isReturn;
2991 }
2992
2993 static MachineBasicBlock::iterator
getInstrPosllvmCFGStruct::CFGStructTraits2994 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2995 assert(instr->getParent() == blk && "instruction doesn't belong to block");
2996 MachineBasicBlock::iterator iter = blk->begin();
2997 MachineBasicBlock::iterator iterEnd = blk->end();
2998 while (&(*iter) != instr && iter != iterEnd) {
2999 ++iter;
3000 }
3001
3002 assert(iter != iterEnd);
3003 return iter;
3004 }//getInstrPos
3005
insertInstrBeforellvmCFGStruct::CFGStructTraits3006 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3007 AMDGPUCFGStructurizer *passRep) {
3008 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
3009 } //insertInstrBefore
3010
insertInstrBeforellvmCFGStruct::CFGStructTraits3011 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3012 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3013 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3014 MachineInstr *newInstr =
3015 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3016
3017 MachineBasicBlock::iterator res;
3018 if (blk->begin() != blk->end()) {
3019 blk->insert(blk->begin(), newInstr);
3020 } else {
3021 blk->push_back(newInstr);
3022 }
3023
3024 SHOWNEWINSTR(newInstr);
3025
3026 return newInstr;
3027 } //insertInstrBefore
3028
insertInstrEndllvmCFGStruct::CFGStructTraits3029 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3030 AMDGPUCFGStructurizer *passRep) {
3031 insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
3032 } //insertInstrEnd
3033
insertInstrEndllvmCFGStruct::CFGStructTraits3034 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3035 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3036 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3037 MachineInstr *newInstr = blk->getParent()
3038 ->CreateMachineInstr(tii->get(newOpcode), DL);
3039
3040 blk->push_back(newInstr);
3041 //assume the instruction doesn't take any reg operand ...
3042
3043 SHOWNEWINSTR(newInstr);
3044 } //insertInstrEnd
3045
insertInstrBeforellvmCFGStruct::CFGStructTraits3046 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
3047 int newOpcode,
3048 AMDGPUCFGStructurizer *passRep) {
3049 MachineInstr *oldInstr = &(*instrPos);
3050 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3051 MachineBasicBlock *blk = oldInstr->getParent();
3052 MachineInstr *newInstr =
3053 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3054 DebugLoc());
3055
3056 blk->insert(instrPos, newInstr);
3057 //assume the instruction doesn't take any reg operand ...
3058
3059 SHOWNEWINSTR(newInstr);
3060 return newInstr;
3061 } //insertInstrBefore
3062
insertCondBranchBeforellvmCFGStruct::CFGStructTraits3063 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
3064 int newOpcode,
3065 AMDGPUCFGStructurizer *passRep,
3066 DebugLoc DL) {
3067 MachineInstr *oldInstr = &(*instrPos);
3068 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3069 MachineBasicBlock *blk = oldInstr->getParent();
3070 MachineInstr *newInstr =
3071 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3072 DL);
3073
3074 blk->insert(instrPos, newInstr);
3075 MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
3076 false);
3077
3078 SHOWNEWINSTR(newInstr);
3079 //erase later oldInstr->eraseFromParent();
3080 } //insertCondBranchBefore
3081
insertCondBranchBeforellvmCFGStruct::CFGStructTraits3082 static void insertCondBranchBefore(MachineBasicBlock *blk,
3083 MachineBasicBlock::iterator insertPos,
3084 int newOpcode,
3085 AMDGPUCFGStructurizer *passRep,
3086 RegiT regNum,
3087 DebugLoc DL) {
3088 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3089
3090 MachineInstr *newInstr =
3091 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3092
3093 //insert before
3094 blk->insert(insertPos, newInstr);
3095 MachineInstrBuilder(newInstr).addReg(regNum, false);
3096
3097 SHOWNEWINSTR(newInstr);
3098 } //insertCondBranchBefore
3099
insertCondBranchEndllvmCFGStruct::CFGStructTraits3100 static void insertCondBranchEnd(MachineBasicBlock *blk,
3101 int newOpcode,
3102 AMDGPUCFGStructurizer *passRep,
3103 RegiT regNum) {
3104 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3105 MachineInstr *newInstr =
3106 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
3107
3108 blk->push_back(newInstr);
3109 MachineInstrBuilder(newInstr).addReg(regNum, false);
3110
3111 SHOWNEWINSTR(newInstr);
3112 } //insertCondBranchEnd
3113
3114
insertAssignInstrBeforellvmCFGStruct::CFGStructTraits3115 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
3116 AMDGPUCFGStructurizer *passRep,
3117 RegiT regNum, int regVal) {
3118 MachineInstr *oldInstr = &(*instrPos);
3119 const AMDGPUInstrInfo *tii =
3120 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3121 MachineBasicBlock *blk = oldInstr->getParent();
3122 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3123 regVal);
3124 blk->insert(instrPos, newInstr);
3125
3126 SHOWNEWINSTR(newInstr);
3127 } //insertAssignInstrBefore
3128
insertAssignInstrBeforellvmCFGStruct::CFGStructTraits3129 static void insertAssignInstrBefore(MachineBasicBlock *blk,
3130 AMDGPUCFGStructurizer *passRep,
3131 RegiT regNum, int regVal) {
3132 const AMDGPUInstrInfo *tii =
3133 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3134
3135 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3136 regVal);
3137 if (blk->begin() != blk->end()) {
3138 blk->insert(blk->begin(), newInstr);
3139 } else {
3140 blk->push_back(newInstr);
3141 }
3142
3143 SHOWNEWINSTR(newInstr);
3144
3145 } //insertInstrBefore
3146
insertCompareInstrBeforellvmCFGStruct::CFGStructTraits3147 static void insertCompareInstrBefore(MachineBasicBlock *blk,
3148 MachineBasicBlock::iterator instrPos,
3149 AMDGPUCFGStructurizer *passRep,
3150 RegiT dstReg, RegiT src1Reg,
3151 RegiT src2Reg) {
3152 const AMDGPUInstrInfo *tii =
3153 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3154 MachineInstr *newInstr =
3155 blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
3156
3157 MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
3158 MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
3159 MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
3160
3161 blk->insert(instrPos, newInstr);
3162 SHOWNEWINSTR(newInstr);
3163
3164 } //insertCompareInstrBefore
3165
cloneSuccessorListllvmCFGStruct::CFGStructTraits3166 static void cloneSuccessorList(MachineBasicBlock *dstBlk,
3167 MachineBasicBlock *srcBlk) {
3168 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
3169 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
3170 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
3171 }
3172 } //cloneSuccessorList
3173
clonellvmCFGStruct::CFGStructTraits3174 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
3175 MachineFunction *func = srcBlk->getParent();
3176 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
3177 func->push_back(newBlk); //insert to function
3178 //newBlk->setNumber(srcBlk->getNumber());
3179 for (MachineBasicBlock::iterator iter = srcBlk->begin(),
3180 iterEnd = srcBlk->end();
3181 iter != iterEnd; ++iter) {
3182 MachineInstr *instr = func->CloneMachineInstr(iter);
3183 newBlk->push_back(instr);
3184 }
3185 return newBlk;
3186 }
3187
3188 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
3189 //the AMDGPU instruction is not recognized as terminator fix this and retire
3190 //this routine
replaceInstrUseOfBlockWithllvmCFGStruct::CFGStructTraits3191 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
3192 MachineBasicBlock *oldBlk,
3193 MachineBasicBlock *newBlk) {
3194 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
3195 if (branchInstr && isCondBranch(branchInstr) &&
3196 getExplicitBranch(branchInstr) == oldBlk) {
3197 setExplicitBranch(branchInstr, newBlk);
3198 }
3199 }
3200
wrapupllvmCFGStruct::CFGStructTraits3201 static void wrapup(MachineBasicBlock *entryBlk) {
3202 assert((!entryBlk->getParent()->getJumpTableInfo()
3203 || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
3204 && "found a jump table");
3205
3206 //collect continue right before endloop
3207 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
3208 MachineBasicBlock::iterator pre = entryBlk->begin();
3209 MachineBasicBlock::iterator iterEnd = entryBlk->end();
3210 MachineBasicBlock::iterator iter = pre;
3211 while (iter != iterEnd) {
3212 if (pre->getOpcode() == AMDGPU::CONTINUE
3213 && iter->getOpcode() == AMDGPU::ENDLOOP) {
3214 contInstr.push_back(pre);
3215 }
3216 pre = iter;
3217 ++iter;
3218 } //end while
3219
3220 //delete continue right before endloop
3221 for (unsigned i = 0; i < contInstr.size(); ++i) {
3222 contInstr[i]->eraseFromParent();
3223 }
3224
3225 // TODO to fix up jump table so later phase won't be confused. if
3226 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3227 // there isn't such an interface yet. alternatively, replace all the other
3228 // blocks in the jump table with the entryBlk //}
3229
3230 } //wrapup
3231
getDominatorTreellvmCFGStruct::CFGStructTraits3232 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3233 return &pass.getAnalysis<MachineDominatorTree>();
3234 }
3235
3236 static MachinePostDominatorTree*
getPostDominatorTreellvmCFGStruct::CFGStructTraits3237 getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3238 return &pass.getAnalysis<MachinePostDominatorTree>();
3239 }
3240
getLoopInfollvmCFGStruct::CFGStructTraits3241 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3242 return &pass.getAnalysis<MachineLoopInfo>();
3243 }
3244 }; // template class CFGStructTraits
3245 } //end of namespace llvm
3246
3247 // createAMDGPUCFGPreparationPass- Returns a pass
createAMDGPUCFGPreparationPass(TargetMachine & tm)3248 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
3249 ) {
3250 return new AMDGPUCFGPrepare(tm );
3251 }
3252
runOnMachineFunction(MachineFunction & func)3253 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3254 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
3255 *this,
3256 TRI);
3257 }
3258
3259 // createAMDGPUCFGStructurizerPass- Returns a pass
createAMDGPUCFGStructurizerPass(TargetMachine & tm)3260 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
3261 ) {
3262 return new AMDGPUCFGPerform(tm );
3263 }
3264
runOnMachineFunction(MachineFunction & func)3265 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3266 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
3267 *this,
3268 TRI);
3269 }
3270
3271 //end of file newline goes below
3272
3273