1 //===- subzero/src/IceLoopAnalyzer.cpp - Loop Analysis --------------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the loop analysis on the CFG.
12 ///
13 //===----------------------------------------------------------------------===//
14 #include "IceLoopAnalyzer.h"
15 
16 #include "IceCfg.h"
17 #include "IceCfgNode.h"
18 
19 #include <algorithm>
20 
21 namespace Ice {
22 class LoopAnalyzer {
23 public:
24   explicit LoopAnalyzer(Cfg *Func);
25 
26   /// Use Tarjan's strongly connected components algorithm to identify outermost
27   /// to innermost loops. By deleting the head of the loop from the graph, inner
28   /// loops can be found. This assumes that the head node is not shared between
29   /// loops but instead all paths to the head come from 'continue' constructs.
30   ///
31   /// This only computes the loop nest depth within the function and does not
32   /// take into account whether the function was called from within a loop.
33   // TODO(ascull): this currently uses a extension of Tarjan's algorithm with
34   // is bounded linear. ncbray suggests another algorithm which is linear in
35   // practice but not bounded linear. I think it also finds dominators.
36   // http://lenx.100871.net/papers/loop-SAS.pdf
37 
getLoopBodies()38   CfgVector<CfgUnorderedSet<SizeT>> getLoopBodies() { return Loops; }
39 
40 private:
41   LoopAnalyzer() = delete;
42   LoopAnalyzer(const LoopAnalyzer &) = delete;
43   LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
44   void computeLoopNestDepth();
45 
46   using IndexT = uint32_t;
47   static constexpr IndexT UndefinedIndex = 0;
48   static constexpr IndexT FirstDefinedIndex = 1;
49 
50   // TODO(ascull): classify the other fields
51   class LoopNode {
52     LoopNode() = delete;
53     LoopNode operator=(const LoopNode &) = delete;
54 
55   public:
LoopNode(CfgNode * BB)56     explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
57     LoopNode(const LoopNode &) = default;
58 
59     void reset();
60 
61     NodeList::const_iterator successorsEnd() const;
currentSuccessor() const62     NodeList::const_iterator currentSuccessor() const { return Succ; }
nextSuccessor()63     void nextSuccessor() { ++Succ; }
64 
visit(IndexT VisitIndex)65     void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
isVisited() const66     bool isVisited() const { return Index != UndefinedIndex; }
getIndex() const67     IndexT getIndex() const { return Index; }
68 
tryLink(IndexT NewLink)69     void tryLink(IndexT NewLink) {
70       if (NewLink < LowLink)
71         LowLink = NewLink;
72     }
getLowLink() const73     IndexT getLowLink() const { return LowLink; }
74 
setOnStack(bool NewValue=true)75     void setOnStack(bool NewValue = true) { OnStack = NewValue; }
isOnStack() const76     bool isOnStack() const { return OnStack; }
77 
setDeleted()78     void setDeleted() { Deleted = true; }
isDeleted() const79     bool isDeleted() const { return Deleted; }
80 
81     void incrementLoopNestDepth();
82     bool hasSelfEdge() const;
83 
getNode()84     CfgNode *getNode() { return BB; }
85 
86   private:
87     CfgNode *BB;
88     NodeList::const_iterator Succ;
89     IndexT Index;
90     IndexT LowLink;
91     bool OnStack;
92     bool Deleted = false;
93   };
94 
95   using LoopNodeList = CfgVector<LoopNode>;
96   using LoopNodePtrList = CfgVector<LoopNode *>;
97 
98   /// Process the node as part as part of Tarjan's algorithm and return either a
99   /// node to recurse into or nullptr when the node has been fully processed.
100   LoopNode *processNode(LoopNode &Node);
101 
102   /// The function to analyze for loops.
103   Cfg *const Func;
104   /// A list of decorated nodes in the same order as Func->getNodes() which
105   /// means the node's index will also be valid in this list.
106   LoopNodeList AllNodes;
107   /// This is used as a replacement for the call stack.
108   LoopNodePtrList WorkStack;
109   /// Track which loop a node belongs to.
110   LoopNodePtrList LoopStack;
111   /// The index to assign to the next visited node.
112   IndexT NextIndex = FirstDefinedIndex;
113   /// The number of nodes which have been marked deleted. This is used to track
114   /// when the iteration should end.
115   LoopNodePtrList::size_type NumDeletedNodes = 0;
116 
117   /// All the Loops, in descending order of size
118   CfgVector<CfgUnorderedSet<SizeT>> Loops;
119 };
reset()120 void LoopAnalyzer::LoopNode::reset() {
121   if (Deleted)
122     return;
123   Succ = BB->getOutEdges().begin();
124   Index = LowLink = UndefinedIndex;
125   OnStack = false;
126 }
127 
successorsEnd() const128 NodeList::const_iterator LoopAnalyzer::LoopNode::successorsEnd() const {
129   return BB->getOutEdges().end();
130 }
131 
incrementLoopNestDepth()132 void LoopAnalyzer::LoopNode::incrementLoopNestDepth() {
133   BB->incrementLoopNestDepth();
134 }
135 
hasSelfEdge() const136 bool LoopAnalyzer::LoopNode::hasSelfEdge() const {
137   for (CfgNode *Succ : BB->getOutEdges()) {
138     if (Succ == BB)
139       return true;
140   }
141   return false;
142 }
143 
LoopAnalyzer(Cfg * Fn)144 LoopAnalyzer::LoopAnalyzer(Cfg *Fn) : Func(Fn) {
145   const NodeList &Nodes = Func->getNodes();
146 
147   // Allocate memory ahead of time. This is why a vector is used instead of a
148   // stack which doesn't support reserving (or bulk erasure used below).
149   AllNodes.reserve(Nodes.size());
150   WorkStack.reserve(Nodes.size());
151   LoopStack.reserve(Nodes.size());
152 
153   // Create the LoopNodes from the function's CFG
154   for (CfgNode *Node : Nodes)
155     AllNodes.emplace_back(Node);
156   computeLoopNestDepth();
157 }
158 
computeLoopNestDepth()159 void LoopAnalyzer::computeLoopNestDepth() {
160   assert(AllNodes.size() == Func->getNodes().size());
161   assert(NextIndex == FirstDefinedIndex);
162   assert(NumDeletedNodes == 0);
163 
164   while (NumDeletedNodes < AllNodes.size()) {
165     // Prepare to run Tarjan's
166     for (LoopNode &Node : AllNodes)
167       Node.reset();
168 
169     assert(WorkStack.empty());
170     assert(LoopStack.empty());
171 
172     for (LoopNode &Node : AllNodes) {
173       if (Node.isDeleted() || Node.isVisited())
174         continue;
175 
176       WorkStack.push_back(&Node);
177 
178       while (!WorkStack.empty()) {
179         LoopNode &WorkNode = *WorkStack.back();
180         if (LoopNode *Succ = processNode(WorkNode))
181           WorkStack.push_back(Succ);
182         else
183           WorkStack.pop_back();
184       }
185     }
186   }
187 }
188 
189 LoopAnalyzer::LoopNode *
processNode(LoopAnalyzer::LoopNode & Node)190 LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
191   if (!Node.isVisited()) {
192     Node.visit(NextIndex++);
193     LoopStack.push_back(&Node);
194     Node.setOnStack();
195   } else {
196     // Returning to a node after having recursed into Succ so continue
197     // iterating through successors after using the Succ.LowLink value that was
198     // computed in the recursion.
199     LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
200     Node.tryLink(Succ.getLowLink());
201     Node.nextSuccessor();
202   }
203 
204   // Visit the successors and recurse into unvisited nodes. The recursion could
205   // cause the iteration to be suspended but it will resume as the stack is
206   // unwound.
207   auto SuccEnd = Node.successorsEnd();
208   for (; Node.currentSuccessor() != SuccEnd; Node.nextSuccessor()) {
209     LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
210 
211     if (Succ.isDeleted())
212       continue;
213 
214     if (!Succ.isVisited())
215       return &Succ;
216     else if (Succ.isOnStack())
217       Node.tryLink(Succ.getIndex());
218   }
219 
220   if (Node.getLowLink() != Node.getIndex())
221     return nullptr;
222 
223   // Single node means no loop in the CFG
224   if (LoopStack.back() == &Node) {
225     LoopStack.back()->setOnStack(false);
226     if (Node.hasSelfEdge())
227       LoopStack.back()->incrementLoopNestDepth();
228     LoopStack.back()->setDeleted();
229     ++NumDeletedNodes;
230     LoopStack.pop_back();
231     return nullptr;
232   }
233 
234   // Reaching here means a loop has been found! It consists of the nodes on the
235   // top of the stack, down until the current node being processed, Node, is
236   // found.
237   for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
238     (*It)->setOnStack(false);
239     (*It)->incrementLoopNestDepth();
240     // Remove the loop from the stack and delete the head node
241     if (*It == &Node) {
242       (*It)->setDeleted();
243       ++NumDeletedNodes;
244       CfgUnorderedSet<SizeT> LoopNodes;
245       for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end();
246            ++LoopIter) {
247         LoopNodes.insert((*LoopIter)->getNode()->getIndex());
248       }
249       Loops.push_back(LoopNodes);
250       LoopStack.erase(It.base() - 1, LoopStack.end());
251       break;
252     }
253   }
254 
255   return nullptr;
256 }
ComputeLoopInfo(Cfg * Func)257 CfgVector<Loop> ComputeLoopInfo(Cfg *Func) {
258   auto LoopBodies = LoopAnalyzer(Func).getLoopBodies();
259 
260   CfgVector<Loop> Loops;
261   Loops.reserve(LoopBodies.size());
262   std::sort(
263       LoopBodies.begin(), LoopBodies.end(),
264       [](const CfgUnorderedSet<SizeT> &A, const CfgUnorderedSet<SizeT> &B) {
265         return A.size() > B.size();
266       });
267   for (auto &LoopBody : LoopBodies) {
268     CfgNode *Header = nullptr;
269     bool IsSimpleLoop = true;
270     for (auto NodeIndex : LoopBody) {
271       CfgNode *Cur = Func->getNodes()[NodeIndex];
272       for (auto *Prev : Cur->getInEdges()) {
273         if (LoopBody.find(Prev->getIndex()) ==
274             LoopBody.end()) { // coming from outside
275           if (Header == nullptr) {
276             Header = Cur;
277           } else {
278             Header = nullptr;
279             IsSimpleLoop = false;
280             break;
281           }
282         }
283       }
284       if (!IsSimpleLoop) {
285         break;
286       }
287     }
288     if (!IsSimpleLoop)
289       continue; // To next potential loop
290 
291     CfgNode *PreHeader = nullptr;
292     for (auto *Prev : Header->getInEdges()) {
293       if (LoopBody.find(Prev->getIndex()) == LoopBody.end()) {
294         if (PreHeader == nullptr) {
295           PreHeader = Prev;
296         } else {
297           PreHeader = nullptr;
298           break;
299         }
300       }
301     }
302 
303     Loops.emplace_back(Header, PreHeader, LoopBody);
304   }
305   return Loops;
306 }
307 
308 } // end of namespace Ice
309