1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
19 
20 #include "base/arena_bit_vector.h"
21 #include "base/arena_containers.h"
22 #include "base/bit_vector-inl.h"
23 #include "nodes.h"
24 
25 namespace art {
26 
27 class InductionVarRange;
28 
29 static const bool kSuperblockClonerLogging = false;
30 
31 // Represents an edge between two HBasicBlocks.
32 //
33 // Note: objects of this class are small - pass them by value.
34 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
35  public:
HEdge(HBasicBlock * from,HBasicBlock * to)36   HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
37     DCHECK_NE(to_, kInvalidBlockId);
38     DCHECK_NE(from_, kInvalidBlockId);
39   }
HEdge(uint32_t from,uint32_t to)40   HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
41     DCHECK_NE(to_, kInvalidBlockId);
42     DCHECK_NE(from_, kInvalidBlockId);
43   }
HEdge()44   HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
45 
GetFrom()46   uint32_t GetFrom() const { return from_; }
GetTo()47   uint32_t GetTo() const { return to_; }
48 
49   bool operator==(const HEdge& other) const {
50     return this->from_ == other.from_ && this->to_ == other.to_;
51   }
52 
53   bool operator!=(const HEdge& other) const { return !operator==(other); }
54   void Dump(std::ostream& stream) const;
55 
56   // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
57   // has to_ block as a successor.
IsValid()58   bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
59 
60  private:
61   // Predecessor block id.
62   uint32_t from_;
63   // Successor block id.
64   uint32_t to_;
65 };
66 
67 // Returns whether a HEdge edge corresponds to an existing edge in the graph.
IsEdgeValid(HEdge edge,HGraph * graph)68 inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
69   if (!edge.IsValid()) {
70     return false;
71   }
72   uint32_t from = edge.GetFrom();
73   uint32_t to = edge.GetTo();
74   if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
75     return false;
76   }
77 
78   HBasicBlock* block_from = graph->GetBlocks()[from];
79   HBasicBlock* block_to = graph->GetBlocks()[to];
80   if (block_from == nullptr || block_to == nullptr) {
81     return false;
82   }
83 
84   return block_from->HasSuccessor(block_to, 0);
85 }
86 
87 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
88 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
89 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy
90 // and a set of rules how to treat edges, remap their successors. By using this approach such
91 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
92 //
93 // The idea of the transformation is based on "Superblock cloning" technique described in the book
94 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
95 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
96 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
97 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
98 //
99 // There are two states of the IR graph: original graph (before the transformation) and
100 // copy graph (after).
101 //
102 // Before the transformation:
103 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
104 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
105 // where pred, succ - basic blocks):
106 //  - internal - pred, succ are members of ‘orig_bb_set’.
107 //  - outside  - pred, succ are not members of ‘orig_bb_set’.
108 //  - incoming - pred is not a member of ‘orig_bb_set’, succ is.
109 //  - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
110 //
111 // Transformation:
112 //
113 // 1. Initial cloning:
114 //   1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
115 //        form ‘copy_bb_set’.
116 //   1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
117 //        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
118 //        set.
119 //   1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
120 //        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
121 //        set.
122 // 2. Successors remapping.
123 //   2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
124 //        be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
125 //   2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
126 //        should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
127 //   2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
128 //        whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
129 //        (X, Y_1)).
130 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
131 // 4. Fix/resolve data flow.
132 // 5. Do cleanups (DCE, critical edges splitting, etc).
133 //
134 class SuperblockCloner : public ValueObject {
135  public:
136   // TODO: Investigate optimal types for the containers.
137   using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
138   using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
139   using HBasicBlockSet = ArenaBitVector;
140   using HEdgeSet = ArenaHashSet<HEdge>;
141 
142   SuperblockCloner(HGraph* graph,
143                    const HBasicBlockSet* orig_bb_set,
144                    HBasicBlockMap* bb_map,
145                    HInstructionMap* hir_map,
146                    InductionVarRange* induction_range);
147 
148   // Sets edge successor remapping info specified by corresponding edge sets.
149   void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
150                                  const HEdgeSet* remap_copy_internal,
151                                  const HEdgeSet* remap_incoming);
152 
153   // Returns whether the specified subgraph is copyable.
154   // TODO: Start from small range of graph patterns then extend it.
155   bool IsSubgraphClonable() const;
156 
157   // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
158   // when iterative DF algorithm is not required and dominators/instructions inputs can be
159   // trivially adjusted.
160   //
161   // TODO: formally describe the criteria.
162   //
163   // Loop peeling and unrolling satisfy the criteria.
164   bool IsFastCase() const;
165 
166   // Runs the copy algorithm according to the description.
167   void Run();
168 
169   // Cleans up the graph after transformation: splits critical edges, recalculates control flow
170   // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
171   void CleanUp();
172 
173   // Returns a clone of a basic block (orig_block).
174   //
175   //  - The copy block will have no successors/predecessors; they should be set up manually.
176   //  - For each instruction in the orig_block a copy is created and inserted into the copy block;
177   //    this correspondence is recorded in the map (old instruction, new instruction).
178   //  - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
179   //    same, as in the original block, PHIs do not reflect a correct correspondence between the
180   //    value and predecessors (as the copy block has no predecessors by now), etc.
181   HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
182 
183   // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
184   // and hir_map_.
185   void CloneBasicBlocks();
186 
GetInstrCopy(HInstruction * orig_instr)187   HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
188     auto copy_input_iter = hir_map_->find(orig_instr);
189     DCHECK(copy_input_iter != hir_map_->end());
190     return copy_input_iter->second;
191   }
192 
GetBlockCopy(HBasicBlock * orig_block)193   HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
194     HBasicBlock* block = bb_map_->Get(orig_block);
195     DCHECK(block != nullptr);
196     return block;
197   }
198 
GetInstrOrig(HInstruction * copy_instr)199   HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
200     for (auto it : *hir_map_) {
201       if (it.second == copy_instr) {
202         return it.first;
203       }
204     }
205     return nullptr;
206   }
207 
IsInOrigBBSet(uint32_t block_id)208   bool IsInOrigBBSet(uint32_t block_id) const {
209     return orig_bb_set_.IsBitSet(block_id);
210   }
211 
IsInOrigBBSet(const HBasicBlock * block)212   bool IsInOrigBBSet(const HBasicBlock* block) const {
213     return IsInOrigBBSet(block->GetBlockId());
214   }
215 
216   // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
217   // dominators) needs to be adjusted.
GetRegionToBeAdjusted()218   HLoopInformation* GetRegionToBeAdjusted() const {
219     return outer_loop_;
220   }
221 
222  private:
223   // Fills the 'exits' vector with the subgraph exits.
224   void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
225 
226   // Finds and records information about the area in the graph for which control flow (back edges,
227   // loops, dominators) needs to be adjusted.
228   void FindAndSetLocalAreaForAdjustments();
229 
230   // Remaps edges' successors according to the info specified in the edges sets.
231   //
232   // Only edge successors/predecessors and phis' input records (to have a correspondence between
233   // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
234   // phis' nor instructions' inputs values are resolved.
235   void RemapEdgesSuccessors();
236 
237   // Adjusts control flow (back edges, loops, dominators) for the local area defined by
238   // FindAndSetLocalAreaForAdjustments.
239   void AdjustControlFlowInfo();
240 
241   // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
242   // the SSA form.
243   void ResolveDataFlow();
244 
245   //
246   // Helpers for live-outs processing and Subgraph-closed SSA.
247   //
248   //  - live-outs - values which are defined inside the subgraph and have uses outside.
249   //  - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
250   //    have no outside uses except for the phi-nodes in the subgraph exits.
251   //
252   // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
253   // makes the subgraph-closed SSA form construction much easier.
254   //
255   // TODO: Support subgraphs with live-outs and multiple exits.
256   //
257 
258   // For each live-out value 'val' in the region puts a record <val, val> into the map.
259   // Returns whether all of the instructions in the subgraph are clonable.
260   bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
261 
262   // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
263   //
264   // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
265   // the record in the map to <val, phi> and replaces all outside uses with this phi.
266   void ConstructSubgraphClosedSSA();
267 
268   // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
269   // (<val, phi>) phi after the cloning is done.
270   void FixSubgraphClosedSSAAfterCloning();
271 
272   //
273   // Helpers for CloneBasicBlock.
274   //
275 
276   // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
277   // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
278   void ReplaceInputsWithCopies(HInstruction* copy_instr);
279 
280   // Recursively clones the environment for the copy instruction. If the input of the original
281   // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
282   // leaves it the same as original.
283   void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
284 
285   //
286   // Helpers for RemapEdgesSuccessors.
287   //
288 
289   // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
290   // copy_succ.
291   void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
292 
293   // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
294   void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
295 
296   // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
297   void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
298 
299   //
300   // Local versions of control flow calculation/adjustment routines.
301   //
302 
303   void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
304   void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
305   GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
306   void CleanUpControlFlow();
307 
308   //
309   // Helpers for ResolveDataFlow
310   //
311 
312   // Resolves the inputs of the phi.
313   void ResolvePhi(HPhi* phi);
314 
315   // Update induction range after when fixing SSA.
316   void UpdateInductionRangeInfoOf(
317       HInstruction* user, HInstruction* old_instruction, HInstruction* replacement);
318 
319   //
320   // Debug and logging methods.
321   //
322   void CheckInstructionInputsRemapping(HInstruction* orig_instr);
323   bool CheckRemappingInfoIsValid();
324   void VerifyGraph();
325   void DumpInputSets();
326 
GetBlockById(uint32_t block_id)327   HBasicBlock* GetBlockById(uint32_t block_id) const {
328     DCHECK(block_id < graph_->GetBlocks().size());
329     HBasicBlock* block = graph_->GetBlocks()[block_id];
330     DCHECK(block != nullptr);
331     return block;
332   }
333 
334   HGraph* const graph_;
335   ArenaAllocator* const arena_;
336 
337   // Set of basic block in the original graph to be copied.
338   HBasicBlockSet orig_bb_set_;
339 
340   // Sets of edges which require successors remapping.
341   const HEdgeSet* remap_orig_internal_;
342   const HEdgeSet* remap_copy_internal_;
343   const HEdgeSet* remap_incoming_;
344 
345   // Correspondence map for blocks: (original block, copy block).
346   HBasicBlockMap* bb_map_;
347   // Correspondence map for instructions: (original HInstruction, copy HInstruction).
348   HInstructionMap* hir_map_;
349   // As a result of cloning, the induction range analysis information can be invalidated
350   // and must be updated. If not null, the cloner updates it for changed instructions.
351   InductionVarRange* induction_range_;
352   // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
353   HLoopInformation* outer_loop_;
354   HBasicBlockSet outer_loop_bb_set_;
355 
356   HInstructionMap live_outs_;
357 
358   ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
359   ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
360 
361   DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
362 };
363 
364 // Helper class to perform loop peeling/unrolling.
365 //
366 // This helper should be used when correspondence map between original and copied
367 // basic blocks/instructions are demanded.
368 class PeelUnrollHelper : public ValueObject {
369  public:
PeelUnrollHelper(HLoopInformation * info,SuperblockCloner::HBasicBlockMap * bb_map,SuperblockCloner::HInstructionMap * hir_map,InductionVarRange * induction_range)370   PeelUnrollHelper(HLoopInformation* info,
371                    SuperblockCloner::HBasicBlockMap* bb_map,
372                    SuperblockCloner::HInstructionMap* hir_map,
373                    InductionVarRange* induction_range) :
374       loop_info_(info),
375       cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) {
376     // For now do peeling/unrolling only for natural loops.
377     DCHECK(!info->IsIrreducible());
378   }
379 
380   // Returns whether the loop can be peeled/unrolled (static function).
381   static bool IsLoopClonable(HLoopInformation* loop_info);
382 
383   // Returns whether the loop can be peeled/unrolled.
IsLoopClonable()384   bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
385 
DoPeeling()386   HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll= */ false); }
DoUnrolling()387   HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll= */ true); }
GetRegionToBeAdjusted()388   HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
389 
390  protected:
391   // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
392   //
393   // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
394   HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
395 
396  private:
397   HLoopInformation* loop_info_;
398   SuperblockCloner cloner_;
399 
400   DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
401 };
402 
403 // Helper class to perform loop peeling/unrolling.
404 //
405 // This helper should be used when there is no need to get correspondence information between
406 // original and copied basic blocks/instructions.
407 class PeelUnrollSimpleHelper : public ValueObject {
408  public:
409   PeelUnrollSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range);
IsLoopClonable()410   bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
DoPeeling()411   HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
DoUnrolling()412   HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
GetRegionToBeAdjusted()413   HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
414 
GetBasicBlockMap()415   const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
GetInstructionMap()416   const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
417 
418  private:
419   SuperblockCloner::HBasicBlockMap bb_map_;
420   SuperblockCloner::HInstructionMap hir_map_;
421   PeelUnrollHelper helper_;
422 
423   DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
424 };
425 
426 // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
427 void CollectRemappingInfoForPeelUnroll(bool to_unroll,
428                                        HLoopInformation* loop_info,
429                                        SuperblockCloner::HEdgeSet* remap_orig_internal,
430                                        SuperblockCloner::HEdgeSet* remap_copy_internal,
431                                        SuperblockCloner::HEdgeSet* remap_incoming);
432 
433 // Returns whether blocks from 'work_set' are reachable from the rest of the graph.
434 //
435 // Returns whether such a set 'outer_entries' of basic blocks exists that:
436 // - each block from 'outer_entries' is not from 'work_set'.
437 // - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
438 //
439 // After the function returns work_set contains only blocks from the original 'work_set'
440 // which are unreachable from the rest of the graph.
441 bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
442 
443 // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
444 // graph.
445 HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
446 }  // namespace art
447 
448 namespace std {
449 
450 template <>
451 struct hash<art::HEdge> {
452   size_t operator()(art::HEdge const& x) const noexcept  {
453     // Use Cantor pairing function as the hash function.
454     size_t a = x.GetFrom();
455     size_t b = x.GetTo();
456     return (a + b) * (a + b + 1) / 2 + b;
457   }
458 };
459 ostream& operator<<(ostream& os, const art::HEdge& e);
460 
461 }  // namespace std
462 
463 #endif  // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
464