1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
18 #define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
19 
20 #include <algorithm>
21 #include <sstream>
22 
23 #include "base/arena_allocator.h"
24 #include "base/arena_bit_vector.h"
25 #include "base/arena_containers.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/globals.h"
29 #include "base/iteration_range.h"
30 #include "base/mutex.h"
31 #include "base/scoped_arena_allocator.h"
32 #include "base/scoped_arena_containers.h"
33 #include "base/stl_util.h"
34 #include "base/transform_iterator.h"
35 #include "nodes.h"
36 
37 namespace art {
38 
39 // Helper for transforming blocks to block_ids.
40 class BlockToBlockIdTransformer {
41  public:
42   BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default;
43   BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default;
BlockToBlockIdTransformer()44   BlockToBlockIdTransformer() {}
45 
operator()46   inline uint32_t operator()(const HBasicBlock* b) const {
47     return b->GetBlockId();
48   }
49 };
50 
51 // Helper for transforming block ids to blocks.
52 class BlockIdToBlockTransformer {
53  public:
54   BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default;
55   BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default;
BlockIdToBlockTransformer(const HGraph * graph)56   explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {}
57 
GetGraph()58   inline const HGraph* GetGraph() const {
59     return graph_;
60   }
61 
GetBlock(uint32_t id)62   inline HBasicBlock* GetBlock(uint32_t id) const {
63     DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod();
64     HBasicBlock* blk = graph_->GetBlocks()[id];
65     DCHECK(blk != nullptr);
66     return blk;
67   }
68 
operator()69   inline HBasicBlock* operator()(uint32_t id) const {
70     return GetBlock(id);
71   }
72 
73  private:
74   const HGraph* const graph_;
75 };
76 
77 class BlockIdFilterThunk {
78  public:
BlockIdFilterThunk(const BitVector & i)79   explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {}
80   BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default;
81   BlockIdFilterThunk(const BlockIdFilterThunk&) = default;
82 
operator()83   bool operator()(const HBasicBlock* b) const {
84     return inner_.IsBitSet(b->GetBlockId());
85   }
86 
87  private:
88   const BitVector& inner_;
89 };
90 
91 // A representation of a particular section of the graph. The graph is split
92 // into an excluded and included area and is used to track escapes.
93 //
94 // This object is a view of the graph and is not updated as the graph is
95 // changed.
96 //
97 // This is implemented by removing various escape points from the subgraph using
98 // the 'RemoveBlock' function. Once all required blocks are removed one will
99 // 'Finalize' the subgraph. This will extend the removed area to include:
100 // (1) Any block which inevitably leads to (post-dominates) a removed block
101 // (2) any block which is between 2 removed blocks
102 //
103 // This allows us to create a set of 'ExcludedCohorts' which are the
104 // well-connected subsets of the graph made up of removed blocks. These cohorts
105 // have a set of entry and exit blocks which act as the boundary of the cohort.
106 // Since we removed blocks between 2 excluded blocks it is impossible for any
107 // cohort-exit block to reach any cohort-entry block. This means we can use the
108 // boundary between the cohort and the rest of the graph to insert
109 // materialization blocks for partial LSE.
110 //
111 // TODO We really should expand this to take into account where the object
112 // allocation takes place directly. Currently we always act as though it were
113 // allocated in the entry block. This is a massively simplifying assumption but
114 // means we can't partially remove objects that are repeatedly allocated in a
115 // loop.
116 class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> {
117  public:
118   using BitVecBlockRange =
119       IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>;
120   using FilteredBitVecBlockRange = IterationRange<
121       FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>;
122 
123   // A set of connected blocks which are connected and removed from the
124   // ExecutionSubgraph. See above comment for explanation.
125   class ExcludedCohort : public ArenaObject<kArenaAllocLSA> {
126    public:
127     ExcludedCohort(ExcludedCohort&&) = default;
128     ExcludedCohort(const ExcludedCohort&) = delete;
ExcludedCohort(ScopedArenaAllocator * allocator,HGraph * graph)129     explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph)
130         : graph_(graph),
131           entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
132           exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
133           blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {}
134 
135     ~ExcludedCohort() = default;
136 
137     // All blocks in the cohort.
Blocks()138     BitVecBlockRange Blocks() const {
139       return BlockIterRange(blocks_);
140     }
141 
142     // Blocks that have predecessors outside of the cohort. These blocks will
143     // need to have PHIs/control-flow added to create the escaping value.
EntryBlocks()144     BitVecBlockRange EntryBlocks() const {
145       return BlockIterRange(entry_blocks_);
146     }
147 
EntryBlocksReversePostOrder()148     FilteredBitVecBlockRange EntryBlocksReversePostOrder() const {
149       return Filter(MakeIterationRange(graph_->GetReversePostOrder()),
150                     BlockIdFilterThunk(entry_blocks_));
151     }
152 
IsEntryBlock(const HBasicBlock * blk)153     bool IsEntryBlock(const HBasicBlock* blk) const {
154       return entry_blocks_.IsBitSet(blk->GetBlockId());
155     }
156 
157     // Blocks that have successors outside of the cohort. The successors of
158     // these blocks will need to have PHI's to restore state.
ExitBlocks()159     BitVecBlockRange ExitBlocks() const {
160       return BlockIterRange(exit_blocks_);
161     }
162 
163     bool operator==(const ExcludedCohort& other) const {
164       return blocks_.Equal(&other.blocks_);
165     }
166 
ContainsBlock(const HBasicBlock * blk)167     bool ContainsBlock(const HBasicBlock* blk) const {
168       return blocks_.IsBitSet(blk->GetBlockId());
169     }
170 
171     // Returns true if there is a path from 'blk' to any block in this cohort.
172     // NB blocks contained within the cohort are not considered to be succeeded
173     // by the cohort (i.e. this function will return false).
SucceedsBlock(const HBasicBlock * blk)174     bool SucceedsBlock(const HBasicBlock* blk) const {
175       if (ContainsBlock(blk)) {
176         return false;
177       }
178       auto idxs = entry_blocks_.Indexes();
179       return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool {
180         return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry);
181       });
182     }
183 
184     // Returns true if there is a path from any block in this cohort to 'blk'.
185     // NB blocks contained within the cohort are not considered to be preceded
186     // by the cohort (i.e. this function will return false).
PrecedesBlock(const HBasicBlock * blk)187     bool PrecedesBlock(const HBasicBlock* blk) const {
188       if (ContainsBlock(blk)) {
189         return false;
190       }
191       auto idxs = exit_blocks_.Indexes();
192       return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool {
193         return blk->GetGraph()->PathBetween(exit, blk->GetBlockId());
194       });
195     }
196 
197     void Dump(std::ostream& os) const;
198 
199    private:
BlockIterRange(const ArenaBitVector & bv)200     BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const {
201       auto indexes = bv.Indexes();
202       BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_));
203       return res;
204     }
205 
206     ExcludedCohort() = delete;
207 
208     HGraph* graph_;
209     ArenaBitVector entry_blocks_;
210     ArenaBitVector exit_blocks_;
211     ArenaBitVector blocks_;
212 
213     friend class ExecutionSubgraph;
214     friend class LoadStoreAnalysisTest;
215   };
216 
217   // The number of successors we can track on a single block. Graphs which
218   // contain a block with a branching factor greater than this will not be
219   // analysed. This is used to both limit the memory usage of analysis to
220   // reasonable levels and ensure that the analysis will complete in a
221   // reasonable amount of time. It also simplifies the implementation somewhat
222   // to have a constant branching factor.
223   static constexpr uint32_t kMaxFilterableSuccessors = 8;
224 
225   // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape
226   // analysis is desired (eg not when being used for instruction scheduling) and
227   // when the branching factor in the graph is not too high. These conditions
228   // are determined once and passed down for performance reasons.
229   ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator);
230 
Invalidate()231   void Invalidate() {
232     valid_ = false;
233   }
234 
235   // A block is contained by the ExecutionSubgraph if it is reachable. This
236   // means it has not been removed explicitly or via pruning/concavity removal.
237   // Finalization is needed to call this function.
238   // See RemoveConcavity and Prune for more information.
ContainsBlock(const HBasicBlock * blk)239   bool ContainsBlock(const HBasicBlock* blk) const {
240     DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_;
241     if (!valid_) {
242       return false;
243     }
244     return !unreachable_blocks_.IsBitSet(blk->GetBlockId());
245   }
246 
247   // Mark the block as removed from the subgraph.
248   void RemoveBlock(const HBasicBlock* to_remove);
249 
250   // Called when no more updates will be done to the subgraph. Calculate the
251   // final subgraph
Finalize()252   void Finalize() {
253     Prune();
254     RemoveConcavity();
255     finalized_ = true;
256   }
257 
UnreachableBlocks()258   BitVecBlockRange UnreachableBlocks() const {
259     auto idxs = unreachable_blocks_.Indexes();
260     return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_));
261   }
262 
263   // Returns true if all allowed execution paths from start eventually reach the
264   // graph's exit block (or diverge).
IsValid()265   bool IsValid() const {
266     return valid_;
267   }
268 
GetExcludedCohorts()269   ArrayRef<const ExcludedCohort> GetExcludedCohorts() const {
270     DCHECK(!valid_ || !needs_prune_);
271     if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) {
272       return ArrayRef<const ExcludedCohort>();
273     } else {
274       return ArrayRef<const ExcludedCohort>(*excluded_list_);
275     }
276   }
277 
278   // Helper class to create reachable blocks iterator.
279   class ContainsFunctor {
280    public:
operator()281     bool operator()(HBasicBlock* blk) const {
282       return subgraph_->ContainsBlock(blk);
283     }
284 
285    private:
ContainsFunctor(const ExecutionSubgraph * subgraph)286     explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {}
287     const ExecutionSubgraph* const subgraph_;
288     friend class ExecutionSubgraph;
289   };
290   // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing.
291   IterationRange<
292       FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>>
ReachableBlocks()293   ReachableBlocks() const {
294     return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this));
295   }
296 
CanAnalyse(HGraph * graph)297   static bool CanAnalyse(HGraph* graph) {
298     // If there are any blocks with more than kMaxFilterableSuccessors we can't
299     // analyse the graph. We avoid this case to prevent excessive memory and
300     // time usage while allowing a simpler algorithm with a fixed-width
301     // branching factor.
302     return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) {
303       return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors;
304     });
305   }
306 
307  private:
GetAllowedSuccessors(const HBasicBlock * blk)308   std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const {
309     DCHECK(valid_);
310     return allowed_successors_[blk->GetBlockId()];
311   }
312 
LimitBlockSuccessors(const HBasicBlock * block,std::bitset<kMaxFilterableSuccessors> allowed)313   void LimitBlockSuccessors(const HBasicBlock* block,
314                             std::bitset<kMaxFilterableSuccessors> allowed) {
315     needs_prune_ = true;
316     allowed_successors_[block->GetBlockId()] &= allowed;
317   }
318 
319   // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal
320   // with only conditionally materializing objects depending on if we already materialized them
321   // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) &&
322   // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures
323   // that no execution can leave and then re-enter any exclusion.
324   void RemoveConcavity();
325 
326   // Removes sink nodes. Sink nodes are nodes where there is no execution which
327   // avoids all removed nodes.
328   void Prune();
329 
330   void RecalculateExcludedCohort();
331 
332   HGraph* graph_;
333   ScopedArenaAllocator* allocator_;
334   // The map from block_id -> allowed-successors.
335   // This is the canonical representation of this subgraph. If a bit in the
336   // bitset is not set then the corresponding outgoing edge of that block is not
337   // considered traversable.
338   ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_;
339   // Helper that holds which blocks we are able to reach. Only valid if
340   // 'needs_prune_ == false'.
341   ArenaBitVector unreachable_blocks_;
342   // A list of the excluded-cohorts of this subgraph. This is only valid when
343   // 'needs_prune_ == false'
344   std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_;
345   // Bool to hold if there is at least one known path from the start block to
346   // the end in this graph. Used to short-circuit computation.
347   bool valid_;
348   // True if the subgraph is consistent and can be queried. Modifying the
349   // subgraph clears this and requires a prune to restore.
350   bool needs_prune_;
351   // True if no more modification of the subgraph is permitted.
352   bool finalized_;
353 
354   friend class ExecutionSubgraphTest;
355   friend class LoadStoreAnalysisTest;
356 
357   DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph);
358 };
359 
360 std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex);
361 
362 }  // namespace art
363 
364 #endif  // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
365