1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "register_allocator_graph_color.h"
18
19 #include "code_generator.h"
20 #include "linear_order.h"
21 #include "register_allocation_resolver.h"
22 #include "ssa_liveness_analysis.h"
23 #include "thread-current-inl.h"
24
25 namespace art {
26
27 // Highest number of registers that we support for any platform. This can be used for std::bitset,
28 // for example, which needs to know its size at compile time.
29 static constexpr size_t kMaxNumRegs = 32;
30
31 // The maximum number of graph coloring attempts before triggering a DCHECK.
32 // This is meant to catch changes to the graph coloring algorithm that undermine its forward
33 // progress guarantees. Forward progress for the algorithm means splitting live intervals on
34 // every graph coloring attempt so that eventually the interference graph will be sparse enough
35 // to color. The main threat to forward progress is trying to split short intervals which cannot be
36 // split further; this could cause infinite looping because the interference graph would never
37 // change. This is avoided by prioritizing short intervals before long ones, so that long
38 // intervals are split when coloring fails.
39 static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
40
41 // We always want to avoid spilling inside loops.
42 static constexpr size_t kLoopSpillWeightMultiplier = 10;
43
44 // If we avoid moves in single jump blocks, we can avoid jumps to jumps.
45 static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
46
47 // We avoid moves in blocks that dominate the exit block, since these blocks will
48 // be executed on every path through the method.
49 static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
50
51 enum class CoalesceKind {
52 kAdjacentSibling, // Prevents moves at interval split points.
53 kFixedOutputSibling, // Prevents moves from a fixed output location.
54 kFixedInput, // Prevents moves into a fixed input location.
55 kNonlinearControlFlow, // Prevents moves between blocks.
56 kPhi, // Prevents phi resolution moves.
57 kFirstInput, // Prevents a single input move.
58 kAnyInput, // May lead to better instruction selection / smaller encodings.
59 };
60
operator <<(std::ostream & os,const CoalesceKind & kind)61 std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
62 return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
63 }
64
LoopDepthAt(HBasicBlock * block)65 static size_t LoopDepthAt(HBasicBlock* block) {
66 HLoopInformation* loop_info = block->GetLoopInformation();
67 size_t depth = 0;
68 while (loop_info != nullptr) {
69 ++depth;
70 loop_info = loop_info->GetPreHeader()->GetLoopInformation();
71 }
72 return depth;
73 }
74
75 // Return the runtime cost of inserting a move instruction at the specified location.
CostForMoveAt(size_t position,const SsaLivenessAnalysis & liveness)76 static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
77 HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
78 DCHECK(block != nullptr);
79 size_t cost = 1;
80 if (block->IsSingleJump()) {
81 cost *= kSingleJumpBlockWeightMultiplier;
82 }
83 if (block->Dominates(block->GetGraph()->GetExitBlock())) {
84 cost *= kDominatesExitBlockWeightMultiplier;
85 }
86 for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
87 cost *= kLoopSpillWeightMultiplier;
88 }
89 return cost;
90 }
91
92 // In general, we estimate coalesce priority by whether it will definitely avoid a move,
93 // and by how likely it is to create an interference graph that's harder to color.
ComputeCoalescePriority(CoalesceKind kind,size_t position,const SsaLivenessAnalysis & liveness)94 static size_t ComputeCoalescePriority(CoalesceKind kind,
95 size_t position,
96 const SsaLivenessAnalysis& liveness) {
97 if (kind == CoalesceKind::kAnyInput) {
98 // This type of coalescing can affect instruction selection, but not moves, so we
99 // give it the lowest priority.
100 return 0;
101 } else {
102 return CostForMoveAt(position, liveness);
103 }
104 }
105
106 enum class CoalesceStage {
107 kWorklist, // Currently in the iterative coalescing worklist.
108 kActive, // Not in a worklist, but could be considered again during iterative coalescing.
109 kInactive, // No longer considered until last-chance coalescing.
110 kDefunct, // Either the two nodes interfere, or have already been coalesced.
111 };
112
operator <<(std::ostream & os,const CoalesceStage & stage)113 std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
114 return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
115 }
116
117 // Represents a coalesce opportunity between two nodes.
118 struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
CoalesceOpportunityart::CoalesceOpportunity119 CoalesceOpportunity(InterferenceNode* a,
120 InterferenceNode* b,
121 CoalesceKind kind,
122 size_t position,
123 const SsaLivenessAnalysis& liveness)
124 : node_a(a),
125 node_b(b),
126 stage(CoalesceStage::kWorklist),
127 priority(ComputeCoalescePriority(kind, position, liveness)) {}
128
129 // Compare two coalesce opportunities based on their priority.
130 // Return true if lhs has a lower priority than that of rhs.
CmpPriorityart::CoalesceOpportunity131 static bool CmpPriority(const CoalesceOpportunity* lhs,
132 const CoalesceOpportunity* rhs) {
133 return lhs->priority < rhs->priority;
134 }
135
136 InterferenceNode* const node_a;
137 InterferenceNode* const node_b;
138
139 // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
140 // and whether it should still be considered.
141 CoalesceStage stage;
142
143 // The priority of this coalesce opportunity, based on heuristics.
144 const size_t priority;
145 };
146
147 enum class NodeStage {
148 kInitial, // Uninitialized.
149 kPrecolored, // Marks fixed nodes.
150 kSafepoint, // Marks safepoint nodes.
151 kPrunable, // Marks uncolored nodes in the interference graph.
152 kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers.
153 kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers.
154 kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers.
155 kPruned // Marks nodes already pruned from the interference graph.
156 };
157
operator <<(std::ostream & os,const NodeStage & stage)158 std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
159 return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
160 }
161
162 // Returns the estimated cost of spilling a particular live interval.
ComputeSpillWeight(LiveInterval * interval,const SsaLivenessAnalysis & liveness)163 static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
164 if (interval->HasRegister()) {
165 // Intervals with a fixed register cannot be spilled.
166 return std::numeric_limits<float>::min();
167 }
168
169 size_t length = interval->GetLength();
170 if (length == 1) {
171 // Tiny intervals should have maximum priority, since they cannot be split any further.
172 return std::numeric_limits<float>::max();
173 }
174
175 size_t use_weight = 0;
176 if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
177 // Cost for spilling at a register definition point.
178 use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
179 }
180
181 // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
182 // [interval->GetStart() + 1, interval->GetEnd() + 1)
183 auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
184 interval->GetUses().end(),
185 interval->GetStart() + 1u,
186 interval->GetEnd() + 1u);
187 for (const UsePosition& use : matching_use_range) {
188 if (use.GetUser() != nullptr && use.RequiresRegister()) {
189 // Cost for spilling at a register use point.
190 use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
191 }
192 }
193
194 // We divide by the length of the interval because we want to prioritize
195 // short intervals; we do not benefit much if we split them further.
196 return static_cast<float>(use_weight) / static_cast<float>(length);
197 }
198
199 // Interference nodes make up the interference graph, which is the primary data structure in
200 // graph coloring register allocation. Each node represents a single live interval, and contains
201 // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
202 // pre-colored nodes never contain outgoing edges (only incoming ones).
203 //
204 // As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
205 // but outgoing edges remain in order to later color the node based on the colors of its neighbors.
206 //
207 // Note that a pair interval is represented by a single node in the interference graph, which
208 // essentially requires two colors. One consequence of this is that the degree of a node is not
209 // necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
210 // number of colors with which a node could interfere. We model this by giving edges different
211 // weights (1 or 2) to control how much it increases the degree of adjacent nodes.
212 // For example, the edge between two single nodes will have weight 1. On the other hand,
213 // the edge between a single node and a pair node will have weight 2. This is because the pair
214 // node could block up to two colors for the single node, and because the single node could
215 // block an entire two-register aligned slot for the pair node.
216 // The degree is defined this way because we use it to decide whether a node is guaranteed a color,
217 // and thus whether it is safe to prune it from the interference graph early on.
218 class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
219 public:
InterferenceNode(LiveInterval * interval,const SsaLivenessAnalysis & liveness)220 InterferenceNode(LiveInterval* interval,
221 const SsaLivenessAnalysis& liveness)
222 : stage(NodeStage::kInitial),
223 interval_(interval),
224 adjacent_nodes_(nullptr),
225 coalesce_opportunities_(nullptr),
226 out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
227 alias_(this),
228 spill_weight_(ComputeSpillWeight(interval, liveness)),
229 requires_color_(interval->RequiresRegister()),
230 needs_spill_slot_(false) {
231 DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
232 }
233
AddInterference(InterferenceNode * other,bool guaranteed_not_interfering_yet,ScopedArenaDeque<ScopedArenaVector<InterferenceNode * >> * storage)234 void AddInterference(InterferenceNode* other,
235 bool guaranteed_not_interfering_yet,
236 ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) {
237 DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
238 DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
239 DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
240 DCHECK_NE(stage, NodeStage::kPruned);
241 DCHECK_NE(other->stage, NodeStage::kPruned);
242 if (adjacent_nodes_ == nullptr) {
243 ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator());
244 storage->emplace_back(adapter);
245 adjacent_nodes_ = &storage->back();
246 }
247 if (guaranteed_not_interfering_yet) {
248 DCHECK(!ContainsElement(GetAdjacentNodes(), other));
249 adjacent_nodes_->push_back(other);
250 out_degree_ += EdgeWeightWith(other);
251 } else {
252 if (!ContainsElement(GetAdjacentNodes(), other)) {
253 adjacent_nodes_->push_back(other);
254 out_degree_ += EdgeWeightWith(other);
255 }
256 }
257 }
258
RemoveInterference(InterferenceNode * other)259 void RemoveInterference(InterferenceNode* other) {
260 DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
261 DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
262 if (adjacent_nodes_ != nullptr) {
263 auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other);
264 if (it != adjacent_nodes_->end()) {
265 adjacent_nodes_->erase(it);
266 out_degree_ -= EdgeWeightWith(other);
267 }
268 }
269 }
270
ContainsInterference(InterferenceNode * other) const271 bool ContainsInterference(InterferenceNode* other) const {
272 DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
273 DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
274 return ContainsElement(GetAdjacentNodes(), other);
275 }
276
GetInterval() const277 LiveInterval* GetInterval() const {
278 return interval_;
279 }
280
GetAdjacentNodes() const281 ArrayRef<InterferenceNode*> GetAdjacentNodes() const {
282 return adjacent_nodes_ != nullptr
283 ? ArrayRef<InterferenceNode*>(*adjacent_nodes_)
284 : ArrayRef<InterferenceNode*>();
285 }
286
GetOutDegree() const287 size_t GetOutDegree() const {
288 // Pre-colored nodes have infinite degree.
289 DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
290 return out_degree_;
291 }
292
AddCoalesceOpportunity(CoalesceOpportunity * opportunity,ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity * >> * storage)293 void AddCoalesceOpportunity(CoalesceOpportunity* opportunity,
294 ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) {
295 if (coalesce_opportunities_ == nullptr) {
296 ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator());
297 storage->emplace_back(adapter);
298 coalesce_opportunities_ = &storage->back();
299 }
300 coalesce_opportunities_->push_back(opportunity);
301 }
302
ClearCoalesceOpportunities()303 void ClearCoalesceOpportunities() {
304 coalesce_opportunities_ = nullptr;
305 }
306
IsMoveRelated() const307 bool IsMoveRelated() const {
308 for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) {
309 if (opportunity->stage == CoalesceStage::kWorklist ||
310 opportunity->stage == CoalesceStage::kActive) {
311 return true;
312 }
313 }
314 return false;
315 }
316
317 // Return whether this node already has a color.
318 // Used to find fixed nodes in the interference graph before coloring.
IsPrecolored() const319 bool IsPrecolored() const {
320 return interval_->HasRegister();
321 }
322
IsPair() const323 bool IsPair() const {
324 return interval_->HasHighInterval();
325 }
326
SetAlias(InterferenceNode * rep)327 void SetAlias(InterferenceNode* rep) {
328 DCHECK_NE(rep->stage, NodeStage::kPruned);
329 DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
330 alias_ = rep;
331 }
332
GetAlias()333 InterferenceNode* GetAlias() {
334 if (alias_ != this) {
335 // Recurse in order to flatten tree of alias pointers.
336 alias_ = alias_->GetAlias();
337 }
338 return alias_;
339 }
340
GetCoalesceOpportunities() const341 ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const {
342 return coalesce_opportunities_ != nullptr
343 ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_)
344 : ArrayRef<CoalesceOpportunity*>();
345 }
346
GetSpillWeight() const347 float GetSpillWeight() const {
348 return spill_weight_;
349 }
350
RequiresColor() const351 bool RequiresColor() const {
352 return requires_color_;
353 }
354
355 // We give extra weight to edges adjacent to pair nodes. See the general comment on the
356 // interference graph above.
EdgeWeightWith(const InterferenceNode * other) const357 size_t EdgeWeightWith(const InterferenceNode* other) const {
358 return (IsPair() || other->IsPair()) ? 2 : 1;
359 }
360
NeedsSpillSlot() const361 bool NeedsSpillSlot() const {
362 return needs_spill_slot_;
363 }
364
SetNeedsSpillSlot()365 void SetNeedsSpillSlot() {
366 needs_spill_slot_ = true;
367 }
368
369 // The current stage of this node, indicating which worklist it belongs to.
370 NodeStage stage;
371
372 private:
373 // The live interval that this node represents.
374 LiveInterval* const interval_;
375
376 // All nodes interfering with this one.
377 // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
378 // set sizes that we encounter. Using a vector leads to much better performance.
379 ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration.
380
381 // Interference nodes that this node should be coalesced with to reduce moves.
382 ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration.
383
384 // The maximum number of colors with which this node could interfere. This could be more than
385 // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
386 // We use "out" degree because incoming edges come from nodes already pruned from the graph,
387 // and do not affect the coloring of this node.
388 // Pre-colored nodes are treated as having infinite degree.
389 size_t out_degree_;
390
391 // The node representing this node in the interference graph.
392 // Initially set to `this`, and only changed if this node is coalesced into another.
393 InterferenceNode* alias_;
394
395 // The cost of splitting and spilling this interval to the stack.
396 // Nodes with a higher spill weight should be prioritized when assigning registers.
397 // This is essentially based on use density and location; short intervals with many uses inside
398 // deeply nested loops have a high spill weight.
399 const float spill_weight_;
400
401 const bool requires_color_;
402
403 bool needs_spill_slot_;
404
405 DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
406 };
407
408 // The order in which we color nodes is important. To guarantee forward progress,
409 // we prioritize intervals that require registers, and after that we prioritize
410 // short intervals. That way, if we fail to color a node, it either won't require a
411 // register, or it will be a long interval that can be split in order to make the
412 // interference graph sparser.
413 // To improve code quality, we prioritize intervals used frequently in deeply nested loops.
414 // (This metric is secondary to the forward progress requirements above.)
415 // TODO: May also want to consider:
416 // - Constants (since they can be rematerialized)
417 // - Allocated spill slots
HasGreaterNodePriority(const InterferenceNode * lhs,const InterferenceNode * rhs)418 static bool HasGreaterNodePriority(const InterferenceNode* lhs,
419 const InterferenceNode* rhs) {
420 // (1) Prioritize the node that requires a color.
421 if (lhs->RequiresColor() != rhs->RequiresColor()) {
422 return lhs->RequiresColor();
423 }
424
425 // (2) Prioritize the interval that has a higher spill weight.
426 return lhs->GetSpillWeight() > rhs->GetSpillWeight();
427 }
428
429 // A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
430 // and provides methods for each phase of the attempt.
431 class ColoringIteration {
432 public:
ColoringIteration(RegisterAllocatorGraphColor * register_allocator,ScopedArenaAllocator * allocator,bool processing_core_regs,size_t num_regs)433 ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
434 ScopedArenaAllocator* allocator,
435 bool processing_core_regs,
436 size_t num_regs)
437 : register_allocator_(register_allocator),
438 allocator_(allocator),
439 processing_core_regs_(processing_core_regs),
440 num_regs_(num_regs),
441 interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
442 prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
443 pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
444 simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
445 freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
446 spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
447 coalesce_worklist_(CoalesceOpportunity::CmpPriority,
448 allocator->Adapter(kArenaAllocRegisterAllocator)),
449 adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)),
450 coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {}
451
452 // Use the intervals collected from instructions to construct an
453 // interference graph mapping intervals to adjacency lists.
454 // Also, collect synthesized safepoint nodes, used to keep
455 // track of live intervals across safepoints.
456 // TODO: Should build safepoints elsewhere.
457 void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals,
458 const ScopedArenaVector<InterferenceNode*>& physical_nodes);
459
460 // Add coalesce opportunities to interference nodes.
461 void FindCoalesceOpportunities();
462
463 // Prune nodes from the interference graph to be colored later. Build
464 // a stack (pruned_nodes) containing these intervals in an order determined
465 // by various heuristics.
466 void PruneInterferenceGraph();
467
468 // Process pruned_intervals_ to color the interference graph, spilling when
469 // necessary. Returns true if successful. Else, some intervals have been
470 // split, and the interference graph should be rebuilt for another attempt.
471 bool ColorInterferenceGraph();
472
473 // Return prunable nodes.
474 // The register allocator will need to access prunable nodes after coloring
475 // in order to tell the code generator which registers have been assigned.
GetPrunableNodes() const476 ArrayRef<InterferenceNode* const> GetPrunableNodes() const {
477 return ArrayRef<InterferenceNode* const>(prunable_nodes_);
478 }
479
480 private:
481 // Create a coalesce opportunity between two nodes.
482 void CreateCoalesceOpportunity(InterferenceNode* a,
483 InterferenceNode* b,
484 CoalesceKind kind,
485 size_t position);
486
487 // Add an edge in the interference graph, if valid.
488 // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
489 // when possible.
490 void AddPotentialInterference(InterferenceNode* from,
491 InterferenceNode* to,
492 bool guaranteed_not_interfering_yet,
493 bool both_directions = true);
494
495 // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
496 // may be pruned from the interference graph.
497 void FreezeMoves(InterferenceNode* node);
498
499 // Prune a node from the interference graph, updating worklists if necessary.
500 void PruneNode(InterferenceNode* node);
501
502 // Add coalesce opportunities associated with this node to the coalesce worklist.
503 void EnableCoalesceOpportunities(InterferenceNode* node);
504
505 // If needed, from `node` from the freeze worklist to the simplify worklist.
506 void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
507
508 // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
509 bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
510
511 // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
512 bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
513
514 void Coalesce(CoalesceOpportunity* opportunity);
515
516 // Merge `from` into `into` in the interference graph.
517 void Combine(InterferenceNode* from, InterferenceNode* into);
518
519 // A reference to the register allocator instance,
520 // needed to split intervals and assign spill slots.
521 RegisterAllocatorGraphColor* register_allocator_;
522
523 // A scoped arena allocator used for a single graph coloring attempt.
524 ScopedArenaAllocator* allocator_;
525
526 const bool processing_core_regs_;
527
528 const size_t num_regs_;
529
530 // A map from live intervals to interference nodes.
531 ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
532
533 // Uncolored nodes that should be pruned from the interference graph.
534 ScopedArenaVector<InterferenceNode*> prunable_nodes_;
535
536 // A stack of nodes pruned from the interference graph, waiting to be pruned.
537 ScopedArenaStdStack<InterferenceNode*> pruned_nodes_;
538
539 // A queue containing low degree, non-move-related nodes that can pruned immediately.
540 ScopedArenaDeque<InterferenceNode*> simplify_worklist_;
541
542 // A queue containing low degree, move-related nodes.
543 ScopedArenaDeque<InterferenceNode*> freeze_worklist_;
544
545 // A queue containing high degree nodes.
546 // If we have to prune from the spill worklist, we cannot guarantee
547 // the pruned node a color, so we order the worklist by priority.
548 ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
549
550 // A queue containing coalesce opportunities.
551 // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
552 // inside of loops) are more important than others.
553 ScopedArenaPriorityQueue<CoalesceOpportunity*,
554 decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
555
556 // Storage for links to adjacent nodes for interference nodes.
557 // Using std::deque so that elements do not move when adding new ones.
558 ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_;
559
560 // Storage for links to coalesce opportunities for interference nodes.
561 // Using std::deque so that elements do not move when adding new ones.
562 ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_;
563
564 DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
565 };
566
IsCoreInterval(LiveInterval * interval)567 static bool IsCoreInterval(LiveInterval* interval) {
568 return !DataType::IsFloatingPointType(interval->GetType());
569 }
570
ComputeReservedArtMethodSlots(const CodeGenerator & codegen)571 static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
572 return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
573 }
574
RegisterAllocatorGraphColor(ScopedArenaAllocator * allocator,CodeGenerator * codegen,const SsaLivenessAnalysis & liveness,bool iterative_move_coalescing)575 RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
576 CodeGenerator* codegen,
577 const SsaLivenessAnalysis& liveness,
578 bool iterative_move_coalescing)
579 : RegisterAllocator(allocator, codegen, liveness),
580 iterative_move_coalescing_(iterative_move_coalescing),
581 core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
582 fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
583 temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
584 safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
585 physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
586 physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
587 num_int_spill_slots_(0),
588 num_double_spill_slots_(0),
589 num_float_spill_slots_(0),
590 num_long_spill_slots_(0),
591 catch_phi_spill_slot_counter_(0),
592 reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
593 reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
594 // Before we ask for blocked registers, set them up in the code generator.
595 codegen->SetupBlockedRegisters();
596
597 // Initialize physical core register live intervals and blocked registers.
598 // This includes globally blocked registers, such as the stack pointer.
599 physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
600 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
601 LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32);
602 physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
603 physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
604 core_intervals_.push_back(interval);
605 if (codegen_->IsBlockedCoreRegister(i)) {
606 interval->AddRange(0, liveness.GetMaxLifetimePosition());
607 }
608 }
609 // Initialize physical floating point register live intervals and blocked registers.
610 physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
611 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
612 LiveInterval* interval =
613 LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32);
614 physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
615 physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
616 fp_intervals_.push_back(interval);
617 if (codegen_->IsBlockedFloatingPointRegister(i)) {
618 interval->AddRange(0, liveness.GetMaxLifetimePosition());
619 }
620 }
621 }
622
~RegisterAllocatorGraphColor()623 RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {}
624
AllocateRegisters()625 void RegisterAllocatorGraphColor::AllocateRegisters() {
626 // (1) Collect and prepare live intervals.
627 ProcessInstructions();
628
629 for (bool processing_core_regs : {true, false}) {
630 ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs
631 ? core_intervals_
632 : fp_intervals_;
633 size_t num_registers = processing_core_regs
634 ? codegen_->GetNumberOfCoreRegisters()
635 : codegen_->GetNumberOfFloatingPointRegisters();
636
637 size_t attempt = 0;
638 while (true) {
639 ++attempt;
640 DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
641 << "Exceeded debug max graph coloring register allocation attempts. "
642 << "This could indicate that the register allocator is not making forward progress, "
643 << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
644 << "should be prioritized over long ones, because they cannot be split further.)";
645
646 // Many data structures are cleared between graph coloring attempts, so we reduce
647 // total memory usage by using a new scoped arena allocator for each attempt.
648 ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack());
649 ColoringIteration iteration(this,
650 &coloring_attempt_allocator,
651 processing_core_regs,
652 num_registers);
653
654 // (2) Build the interference graph.
655 ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
656 ? physical_core_nodes_
657 : physical_fp_nodes_;
658 iteration.BuildInterferenceGraph(intervals, physical_nodes);
659
660 // (3) Add coalesce opportunities.
661 // If we have tried coloring the graph a suspiciously high number of times, give
662 // up on move coalescing, just in case the coalescing heuristics are not conservative.
663 // (This situation will be caught if DCHECKs are turned on.)
664 if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
665 iteration.FindCoalesceOpportunities();
666 }
667
668 // (4) Prune all uncolored nodes from interference graph.
669 iteration.PruneInterferenceGraph();
670
671 // (5) Color pruned nodes based on interferences.
672 bool successful = iteration.ColorInterferenceGraph();
673
674 // We manually clear coalesce opportunities for physical nodes,
675 // since they persist across coloring attempts.
676 for (InterferenceNode* node : physical_core_nodes_) {
677 node->ClearCoalesceOpportunities();
678 }
679 for (InterferenceNode* node : physical_fp_nodes_) {
680 node->ClearCoalesceOpportunities();
681 }
682
683 if (successful) {
684 // Assign spill slots.
685 AllocateSpillSlots(iteration.GetPrunableNodes());
686
687 // Tell the code generator which registers were allocated.
688 // We only look at prunable_nodes because we already told the code generator about
689 // fixed intervals while processing instructions. We also ignore the fixed intervals
690 // placed at the top of catch blocks.
691 for (InterferenceNode* node : iteration.GetPrunableNodes()) {
692 LiveInterval* interval = node->GetInterval();
693 if (interval->HasRegister()) {
694 Location low_reg = processing_core_regs
695 ? Location::RegisterLocation(interval->GetRegister())
696 : Location::FpuRegisterLocation(interval->GetRegister());
697 codegen_->AddAllocatedRegister(low_reg);
698 if (interval->HasHighInterval()) {
699 LiveInterval* high = interval->GetHighInterval();
700 DCHECK(high->HasRegister());
701 Location high_reg = processing_core_regs
702 ? Location::RegisterLocation(high->GetRegister())
703 : Location::FpuRegisterLocation(high->GetRegister());
704 codegen_->AddAllocatedRegister(high_reg);
705 }
706 } else {
707 DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
708 }
709 }
710
711 break;
712 }
713 } // while unsuccessful
714 } // for processing_core_instructions
715
716 // (6) Resolve locations and deconstruct SSA form.
717 RegisterAllocationResolver(codegen_, liveness_)
718 .Resolve(ArrayRef<HInstruction* const>(safepoints_),
719 reserved_art_method_slots_ + reserved_out_slots_,
720 num_int_spill_slots_,
721 num_long_spill_slots_,
722 num_float_spill_slots_,
723 num_double_spill_slots_,
724 catch_phi_spill_slot_counter_,
725 ArrayRef<LiveInterval* const>(temp_intervals_));
726
727 if (kIsDebugBuild) {
728 Validate(/*log_fatal_on_failure*/ true);
729 }
730 }
731
Validate(bool log_fatal_on_failure)732 bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
733 for (bool processing_core_regs : {true, false}) {
734 ScopedArenaAllocator allocator(allocator_->GetArenaStack());
735 ScopedArenaVector<LiveInterval*> intervals(
736 allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
737 for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
738 HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
739 LiveInterval* interval = instruction->GetLiveInterval();
740 if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
741 intervals.push_back(instruction->GetLiveInterval());
742 }
743 }
744
745 ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
746 ? physical_core_nodes_
747 : physical_fp_nodes_;
748 for (InterferenceNode* fixed : physical_nodes) {
749 LiveInterval* interval = fixed->GetInterval();
750 if (interval->GetFirstRange() != nullptr) {
751 // Ideally we would check fixed ranges as well, but currently there are times when
752 // two fixed intervals for the same register will overlap. For example, a fixed input
753 // and a fixed output may sometimes share the same register, in which there will be two
754 // fixed intervals for the same place.
755 }
756 }
757
758 for (LiveInterval* temp : temp_intervals_) {
759 if (IsCoreInterval(temp) == processing_core_regs) {
760 intervals.push_back(temp);
761 }
762 }
763
764 size_t spill_slots = num_int_spill_slots_
765 + num_long_spill_slots_
766 + num_float_spill_slots_
767 + num_double_spill_slots_
768 + catch_phi_spill_slot_counter_;
769 bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
770 spill_slots,
771 reserved_art_method_slots_ + reserved_out_slots_,
772 *codegen_,
773 processing_core_regs,
774 log_fatal_on_failure);
775 if (!ok) {
776 return false;
777 }
778 } // for processing_core_regs
779
780 return true;
781 }
782
ProcessInstructions()783 void RegisterAllocatorGraphColor::ProcessInstructions() {
784 for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
785 // Note that we currently depend on this ordering, since some helper
786 // code is designed for linear scan register allocation.
787 for (HBackwardInstructionIterator instr_it(block->GetInstructions());
788 !instr_it.Done();
789 instr_it.Advance()) {
790 ProcessInstruction(instr_it.Current());
791 }
792
793 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
794 ProcessInstruction(phi_it.Current());
795 }
796
797 if (block->IsCatchBlock()
798 || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
799 // By blocking all registers at the top of each catch block or irreducible loop, we force
800 // intervals belonging to the live-in set of the catch/header block to be spilled.
801 // TODO(ngeoffray): Phis in this block could be allocated in register.
802 size_t position = block->GetLifetimeStart();
803 BlockRegisters(position, position + 1);
804 }
805 }
806 }
807
ProcessInstruction(HInstruction * instruction)808 void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
809 LocationSummary* locations = instruction->GetLocations();
810 if (locations == nullptr) {
811 return;
812 }
813 if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
814 // We do this here because we do not want the suspend check to artificially
815 // create live registers.
816 DCHECK(instruction->IsSuspendCheckEntry());
817 DCHECK_EQ(locations->GetTempCount(), 0u);
818 instruction->GetBlock()->RemoveInstruction(instruction);
819 return;
820 }
821
822 CheckForTempLiveIntervals(instruction);
823 CheckForSafepoint(instruction);
824 if (instruction->GetLocations()->WillCall()) {
825 // If a call will happen, create fixed intervals for caller-save registers.
826 // TODO: Note that it may be beneficial to later split intervals at this point,
827 // so that we allow last-minute moves from a caller-save register
828 // to a callee-save register.
829 BlockRegisters(instruction->GetLifetimePosition(),
830 instruction->GetLifetimePosition() + 1,
831 /*caller_save_only*/ true);
832 }
833 CheckForFixedInputs(instruction);
834
835 LiveInterval* interval = instruction->GetLiveInterval();
836 if (interval == nullptr) {
837 // Instructions lacking a valid output location do not have a live interval.
838 DCHECK(!locations->Out().IsValid());
839 return;
840 }
841
842 // Low intervals act as representatives for their corresponding high interval.
843 DCHECK(!interval->IsHighInterval());
844 if (codegen_->NeedsTwoRegisters(interval->GetType())) {
845 interval->AddHighInterval();
846 }
847 AddSafepointsFor(instruction);
848 CheckForFixedOutput(instruction);
849 AllocateSpillSlotForCatchPhi(instruction);
850
851 ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
852 ? core_intervals_
853 : fp_intervals_;
854 if (interval->HasSpillSlot() || instruction->IsConstant()) {
855 // Note that if an interval already has a spill slot, then its value currently resides
856 // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
857 // register use. This is also true for constants, which can be materialized at any point.
858 size_t first_register_use = interval->FirstRegisterUse();
859 if (first_register_use != kNoLifetime) {
860 LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
861 intervals.push_back(split);
862 } else {
863 // We won't allocate a register for this value.
864 }
865 } else {
866 intervals.push_back(interval);
867 }
868 }
869
CheckForFixedInputs(HInstruction * instruction)870 void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
871 // We simply block physical registers where necessary.
872 // TODO: Ideally we would coalesce the physical register with the register
873 // allocated to the input value, but this can be tricky if, e.g., there
874 // could be multiple physical register uses of the same value at the
875 // same instruction. Furthermore, there's currently no distinction between
876 // fixed inputs to a call (which will be clobbered) and other fixed inputs (which
877 // may not be clobbered).
878 LocationSummary* locations = instruction->GetLocations();
879 size_t position = instruction->GetLifetimePosition();
880 for (size_t i = 0; i < locations->GetInputCount(); ++i) {
881 Location input = locations->InAt(i);
882 if (input.IsRegister() || input.IsFpuRegister()) {
883 BlockRegister(input, position, position + 1);
884 codegen_->AddAllocatedRegister(input);
885 } else if (input.IsPair()) {
886 BlockRegister(input.ToLow(), position, position + 1);
887 BlockRegister(input.ToHigh(), position, position + 1);
888 codegen_->AddAllocatedRegister(input.ToLow());
889 codegen_->AddAllocatedRegister(input.ToHigh());
890 }
891 }
892 }
893
CheckForFixedOutput(HInstruction * instruction)894 void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
895 // If an instruction has a fixed output location, we give the live interval a register and then
896 // proactively split it just after the definition point to avoid creating too many interferences
897 // with a fixed node.
898 LiveInterval* interval = instruction->GetLiveInterval();
899 Location out = interval->GetDefinedBy()->GetLocations()->Out();
900 size_t position = instruction->GetLifetimePosition();
901 DCHECK_GE(interval->GetEnd() - position, 2u);
902
903 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
904 out = instruction->GetLocations()->InAt(0);
905 }
906
907 if (out.IsRegister() || out.IsFpuRegister()) {
908 interval->SetRegister(out.reg());
909 codegen_->AddAllocatedRegister(out);
910 Split(interval, position + 1);
911 } else if (out.IsPair()) {
912 interval->SetRegister(out.low());
913 interval->GetHighInterval()->SetRegister(out.high());
914 codegen_->AddAllocatedRegister(out.ToLow());
915 codegen_->AddAllocatedRegister(out.ToHigh());
916 Split(interval, position + 1);
917 } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
918 interval->SetSpillSlot(out.GetStackIndex());
919 } else {
920 DCHECK(out.IsUnallocated() || out.IsConstant());
921 }
922 }
923
AddSafepointsFor(HInstruction * instruction)924 void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
925 LiveInterval* interval = instruction->GetLiveInterval();
926 for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
927 HInstruction* safepoint = safepoints_[safepoint_index - 1u];
928 size_t safepoint_position = safepoint->GetLifetimePosition();
929
930 // Test that safepoints_ are ordered in the optimal way.
931 DCHECK(safepoint_index == safepoints_.size() ||
932 safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
933
934 if (safepoint_position == interval->GetStart()) {
935 // The safepoint is for this instruction, so the location of the instruction
936 // does not need to be saved.
937 DCHECK_EQ(safepoint_index, safepoints_.size());
938 DCHECK_EQ(safepoint, instruction);
939 continue;
940 } else if (interval->IsDeadAt(safepoint_position)) {
941 break;
942 } else if (!interval->Covers(safepoint_position)) {
943 // Hole in the interval.
944 continue;
945 }
946 interval->AddSafepoint(safepoint);
947 }
948 }
949
CheckForTempLiveIntervals(HInstruction * instruction)950 void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
951 LocationSummary* locations = instruction->GetLocations();
952 size_t position = instruction->GetLifetimePosition();
953 for (size_t i = 0; i < locations->GetTempCount(); ++i) {
954 Location temp = locations->GetTemp(i);
955 if (temp.IsRegister() || temp.IsFpuRegister()) {
956 BlockRegister(temp, position, position + 1);
957 codegen_->AddAllocatedRegister(temp);
958 } else {
959 DCHECK(temp.IsUnallocated());
960 switch (temp.GetPolicy()) {
961 case Location::kRequiresRegister: {
962 LiveInterval* interval =
963 LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32);
964 interval->AddTempUse(instruction, i);
965 core_intervals_.push_back(interval);
966 temp_intervals_.push_back(interval);
967 break;
968 }
969
970 case Location::kRequiresFpuRegister: {
971 LiveInterval* interval =
972 LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64);
973 interval->AddTempUse(instruction, i);
974 fp_intervals_.push_back(interval);
975 temp_intervals_.push_back(interval);
976 if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
977 interval->AddHighInterval(/*is_temp*/ true);
978 temp_intervals_.push_back(interval->GetHighInterval());
979 }
980 break;
981 }
982
983 default:
984 LOG(FATAL) << "Unexpected policy for temporary location "
985 << temp.GetPolicy();
986 }
987 }
988 }
989 }
990
CheckForSafepoint(HInstruction * instruction)991 void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
992 LocationSummary* locations = instruction->GetLocations();
993
994 if (locations->NeedsSafepoint()) {
995 safepoints_.push_back(instruction);
996 }
997 }
998
TrySplit(LiveInterval * interval,size_t position)999 LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
1000 if (interval->GetStart() < position && position < interval->GetEnd()) {
1001 return Split(interval, position);
1002 } else {
1003 return interval;
1004 }
1005 }
1006
SplitAtRegisterUses(LiveInterval * interval)1007 void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
1008 DCHECK(!interval->IsHighInterval());
1009
1010 // Split just after a register definition.
1011 if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
1012 interval = TrySplit(interval, interval->GetStart() + 1);
1013 }
1014
1015 // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
1016 // [interval->GetStart(), interval->GetEnd() + 1)
1017 auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
1018 interval->GetUses().end(),
1019 interval->GetStart(),
1020 interval->GetEnd() + 1u);
1021 // Split around register uses.
1022 for (const UsePosition& use : matching_use_range) {
1023 if (use.RequiresRegister()) {
1024 size_t position = use.GetPosition();
1025 interval = TrySplit(interval, position - 1);
1026 if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
1027 // If we are at the very end of a basic block, we cannot split right
1028 // at the use. Split just after instead.
1029 interval = TrySplit(interval, position + 1);
1030 } else {
1031 interval = TrySplit(interval, position);
1032 }
1033 }
1034 }
1035 }
1036
AllocateSpillSlotForCatchPhi(HInstruction * instruction)1037 void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
1038 if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
1039 HPhi* phi = instruction->AsPhi();
1040 LiveInterval* interval = phi->GetLiveInterval();
1041
1042 HInstruction* previous_phi = phi->GetPrevious();
1043 DCHECK(previous_phi == nullptr ||
1044 previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
1045 << "Phis expected to be sorted by vreg number, "
1046 << "so that equivalent phis are adjacent.";
1047
1048 if (phi->IsVRegEquivalentOf(previous_phi)) {
1049 // Assign the same spill slot.
1050 DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
1051 interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
1052 } else {
1053 interval->SetSpillSlot(catch_phi_spill_slot_counter_);
1054 catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
1055 }
1056 }
1057 }
1058
BlockRegister(Location location,size_t start,size_t end)1059 void RegisterAllocatorGraphColor::BlockRegister(Location location,
1060 size_t start,
1061 size_t end) {
1062 DCHECK(location.IsRegister() || location.IsFpuRegister());
1063 int reg = location.reg();
1064 LiveInterval* interval = location.IsRegister()
1065 ? physical_core_nodes_[reg]->GetInterval()
1066 : physical_fp_nodes_[reg]->GetInterval();
1067 DCHECK(interval->GetRegister() == reg);
1068 bool blocked_by_codegen = location.IsRegister()
1069 ? codegen_->IsBlockedCoreRegister(reg)
1070 : codegen_->IsBlockedFloatingPointRegister(reg);
1071 if (blocked_by_codegen) {
1072 // We've already blocked this register for the entire method. (And adding a
1073 // range inside another range violates the preconditions of AddRange).
1074 } else {
1075 interval->AddRange(start, end);
1076 }
1077 }
1078
BlockRegisters(size_t start,size_t end,bool caller_save_only)1079 void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
1080 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
1081 if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
1082 BlockRegister(Location::RegisterLocation(i), start, end);
1083 }
1084 }
1085 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
1086 if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
1087 BlockRegister(Location::FpuRegisterLocation(i), start, end);
1088 }
1089 }
1090 }
1091
AddPotentialInterference(InterferenceNode * from,InterferenceNode * to,bool guaranteed_not_interfering_yet,bool both_directions)1092 void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
1093 InterferenceNode* to,
1094 bool guaranteed_not_interfering_yet,
1095 bool both_directions) {
1096 if (from->IsPrecolored()) {
1097 // We save space by ignoring outgoing edges from fixed nodes.
1098 } else if (to->IsPrecolored()) {
1099 // It is important that only a single node represents a given fixed register in the
1100 // interference graph. We retrieve that node here.
1101 const ScopedArenaVector<InterferenceNode*>& physical_nodes =
1102 to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_
1103 : register_allocator_->physical_core_nodes_;
1104 InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
1105 from->AddInterference(
1106 physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
1107 DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
1108 DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
1109
1110 // If a node interferes with a fixed pair node, the weight of the edge may
1111 // be inaccurate after using the alias of the pair node, because the alias of the pair node
1112 // is a singular node.
1113 // We could make special pair fixed nodes, but that ends up being too conservative because
1114 // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
1115 // three rather than two.
1116 // Instead, we explicitly add an interference with the high node of the fixed pair node.
1117 // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
1118 // can be unaligned on x86 complicates things.
1119 if (to->IsPair()) {
1120 InterferenceNode* high_node =
1121 physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
1122 DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
1123 high_node->GetInterval()->GetRegister());
1124 from->AddInterference(
1125 high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
1126 }
1127 } else {
1128 // Standard interference between two uncolored nodes.
1129 from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_);
1130 }
1131
1132 if (both_directions) {
1133 AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
1134 }
1135 }
1136
1137 // Returns true if `in_node` represents an input interval of `out_node`, and the output interval
1138 // is allowed to have the same register as the input interval.
1139 // TODO: Ideally we should just produce correct intervals in liveness analysis.
1140 // We would need to refactor the current live interval layout to do so, which is
1141 // no small task.
CheckInputOutputCanOverlap(InterferenceNode * in_node,InterferenceNode * out_node)1142 static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
1143 LiveInterval* output_interval = out_node->GetInterval();
1144 HInstruction* defined_by = output_interval->GetDefinedBy();
1145 if (defined_by == nullptr) {
1146 // This must not be a definition point.
1147 return false;
1148 }
1149
1150 LocationSummary* locations = defined_by->GetLocations();
1151 if (locations->OutputCanOverlapWithInputs()) {
1152 // This instruction does not allow the output to reuse a register from an input.
1153 return false;
1154 }
1155
1156 LiveInterval* input_interval = in_node->GetInterval();
1157 LiveInterval* next_sibling = input_interval->GetNextSibling();
1158 size_t def_position = defined_by->GetLifetimePosition();
1159 size_t use_position = def_position + 1;
1160 if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
1161 // The next sibling starts at the use position, so reusing the input register in the output
1162 // would clobber the input before it's moved into the sibling interval location.
1163 return false;
1164 }
1165
1166 if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
1167 // The input interval is live after the use position.
1168 return false;
1169 }
1170
1171 HInputsRef inputs = defined_by->GetInputs();
1172 for (size_t i = 0; i < inputs.size(); ++i) {
1173 if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
1174 DCHECK(input_interval->SameRegisterKind(*output_interval));
1175 return true;
1176 }
1177 }
1178
1179 // The input interval was not an input for this instruction.
1180 return false;
1181 }
1182
BuildInterferenceGraph(const ScopedArenaVector<LiveInterval * > & intervals,const ScopedArenaVector<InterferenceNode * > & physical_nodes)1183 void ColoringIteration::BuildInterferenceGraph(
1184 const ScopedArenaVector<LiveInterval*>& intervals,
1185 const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
1186 DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
1187 // Build the interference graph efficiently by ordering range endpoints
1188 // by position and doing a linear sweep to find interferences. (That is, we
1189 // jump from endpoint to endpoint, maintaining a set of intervals live at each
1190 // point. If two nodes are ever in the live set at the same time, then they
1191 // interfere with each other.)
1192 //
1193 // We order by both position and (secondarily) by whether the endpoint
1194 // begins or ends a range; we want to process range endings before range
1195 // beginnings at the same position because they should not conflict.
1196 //
1197 // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
1198 // Tuple contents: (position, is_range_beginning, node).
1199 ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
1200 allocator_->Adapter(kArenaAllocRegisterAllocator));
1201
1202 // We reserve plenty of space to avoid excessive copying.
1203 range_endpoints.reserve(4 * prunable_nodes_.size());
1204
1205 for (LiveInterval* parent : intervals) {
1206 for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
1207 LiveRange* range = sibling->GetFirstRange();
1208 if (range != nullptr) {
1209 InterferenceNode* node =
1210 new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
1211 interval_node_map_.insert(std::make_pair(sibling, node));
1212
1213 if (sibling->HasRegister()) {
1214 // Fixed nodes should alias the canonical node for the corresponding register.
1215 node->stage = NodeStage::kPrecolored;
1216 InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
1217 node->SetAlias(physical_node);
1218 DCHECK_EQ(node->GetInterval()->GetRegister(),
1219 physical_node->GetInterval()->GetRegister());
1220 } else {
1221 node->stage = NodeStage::kPrunable;
1222 prunable_nodes_.push_back(node);
1223 }
1224
1225 while (range != nullptr) {
1226 range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
1227 range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
1228 range = range->GetNext();
1229 }
1230 }
1231 }
1232 }
1233
1234 // Sort the endpoints.
1235 // We explicitly ignore the third entry of each tuple (the node pointer) in order
1236 // to maintain determinism.
1237 std::sort(range_endpoints.begin(), range_endpoints.end(),
1238 [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
1239 const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
1240 return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1241 < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1242 });
1243
1244 // Nodes live at the current position in the linear sweep.
1245 ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator));
1246
1247 // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
1248 // live set. When we encounter the end of a range, we remove the corresponding node
1249 // from the live set. Nodes interfere if they are in the live set at the same time.
1250 for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
1251 bool is_range_beginning;
1252 InterferenceNode* node;
1253 size_t position;
1254 // Extract information from the tuple, including the node this tuple represents.
1255 std::tie(position, is_range_beginning, node) = *it;
1256
1257 if (is_range_beginning) {
1258 bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
1259 for (InterferenceNode* conflicting : live) {
1260 DCHECK_NE(node, conflicting);
1261 if (CheckInputOutputCanOverlap(conflicting, node)) {
1262 // We do not add an interference, because the instruction represented by `node` allows
1263 // its output to share a register with an input, represented here by `conflicting`.
1264 } else {
1265 AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
1266 }
1267 }
1268 DCHECK(std::find(live.begin(), live.end(), node) == live.end());
1269 live.push_back(node);
1270 } else {
1271 // End of range.
1272 auto live_it = std::find(live.begin(), live.end(), node);
1273 DCHECK(live_it != live.end());
1274 live.erase(live_it);
1275 }
1276 }
1277 DCHECK(live.empty());
1278 }
1279
CreateCoalesceOpportunity(InterferenceNode * a,InterferenceNode * b,CoalesceKind kind,size_t position)1280 void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
1281 InterferenceNode* b,
1282 CoalesceKind kind,
1283 size_t position) {
1284 DCHECK_EQ(a->IsPair(), b->IsPair())
1285 << "Nodes of different memory widths should never be coalesced";
1286 CoalesceOpportunity* opportunity =
1287 new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
1288 a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1289 b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1290 coalesce_worklist_.push(opportunity);
1291 }
1292
1293 // When looking for coalesce opportunities, we use the interval_node_map_ to find the node
1294 // corresponding to an interval. Note that not all intervals are in this map, notably the parents
1295 // of constants and stack arguments. (However, these interval should not be involved in coalesce
1296 // opportunities anyway, because they're not going to be in registers.)
FindCoalesceOpportunities()1297 void ColoringIteration::FindCoalesceOpportunities() {
1298 DCHECK(coalesce_worklist_.empty());
1299
1300 for (InterferenceNode* node : prunable_nodes_) {
1301 LiveInterval* interval = node->GetInterval();
1302
1303 // Coalesce siblings.
1304 LiveInterval* next_sibling = interval->GetNextSibling();
1305 if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
1306 auto it = interval_node_map_.find(next_sibling);
1307 if (it != interval_node_map_.end()) {
1308 InterferenceNode* sibling_node = it->second;
1309 CreateCoalesceOpportunity(node,
1310 sibling_node,
1311 CoalesceKind::kAdjacentSibling,
1312 interval->GetEnd());
1313 }
1314 }
1315
1316 // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
1317 LiveInterval* parent = interval->GetParent();
1318 if (parent->HasRegister()
1319 && parent->GetNextSibling() == interval
1320 && parent->GetEnd() == interval->GetStart()) {
1321 auto it = interval_node_map_.find(parent);
1322 if (it != interval_node_map_.end()) {
1323 InterferenceNode* parent_node = it->second;
1324 CreateCoalesceOpportunity(node,
1325 parent_node,
1326 CoalesceKind::kFixedOutputSibling,
1327 parent->GetEnd());
1328 }
1329 }
1330
1331 // Try to prevent moves across blocks.
1332 // Note that this does not lead to many succeeding coalesce attempts, so could be removed
1333 // if found to add to compile time.
1334 const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
1335 if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
1336 // If the start of this interval is at a block boundary, we look at the
1337 // location of the interval in blocks preceding the block this interval
1338 // starts at. This can avoid a move between the two blocks.
1339 HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
1340 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1341 size_t position = predecessor->GetLifetimeEnd() - 1;
1342 LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
1343 if (existing != nullptr) {
1344 auto it = interval_node_map_.find(existing);
1345 if (it != interval_node_map_.end()) {
1346 InterferenceNode* existing_node = it->second;
1347 CreateCoalesceOpportunity(node,
1348 existing_node,
1349 CoalesceKind::kNonlinearControlFlow,
1350 position);
1351 }
1352 }
1353 }
1354 }
1355
1356 // Coalesce phi inputs with the corresponding output.
1357 HInstruction* defined_by = interval->GetDefinedBy();
1358 if (defined_by != nullptr && defined_by->IsPhi()) {
1359 ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors());
1360 HInputsRef inputs = defined_by->GetInputs();
1361
1362 for (size_t i = 0, e = inputs.size(); i < e; ++i) {
1363 // We want the sibling at the end of the appropriate predecessor block.
1364 size_t position = predecessors[i]->GetLifetimeEnd() - 1;
1365 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
1366
1367 auto it = interval_node_map_.find(input_interval);
1368 if (it != interval_node_map_.end()) {
1369 InterferenceNode* input_node = it->second;
1370 CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
1371 }
1372 }
1373 }
1374
1375 // Coalesce output with first input when policy is kSameAsFirstInput.
1376 if (defined_by != nullptr) {
1377 Location out = defined_by->GetLocations()->Out();
1378 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
1379 LiveInterval* input_interval
1380 = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
1381 // TODO: Could we consider lifetime holes here?
1382 if (input_interval->GetEnd() == interval->GetStart()) {
1383 auto it = interval_node_map_.find(input_interval);
1384 if (it != interval_node_map_.end()) {
1385 InterferenceNode* input_node = it->second;
1386 CreateCoalesceOpportunity(node,
1387 input_node,
1388 CoalesceKind::kFirstInput,
1389 interval->GetStart());
1390 }
1391 }
1392 }
1393 }
1394
1395 // An interval that starts an instruction (that is, it is not split), may
1396 // re-use the registers used by the inputs of that instruction, based on the
1397 // location summary.
1398 if (defined_by != nullptr) {
1399 DCHECK(!interval->IsSplit());
1400 LocationSummary* locations = defined_by->GetLocations();
1401 if (!locations->OutputCanOverlapWithInputs()) {
1402 HInputsRef inputs = defined_by->GetInputs();
1403 for (size_t i = 0; i < inputs.size(); ++i) {
1404 size_t def_point = defined_by->GetLifetimePosition();
1405 // TODO: Getting the sibling at the def_point might not be quite what we want
1406 // for fixed inputs, since the use will be *at* the def_point rather than after.
1407 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
1408 if (input_interval != nullptr &&
1409 input_interval->HasHighInterval() == interval->HasHighInterval()) {
1410 auto it = interval_node_map_.find(input_interval);
1411 if (it != interval_node_map_.end()) {
1412 InterferenceNode* input_node = it->second;
1413 CreateCoalesceOpportunity(node,
1414 input_node,
1415 CoalesceKind::kAnyInput,
1416 interval->GetStart());
1417 }
1418 }
1419 }
1420 }
1421 }
1422
1423 // Try to prevent moves into fixed input locations.
1424 // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
1425 // [interval->GetStart() + 1, interval->GetEnd() + 1)
1426 auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
1427 interval->GetUses().end(),
1428 interval->GetStart() + 1u,
1429 interval->GetEnd() + 1u);
1430 for (const UsePosition& use : matching_use_range) {
1431 HInstruction* user = use.GetUser();
1432 if (user == nullptr) {
1433 // User may be null for certain intervals, such as temp intervals.
1434 continue;
1435 }
1436 LocationSummary* locations = user->GetLocations();
1437 Location input = locations->InAt(use.GetInputIndex());
1438 if (input.IsRegister() || input.IsFpuRegister()) {
1439 // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
1440 // is currently not supported.
1441 InterferenceNode* fixed_node = input.IsRegister()
1442 ? register_allocator_->physical_core_nodes_[input.reg()]
1443 : register_allocator_->physical_fp_nodes_[input.reg()];
1444 CreateCoalesceOpportunity(node,
1445 fixed_node,
1446 CoalesceKind::kFixedInput,
1447 user->GetLifetimePosition());
1448 }
1449 }
1450 } // for node in prunable_nodes
1451 }
1452
IsLowDegreeNode(InterferenceNode * node,size_t num_regs)1453 static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
1454 return node->GetOutDegree() < num_regs;
1455 }
1456
IsHighDegreeNode(InterferenceNode * node,size_t num_regs)1457 static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
1458 return !IsLowDegreeNode(node, num_regs);
1459 }
1460
PruneInterferenceGraph()1461 void ColoringIteration::PruneInterferenceGraph() {
1462 DCHECK(pruned_nodes_.empty()
1463 && simplify_worklist_.empty()
1464 && freeze_worklist_.empty()
1465 && spill_worklist_.empty());
1466 // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
1467 // and all others as high degree nodes. The distinction is important: low degree nodes are
1468 // guaranteed a color, while high degree nodes are not.
1469
1470 // Build worklists. Note that the coalesce worklist has already been
1471 // filled by FindCoalesceOpportunities().
1472 for (InterferenceNode* node : prunable_nodes_) {
1473 DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
1474 if (IsLowDegreeNode(node, num_regs_)) {
1475 if (node->GetCoalesceOpportunities().empty()) {
1476 // Simplify Worklist.
1477 node->stage = NodeStage::kSimplifyWorklist;
1478 simplify_worklist_.push_back(node);
1479 } else {
1480 // Freeze Worklist.
1481 node->stage = NodeStage::kFreezeWorklist;
1482 freeze_worklist_.push_back(node);
1483 }
1484 } else {
1485 // Spill worklist.
1486 node->stage = NodeStage::kSpillWorklist;
1487 spill_worklist_.push(node);
1488 }
1489 }
1490
1491 // Prune graph.
1492 // Note that we do not remove a node from its current worklist if it moves to another, so it may
1493 // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
1494 while (true) {
1495 if (!simplify_worklist_.empty()) {
1496 // Prune low-degree nodes.
1497 // TODO: pop_back() should work as well, but it didn't; we get a
1498 // failed check while pruning. We should look into this.
1499 InterferenceNode* node = simplify_worklist_.front();
1500 simplify_worklist_.pop_front();
1501 DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
1502 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
1503 DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
1504 PruneNode(node);
1505 } else if (!coalesce_worklist_.empty()) {
1506 // Coalesce.
1507 CoalesceOpportunity* opportunity = coalesce_worklist_.top();
1508 coalesce_worklist_.pop();
1509 if (opportunity->stage == CoalesceStage::kWorklist) {
1510 Coalesce(opportunity);
1511 }
1512 } else if (!freeze_worklist_.empty()) {
1513 // Freeze moves and prune a low-degree move-related node.
1514 InterferenceNode* node = freeze_worklist_.front();
1515 freeze_worklist_.pop_front();
1516 if (node->stage == NodeStage::kFreezeWorklist) {
1517 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
1518 DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
1519 FreezeMoves(node);
1520 PruneNode(node);
1521 }
1522 } else if (!spill_worklist_.empty()) {
1523 // We spill the lowest-priority node, because pruning a node earlier
1524 // gives it a higher chance of being spilled.
1525 InterferenceNode* node = spill_worklist_.top();
1526 spill_worklist_.pop();
1527 if (node->stage == NodeStage::kSpillWorklist) {
1528 DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
1529 FreezeMoves(node);
1530 PruneNode(node);
1531 }
1532 } else {
1533 // Pruning complete.
1534 break;
1535 }
1536 }
1537 DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
1538 }
1539
EnableCoalesceOpportunities(InterferenceNode * node)1540 void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
1541 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1542 if (opportunity->stage == CoalesceStage::kActive) {
1543 opportunity->stage = CoalesceStage::kWorklist;
1544 coalesce_worklist_.push(opportunity);
1545 }
1546 }
1547 }
1548
PruneNode(InterferenceNode * node)1549 void ColoringIteration::PruneNode(InterferenceNode* node) {
1550 DCHECK_NE(node->stage, NodeStage::kPruned);
1551 DCHECK(!node->IsPrecolored());
1552 node->stage = NodeStage::kPruned;
1553 pruned_nodes_.push(node);
1554
1555 for (InterferenceNode* adj : node->GetAdjacentNodes()) {
1556 DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
1557
1558 if (adj->IsPrecolored()) {
1559 // No effect on pre-colored nodes; they're never pruned.
1560 } else {
1561 // Remove the interference.
1562 bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
1563 DCHECK(adj->ContainsInterference(node))
1564 << "Missing reflexive interference from non-fixed node";
1565 adj->RemoveInterference(node);
1566
1567 // Handle transitions from high degree to low degree.
1568 if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
1569 EnableCoalesceOpportunities(adj);
1570 for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
1571 EnableCoalesceOpportunities(adj_adj);
1572 }
1573
1574 DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
1575 if (adj->IsMoveRelated()) {
1576 adj->stage = NodeStage::kFreezeWorklist;
1577 freeze_worklist_.push_back(adj);
1578 } else {
1579 adj->stage = NodeStage::kSimplifyWorklist;
1580 simplify_worklist_.push_back(adj);
1581 }
1582 }
1583 }
1584 }
1585 }
1586
CheckTransitionFromFreezeWorklist(InterferenceNode * node)1587 void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
1588 if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
1589 DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
1590 node->stage = NodeStage::kSimplifyWorklist;
1591 simplify_worklist_.push_back(node);
1592 }
1593 }
1594
FreezeMoves(InterferenceNode * node)1595 void ColoringIteration::FreezeMoves(InterferenceNode* node) {
1596 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1597 if (opportunity->stage == CoalesceStage::kDefunct) {
1598 // Constrained moves should remain constrained, since they will not be considered
1599 // during last-chance coalescing.
1600 } else {
1601 opportunity->stage = CoalesceStage::kInactive;
1602 }
1603 InterferenceNode* other = opportunity->node_a->GetAlias() == node
1604 ? opportunity->node_b->GetAlias()
1605 : opportunity->node_a->GetAlias();
1606 if (other != node && other->stage == NodeStage::kFreezeWorklist) {
1607 DCHECK(IsLowDegreeNode(node, num_regs_));
1608 CheckTransitionFromFreezeWorklist(other);
1609 }
1610 }
1611 }
1612
PrecoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1613 bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
1614 InterferenceNode* into) {
1615 if (!into->IsPrecolored()) {
1616 // The uncolored heuristic will cover this case.
1617 return false;
1618 }
1619 if (from->IsPair() || into->IsPair()) {
1620 // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
1621 // are currently represented as two single fixed nodes in the graph, and `into` is
1622 // only one of them. (We may lose the implicit connections to the second one in a merge.)
1623 return false;
1624 }
1625
1626 // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
1627 // Reasons an adjacent node `adj` can be "ok":
1628 // (1) If `adj` is low degree, interference with `into` will not affect its existing
1629 // colorable guarantee. (Notice that coalescing cannot increase its degree.)
1630 // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
1631 // (3) If there's already an interference with `into`, coalescing will not add interferences.
1632 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1633 if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
1634 // Ok.
1635 } else {
1636 return false;
1637 }
1638 }
1639 return true;
1640 }
1641
UncoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1642 bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
1643 InterferenceNode* into) {
1644 if (into->IsPrecolored()) {
1645 // The pre-colored heuristic will handle this case.
1646 return false;
1647 }
1648
1649 // Arbitrary cap to improve compile time. Tests show that this has negligible affect
1650 // on generated code.
1651 if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
1652 return false;
1653 }
1654
1655 // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
1656 // of high degree. (Low degree neighbors can be ignored, because they will eventually be
1657 // pruned from the interference graph in the simplify stage.)
1658 size_t high_degree_interferences = 0;
1659 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1660 if (IsHighDegreeNode(adj, num_regs_)) {
1661 high_degree_interferences += from->EdgeWeightWith(adj);
1662 }
1663 }
1664 for (InterferenceNode* adj : into->GetAdjacentNodes()) {
1665 if (IsHighDegreeNode(adj, num_regs_)) {
1666 if (from->ContainsInterference(adj)) {
1667 // We've already counted this adjacent node.
1668 // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
1669 // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
1670 // but remains conservative.)
1671 if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
1672 high_degree_interferences -= from->EdgeWeightWith(adj);
1673 }
1674 } else {
1675 high_degree_interferences += into->EdgeWeightWith(adj);
1676 }
1677 }
1678 }
1679
1680 return high_degree_interferences < num_regs_;
1681 }
1682
Combine(InterferenceNode * from,InterferenceNode * into)1683 void ColoringIteration::Combine(InterferenceNode* from,
1684 InterferenceNode* into) {
1685 from->SetAlias(into);
1686
1687 // Add interferences.
1688 for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1689 bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
1690 AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
1691 if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
1692 // This is a (temporary) transition to a high degree node. Its degree will decrease again
1693 // when we prune `from`, but it's best to be consistent about the current worklist.
1694 adj->stage = NodeStage::kSpillWorklist;
1695 spill_worklist_.push(adj);
1696 }
1697 }
1698
1699 // Add coalesce opportunities.
1700 for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
1701 if (opportunity->stage != CoalesceStage::kDefunct) {
1702 into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1703 }
1704 }
1705 EnableCoalesceOpportunities(from);
1706
1707 // Prune and update worklists.
1708 PruneNode(from);
1709 if (IsLowDegreeNode(into, num_regs_)) {
1710 // Coalesce(...) takes care of checking for a transition to the simplify worklist.
1711 DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
1712 } else if (into->stage == NodeStage::kFreezeWorklist) {
1713 // This is a transition to a high degree node.
1714 into->stage = NodeStage::kSpillWorklist;
1715 spill_worklist_.push(into);
1716 } else {
1717 DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
1718 }
1719 }
1720
Coalesce(CoalesceOpportunity * opportunity)1721 void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
1722 InterferenceNode* from = opportunity->node_a->GetAlias();
1723 InterferenceNode* into = opportunity->node_b->GetAlias();
1724 DCHECK_NE(from->stage, NodeStage::kPruned);
1725 DCHECK_NE(into->stage, NodeStage::kPruned);
1726
1727 if (from->IsPrecolored()) {
1728 // If we have one pre-colored node, make sure it's the `into` node.
1729 std::swap(from, into);
1730 }
1731
1732 if (from == into) {
1733 // These nodes have already been coalesced.
1734 opportunity->stage = CoalesceStage::kDefunct;
1735 CheckTransitionFromFreezeWorklist(from);
1736 } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
1737 // These nodes interfere.
1738 opportunity->stage = CoalesceStage::kDefunct;
1739 CheckTransitionFromFreezeWorklist(from);
1740 CheckTransitionFromFreezeWorklist(into);
1741 } else if (PrecoloredHeuristic(from, into)
1742 || UncoloredHeuristic(from, into)) {
1743 // We can coalesce these nodes.
1744 opportunity->stage = CoalesceStage::kDefunct;
1745 Combine(from, into);
1746 CheckTransitionFromFreezeWorklist(into);
1747 } else {
1748 // We cannot coalesce, but we may be able to later.
1749 opportunity->stage = CoalesceStage::kActive;
1750 }
1751 }
1752
1753 // Build a mask with a bit set for each register assigned to some
1754 // interval in `intervals`.
1755 template <typename Container>
BuildConflictMask(const Container & intervals)1756 static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) {
1757 std::bitset<kMaxNumRegs> conflict_mask;
1758 for (InterferenceNode* adjacent : intervals) {
1759 LiveInterval* conflicting = adjacent->GetInterval();
1760 if (conflicting->HasRegister()) {
1761 conflict_mask.set(conflicting->GetRegister());
1762 if (conflicting->HasHighInterval()) {
1763 DCHECK(conflicting->GetHighInterval()->HasRegister());
1764 conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
1765 }
1766 } else {
1767 DCHECK(!conflicting->HasHighInterval()
1768 || !conflicting->GetHighInterval()->HasRegister());
1769 }
1770 }
1771 return conflict_mask;
1772 }
1773
IsCallerSave(size_t reg,bool processing_core_regs)1774 bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
1775 return processing_core_regs
1776 ? !codegen_->IsCoreCalleeSaveRegister(reg)
1777 : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
1778 }
1779
RegisterIsAligned(size_t reg)1780 static bool RegisterIsAligned(size_t reg) {
1781 return reg % 2 == 0;
1782 }
1783
FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask)1784 static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
1785 // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
1786 // Note that CTZ is undefined if all bits are 0, so we special-case it.
1787 return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
1788 }
1789
ColorInterferenceGraph()1790 bool ColoringIteration::ColorInterferenceGraph() {
1791 DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
1792 ScopedArenaVector<LiveInterval*> colored_intervals(
1793 allocator_->Adapter(kArenaAllocRegisterAllocator));
1794 bool successful = true;
1795
1796 while (!pruned_nodes_.empty()) {
1797 InterferenceNode* node = pruned_nodes_.top();
1798 pruned_nodes_.pop();
1799 LiveInterval* interval = node->GetInterval();
1800 size_t reg = 0;
1801
1802 InterferenceNode* alias = node->GetAlias();
1803 if (alias != node) {
1804 // This node was coalesced with another.
1805 LiveInterval* alias_interval = alias->GetInterval();
1806 if (alias_interval->HasRegister()) {
1807 reg = alias_interval->GetRegister();
1808 DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
1809 << "This node conflicts with the register it was coalesced with";
1810 } else {
1811 DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
1812 << "Move coalescing was not conservative, causing a node to be coalesced "
1813 << "with another node that could not be colored";
1814 if (interval->RequiresRegister()) {
1815 successful = false;
1816 }
1817 }
1818 } else {
1819 // Search for free register(s).
1820 std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
1821 if (interval->HasHighInterval()) {
1822 // Note that the graph coloring allocator assumes that pair intervals are aligned here,
1823 // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
1824 // change the alignment requirements here, we will have to update the algorithm (e.g.,
1825 // be more conservative about the weight of edges adjacent to pair nodes.)
1826 while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
1827 reg += 2;
1828 }
1829
1830 // Try to use a caller-save register first.
1831 for (size_t i = 0; i < num_regs_ - 1; i += 2) {
1832 bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_);
1833 bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
1834 if (!conflict_mask[i] && !conflict_mask[i + 1]) {
1835 if (low_caller_save && high_caller_save) {
1836 reg = i;
1837 break;
1838 } else if (low_caller_save || high_caller_save) {
1839 reg = i;
1840 // Keep looking to try to get both parts in caller-save registers.
1841 }
1842 }
1843 }
1844 } else {
1845 // Not a pair interval.
1846 reg = FindFirstZeroInConflictMask(conflict_mask);
1847
1848 // Try to use caller-save registers first.
1849 for (size_t i = 0; i < num_regs_; ++i) {
1850 if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
1851 reg = i;
1852 break;
1853 }
1854 }
1855 }
1856
1857 // Last-chance coalescing.
1858 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1859 if (opportunity->stage == CoalesceStage::kDefunct) {
1860 continue;
1861 }
1862 LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
1863 ? opportunity->node_b->GetAlias()->GetInterval()
1864 : opportunity->node_a->GetAlias()->GetInterval();
1865 if (other_interval->HasRegister()) {
1866 size_t coalesce_register = other_interval->GetRegister();
1867 if (interval->HasHighInterval()) {
1868 if (!conflict_mask[coalesce_register] &&
1869 !conflict_mask[coalesce_register + 1] &&
1870 RegisterIsAligned(coalesce_register)) {
1871 reg = coalesce_register;
1872 break;
1873 }
1874 } else if (!conflict_mask[coalesce_register]) {
1875 reg = coalesce_register;
1876 break;
1877 }
1878 }
1879 }
1880 }
1881
1882 if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
1883 // Assign register.
1884 DCHECK(!interval->HasRegister());
1885 interval->SetRegister(reg);
1886 colored_intervals.push_back(interval);
1887 if (interval->HasHighInterval()) {
1888 DCHECK(!interval->GetHighInterval()->HasRegister());
1889 interval->GetHighInterval()->SetRegister(reg + 1);
1890 colored_intervals.push_back(interval->GetHighInterval());
1891 }
1892 } else if (interval->RequiresRegister()) {
1893 // The interference graph is too dense to color. Make it sparser by
1894 // splitting this live interval.
1895 successful = false;
1896 register_allocator_->SplitAtRegisterUses(interval);
1897 // We continue coloring, because there may be additional intervals that cannot
1898 // be colored, and that we should split.
1899 } else {
1900 // Spill.
1901 node->SetNeedsSpillSlot();
1902 }
1903 }
1904
1905 // If unsuccessful, reset all register assignments.
1906 if (!successful) {
1907 for (LiveInterval* interval : colored_intervals) {
1908 interval->ClearRegister();
1909 }
1910 }
1911
1912 return successful;
1913 }
1914
AllocateSpillSlots(ArrayRef<InterferenceNode * const> nodes)1915 void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) {
1916 // The register allocation resolver will organize the stack based on value type,
1917 // so we assign stack slots for each value type separately.
1918 ScopedArenaAllocator allocator(allocator_->GetArenaStack());
1919 ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator);
1920 ScopedArenaVector<LiveInterval*> double_intervals(adapter);
1921 ScopedArenaVector<LiveInterval*> long_intervals(adapter);
1922 ScopedArenaVector<LiveInterval*> float_intervals(adapter);
1923 ScopedArenaVector<LiveInterval*> int_intervals(adapter);
1924
1925 // The set of parent intervals already handled.
1926 ScopedArenaSet<LiveInterval*> seen(adapter);
1927
1928 // Find nodes that need spill slots.
1929 for (InterferenceNode* node : nodes) {
1930 if (!node->NeedsSpillSlot()) {
1931 continue;
1932 }
1933
1934 LiveInterval* parent = node->GetInterval()->GetParent();
1935 if (seen.find(parent) != seen.end()) {
1936 // We've already handled this interval.
1937 // This can happen if multiple siblings of the same interval request a stack slot.
1938 continue;
1939 }
1940 seen.insert(parent);
1941
1942 HInstruction* defined_by = parent->GetDefinedBy();
1943 if (parent->HasSpillSlot()) {
1944 // We already have a spill slot for this value that we can reuse.
1945 } else if (defined_by->IsParameterValue()) {
1946 // Parameters already have a stack slot.
1947 parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
1948 } else if (defined_by->IsCurrentMethod()) {
1949 // The current method is always at stack slot 0.
1950 parent->SetSpillSlot(0);
1951 } else if (defined_by->IsConstant()) {
1952 // Constants don't need a spill slot.
1953 } else {
1954 // We need to find a spill slot for this interval. Place it in the correct
1955 // worklist to be processed later.
1956 switch (node->GetInterval()->GetType()) {
1957 case DataType::Type::kFloat64:
1958 double_intervals.push_back(parent);
1959 break;
1960 case DataType::Type::kInt64:
1961 long_intervals.push_back(parent);
1962 break;
1963 case DataType::Type::kFloat32:
1964 float_intervals.push_back(parent);
1965 break;
1966 case DataType::Type::kReference:
1967 case DataType::Type::kInt32:
1968 case DataType::Type::kUint16:
1969 case DataType::Type::kUint8:
1970 case DataType::Type::kInt8:
1971 case DataType::Type::kBool:
1972 case DataType::Type::kInt16:
1973 int_intervals.push_back(parent);
1974 break;
1975 case DataType::Type::kUint32:
1976 case DataType::Type::kUint64:
1977 case DataType::Type::kVoid:
1978 LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
1979 UNREACHABLE();
1980 }
1981 }
1982 }
1983
1984 // Color spill slots for each value type.
1985 ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_);
1986 ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_);
1987 ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_);
1988 ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_);
1989 }
1990
ColorSpillSlots(ArrayRef<LiveInterval * const> intervals,size_t * num_stack_slots_used)1991 void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals,
1992 /* out */ size_t* num_stack_slots_used) {
1993 // We cannot use the original interference graph here because spill slots are assigned to
1994 // all of the siblings of an interval, whereas an interference node represents only a single
1995 // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
1996 // by position, and assigning the lowest spill slot available when we encounter an interval
1997 // beginning. We ignore lifetime holes for simplicity.
1998 ScopedArenaAllocator allocator(allocator_->GetArenaStack());
1999 ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
2000 allocator.Adapter(kArenaAllocRegisterAllocator));
2001
2002 for (LiveInterval* parent_interval : intervals) {
2003 DCHECK(parent_interval->IsParent());
2004 DCHECK(!parent_interval->HasSpillSlot());
2005 size_t start = parent_interval->GetStart();
2006 size_t end = parent_interval->GetLastSibling()->GetEnd();
2007 DCHECK_LT(start, end);
2008 interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
2009 interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
2010 }
2011
2012 // Sort by position.
2013 // We explicitly ignore the third entry of each tuple (the interval pointer) in order
2014 // to maintain determinism.
2015 std::sort(interval_endpoints.begin(), interval_endpoints.end(),
2016 [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
2017 const std::tuple<size_t, bool, LiveInterval*>& rhs) {
2018 return std::tie(std::get<0>(lhs), std::get<1>(lhs))
2019 < std::tie(std::get<0>(rhs), std::get<1>(rhs));
2020 });
2021
2022 ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator);
2023 for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
2024 // Extract information from the current tuple.
2025 LiveInterval* parent_interval;
2026 bool is_interval_beginning;
2027 size_t position;
2028 std::tie(position, is_interval_beginning, parent_interval) = *it;
2029 size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
2030
2031 if (is_interval_beginning) {
2032 DCHECK(!parent_interval->HasSpillSlot());
2033 DCHECK_EQ(position, parent_interval->GetStart());
2034
2035 // Find first available free stack slot(s).
2036 size_t slot = 0;
2037 for (; ; ++slot) {
2038 bool found = true;
2039 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2040 if (taken.IsBitSet(s)) {
2041 found = false;
2042 break; // failure
2043 }
2044 }
2045 if (found) {
2046 break; // success
2047 }
2048 }
2049
2050 parent_interval->SetSpillSlot(slot);
2051
2052 *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
2053 if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
2054 // The parallel move resolver requires that there be an even number of spill slots
2055 // allocated for pair value types.
2056 ++(*num_stack_slots_used);
2057 }
2058
2059 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2060 taken.SetBit(s);
2061 }
2062 } else {
2063 DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
2064 DCHECK(parent_interval->HasSpillSlot());
2065
2066 // Free up the stack slot(s) used by this interval.
2067 size_t slot = parent_interval->GetSpillSlot();
2068 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2069 DCHECK(taken.IsBitSet(s));
2070 taken.ClearBit(s);
2071 }
2072 }
2073 }
2074 DCHECK_EQ(taken.NumSetBits(), 0u);
2075 }
2076
2077 } // namespace art
2078