1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "register_allocator_graph_color.h"
18 
19 #include "code_generator.h"
20 #include "linear_order.h"
21 #include "register_allocation_resolver.h"
22 #include "ssa_liveness_analysis.h"
23 #include "thread-current-inl.h"
24 
25 namespace art {
26 
27 // Highest number of registers that we support for any platform. This can be used for std::bitset,
28 // for example, which needs to know its size at compile time.
29 static constexpr size_t kMaxNumRegs = 32;
30 
31 // The maximum number of graph coloring attempts before triggering a DCHECK.
32 // This is meant to catch changes to the graph coloring algorithm that undermine its forward
33 // progress guarantees. Forward progress for the algorithm means splitting live intervals on
34 // every graph coloring attempt so that eventually the interference graph will be sparse enough
35 // to color. The main threat to forward progress is trying to split short intervals which cannot be
36 // split further; this could cause infinite looping because the interference graph would never
37 // change. This is avoided by prioritizing short intervals before long ones, so that long
38 // intervals are split when coloring fails.
39 static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
40 
41 // We always want to avoid spilling inside loops.
42 static constexpr size_t kLoopSpillWeightMultiplier = 10;
43 
44 // If we avoid moves in single jump blocks, we can avoid jumps to jumps.
45 static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
46 
47 // We avoid moves in blocks that dominate the exit block, since these blocks will
48 // be executed on every path through the method.
49 static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
50 
51 enum class CoalesceKind {
52   kAdjacentSibling,       // Prevents moves at interval split points.
53   kFixedOutputSibling,    // Prevents moves from a fixed output location.
54   kFixedInput,            // Prevents moves into a fixed input location.
55   kNonlinearControlFlow,  // Prevents moves between blocks.
56   kPhi,                   // Prevents phi resolution moves.
57   kFirstInput,            // Prevents a single input move.
58   kAnyInput,              // May lead to better instruction selection / smaller encodings.
59 };
60 
operator <<(std::ostream & os,const CoalesceKind & kind)61 std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
62   return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
63 }
64 
LoopDepthAt(HBasicBlock * block)65 static size_t LoopDepthAt(HBasicBlock* block) {
66   HLoopInformation* loop_info = block->GetLoopInformation();
67   size_t depth = 0;
68   while (loop_info != nullptr) {
69     ++depth;
70     loop_info = loop_info->GetPreHeader()->GetLoopInformation();
71   }
72   return depth;
73 }
74 
75 // Return the runtime cost of inserting a move instruction at the specified location.
CostForMoveAt(size_t position,const SsaLivenessAnalysis & liveness)76 static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
77   HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
78   DCHECK(block != nullptr);
79   size_t cost = 1;
80   if (block->IsSingleJump()) {
81     cost *= kSingleJumpBlockWeightMultiplier;
82   }
83   if (block->Dominates(block->GetGraph()->GetExitBlock())) {
84     cost *= kDominatesExitBlockWeightMultiplier;
85   }
86   for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
87     cost *= kLoopSpillWeightMultiplier;
88   }
89   return cost;
90 }
91 
92 // In general, we estimate coalesce priority by whether it will definitely avoid a move,
93 // and by how likely it is to create an interference graph that's harder to color.
ComputeCoalescePriority(CoalesceKind kind,size_t position,const SsaLivenessAnalysis & liveness)94 static size_t ComputeCoalescePriority(CoalesceKind kind,
95                                       size_t position,
96                                       const SsaLivenessAnalysis& liveness) {
97   if (kind == CoalesceKind::kAnyInput) {
98     // This type of coalescing can affect instruction selection, but not moves, so we
99     // give it the lowest priority.
100     return 0;
101   } else {
102     return CostForMoveAt(position, liveness);
103   }
104 }
105 
106 enum class CoalesceStage {
107   kWorklist,  // Currently in the iterative coalescing worklist.
108   kActive,    // Not in a worklist, but could be considered again during iterative coalescing.
109   kInactive,  // No longer considered until last-chance coalescing.
110   kDefunct,   // Either the two nodes interfere, or have already been coalesced.
111 };
112 
operator <<(std::ostream & os,const CoalesceStage & stage)113 std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
114   return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
115 }
116 
117 // Represents a coalesce opportunity between two nodes.
118 struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
CoalesceOpportunityart::CoalesceOpportunity119   CoalesceOpportunity(InterferenceNode* a,
120                       InterferenceNode* b,
121                       CoalesceKind kind,
122                       size_t position,
123                       const SsaLivenessAnalysis& liveness)
124         : node_a(a),
125           node_b(b),
126           stage(CoalesceStage::kWorklist),
127           priority(ComputeCoalescePriority(kind, position, liveness)) {}
128 
129   // Compare two coalesce opportunities based on their priority.
130   // Return true if lhs has a lower priority than that of rhs.
CmpPriorityart::CoalesceOpportunity131   static bool CmpPriority(const CoalesceOpportunity* lhs,
132                           const CoalesceOpportunity* rhs) {
133     return lhs->priority < rhs->priority;
134   }
135 
136   InterferenceNode* const node_a;
137   InterferenceNode* const node_b;
138 
139   // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
140   // and whether it should still be considered.
141   CoalesceStage stage;
142 
143   // The priority of this coalesce opportunity, based on heuristics.
144   const size_t priority;
145 };
146 
147 enum class NodeStage {
148   kInitial,           // Uninitialized.
149   kPrecolored,        // Marks fixed nodes.
150   kSafepoint,         // Marks safepoint nodes.
151   kPrunable,          // Marks uncolored nodes in the interference graph.
152   kSimplifyWorklist,  // Marks non-move-related nodes with degree less than the number of registers.
153   kFreezeWorklist,    // Marks move-related nodes with degree less than the number of registers.
154   kSpillWorklist,     // Marks nodes with degree greater or equal to the number of registers.
155   kPruned             // Marks nodes already pruned from the interference graph.
156 };
157 
operator <<(std::ostream & os,const NodeStage & stage)158 std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
159   return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
160 }
161 
162 // Returns the estimated cost of spilling a particular live interval.
ComputeSpillWeight(LiveInterval * interval,const SsaLivenessAnalysis & liveness)163 static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
164   if (interval->HasRegister()) {
165     // Intervals with a fixed register cannot be spilled.
166     return std::numeric_limits<float>::min();
167   }
168 
169   size_t length = interval->GetLength();
170   if (length == 1) {
171     // Tiny intervals should have maximum priority, since they cannot be split any further.
172     return std::numeric_limits<float>::max();
173   }
174 
175   size_t use_weight = 0;
176   if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
177     // Cost for spilling at a register definition point.
178     use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
179   }
180 
181   // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
182   // [interval->GetStart() + 1, interval->GetEnd() + 1)
183   auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
184                                                  interval->GetUses().end(),
185                                                  interval->GetStart() + 1u,
186                                                  interval->GetEnd() + 1u);
187   for (const UsePosition& use : matching_use_range) {
188     if (use.GetUser() != nullptr && use.RequiresRegister()) {
189       // Cost for spilling at a register use point.
190       use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
191     }
192   }
193 
194   // We divide by the length of the interval because we want to prioritize
195   // short intervals; we do not benefit much if we split them further.
196   return static_cast<float>(use_weight) / static_cast<float>(length);
197 }
198 
199 // Interference nodes make up the interference graph, which is the primary data structure in
200 // graph coloring register allocation. Each node represents a single live interval, and contains
201 // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
202 // pre-colored nodes never contain outgoing edges (only incoming ones).
203 //
204 // As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
205 // but outgoing edges remain in order to later color the node based on the colors of its neighbors.
206 //
207 // Note that a pair interval is represented by a single node in the interference graph, which
208 // essentially requires two colors. One consequence of this is that the degree of a node is not
209 // necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
210 // number of colors with which a node could interfere. We model this by giving edges different
211 // weights (1 or 2) to control how much it increases the degree of adjacent nodes.
212 // For example, the edge between two single nodes will have weight 1. On the other hand,
213 // the edge between a single node and a pair node will have weight 2. This is because the pair
214 // node could block up to two colors for the single node, and because the single node could
215 // block an entire two-register aligned slot for the pair node.
216 // The degree is defined this way because we use it to decide whether a node is guaranteed a color,
217 // and thus whether it is safe to prune it from the interference graph early on.
218 class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
219  public:
InterferenceNode(LiveInterval * interval,const SsaLivenessAnalysis & liveness)220   InterferenceNode(LiveInterval* interval,
221                    const SsaLivenessAnalysis& liveness)
222         : stage(NodeStage::kInitial),
223           interval_(interval),
224           adjacent_nodes_(nullptr),
225           coalesce_opportunities_(nullptr),
226           out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
227           alias_(this),
228           spill_weight_(ComputeSpillWeight(interval, liveness)),
229           requires_color_(interval->RequiresRegister()),
230           needs_spill_slot_(false) {
231     DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
232   }
233 
AddInterference(InterferenceNode * other,bool guaranteed_not_interfering_yet,ScopedArenaDeque<ScopedArenaVector<InterferenceNode * >> * storage)234   void AddInterference(InterferenceNode* other,
235                        bool guaranteed_not_interfering_yet,
236                        ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) {
237     DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
238     DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
239     DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
240     DCHECK_NE(stage, NodeStage::kPruned);
241     DCHECK_NE(other->stage, NodeStage::kPruned);
242     if (adjacent_nodes_ == nullptr) {
243       ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator());
244       storage->emplace_back(adapter);
245       adjacent_nodes_ = &storage->back();
246     }
247     if (guaranteed_not_interfering_yet) {
248       DCHECK(!ContainsElement(GetAdjacentNodes(), other));
249       adjacent_nodes_->push_back(other);
250       out_degree_ += EdgeWeightWith(other);
251     } else {
252       if (!ContainsElement(GetAdjacentNodes(), other)) {
253         adjacent_nodes_->push_back(other);
254         out_degree_ += EdgeWeightWith(other);
255       }
256     }
257   }
258 
RemoveInterference(InterferenceNode * other)259   void RemoveInterference(InterferenceNode* other) {
260     DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
261     DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
262     if (adjacent_nodes_ != nullptr) {
263       auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other);
264       if (it != adjacent_nodes_->end()) {
265         adjacent_nodes_->erase(it);
266         out_degree_ -= EdgeWeightWith(other);
267       }
268     }
269   }
270 
ContainsInterference(InterferenceNode * other) const271   bool ContainsInterference(InterferenceNode* other) const {
272     DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
273     DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
274     return ContainsElement(GetAdjacentNodes(), other);
275   }
276 
GetInterval() const277   LiveInterval* GetInterval() const {
278     return interval_;
279   }
280 
GetAdjacentNodes() const281   ArrayRef<InterferenceNode*> GetAdjacentNodes() const {
282     return adjacent_nodes_ != nullptr
283         ? ArrayRef<InterferenceNode*>(*adjacent_nodes_)
284         : ArrayRef<InterferenceNode*>();
285   }
286 
GetOutDegree() const287   size_t GetOutDegree() const {
288     // Pre-colored nodes have infinite degree.
289     DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
290     return out_degree_;
291   }
292 
AddCoalesceOpportunity(CoalesceOpportunity * opportunity,ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity * >> * storage)293   void AddCoalesceOpportunity(CoalesceOpportunity* opportunity,
294                               ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) {
295     if (coalesce_opportunities_ == nullptr) {
296       ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator());
297       storage->emplace_back(adapter);
298       coalesce_opportunities_ = &storage->back();
299     }
300     coalesce_opportunities_->push_back(opportunity);
301   }
302 
ClearCoalesceOpportunities()303   void ClearCoalesceOpportunities() {
304     coalesce_opportunities_ = nullptr;
305   }
306 
IsMoveRelated() const307   bool IsMoveRelated() const {
308     for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) {
309       if (opportunity->stage == CoalesceStage::kWorklist ||
310           opportunity->stage == CoalesceStage::kActive) {
311         return true;
312       }
313     }
314     return false;
315   }
316 
317   // Return whether this node already has a color.
318   // Used to find fixed nodes in the interference graph before coloring.
IsPrecolored() const319   bool IsPrecolored() const {
320     return interval_->HasRegister();
321   }
322 
IsPair() const323   bool IsPair() const {
324     return interval_->HasHighInterval();
325   }
326 
SetAlias(InterferenceNode * rep)327   void SetAlias(InterferenceNode* rep) {
328     DCHECK_NE(rep->stage, NodeStage::kPruned);
329     DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
330     alias_ = rep;
331   }
332 
GetAlias()333   InterferenceNode* GetAlias() {
334     if (alias_ != this) {
335       // Recurse in order to flatten tree of alias pointers.
336       alias_ = alias_->GetAlias();
337     }
338     return alias_;
339   }
340 
GetCoalesceOpportunities() const341   ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const {
342     return coalesce_opportunities_ != nullptr
343         ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_)
344         : ArrayRef<CoalesceOpportunity*>();
345   }
346 
GetSpillWeight() const347   float GetSpillWeight() const {
348     return spill_weight_;
349   }
350 
RequiresColor() const351   bool RequiresColor() const {
352     return requires_color_;
353   }
354 
355   // We give extra weight to edges adjacent to pair nodes. See the general comment on the
356   // interference graph above.
EdgeWeightWith(const InterferenceNode * other) const357   size_t EdgeWeightWith(const InterferenceNode* other) const {
358     return (IsPair() || other->IsPair()) ? 2 : 1;
359   }
360 
NeedsSpillSlot() const361   bool NeedsSpillSlot() const {
362     return needs_spill_slot_;
363   }
364 
SetNeedsSpillSlot()365   void SetNeedsSpillSlot() {
366     needs_spill_slot_ = true;
367   }
368 
369   // The current stage of this node, indicating which worklist it belongs to.
370   NodeStage stage;
371 
372  private:
373   // The live interval that this node represents.
374   LiveInterval* const interval_;
375 
376   // All nodes interfering with this one.
377   // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
378   // set sizes that we encounter. Using a vector leads to much better performance.
379   ScopedArenaVector<InterferenceNode*>* adjacent_nodes_;  // Owned by ColoringIteration.
380 
381   // Interference nodes that this node should be coalesced with to reduce moves.
382   ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_;  // Owned by ColoringIteration.
383 
384   // The maximum number of colors with which this node could interfere. This could be more than
385   // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
386   // We use "out" degree because incoming edges come from nodes already pruned from the graph,
387   // and do not affect the coloring of this node.
388   // Pre-colored nodes are treated as having infinite degree.
389   size_t out_degree_;
390 
391   // The node representing this node in the interference graph.
392   // Initially set to `this`, and only changed if this node is coalesced into another.
393   InterferenceNode* alias_;
394 
395   // The cost of splitting and spilling this interval to the stack.
396   // Nodes with a higher spill weight should be prioritized when assigning registers.
397   // This is essentially based on use density and location; short intervals with many uses inside
398   // deeply nested loops have a high spill weight.
399   const float spill_weight_;
400 
401   const bool requires_color_;
402 
403   bool needs_spill_slot_;
404 
405   DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
406 };
407 
408 // The order in which we color nodes is important. To guarantee forward progress,
409 // we prioritize intervals that require registers, and after that we prioritize
410 // short intervals. That way, if we fail to color a node, it either won't require a
411 // register, or it will be a long interval that can be split in order to make the
412 // interference graph sparser.
413 // To improve code quality, we prioritize intervals used frequently in deeply nested loops.
414 // (This metric is secondary to the forward progress requirements above.)
415 // TODO: May also want to consider:
416 // - Constants (since they can be rematerialized)
417 // - Allocated spill slots
HasGreaterNodePriority(const InterferenceNode * lhs,const InterferenceNode * rhs)418 static bool HasGreaterNodePriority(const InterferenceNode* lhs,
419                                    const InterferenceNode* rhs) {
420   // (1) Prioritize the node that requires a color.
421   if (lhs->RequiresColor() != rhs->RequiresColor()) {
422     return lhs->RequiresColor();
423   }
424 
425   // (2) Prioritize the interval that has a higher spill weight.
426   return lhs->GetSpillWeight() > rhs->GetSpillWeight();
427 }
428 
429 // A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
430 // and provides methods for each phase of the attempt.
431 class ColoringIteration {
432  public:
ColoringIteration(RegisterAllocatorGraphColor * register_allocator,ScopedArenaAllocator * allocator,bool processing_core_regs,size_t num_regs)433   ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
434                     ScopedArenaAllocator* allocator,
435                     bool processing_core_regs,
436                     size_t num_regs)
437         : register_allocator_(register_allocator),
438           allocator_(allocator),
439           processing_core_regs_(processing_core_regs),
440           num_regs_(num_regs),
441           interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
442           prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
443           pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
444           simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
445           freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
446           spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
447           coalesce_worklist_(CoalesceOpportunity::CmpPriority,
448                              allocator->Adapter(kArenaAllocRegisterAllocator)),
449           adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)),
450           coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {}
451 
452   // Use the intervals collected from instructions to construct an
453   // interference graph mapping intervals to adjacency lists.
454   // Also, collect synthesized safepoint nodes, used to keep
455   // track of live intervals across safepoints.
456   // TODO: Should build safepoints elsewhere.
457   void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals,
458                               const ScopedArenaVector<InterferenceNode*>& physical_nodes);
459 
460   // Add coalesce opportunities to interference nodes.
461   void FindCoalesceOpportunities();
462 
463   // Prune nodes from the interference graph to be colored later. Build
464   // a stack (pruned_nodes) containing these intervals in an order determined
465   // by various heuristics.
466   void PruneInterferenceGraph();
467 
468   // Process pruned_intervals_ to color the interference graph, spilling when
469   // necessary. Returns true if successful. Else, some intervals have been
470   // split, and the interference graph should be rebuilt for another attempt.
471   bool ColorInterferenceGraph();
472 
473   // Return prunable nodes.
474   // The register allocator will need to access prunable nodes after coloring
475   // in order to tell the code generator which registers have been assigned.
GetPrunableNodes() const476   ArrayRef<InterferenceNode* const> GetPrunableNodes() const {
477     return ArrayRef<InterferenceNode* const>(prunable_nodes_);
478   }
479 
480  private:
481   // Create a coalesce opportunity between two nodes.
482   void CreateCoalesceOpportunity(InterferenceNode* a,
483                                  InterferenceNode* b,
484                                  CoalesceKind kind,
485                                  size_t position);
486 
487   // Add an edge in the interference graph, if valid.
488   // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
489   // when possible.
490   void AddPotentialInterference(InterferenceNode* from,
491                                 InterferenceNode* to,
492                                 bool guaranteed_not_interfering_yet,
493                                 bool both_directions = true);
494 
495   // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
496   // may be pruned from the interference graph.
497   void FreezeMoves(InterferenceNode* node);
498 
499   // Prune a node from the interference graph, updating worklists if necessary.
500   void PruneNode(InterferenceNode* node);
501 
502   // Add coalesce opportunities associated with this node to the coalesce worklist.
503   void EnableCoalesceOpportunities(InterferenceNode* node);
504 
505   // If needed, from `node` from the freeze worklist to the simplify worklist.
506   void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
507 
508   // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
509   bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
510 
511   // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
512   bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
513 
514   void Coalesce(CoalesceOpportunity* opportunity);
515 
516   // Merge `from` into `into` in the interference graph.
517   void Combine(InterferenceNode* from, InterferenceNode* into);
518 
519   // A reference to the register allocator instance,
520   // needed to split intervals and assign spill slots.
521   RegisterAllocatorGraphColor* register_allocator_;
522 
523   // A scoped arena allocator used for a single graph coloring attempt.
524   ScopedArenaAllocator* allocator_;
525 
526   const bool processing_core_regs_;
527 
528   const size_t num_regs_;
529 
530   // A map from live intervals to interference nodes.
531   ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
532 
533   // Uncolored nodes that should be pruned from the interference graph.
534   ScopedArenaVector<InterferenceNode*> prunable_nodes_;
535 
536   // A stack of nodes pruned from the interference graph, waiting to be pruned.
537   ScopedArenaStdStack<InterferenceNode*> pruned_nodes_;
538 
539   // A queue containing low degree, non-move-related nodes that can pruned immediately.
540   ScopedArenaDeque<InterferenceNode*> simplify_worklist_;
541 
542   // A queue containing low degree, move-related nodes.
543   ScopedArenaDeque<InterferenceNode*> freeze_worklist_;
544 
545   // A queue containing high degree nodes.
546   // If we have to prune from the spill worklist, we cannot guarantee
547   // the pruned node a color, so we order the worklist by priority.
548   ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
549 
550   // A queue containing coalesce opportunities.
551   // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
552   // inside of loops) are more important than others.
553   ScopedArenaPriorityQueue<CoalesceOpportunity*,
554                            decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
555 
556   // Storage for links to adjacent nodes for interference nodes.
557   // Using std::deque so that elements do not move when adding new ones.
558   ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_;
559 
560   // Storage for links to coalesce opportunities for interference nodes.
561   // Using std::deque so that elements do not move when adding new ones.
562   ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_;
563 
564   DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
565 };
566 
IsCoreInterval(LiveInterval * interval)567 static bool IsCoreInterval(LiveInterval* interval) {
568   return !DataType::IsFloatingPointType(interval->GetType());
569 }
570 
ComputeReservedArtMethodSlots(const CodeGenerator & codegen)571 static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
572   return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
573 }
574 
RegisterAllocatorGraphColor(ScopedArenaAllocator * allocator,CodeGenerator * codegen,const SsaLivenessAnalysis & liveness,bool iterative_move_coalescing)575 RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
576                                                          CodeGenerator* codegen,
577                                                          const SsaLivenessAnalysis& liveness,
578                                                          bool iterative_move_coalescing)
579       : RegisterAllocator(allocator, codegen, liveness),
580         iterative_move_coalescing_(iterative_move_coalescing),
581         core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
582         fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
583         temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
584         safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
585         physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
586         physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
587         num_int_spill_slots_(0),
588         num_double_spill_slots_(0),
589         num_float_spill_slots_(0),
590         num_long_spill_slots_(0),
591         catch_phi_spill_slot_counter_(0),
592         reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
593         reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
594   // Before we ask for blocked registers, set them up in the code generator.
595   codegen->SetupBlockedRegisters();
596 
597   // Initialize physical core register live intervals and blocked registers.
598   // This includes globally blocked registers, such as the stack pointer.
599   physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
600   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
601     LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32);
602     physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
603     physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
604     core_intervals_.push_back(interval);
605     if (codegen_->IsBlockedCoreRegister(i)) {
606       interval->AddRange(0, liveness.GetMaxLifetimePosition());
607     }
608   }
609   // Initialize physical floating point register live intervals and blocked registers.
610   physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
611   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
612     LiveInterval* interval =
613         LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32);
614     physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
615     physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
616     fp_intervals_.push_back(interval);
617     if (codegen_->IsBlockedFloatingPointRegister(i)) {
618       interval->AddRange(0, liveness.GetMaxLifetimePosition());
619     }
620   }
621 }
622 
~RegisterAllocatorGraphColor()623 RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {}
624 
AllocateRegisters()625 void RegisterAllocatorGraphColor::AllocateRegisters() {
626   // (1) Collect and prepare live intervals.
627   ProcessInstructions();
628 
629   for (bool processing_core_regs : {true, false}) {
630     ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs
631         ? core_intervals_
632         : fp_intervals_;
633     size_t num_registers = processing_core_regs
634         ? codegen_->GetNumberOfCoreRegisters()
635         : codegen_->GetNumberOfFloatingPointRegisters();
636 
637     size_t attempt = 0;
638     while (true) {
639       ++attempt;
640       DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
641           << "Exceeded debug max graph coloring register allocation attempts. "
642           << "This could indicate that the register allocator is not making forward progress, "
643           << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
644           << "should be prioritized over long ones, because they cannot be split further.)";
645 
646       // Many data structures are cleared between graph coloring attempts, so we reduce
647       // total memory usage by using a new scoped arena allocator for each attempt.
648       ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack());
649       ColoringIteration iteration(this,
650                                   &coloring_attempt_allocator,
651                                   processing_core_regs,
652                                   num_registers);
653 
654       // (2) Build the interference graph.
655       ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
656           ? physical_core_nodes_
657           : physical_fp_nodes_;
658       iteration.BuildInterferenceGraph(intervals, physical_nodes);
659 
660       // (3) Add coalesce opportunities.
661       //     If we have tried coloring the graph a suspiciously high number of times, give
662       //     up on move coalescing, just in case the coalescing heuristics are not conservative.
663       //     (This situation will be caught if DCHECKs are turned on.)
664       if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
665         iteration.FindCoalesceOpportunities();
666       }
667 
668       // (4) Prune all uncolored nodes from interference graph.
669       iteration.PruneInterferenceGraph();
670 
671       // (5) Color pruned nodes based on interferences.
672       bool successful = iteration.ColorInterferenceGraph();
673 
674       // We manually clear coalesce opportunities for physical nodes,
675       // since they persist across coloring attempts.
676       for (InterferenceNode* node : physical_core_nodes_) {
677         node->ClearCoalesceOpportunities();
678       }
679       for (InterferenceNode* node : physical_fp_nodes_) {
680         node->ClearCoalesceOpportunities();
681       }
682 
683       if (successful) {
684         // Assign spill slots.
685         AllocateSpillSlots(iteration.GetPrunableNodes());
686 
687         // Tell the code generator which registers were allocated.
688         // We only look at prunable_nodes because we already told the code generator about
689         // fixed intervals while processing instructions. We also ignore the fixed intervals
690         // placed at the top of catch blocks.
691         for (InterferenceNode* node : iteration.GetPrunableNodes()) {
692           LiveInterval* interval = node->GetInterval();
693           if (interval->HasRegister()) {
694             Location low_reg = processing_core_regs
695                 ? Location::RegisterLocation(interval->GetRegister())
696                 : Location::FpuRegisterLocation(interval->GetRegister());
697             codegen_->AddAllocatedRegister(low_reg);
698             if (interval->HasHighInterval()) {
699               LiveInterval* high = interval->GetHighInterval();
700               DCHECK(high->HasRegister());
701               Location high_reg = processing_core_regs
702                   ? Location::RegisterLocation(high->GetRegister())
703                   : Location::FpuRegisterLocation(high->GetRegister());
704               codegen_->AddAllocatedRegister(high_reg);
705             }
706           } else {
707             DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
708           }
709         }
710 
711         break;
712       }
713     }  // while unsuccessful
714   }  // for processing_core_instructions
715 
716   // (6) Resolve locations and deconstruct SSA form.
717   RegisterAllocationResolver(codegen_, liveness_)
718       .Resolve(ArrayRef<HInstruction* const>(safepoints_),
719                reserved_art_method_slots_ + reserved_out_slots_,
720                num_int_spill_slots_,
721                num_long_spill_slots_,
722                num_float_spill_slots_,
723                num_double_spill_slots_,
724                catch_phi_spill_slot_counter_,
725                ArrayRef<LiveInterval* const>(temp_intervals_));
726 
727   if (kIsDebugBuild) {
728     Validate(/*log_fatal_on_failure*/ true);
729   }
730 }
731 
Validate(bool log_fatal_on_failure)732 bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
733   for (bool processing_core_regs : {true, false}) {
734     ScopedArenaAllocator allocator(allocator_->GetArenaStack());
735     ScopedArenaVector<LiveInterval*> intervals(
736         allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
737     for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
738       HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
739       LiveInterval* interval = instruction->GetLiveInterval();
740       if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
741         intervals.push_back(instruction->GetLiveInterval());
742       }
743     }
744 
745     ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
746         ? physical_core_nodes_
747         : physical_fp_nodes_;
748     for (InterferenceNode* fixed : physical_nodes) {
749       LiveInterval* interval = fixed->GetInterval();
750       if (interval->GetFirstRange() != nullptr) {
751         // Ideally we would check fixed ranges as well, but currently there are times when
752         // two fixed intervals for the same register will overlap. For example, a fixed input
753         // and a fixed output may sometimes share the same register, in which there will be two
754         // fixed intervals for the same place.
755       }
756     }
757 
758     for (LiveInterval* temp : temp_intervals_) {
759       if (IsCoreInterval(temp) == processing_core_regs) {
760         intervals.push_back(temp);
761       }
762     }
763 
764     size_t spill_slots = num_int_spill_slots_
765                        + num_long_spill_slots_
766                        + num_float_spill_slots_
767                        + num_double_spill_slots_
768                        + catch_phi_spill_slot_counter_;
769     bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
770                                 spill_slots,
771                                 reserved_art_method_slots_ + reserved_out_slots_,
772                                 *codegen_,
773                                 processing_core_regs,
774                                 log_fatal_on_failure);
775     if (!ok) {
776       return false;
777     }
778   }  // for processing_core_regs
779 
780   return true;
781 }
782 
ProcessInstructions()783 void RegisterAllocatorGraphColor::ProcessInstructions() {
784   for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
785     // Note that we currently depend on this ordering, since some helper
786     // code is designed for linear scan register allocation.
787     for (HBackwardInstructionIterator instr_it(block->GetInstructions());
788           !instr_it.Done();
789           instr_it.Advance()) {
790       ProcessInstruction(instr_it.Current());
791     }
792 
793     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
794       ProcessInstruction(phi_it.Current());
795     }
796 
797     if (block->IsCatchBlock()
798         || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
799       // By blocking all registers at the top of each catch block or irreducible loop, we force
800       // intervals belonging to the live-in set of the catch/header block to be spilled.
801       // TODO(ngeoffray): Phis in this block could be allocated in register.
802       size_t position = block->GetLifetimeStart();
803       BlockRegisters(position, position + 1);
804     }
805   }
806 }
807 
ProcessInstruction(HInstruction * instruction)808 void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
809   LocationSummary* locations = instruction->GetLocations();
810   if (locations == nullptr) {
811     return;
812   }
813   if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
814     // We do this here because we do not want the suspend check to artificially
815     // create live registers.
816     DCHECK(instruction->IsSuspendCheckEntry());
817     DCHECK_EQ(locations->GetTempCount(), 0u);
818     instruction->GetBlock()->RemoveInstruction(instruction);
819     return;
820   }
821 
822   CheckForTempLiveIntervals(instruction);
823   CheckForSafepoint(instruction);
824   if (instruction->GetLocations()->WillCall()) {
825     // If a call will happen, create fixed intervals for caller-save registers.
826     // TODO: Note that it may be beneficial to later split intervals at this point,
827     //       so that we allow last-minute moves from a caller-save register
828     //       to a callee-save register.
829     BlockRegisters(instruction->GetLifetimePosition(),
830                    instruction->GetLifetimePosition() + 1,
831                    /*caller_save_only*/ true);
832   }
833   CheckForFixedInputs(instruction);
834 
835   LiveInterval* interval = instruction->GetLiveInterval();
836   if (interval == nullptr) {
837     // Instructions lacking a valid output location do not have a live interval.
838     DCHECK(!locations->Out().IsValid());
839     return;
840   }
841 
842   // Low intervals act as representatives for their corresponding high interval.
843   DCHECK(!interval->IsHighInterval());
844   if (codegen_->NeedsTwoRegisters(interval->GetType())) {
845     interval->AddHighInterval();
846   }
847   AddSafepointsFor(instruction);
848   CheckForFixedOutput(instruction);
849   AllocateSpillSlotForCatchPhi(instruction);
850 
851   ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
852       ? core_intervals_
853       : fp_intervals_;
854   if (interval->HasSpillSlot() || instruction->IsConstant()) {
855     // Note that if an interval already has a spill slot, then its value currently resides
856     // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
857     // register use. This is also true for constants, which can be materialized at any point.
858     size_t first_register_use = interval->FirstRegisterUse();
859     if (first_register_use != kNoLifetime) {
860       LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
861       intervals.push_back(split);
862     } else {
863       // We won't allocate a register for this value.
864     }
865   } else {
866     intervals.push_back(interval);
867   }
868 }
869 
CheckForFixedInputs(HInstruction * instruction)870 void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
871   // We simply block physical registers where necessary.
872   // TODO: Ideally we would coalesce the physical register with the register
873   //       allocated to the input value, but this can be tricky if, e.g., there
874   //       could be multiple physical register uses of the same value at the
875   //       same instruction. Furthermore, there's currently no distinction between
876   //       fixed inputs to a call (which will be clobbered) and other fixed inputs (which
877   //       may not be clobbered).
878   LocationSummary* locations = instruction->GetLocations();
879   size_t position = instruction->GetLifetimePosition();
880   for (size_t i = 0; i < locations->GetInputCount(); ++i) {
881     Location input = locations->InAt(i);
882     if (input.IsRegister() || input.IsFpuRegister()) {
883       BlockRegister(input, position, position + 1);
884       codegen_->AddAllocatedRegister(input);
885     } else if (input.IsPair()) {
886       BlockRegister(input.ToLow(), position, position + 1);
887       BlockRegister(input.ToHigh(), position, position + 1);
888       codegen_->AddAllocatedRegister(input.ToLow());
889       codegen_->AddAllocatedRegister(input.ToHigh());
890     }
891   }
892 }
893 
CheckForFixedOutput(HInstruction * instruction)894 void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
895   // If an instruction has a fixed output location, we give the live interval a register and then
896   // proactively split it just after the definition point to avoid creating too many interferences
897   // with a fixed node.
898   LiveInterval* interval = instruction->GetLiveInterval();
899   Location out = interval->GetDefinedBy()->GetLocations()->Out();
900   size_t position = instruction->GetLifetimePosition();
901   DCHECK_GE(interval->GetEnd() - position, 2u);
902 
903   if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
904     out = instruction->GetLocations()->InAt(0);
905   }
906 
907   if (out.IsRegister() || out.IsFpuRegister()) {
908     interval->SetRegister(out.reg());
909     codegen_->AddAllocatedRegister(out);
910     Split(interval, position + 1);
911   } else if (out.IsPair()) {
912     interval->SetRegister(out.low());
913     interval->GetHighInterval()->SetRegister(out.high());
914     codegen_->AddAllocatedRegister(out.ToLow());
915     codegen_->AddAllocatedRegister(out.ToHigh());
916     Split(interval, position + 1);
917   } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
918     interval->SetSpillSlot(out.GetStackIndex());
919   } else {
920     DCHECK(out.IsUnallocated() || out.IsConstant());
921   }
922 }
923 
AddSafepointsFor(HInstruction * instruction)924 void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
925   LiveInterval* interval = instruction->GetLiveInterval();
926   for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
927     HInstruction* safepoint = safepoints_[safepoint_index - 1u];
928     size_t safepoint_position = safepoint->GetLifetimePosition();
929 
930     // Test that safepoints_ are ordered in the optimal way.
931     DCHECK(safepoint_index == safepoints_.size() ||
932            safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
933 
934     if (safepoint_position == interval->GetStart()) {
935       // The safepoint is for this instruction, so the location of the instruction
936       // does not need to be saved.
937       DCHECK_EQ(safepoint_index, safepoints_.size());
938       DCHECK_EQ(safepoint, instruction);
939       continue;
940     } else if (interval->IsDeadAt(safepoint_position)) {
941       break;
942     } else if (!interval->Covers(safepoint_position)) {
943       // Hole in the interval.
944       continue;
945     }
946     interval->AddSafepoint(safepoint);
947   }
948 }
949 
CheckForTempLiveIntervals(HInstruction * instruction)950 void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
951   LocationSummary* locations = instruction->GetLocations();
952   size_t position = instruction->GetLifetimePosition();
953   for (size_t i = 0; i < locations->GetTempCount(); ++i) {
954     Location temp = locations->GetTemp(i);
955     if (temp.IsRegister() || temp.IsFpuRegister()) {
956       BlockRegister(temp, position, position + 1);
957       codegen_->AddAllocatedRegister(temp);
958     } else {
959       DCHECK(temp.IsUnallocated());
960       switch (temp.GetPolicy()) {
961         case Location::kRequiresRegister: {
962           LiveInterval* interval =
963               LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32);
964           interval->AddTempUse(instruction, i);
965           core_intervals_.push_back(interval);
966           temp_intervals_.push_back(interval);
967           break;
968         }
969 
970         case Location::kRequiresFpuRegister: {
971           LiveInterval* interval =
972               LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64);
973           interval->AddTempUse(instruction, i);
974           fp_intervals_.push_back(interval);
975           temp_intervals_.push_back(interval);
976           if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
977             interval->AddHighInterval(/*is_temp*/ true);
978             temp_intervals_.push_back(interval->GetHighInterval());
979           }
980           break;
981         }
982 
983         default:
984           LOG(FATAL) << "Unexpected policy for temporary location "
985                      << temp.GetPolicy();
986       }
987     }
988   }
989 }
990 
CheckForSafepoint(HInstruction * instruction)991 void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
992   LocationSummary* locations = instruction->GetLocations();
993 
994   if (locations->NeedsSafepoint()) {
995     safepoints_.push_back(instruction);
996   }
997 }
998 
TrySplit(LiveInterval * interval,size_t position)999 LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
1000   if (interval->GetStart() < position && position < interval->GetEnd()) {
1001     return Split(interval, position);
1002   } else {
1003     return interval;
1004   }
1005 }
1006 
SplitAtRegisterUses(LiveInterval * interval)1007 void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
1008   DCHECK(!interval->IsHighInterval());
1009 
1010   // Split just after a register definition.
1011   if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
1012     interval = TrySplit(interval, interval->GetStart() + 1);
1013   }
1014 
1015   // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
1016   // [interval->GetStart(), interval->GetEnd() + 1)
1017   auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
1018                                                  interval->GetUses().end(),
1019                                                  interval->GetStart(),
1020                                                  interval->GetEnd() + 1u);
1021   // Split around register uses.
1022   for (const UsePosition& use : matching_use_range) {
1023     if (use.RequiresRegister()) {
1024       size_t position = use.GetPosition();
1025       interval = TrySplit(interval, position - 1);
1026       if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
1027         // If we are at the very end of a basic block, we cannot split right
1028         // at the use. Split just after instead.
1029         interval = TrySplit(interval, position + 1);
1030       } else {
1031         interval = TrySplit(interval, position);
1032       }
1033     }
1034   }
1035 }
1036 
AllocateSpillSlotForCatchPhi(HInstruction * instruction)1037 void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
1038   if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
1039     HPhi* phi = instruction->AsPhi();
1040     LiveInterval* interval = phi->GetLiveInterval();
1041 
1042     HInstruction* previous_phi = phi->GetPrevious();
1043     DCHECK(previous_phi == nullptr ||
1044            previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
1045         << "Phis expected to be sorted by vreg number, "
1046         << "so that equivalent phis are adjacent.";
1047 
1048     if (phi->IsVRegEquivalentOf(previous_phi)) {
1049       // Assign the same spill slot.
1050       DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
1051       interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
1052     } else {
1053       interval->SetSpillSlot(catch_phi_spill_slot_counter_);
1054       catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
1055     }
1056   }
1057 }
1058 
BlockRegister(Location location,size_t start,size_t end)1059 void RegisterAllocatorGraphColor::BlockRegister(Location location,
1060                                                 size_t start,
1061                                                 size_t end) {
1062   DCHECK(location.IsRegister() || location.IsFpuRegister());
1063   int reg = location.reg();
1064   LiveInterval* interval = location.IsRegister()
1065       ? physical_core_nodes_[reg]->GetInterval()
1066       : physical_fp_nodes_[reg]->GetInterval();
1067   DCHECK(interval->GetRegister() == reg);
1068   bool blocked_by_codegen = location.IsRegister()
1069       ? codegen_->IsBlockedCoreRegister(reg)
1070       : codegen_->IsBlockedFloatingPointRegister(reg);
1071   if (blocked_by_codegen) {
1072     // We've already blocked this register for the entire method. (And adding a
1073     // range inside another range violates the preconditions of AddRange).
1074   } else {
1075     interval->AddRange(start, end);
1076   }
1077 }
1078 
BlockRegisters(size_t start,size_t end,bool caller_save_only)1079 void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
1080   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
1081     if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
1082       BlockRegister(Location::RegisterLocation(i), start, end);
1083     }
1084   }
1085   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
1086     if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
1087       BlockRegister(Location::FpuRegisterLocation(i), start, end);
1088     }
1089   }
1090 }
1091 
AddPotentialInterference(InterferenceNode * from,InterferenceNode * to,bool guaranteed_not_interfering_yet,bool both_directions)1092 void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
1093                                                  InterferenceNode* to,
1094                                                  bool guaranteed_not_interfering_yet,
1095                                                  bool both_directions) {
1096   if (from->IsPrecolored()) {
1097     // We save space by ignoring outgoing edges from fixed nodes.
1098   } else if (to->IsPrecolored()) {
1099     // It is important that only a single node represents a given fixed register in the
1100     // interference graph. We retrieve that node here.
1101     const ScopedArenaVector<InterferenceNode*>& physical_nodes =
1102         to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_
1103                                              : register_allocator_->physical_core_nodes_;
1104     InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
1105     from->AddInterference(
1106         physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
1107     DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
1108     DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
1109 
1110     // If a node interferes with a fixed pair node, the weight of the edge may
1111     // be inaccurate after using the alias of the pair node, because the alias of the pair node
1112     // is a singular node.
1113     // We could make special pair fixed nodes, but that ends up being too conservative because
1114     // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
1115     // three rather than two.
1116     // Instead, we explicitly add an interference with the high node of the fixed pair node.
1117     // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
1118     //       can be unaligned on x86 complicates things.
1119     if (to->IsPair()) {
1120       InterferenceNode* high_node =
1121           physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
1122       DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
1123                 high_node->GetInterval()->GetRegister());
1124       from->AddInterference(
1125           high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
1126     }
1127   } else {
1128     // Standard interference between two uncolored nodes.
1129     from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_);
1130   }
1131 
1132   if (both_directions) {
1133     AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
1134   }
1135 }
1136 
1137 // Returns true if `in_node` represents an input interval of `out_node`, and the output interval
1138 // is allowed to have the same register as the input interval.
1139 // TODO: Ideally we should just produce correct intervals in liveness analysis.
1140 //       We would need to refactor the current live interval layout to do so, which is
1141 //       no small task.
CheckInputOutputCanOverlap(InterferenceNode * in_node,InterferenceNode * out_node)1142 static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
1143   LiveInterval* output_interval = out_node->GetInterval();
1144   HInstruction* defined_by = output_interval->GetDefinedBy();
1145   if (defined_by == nullptr) {
1146     // This must not be a definition point.
1147     return false;
1148   }
1149 
1150   LocationSummary* locations = defined_by->GetLocations();
1151   if (locations->OutputCanOverlapWithInputs()) {
1152     // This instruction does not allow the output to reuse a register from an input.
1153     return false;
1154   }
1155 
1156   LiveInterval* input_interval = in_node->GetInterval();
1157   LiveInterval* next_sibling = input_interval->GetNextSibling();
1158   size_t def_position = defined_by->GetLifetimePosition();
1159   size_t use_position = def_position + 1;
1160   if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
1161     // The next sibling starts at the use position, so reusing the input register in the output
1162     // would clobber the input before it's moved into the sibling interval location.
1163     return false;
1164   }
1165 
1166   if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
1167     // The input interval is live after the use position.
1168     return false;
1169   }
1170 
1171   HInputsRef inputs = defined_by->GetInputs();
1172   for (size_t i = 0; i < inputs.size(); ++i) {
1173     if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
1174       DCHECK(input_interval->SameRegisterKind(*output_interval));
1175       return true;
1176     }
1177   }
1178 
1179   // The input interval was not an input for this instruction.
1180   return false;
1181 }
1182 
BuildInterferenceGraph(const ScopedArenaVector<LiveInterval * > & intervals,const ScopedArenaVector<InterferenceNode * > & physical_nodes)1183 void ColoringIteration::BuildInterferenceGraph(
1184     const ScopedArenaVector<LiveInterval*>& intervals,
1185     const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
1186   DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
1187   // Build the interference graph efficiently by ordering range endpoints
1188   // by position and doing a linear sweep to find interferences. (That is, we
1189   // jump from endpoint to endpoint, maintaining a set of intervals live at each
1190   // point. If two nodes are ever in the live set at the same time, then they
1191   // interfere with each other.)
1192   //
1193   // We order by both position and (secondarily) by whether the endpoint
1194   // begins or ends a range; we want to process range endings before range
1195   // beginnings at the same position because they should not conflict.
1196   //
1197   // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
1198   // Tuple contents: (position, is_range_beginning, node).
1199   ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
1200       allocator_->Adapter(kArenaAllocRegisterAllocator));
1201 
1202   // We reserve plenty of space to avoid excessive copying.
1203   range_endpoints.reserve(4 * prunable_nodes_.size());
1204 
1205   for (LiveInterval* parent : intervals) {
1206     for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
1207       LiveRange* range = sibling->GetFirstRange();
1208       if (range != nullptr) {
1209         InterferenceNode* node =
1210             new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
1211         interval_node_map_.insert(std::make_pair(sibling, node));
1212 
1213         if (sibling->HasRegister()) {
1214           // Fixed nodes should alias the canonical node for the corresponding register.
1215           node->stage = NodeStage::kPrecolored;
1216           InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
1217           node->SetAlias(physical_node);
1218           DCHECK_EQ(node->GetInterval()->GetRegister(),
1219                     physical_node->GetInterval()->GetRegister());
1220         } else {
1221           node->stage = NodeStage::kPrunable;
1222           prunable_nodes_.push_back(node);
1223         }
1224 
1225         while (range != nullptr) {
1226           range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
1227           range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
1228           range = range->GetNext();
1229         }
1230       }
1231     }
1232   }
1233 
1234   // Sort the endpoints.
1235   // We explicitly ignore the third entry of each tuple (the node pointer) in order
1236   // to maintain determinism.
1237   std::sort(range_endpoints.begin(), range_endpoints.end(),
1238             [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
1239                 const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
1240     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1241          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1242   });
1243 
1244   // Nodes live at the current position in the linear sweep.
1245   ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator));
1246 
1247   // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
1248   // live set. When we encounter the end of a range, we remove the corresponding node
1249   // from the live set. Nodes interfere if they are in the live set at the same time.
1250   for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
1251     bool is_range_beginning;
1252     InterferenceNode* node;
1253     size_t position;
1254     // Extract information from the tuple, including the node this tuple represents.
1255     std::tie(position, is_range_beginning, node) = *it;
1256 
1257     if (is_range_beginning) {
1258       bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
1259       for (InterferenceNode* conflicting : live) {
1260         DCHECK_NE(node, conflicting);
1261         if (CheckInputOutputCanOverlap(conflicting, node)) {
1262           // We do not add an interference, because the instruction represented by `node` allows
1263           // its output to share a register with an input, represented here by `conflicting`.
1264         } else {
1265           AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
1266         }
1267       }
1268       DCHECK(std::find(live.begin(), live.end(), node) == live.end());
1269       live.push_back(node);
1270     } else {
1271       // End of range.
1272       auto live_it = std::find(live.begin(), live.end(), node);
1273       DCHECK(live_it != live.end());
1274       live.erase(live_it);
1275     }
1276   }
1277   DCHECK(live.empty());
1278 }
1279 
CreateCoalesceOpportunity(InterferenceNode * a,InterferenceNode * b,CoalesceKind kind,size_t position)1280 void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
1281                                                   InterferenceNode* b,
1282                                                   CoalesceKind kind,
1283                                                   size_t position) {
1284   DCHECK_EQ(a->IsPair(), b->IsPair())
1285       << "Nodes of different memory widths should never be coalesced";
1286   CoalesceOpportunity* opportunity =
1287       new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
1288   a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1289   b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1290   coalesce_worklist_.push(opportunity);
1291 }
1292 
1293 // When looking for coalesce opportunities, we use the interval_node_map_ to find the node
1294 // corresponding to an interval. Note that not all intervals are in this map, notably the parents
1295 // of constants and stack arguments. (However, these interval should not be involved in coalesce
1296 // opportunities anyway, because they're not going to be in registers.)
FindCoalesceOpportunities()1297 void ColoringIteration::FindCoalesceOpportunities() {
1298   DCHECK(coalesce_worklist_.empty());
1299 
1300   for (InterferenceNode* node : prunable_nodes_) {
1301     LiveInterval* interval = node->GetInterval();
1302 
1303     // Coalesce siblings.
1304     LiveInterval* next_sibling = interval->GetNextSibling();
1305     if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
1306       auto it = interval_node_map_.find(next_sibling);
1307       if (it != interval_node_map_.end()) {
1308         InterferenceNode* sibling_node = it->second;
1309         CreateCoalesceOpportunity(node,
1310                                   sibling_node,
1311                                   CoalesceKind::kAdjacentSibling,
1312                                   interval->GetEnd());
1313       }
1314     }
1315 
1316     // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
1317     LiveInterval* parent = interval->GetParent();
1318     if (parent->HasRegister()
1319         && parent->GetNextSibling() == interval
1320         && parent->GetEnd() == interval->GetStart()) {
1321       auto it = interval_node_map_.find(parent);
1322       if (it != interval_node_map_.end()) {
1323         InterferenceNode* parent_node = it->second;
1324         CreateCoalesceOpportunity(node,
1325                                   parent_node,
1326                                   CoalesceKind::kFixedOutputSibling,
1327                                   parent->GetEnd());
1328       }
1329     }
1330 
1331     // Try to prevent moves across blocks.
1332     // Note that this does not lead to many succeeding coalesce attempts, so could be removed
1333     // if found to add to compile time.
1334     const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
1335     if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
1336       // If the start of this interval is at a block boundary, we look at the
1337       // location of the interval in blocks preceding the block this interval
1338       // starts at. This can avoid a move between the two blocks.
1339       HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
1340       for (HBasicBlock* predecessor : block->GetPredecessors()) {
1341         size_t position = predecessor->GetLifetimeEnd() - 1;
1342         LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
1343         if (existing != nullptr) {
1344           auto it = interval_node_map_.find(existing);
1345           if (it != interval_node_map_.end()) {
1346             InterferenceNode* existing_node = it->second;
1347             CreateCoalesceOpportunity(node,
1348                                       existing_node,
1349                                       CoalesceKind::kNonlinearControlFlow,
1350                                       position);
1351           }
1352         }
1353       }
1354     }
1355 
1356     // Coalesce phi inputs with the corresponding output.
1357     HInstruction* defined_by = interval->GetDefinedBy();
1358     if (defined_by != nullptr && defined_by->IsPhi()) {
1359       ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors());
1360       HInputsRef inputs = defined_by->GetInputs();
1361 
1362       for (size_t i = 0, e = inputs.size(); i < e; ++i) {
1363         // We want the sibling at the end of the appropriate predecessor block.
1364         size_t position = predecessors[i]->GetLifetimeEnd() - 1;
1365         LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
1366 
1367         auto it = interval_node_map_.find(input_interval);
1368         if (it != interval_node_map_.end()) {
1369           InterferenceNode* input_node = it->second;
1370           CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
1371         }
1372       }
1373     }
1374 
1375     // Coalesce output with first input when policy is kSameAsFirstInput.
1376     if (defined_by != nullptr) {
1377       Location out = defined_by->GetLocations()->Out();
1378       if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
1379         LiveInterval* input_interval
1380             = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
1381         // TODO: Could we consider lifetime holes here?
1382         if (input_interval->GetEnd() == interval->GetStart()) {
1383           auto it = interval_node_map_.find(input_interval);
1384           if (it != interval_node_map_.end()) {
1385             InterferenceNode* input_node = it->second;
1386             CreateCoalesceOpportunity(node,
1387                                       input_node,
1388                                       CoalesceKind::kFirstInput,
1389                                       interval->GetStart());
1390           }
1391         }
1392       }
1393     }
1394 
1395     // An interval that starts an instruction (that is, it is not split), may
1396     // re-use the registers used by the inputs of that instruction, based on the
1397     // location summary.
1398     if (defined_by != nullptr) {
1399       DCHECK(!interval->IsSplit());
1400       LocationSummary* locations = defined_by->GetLocations();
1401       if (!locations->OutputCanOverlapWithInputs()) {
1402         HInputsRef inputs = defined_by->GetInputs();
1403         for (size_t i = 0; i < inputs.size(); ++i) {
1404           size_t def_point = defined_by->GetLifetimePosition();
1405           // TODO: Getting the sibling at the def_point might not be quite what we want
1406           //       for fixed inputs, since the use will be *at* the def_point rather than after.
1407           LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
1408           if (input_interval != nullptr &&
1409               input_interval->HasHighInterval() == interval->HasHighInterval()) {
1410             auto it = interval_node_map_.find(input_interval);
1411             if (it != interval_node_map_.end()) {
1412               InterferenceNode* input_node = it->second;
1413               CreateCoalesceOpportunity(node,
1414                                         input_node,
1415                                         CoalesceKind::kAnyInput,
1416                                         interval->GetStart());
1417             }
1418           }
1419         }
1420       }
1421     }
1422 
1423     // Try to prevent moves into fixed input locations.
1424     // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
1425     // [interval->GetStart() + 1, interval->GetEnd() + 1)
1426     auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
1427                                                    interval->GetUses().end(),
1428                                                    interval->GetStart() + 1u,
1429                                                    interval->GetEnd() + 1u);
1430     for (const UsePosition& use : matching_use_range) {
1431       HInstruction* user = use.GetUser();
1432       if (user == nullptr) {
1433         // User may be null for certain intervals, such as temp intervals.
1434         continue;
1435       }
1436       LocationSummary* locations = user->GetLocations();
1437       Location input = locations->InAt(use.GetInputIndex());
1438       if (input.IsRegister() || input.IsFpuRegister()) {
1439         // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
1440         //       is currently not supported.
1441         InterferenceNode* fixed_node = input.IsRegister()
1442             ? register_allocator_->physical_core_nodes_[input.reg()]
1443             : register_allocator_->physical_fp_nodes_[input.reg()];
1444         CreateCoalesceOpportunity(node,
1445                                   fixed_node,
1446                                   CoalesceKind::kFixedInput,
1447                                   user->GetLifetimePosition());
1448       }
1449     }
1450   }  // for node in prunable_nodes
1451 }
1452 
IsLowDegreeNode(InterferenceNode * node,size_t num_regs)1453 static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
1454   return node->GetOutDegree() < num_regs;
1455 }
1456 
IsHighDegreeNode(InterferenceNode * node,size_t num_regs)1457 static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
1458   return !IsLowDegreeNode(node, num_regs);
1459 }
1460 
PruneInterferenceGraph()1461 void ColoringIteration::PruneInterferenceGraph() {
1462   DCHECK(pruned_nodes_.empty()
1463       && simplify_worklist_.empty()
1464       && freeze_worklist_.empty()
1465       && spill_worklist_.empty());
1466   // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
1467   // and all others as high degree nodes. The distinction is important: low degree nodes are
1468   // guaranteed a color, while high degree nodes are not.
1469 
1470   // Build worklists. Note that the coalesce worklist has already been
1471   // filled by FindCoalesceOpportunities().
1472   for (InterferenceNode* node : prunable_nodes_) {
1473     DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
1474     if (IsLowDegreeNode(node, num_regs_)) {
1475       if (node->GetCoalesceOpportunities().empty()) {
1476         // Simplify Worklist.
1477         node->stage = NodeStage::kSimplifyWorklist;
1478         simplify_worklist_.push_back(node);
1479       } else {
1480         // Freeze Worklist.
1481         node->stage = NodeStage::kFreezeWorklist;
1482         freeze_worklist_.push_back(node);
1483       }
1484     } else {
1485       // Spill worklist.
1486       node->stage = NodeStage::kSpillWorklist;
1487       spill_worklist_.push(node);
1488     }
1489   }
1490 
1491   // Prune graph.
1492   // Note that we do not remove a node from its current worklist if it moves to another, so it may
1493   // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
1494   while (true) {
1495     if (!simplify_worklist_.empty()) {
1496       // Prune low-degree nodes.
1497       // TODO: pop_back() should work as well, but it didn't; we get a
1498       //       failed check while pruning. We should look into this.
1499       InterferenceNode* node = simplify_worklist_.front();
1500       simplify_worklist_.pop_front();
1501       DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
1502       DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
1503       DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
1504       PruneNode(node);
1505     } else if (!coalesce_worklist_.empty()) {
1506       // Coalesce.
1507       CoalesceOpportunity* opportunity = coalesce_worklist_.top();
1508       coalesce_worklist_.pop();
1509       if (opportunity->stage == CoalesceStage::kWorklist) {
1510         Coalesce(opportunity);
1511       }
1512     } else if (!freeze_worklist_.empty()) {
1513       // Freeze moves and prune a low-degree move-related node.
1514       InterferenceNode* node = freeze_worklist_.front();
1515       freeze_worklist_.pop_front();
1516       if (node->stage == NodeStage::kFreezeWorklist) {
1517         DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
1518         DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
1519         FreezeMoves(node);
1520         PruneNode(node);
1521       }
1522     } else if (!spill_worklist_.empty()) {
1523       // We spill the lowest-priority node, because pruning a node earlier
1524       // gives it a higher chance of being spilled.
1525       InterferenceNode* node = spill_worklist_.top();
1526       spill_worklist_.pop();
1527       if (node->stage == NodeStage::kSpillWorklist) {
1528         DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
1529         FreezeMoves(node);
1530         PruneNode(node);
1531       }
1532     } else {
1533       // Pruning complete.
1534       break;
1535     }
1536   }
1537   DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
1538 }
1539 
EnableCoalesceOpportunities(InterferenceNode * node)1540 void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
1541   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1542     if (opportunity->stage == CoalesceStage::kActive) {
1543       opportunity->stage = CoalesceStage::kWorklist;
1544       coalesce_worklist_.push(opportunity);
1545     }
1546   }
1547 }
1548 
PruneNode(InterferenceNode * node)1549 void ColoringIteration::PruneNode(InterferenceNode* node) {
1550   DCHECK_NE(node->stage, NodeStage::kPruned);
1551   DCHECK(!node->IsPrecolored());
1552   node->stage = NodeStage::kPruned;
1553   pruned_nodes_.push(node);
1554 
1555   for (InterferenceNode* adj : node->GetAdjacentNodes()) {
1556     DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
1557 
1558     if (adj->IsPrecolored()) {
1559       // No effect on pre-colored nodes; they're never pruned.
1560     } else {
1561       // Remove the interference.
1562       bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
1563       DCHECK(adj->ContainsInterference(node))
1564           << "Missing reflexive interference from non-fixed node";
1565       adj->RemoveInterference(node);
1566 
1567       // Handle transitions from high degree to low degree.
1568       if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
1569         EnableCoalesceOpportunities(adj);
1570         for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
1571           EnableCoalesceOpportunities(adj_adj);
1572         }
1573 
1574         DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
1575         if (adj->IsMoveRelated()) {
1576           adj->stage = NodeStage::kFreezeWorklist;
1577           freeze_worklist_.push_back(adj);
1578         } else {
1579           adj->stage = NodeStage::kSimplifyWorklist;
1580           simplify_worklist_.push_back(adj);
1581         }
1582       }
1583     }
1584   }
1585 }
1586 
CheckTransitionFromFreezeWorklist(InterferenceNode * node)1587 void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
1588   if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
1589     DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
1590     node->stage = NodeStage::kSimplifyWorklist;
1591     simplify_worklist_.push_back(node);
1592   }
1593 }
1594 
FreezeMoves(InterferenceNode * node)1595 void ColoringIteration::FreezeMoves(InterferenceNode* node) {
1596   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1597     if (opportunity->stage == CoalesceStage::kDefunct) {
1598       // Constrained moves should remain constrained, since they will not be considered
1599       // during last-chance coalescing.
1600     } else {
1601       opportunity->stage = CoalesceStage::kInactive;
1602     }
1603     InterferenceNode* other = opportunity->node_a->GetAlias() == node
1604         ? opportunity->node_b->GetAlias()
1605         : opportunity->node_a->GetAlias();
1606     if (other != node && other->stage == NodeStage::kFreezeWorklist) {
1607       DCHECK(IsLowDegreeNode(node, num_regs_));
1608       CheckTransitionFromFreezeWorklist(other);
1609     }
1610   }
1611 }
1612 
PrecoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1613 bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
1614                                             InterferenceNode* into) {
1615   if (!into->IsPrecolored()) {
1616     // The uncolored heuristic will cover this case.
1617     return false;
1618   }
1619   if (from->IsPair() || into->IsPair()) {
1620     // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
1621     //       are currently represented as two single fixed nodes in the graph, and `into` is
1622     //       only one of them. (We may lose the implicit connections to the second one in a merge.)
1623     return false;
1624   }
1625 
1626   // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
1627   // Reasons an adjacent node `adj` can be "ok":
1628   // (1) If `adj` is low degree, interference with `into` will not affect its existing
1629   //     colorable guarantee. (Notice that coalescing cannot increase its degree.)
1630   // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
1631   // (3) If there's already an interference with `into`, coalescing will not add interferences.
1632   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1633     if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
1634       // Ok.
1635     } else {
1636       return false;
1637     }
1638   }
1639   return true;
1640 }
1641 
UncoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1642 bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
1643                                            InterferenceNode* into) {
1644   if (into->IsPrecolored()) {
1645     // The pre-colored heuristic will handle this case.
1646     return false;
1647   }
1648 
1649   // Arbitrary cap to improve compile time. Tests show that this has negligible affect
1650   // on generated code.
1651   if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
1652     return false;
1653   }
1654 
1655   // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
1656   // of high degree. (Low degree neighbors can be ignored, because they will eventually be
1657   // pruned from the interference graph in the simplify stage.)
1658   size_t high_degree_interferences = 0;
1659   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1660     if (IsHighDegreeNode(adj, num_regs_)) {
1661       high_degree_interferences += from->EdgeWeightWith(adj);
1662     }
1663   }
1664   for (InterferenceNode* adj : into->GetAdjacentNodes()) {
1665     if (IsHighDegreeNode(adj, num_regs_)) {
1666       if (from->ContainsInterference(adj)) {
1667         // We've already counted this adjacent node.
1668         // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
1669         // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
1670         // but remains conservative.)
1671         if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
1672           high_degree_interferences -= from->EdgeWeightWith(adj);
1673         }
1674       } else {
1675         high_degree_interferences += into->EdgeWeightWith(adj);
1676       }
1677     }
1678   }
1679 
1680   return high_degree_interferences < num_regs_;
1681 }
1682 
Combine(InterferenceNode * from,InterferenceNode * into)1683 void ColoringIteration::Combine(InterferenceNode* from,
1684                                 InterferenceNode* into) {
1685   from->SetAlias(into);
1686 
1687   // Add interferences.
1688   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1689     bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
1690     AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
1691     if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
1692       // This is a (temporary) transition to a high degree node. Its degree will decrease again
1693       // when we prune `from`, but it's best to be consistent about the current worklist.
1694       adj->stage = NodeStage::kSpillWorklist;
1695       spill_worklist_.push(adj);
1696     }
1697   }
1698 
1699   // Add coalesce opportunities.
1700   for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
1701     if (opportunity->stage != CoalesceStage::kDefunct) {
1702       into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1703     }
1704   }
1705   EnableCoalesceOpportunities(from);
1706 
1707   // Prune and update worklists.
1708   PruneNode(from);
1709   if (IsLowDegreeNode(into, num_regs_)) {
1710     // Coalesce(...) takes care of checking for a transition to the simplify worklist.
1711     DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
1712   } else if (into->stage == NodeStage::kFreezeWorklist) {
1713     // This is a transition to a high degree node.
1714     into->stage = NodeStage::kSpillWorklist;
1715     spill_worklist_.push(into);
1716   } else {
1717     DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
1718   }
1719 }
1720 
Coalesce(CoalesceOpportunity * opportunity)1721 void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
1722   InterferenceNode* from = opportunity->node_a->GetAlias();
1723   InterferenceNode* into = opportunity->node_b->GetAlias();
1724   DCHECK_NE(from->stage, NodeStage::kPruned);
1725   DCHECK_NE(into->stage, NodeStage::kPruned);
1726 
1727   if (from->IsPrecolored()) {
1728     // If we have one pre-colored node, make sure it's the `into` node.
1729     std::swap(from, into);
1730   }
1731 
1732   if (from == into) {
1733     // These nodes have already been coalesced.
1734     opportunity->stage = CoalesceStage::kDefunct;
1735     CheckTransitionFromFreezeWorklist(from);
1736   } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
1737     // These nodes interfere.
1738     opportunity->stage = CoalesceStage::kDefunct;
1739     CheckTransitionFromFreezeWorklist(from);
1740     CheckTransitionFromFreezeWorklist(into);
1741   } else if (PrecoloredHeuristic(from, into)
1742           || UncoloredHeuristic(from, into)) {
1743     // We can coalesce these nodes.
1744     opportunity->stage = CoalesceStage::kDefunct;
1745     Combine(from, into);
1746     CheckTransitionFromFreezeWorklist(into);
1747   } else {
1748     // We cannot coalesce, but we may be able to later.
1749     opportunity->stage = CoalesceStage::kActive;
1750   }
1751 }
1752 
1753 // Build a mask with a bit set for each register assigned to some
1754 // interval in `intervals`.
1755 template <typename Container>
BuildConflictMask(const Container & intervals)1756 static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) {
1757   std::bitset<kMaxNumRegs> conflict_mask;
1758   for (InterferenceNode* adjacent : intervals) {
1759     LiveInterval* conflicting = adjacent->GetInterval();
1760     if (conflicting->HasRegister()) {
1761       conflict_mask.set(conflicting->GetRegister());
1762       if (conflicting->HasHighInterval()) {
1763         DCHECK(conflicting->GetHighInterval()->HasRegister());
1764         conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
1765       }
1766     } else {
1767       DCHECK(!conflicting->HasHighInterval()
1768           || !conflicting->GetHighInterval()->HasRegister());
1769     }
1770   }
1771   return conflict_mask;
1772 }
1773 
IsCallerSave(size_t reg,bool processing_core_regs)1774 bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
1775   return processing_core_regs
1776       ? !codegen_->IsCoreCalleeSaveRegister(reg)
1777       : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
1778 }
1779 
RegisterIsAligned(size_t reg)1780 static bool RegisterIsAligned(size_t reg) {
1781   return reg % 2 == 0;
1782 }
1783 
FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask)1784 static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
1785   // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
1786   // Note that CTZ is undefined if all bits are 0, so we special-case it.
1787   return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
1788 }
1789 
ColorInterferenceGraph()1790 bool ColoringIteration::ColorInterferenceGraph() {
1791   DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
1792   ScopedArenaVector<LiveInterval*> colored_intervals(
1793       allocator_->Adapter(kArenaAllocRegisterAllocator));
1794   bool successful = true;
1795 
1796   while (!pruned_nodes_.empty()) {
1797     InterferenceNode* node = pruned_nodes_.top();
1798     pruned_nodes_.pop();
1799     LiveInterval* interval = node->GetInterval();
1800     size_t reg = 0;
1801 
1802     InterferenceNode* alias = node->GetAlias();
1803     if (alias != node) {
1804       // This node was coalesced with another.
1805       LiveInterval* alias_interval = alias->GetInterval();
1806       if (alias_interval->HasRegister()) {
1807         reg = alias_interval->GetRegister();
1808         DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
1809             << "This node conflicts with the register it was coalesced with";
1810       } else {
1811         DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
1812             << "Move coalescing was not conservative, causing a node to be coalesced "
1813             << "with another node that could not be colored";
1814         if (interval->RequiresRegister()) {
1815           successful = false;
1816         }
1817       }
1818     } else {
1819       // Search for free register(s).
1820       std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
1821       if (interval->HasHighInterval()) {
1822         // Note that the graph coloring allocator assumes that pair intervals are aligned here,
1823         // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
1824         // change the alignment requirements here, we will have to update the algorithm (e.g.,
1825         // be more conservative about the weight of edges adjacent to pair nodes.)
1826         while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
1827           reg += 2;
1828         }
1829 
1830         // Try to use a caller-save register first.
1831         for (size_t i = 0; i < num_regs_ - 1; i += 2) {
1832           bool low_caller_save  = register_allocator_->IsCallerSave(i, processing_core_regs_);
1833           bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
1834           if (!conflict_mask[i] && !conflict_mask[i + 1]) {
1835             if (low_caller_save && high_caller_save) {
1836               reg = i;
1837               break;
1838             } else if (low_caller_save || high_caller_save) {
1839               reg = i;
1840               // Keep looking to try to get both parts in caller-save registers.
1841             }
1842           }
1843         }
1844       } else {
1845         // Not a pair interval.
1846         reg = FindFirstZeroInConflictMask(conflict_mask);
1847 
1848         // Try to use caller-save registers first.
1849         for (size_t i = 0; i < num_regs_; ++i) {
1850           if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
1851             reg = i;
1852             break;
1853           }
1854         }
1855       }
1856 
1857       // Last-chance coalescing.
1858       for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1859         if (opportunity->stage == CoalesceStage::kDefunct) {
1860           continue;
1861         }
1862         LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
1863             ? opportunity->node_b->GetAlias()->GetInterval()
1864             : opportunity->node_a->GetAlias()->GetInterval();
1865         if (other_interval->HasRegister()) {
1866           size_t coalesce_register = other_interval->GetRegister();
1867           if (interval->HasHighInterval()) {
1868             if (!conflict_mask[coalesce_register] &&
1869                 !conflict_mask[coalesce_register + 1] &&
1870                 RegisterIsAligned(coalesce_register)) {
1871               reg = coalesce_register;
1872               break;
1873             }
1874           } else if (!conflict_mask[coalesce_register]) {
1875             reg = coalesce_register;
1876             break;
1877           }
1878         }
1879       }
1880     }
1881 
1882     if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
1883       // Assign register.
1884       DCHECK(!interval->HasRegister());
1885       interval->SetRegister(reg);
1886       colored_intervals.push_back(interval);
1887       if (interval->HasHighInterval()) {
1888         DCHECK(!interval->GetHighInterval()->HasRegister());
1889         interval->GetHighInterval()->SetRegister(reg + 1);
1890         colored_intervals.push_back(interval->GetHighInterval());
1891       }
1892     } else if (interval->RequiresRegister()) {
1893       // The interference graph is too dense to color. Make it sparser by
1894       // splitting this live interval.
1895       successful = false;
1896       register_allocator_->SplitAtRegisterUses(interval);
1897       // We continue coloring, because there may be additional intervals that cannot
1898       // be colored, and that we should split.
1899     } else {
1900       // Spill.
1901       node->SetNeedsSpillSlot();
1902     }
1903   }
1904 
1905   // If unsuccessful, reset all register assignments.
1906   if (!successful) {
1907     for (LiveInterval* interval : colored_intervals) {
1908       interval->ClearRegister();
1909     }
1910   }
1911 
1912   return successful;
1913 }
1914 
AllocateSpillSlots(ArrayRef<InterferenceNode * const> nodes)1915 void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) {
1916   // The register allocation resolver will organize the stack based on value type,
1917   // so we assign stack slots for each value type separately.
1918   ScopedArenaAllocator allocator(allocator_->GetArenaStack());
1919   ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator);
1920   ScopedArenaVector<LiveInterval*> double_intervals(adapter);
1921   ScopedArenaVector<LiveInterval*> long_intervals(adapter);
1922   ScopedArenaVector<LiveInterval*> float_intervals(adapter);
1923   ScopedArenaVector<LiveInterval*> int_intervals(adapter);
1924 
1925   // The set of parent intervals already handled.
1926   ScopedArenaSet<LiveInterval*> seen(adapter);
1927 
1928   // Find nodes that need spill slots.
1929   for (InterferenceNode* node : nodes) {
1930     if (!node->NeedsSpillSlot()) {
1931       continue;
1932     }
1933 
1934     LiveInterval* parent = node->GetInterval()->GetParent();
1935     if (seen.find(parent) != seen.end()) {
1936       // We've already handled this interval.
1937       // This can happen if multiple siblings of the same interval request a stack slot.
1938       continue;
1939     }
1940     seen.insert(parent);
1941 
1942     HInstruction* defined_by = parent->GetDefinedBy();
1943     if (parent->HasSpillSlot()) {
1944       // We already have a spill slot for this value that we can reuse.
1945     } else if (defined_by->IsParameterValue()) {
1946       // Parameters already have a stack slot.
1947       parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
1948     } else if (defined_by->IsCurrentMethod()) {
1949       // The current method is always at stack slot 0.
1950       parent->SetSpillSlot(0);
1951     } else if (defined_by->IsConstant()) {
1952       // Constants don't need a spill slot.
1953     } else {
1954       // We need to find a spill slot for this interval. Place it in the correct
1955       // worklist to be processed later.
1956       switch (node->GetInterval()->GetType()) {
1957         case DataType::Type::kFloat64:
1958           double_intervals.push_back(parent);
1959           break;
1960         case DataType::Type::kInt64:
1961           long_intervals.push_back(parent);
1962           break;
1963         case DataType::Type::kFloat32:
1964           float_intervals.push_back(parent);
1965           break;
1966         case DataType::Type::kReference:
1967         case DataType::Type::kInt32:
1968         case DataType::Type::kUint16:
1969         case DataType::Type::kUint8:
1970         case DataType::Type::kInt8:
1971         case DataType::Type::kBool:
1972         case DataType::Type::kInt16:
1973           int_intervals.push_back(parent);
1974           break;
1975         case DataType::Type::kUint32:
1976         case DataType::Type::kUint64:
1977         case DataType::Type::kVoid:
1978           LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
1979           UNREACHABLE();
1980       }
1981     }
1982   }
1983 
1984   // Color spill slots for each value type.
1985   ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_);
1986   ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_);
1987   ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_);
1988   ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_);
1989 }
1990 
ColorSpillSlots(ArrayRef<LiveInterval * const> intervals,size_t * num_stack_slots_used)1991 void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals,
1992                                                   /* out */ size_t* num_stack_slots_used) {
1993   // We cannot use the original interference graph here because spill slots are assigned to
1994   // all of the siblings of an interval, whereas an interference node represents only a single
1995   // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
1996   // by position, and assigning the lowest spill slot available when we encounter an interval
1997   // beginning. We ignore lifetime holes for simplicity.
1998   ScopedArenaAllocator allocator(allocator_->GetArenaStack());
1999   ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
2000       allocator.Adapter(kArenaAllocRegisterAllocator));
2001 
2002   for (LiveInterval* parent_interval : intervals) {
2003     DCHECK(parent_interval->IsParent());
2004     DCHECK(!parent_interval->HasSpillSlot());
2005     size_t start = parent_interval->GetStart();
2006     size_t end = parent_interval->GetLastSibling()->GetEnd();
2007     DCHECK_LT(start, end);
2008     interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
2009     interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
2010   }
2011 
2012   // Sort by position.
2013   // We explicitly ignore the third entry of each tuple (the interval pointer) in order
2014   // to maintain determinism.
2015   std::sort(interval_endpoints.begin(), interval_endpoints.end(),
2016             [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
2017                 const std::tuple<size_t, bool, LiveInterval*>& rhs) {
2018     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
2019          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
2020   });
2021 
2022   ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator);
2023   for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
2024     // Extract information from the current tuple.
2025     LiveInterval* parent_interval;
2026     bool is_interval_beginning;
2027     size_t position;
2028     std::tie(position, is_interval_beginning, parent_interval) = *it;
2029     size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
2030 
2031     if (is_interval_beginning) {
2032       DCHECK(!parent_interval->HasSpillSlot());
2033       DCHECK_EQ(position, parent_interval->GetStart());
2034 
2035       // Find first available free stack slot(s).
2036       size_t slot = 0;
2037       for (; ; ++slot) {
2038         bool found = true;
2039         for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2040           if (taken.IsBitSet(s)) {
2041             found = false;
2042             break;  // failure
2043           }
2044         }
2045         if (found) {
2046           break;  // success
2047         }
2048       }
2049 
2050       parent_interval->SetSpillSlot(slot);
2051 
2052       *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
2053       if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
2054         // The parallel move resolver requires that there be an even number of spill slots
2055         // allocated for pair value types.
2056         ++(*num_stack_slots_used);
2057       }
2058 
2059       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2060         taken.SetBit(s);
2061       }
2062     } else {
2063       DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
2064       DCHECK(parent_interval->HasSpillSlot());
2065 
2066       // Free up the stack slot(s) used by this interval.
2067       size_t slot = parent_interval->GetSpillSlot();
2068       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2069         DCHECK(taken.IsBitSet(s));
2070         taken.ClearBit(s);
2071       }
2072     }
2073   }
2074   DCHECK_EQ(taken.NumSetBits(), 0u);
2075 }
2076 
2077 }  // namespace art
2078