1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "register_allocator.h"
18 
19 #include <iostream>
20 #include <sstream>
21 
22 #include "base/scoped_arena_allocator.h"
23 #include "base/scoped_arena_containers.h"
24 #include "base/bit_utils_iterator.h"
25 #include "base/bit_vector-inl.h"
26 #include "code_generator.h"
27 #include "register_allocator_linear_scan.h"
28 #include "ssa_liveness_analysis.h"
29 
30 namespace art HIDDEN {
31 
32 template <typename IsCalleeSave>
GetBlockedRegistersForCall(size_t num_registers,IsCalleeSave && is_callee_save)33 static uint32_t GetBlockedRegistersForCall(size_t num_registers, IsCalleeSave&& is_callee_save) {
34   DCHECK_LE(num_registers, BitSizeOf<uint32_t>());
35   uint32_t mask = 0u;
36   for (size_t reg = 0; reg != num_registers; ++reg) {
37     if (!is_callee_save(reg)) {
38       mask |= 1u << reg;
39     }
40   }
41   return mask;
42 }
43 
GetBlockedCoreRegistersForCall(size_t num_registers,const CodeGenerator * codegen)44 static uint32_t GetBlockedCoreRegistersForCall(size_t num_registers, const CodeGenerator* codegen) {
45   return GetBlockedRegistersForCall(
46       num_registers, [&](size_t reg) { return codegen->IsCoreCalleeSaveRegister(reg); });
47 }
48 
GetBlockedFpRegistersForCall(size_t num_registers,const CodeGenerator * codegen)49 static uint32_t GetBlockedFpRegistersForCall(size_t num_registers, const CodeGenerator* codegen) {
50   return GetBlockedRegistersForCall(
51       num_registers, [&](size_t reg) { return codegen->IsFloatingPointCalleeSaveRegister(reg); });
52 }
53 
RegisterAllocator(ScopedArenaAllocator * allocator,CodeGenerator * codegen,const SsaLivenessAnalysis & liveness)54 RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator,
55                                      CodeGenerator* codegen,
56                                      const SsaLivenessAnalysis& liveness)
57     : allocator_(allocator),
58       codegen_(codegen),
59       liveness_(liveness),
60       num_core_registers_(codegen_->GetNumberOfCoreRegisters()),
61       num_fp_registers_(codegen_->GetNumberOfFloatingPointRegisters()),
62       core_registers_blocked_for_call_(
63           GetBlockedCoreRegistersForCall(num_core_registers_, codegen)),
64       fp_registers_blocked_for_call_(GetBlockedFpRegistersForCall(num_fp_registers_, codegen)) {}
65 
Create(ScopedArenaAllocator * allocator,CodeGenerator * codegen,const SsaLivenessAnalysis & analysis)66 std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocator* allocator,
67                                                              CodeGenerator* codegen,
68                                                              const SsaLivenessAnalysis& analysis) {
69   return std::unique_ptr<RegisterAllocator>(
70       new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis));
71 }
72 
~RegisterAllocator()73 RegisterAllocator::~RegisterAllocator() {
74   if (kIsDebugBuild) {
75     // Poison live interval pointers with "Error: BAD 71ve1nt3rval."
76     LiveInterval* bad_live_interval = reinterpret_cast<LiveInterval*>(0xebad7113u);
77     for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) {
78       for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
79         it.Current()->SetLiveInterval(bad_live_interval);
80       }
81       for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
82         it.Current()->SetLiveInterval(bad_live_interval);
83       }
84     }
85   }
86 }
87 
88 class AllRangesIterator : public ValueObject {
89  public:
AllRangesIterator(LiveInterval * interval)90   explicit AllRangesIterator(LiveInterval* interval)
91       : current_interval_(interval),
92         current_range_(interval->GetFirstRange()) {}
93 
Done() const94   bool Done() const { return current_interval_ == nullptr; }
CurrentRange() const95   LiveRange* CurrentRange() const { return current_range_; }
CurrentInterval() const96   LiveInterval* CurrentInterval() const { return current_interval_; }
97 
Advance()98   void Advance() {
99     current_range_ = current_range_->GetNext();
100     if (current_range_ == nullptr) {
101       current_interval_ = current_interval_->GetNextSibling();
102       if (current_interval_ != nullptr) {
103         current_range_ = current_interval_->GetFirstRange();
104       }
105     }
106   }
107 
108  private:
109   LiveInterval* current_interval_;
110   LiveRange* current_range_;
111 
112   DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
113 };
114 
DumpRegister(std::ostream & stream,int reg,RegisterType register_type,const CodeGenerator * codegen)115 void RegisterAllocator::DumpRegister(std::ostream& stream,
116                                      int reg,
117                                      RegisterType register_type,
118                                      const CodeGenerator* codegen) {
119   switch (register_type) {
120     case RegisterType::kCoreRegister:
121       codegen->DumpCoreRegister(stream, reg);
122       break;
123     case RegisterType::kFpRegister:
124       codegen->DumpFloatingPointRegister(stream, reg);
125       break;
126   }
127 }
128 
GetRegisterMask(LiveInterval * interval,RegisterType register_type) const129 uint32_t RegisterAllocator::GetRegisterMask(LiveInterval* interval,
130                                             RegisterType register_type) const {
131   if (interval->HasRegister()) {
132     DCHECK_EQ(register_type == RegisterType::kFpRegister,
133               DataType::IsFloatingPointType(interval->GetType()));
134     DCHECK_LE(static_cast<size_t>(interval->GetRegister()), BitSizeOf<uint32_t>());
135     return 1u << interval->GetRegister();
136   } else if (interval->IsFixed()) {
137     DCHECK_EQ(interval->GetType(), DataType::Type::kVoid);
138     DCHECK(interval->GetFirstRange() != nullptr);
139     size_t start = interval->GetFirstRange()->GetStart();
140     bool blocked_for_call = liveness_.GetInstructionFromPosition(start / 2u) != nullptr;
141     switch (register_type) {
142       case RegisterType::kCoreRegister:
143         return blocked_for_call ? core_registers_blocked_for_call_
144                                 : MaxInt<uint32_t>(num_core_registers_);
145       case RegisterType::kFpRegister:
146         return blocked_for_call ? fp_registers_blocked_for_call_
147                                 : MaxInt<uint32_t>(num_fp_registers_);
148     }
149   } else {
150     return 0u;
151   }
152 }
153 
ValidateIntervals(ArrayRef<LiveInterval * const> intervals,size_t number_of_spill_slots,size_t number_of_out_slots,const CodeGenerator & codegen,const SsaLivenessAnalysis * liveness,RegisterType register_type,bool log_fatal_on_failure)154 bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> intervals,
155                                           size_t number_of_spill_slots,
156                                           size_t number_of_out_slots,
157                                           const CodeGenerator& codegen,
158                                           const SsaLivenessAnalysis* liveness,
159                                           RegisterType register_type,
160                                           bool log_fatal_on_failure) {
161   size_t number_of_registers = (register_type == RegisterType::kCoreRegister)
162       ? codegen.GetNumberOfCoreRegisters()
163       : codegen.GetNumberOfFloatingPointRegisters();
164   uint32_t registers_blocked_for_call = (register_type == RegisterType::kCoreRegister)
165       ? GetBlockedCoreRegistersForCall(number_of_registers, &codegen)
166       : GetBlockedFpRegistersForCall(number_of_registers, &codegen);
167 
168   // A copy of `GetRegisterMask()` using local `number_of_registers` and
169   // `registers_blocked_for_call` instead of the cached per-type members
170   // that we cannot use in this static member function.
171   auto get_register_mask = [&](LiveInterval* interval) {
172     if (interval->HasRegister()) {
173       DCHECK_EQ(register_type == RegisterType::kFpRegister,
174                 DataType::IsFloatingPointType(interval->GetType()));
175       DCHECK_LE(static_cast<size_t>(interval->GetRegister()), BitSizeOf<uint32_t>());
176       return 1u << interval->GetRegister();
177     } else if (interval->IsFixed()) {
178       DCHECK_EQ(interval->GetType(), DataType::Type::kVoid);
179       DCHECK(interval->GetFirstRange() != nullptr);
180       size_t start = interval->GetFirstRange()->GetStart();
181       CHECK(liveness != nullptr);
182       bool blocked_for_call = liveness->GetInstructionFromPosition(start / 2u) != nullptr;
183       return blocked_for_call ? registers_blocked_for_call
184                               : MaxInt<uint32_t>(number_of_registers);
185     } else {
186       return 0u;
187     }
188   };
189 
190   ScopedArenaAllocator allocator(codegen.GetGraph()->GetArenaStack());
191   ScopedArenaVector<ArenaBitVector*> liveness_of_values(
192       allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
193   liveness_of_values.reserve(number_of_registers + number_of_spill_slots);
194 
195   size_t max_end = 0u;
196   for (LiveInterval* start_interval : intervals) {
197     for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
198       max_end = std::max(max_end, it.CurrentRange()->GetEnd());
199     }
200   }
201 
202   // Allocate a bit vector per register. A live interval that has a register
203   // allocated will populate the associated bit vector based on its live ranges.
204   for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) {
205     liveness_of_values.push_back(
206         ArenaBitVector::Create(&allocator, max_end, false, kArenaAllocRegisterAllocatorValidate));
207   }
208 
209   for (LiveInterval* start_interval : intervals) {
210     for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
211       LiveInterval* current = it.CurrentInterval();
212       HInstruction* defined_by = current->GetParent()->GetDefinedBy();
213       if (current->GetParent()->HasSpillSlot()
214            // Parameters and current method have their own stack slot.
215            && !(defined_by != nullptr && (defined_by->IsParameterValue()
216                                           || defined_by->IsCurrentMethod()))) {
217         BitVector* liveness_of_spill_slot = liveness_of_values[number_of_registers
218             + current->GetParent()->GetSpillSlot() / kVRegSize
219             - number_of_out_slots];
220         for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
221           if (liveness_of_spill_slot->IsBitSet(j)) {
222             if (log_fatal_on_failure) {
223               std::ostringstream message;
224               message << "Spill slot conflict at " << j;
225               LOG(FATAL) << message.str();
226             } else {
227               return false;
228             }
229           } else {
230             liveness_of_spill_slot->SetBit(j);
231           }
232         }
233       }
234 
235       for (uint32_t reg : LowToHighBits(get_register_mask(current))) {
236         if (kIsDebugBuild && log_fatal_on_failure && !current->IsFixed()) {
237           // Only check when an error is fatal. Only tests code ask for non-fatal failures
238           // and test code may not properly fill the right information to the code generator.
239           CHECK(codegen.HasAllocatedRegister(register_type == RegisterType::kCoreRegister, reg));
240         }
241         BitVector* liveness_of_register = liveness_of_values[reg];
242         for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
243           if (liveness_of_register->IsBitSet(j)) {
244             if (current->IsUsingInputRegister() && current->CanUseInputRegister()) {
245               continue;
246             }
247             if (log_fatal_on_failure) {
248               std::ostringstream message;
249               message << "Register conflict at " << j << " ";
250               if (defined_by != nullptr) {
251                 message << "(" << defined_by->DebugName() << ")";
252               }
253               message << "for ";
254               DumpRegister(message, reg, register_type, &codegen);
255               for (LiveInterval* interval : intervals) {
256                 if ((get_register_mask(interval) & (1u << reg)) != 0u && interval->CoversSlow(j)) {
257                   message << std::endl;
258                   if (interval->GetDefinedBy() != nullptr) {
259                     message << interval->GetDefinedBy()->GetKind() << " ";
260                   } else {
261                     message << "physical ";
262                   }
263                   interval->Dump(message);
264                 }
265               }
266               LOG(FATAL) << message.str();
267             } else {
268               return false;
269             }
270           } else {
271             liveness_of_register->SetBit(j);
272           }
273         }
274       }
275     }
276   }
277   return true;
278 }
279 
Split(LiveInterval * interval,size_t position)280 LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
281   DCHECK_GE(position, interval->GetStart());
282   DCHECK(!interval->IsDeadAt(position));
283   if (position == interval->GetStart()) {
284     // Spill slot will be allocated when handling `interval` again.
285     interval->ClearRegister();
286     if (interval->HasHighInterval()) {
287       interval->GetHighInterval()->ClearRegister();
288     } else if (interval->HasLowInterval()) {
289       interval->GetLowInterval()->ClearRegister();
290     }
291     return interval;
292   } else {
293     LiveInterval* new_interval = interval->SplitAt(position);
294     if (interval->HasHighInterval()) {
295       LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
296       new_interval->SetHighInterval(high);
297       high->SetLowInterval(new_interval);
298     } else if (interval->HasLowInterval()) {
299       LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
300       new_interval->SetLowInterval(low);
301       low->SetHighInterval(new_interval);
302     }
303     return new_interval;
304   }
305 }
306 
SplitBetween(LiveInterval * interval,size_t from,size_t to)307 LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) {
308   HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2);
309   HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2);
310   DCHECK(block_from != nullptr);
311   DCHECK(block_to != nullptr);
312 
313   // Both locations are in the same block. We split at the given location.
314   if (block_from == block_to) {
315     return Split(interval, to);
316   }
317 
318   /*
319    * Non-linear control flow will force moves at every branch instruction to the new location.
320    * To avoid having all branches doing the moves, we find the next non-linear position and
321    * split the interval at this position. Take the following example (block number is the linear
322    * order position):
323    *
324    *     B1
325    *    /  \
326    *   B2  B3
327    *    \  /
328    *     B4
329    *
330    * B2 needs to split an interval, whose next use is in B4. If we were to split at the
331    * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval
332    * is now in the correct location. It makes performance worst if the interval is spilled
333    * and both B2 and B3 need to reload it before entering B4.
334    *
335    * By splitting at B3, we give a chance to the register allocator to allocate the
336    * interval to the same register as in B1, and therefore avoid doing any
337    * moves in B3.
338    */
339   if (block_from->GetDominator() != nullptr) {
340     for (HBasicBlock* dominated : block_from->GetDominator()->GetDominatedBlocks()) {
341       size_t position = dominated->GetLifetimeStart();
342       if ((position > from) && (block_to->GetLifetimeStart() > position)) {
343         // Even if we found a better block, we continue iterating in case
344         // a dominated block is closer.
345         // Note that dominated blocks are not sorted in liveness order.
346         block_to = dominated;
347         DCHECK_NE(block_to, block_from);
348       }
349     }
350   }
351 
352   // If `to` is in a loop, find the outermost loop header which does not contain `from`.
353   for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) {
354     HBasicBlock* header = it.Current()->GetHeader();
355     if (block_from->GetLifetimeStart() >= header->GetLifetimeStart()) {
356       break;
357     }
358     block_to = header;
359   }
360 
361   // Split at the start of the found block, to piggy back on existing moves
362   // due to resolution if non-linear control flow (see `ConnectSplitSiblings`).
363   return Split(interval, block_to->GetLifetimeStart());
364 }
365 
366 }  // namespace art
367