1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19 
20 #include "arch/instruction_set.h"
21 #include "arch/instruction_set_features.h"
22 #include "base/bit_field.h"
23 #include "driver/compiler_options.h"
24 #include "globals.h"
25 #include "locations.h"
26 #include "memory_region.h"
27 #include "nodes.h"
28 #include "stack_map_stream.h"
29 
30 namespace art {
31 
32 // Binary encoding of 2^32 for type double.
33 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
34 // Binary encoding of 2^31 for type double.
35 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
36 
37 // Maximum value for a primitive integer.
38 static int32_t constexpr kPrimIntMax = 0x7fffffff;
39 // Maximum value for a primitive long.
40 static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
41 
42 class Assembler;
43 class CodeGenerator;
44 class DexCompilationUnit;
45 class ParallelMoveResolver;
46 class SrcMapElem;
47 template <class Alloc>
48 class SrcMap;
49 using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
50 
51 class CodeAllocator {
52  public:
CodeAllocator()53   CodeAllocator() {}
~CodeAllocator()54   virtual ~CodeAllocator() {}
55 
56   virtual uint8_t* Allocate(size_t size) = 0;
57 
58  private:
59   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
60 };
61 
62 struct PcInfo {
63   uint32_t dex_pc;
64   uintptr_t native_pc;
65 };
66 
67 class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
68  public:
SlowPathCode()69   SlowPathCode() {
70     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
71       saved_core_stack_offsets_[i] = kRegisterNotSaved;
72       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
73     }
74   }
75 
~SlowPathCode()76   virtual ~SlowPathCode() {}
77 
78   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
79 
80   void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
81   void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
82   void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
83 
IsCoreRegisterSaved(int reg)84   bool IsCoreRegisterSaved(int reg) const {
85     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
86   }
87 
IsFpuRegisterSaved(int reg)88   bool IsFpuRegisterSaved(int reg) const {
89     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
90   }
91 
GetStackOffsetOfCoreRegister(int reg)92   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
93     return saved_core_stack_offsets_[reg];
94   }
95 
GetStackOffsetOfFpuRegister(int reg)96   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
97     return saved_fpu_stack_offsets_[reg];
98   }
99 
100  private:
101   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
102   static constexpr uint32_t kRegisterNotSaved = -1;
103   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
104   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
105   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
106 };
107 
108 class InvokeDexCallingConventionVisitor {
109  public:
110   virtual Location GetNextLocation(Primitive::Type type) = 0;
111 
112  protected:
InvokeDexCallingConventionVisitor()113   InvokeDexCallingConventionVisitor() {}
~InvokeDexCallingConventionVisitor()114   virtual ~InvokeDexCallingConventionVisitor() {}
115 
116   // The current index for core registers.
117   uint32_t gp_index_ = 0u;
118   // The current index for floating-point registers.
119   uint32_t float_index_ = 0u;
120   // The current stack index.
121   uint32_t stack_index_ = 0u;
122 
123  private:
124   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
125 };
126 
127 class CodeGenerator {
128  public:
129   // Compiles the graph to executable instructions. Returns whether the compilation
130   // succeeded.
131   void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
132   void CompileOptimized(CodeAllocator* allocator);
133   static CodeGenerator* Create(HGraph* graph,
134                                InstructionSet instruction_set,
135                                const InstructionSetFeatures& isa_features,
136                                const CompilerOptions& compiler_options);
~CodeGenerator()137   virtual ~CodeGenerator() {}
138 
GetGraph()139   HGraph* GetGraph() const { return graph_; }
140 
141   HBasicBlock* GetNextBlockToEmit() const;
142   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
143   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
144 
GetStackSlotOfParameter(HParameterValue * parameter)145   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
146     // Note that this follows the current calling convention.
147     return GetFrameSize()
148         + InstructionSetPointerSize(GetInstructionSet())  // Art method
149         + parameter->GetIndex() * kVRegSize;
150   }
151 
152   virtual void Initialize() = 0;
153   virtual void Finalize(CodeAllocator* allocator);
154   virtual void GenerateFrameEntry() = 0;
155   virtual void GenerateFrameExit() = 0;
156   virtual void Bind(HBasicBlock* block) = 0;
157   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
158   virtual Assembler* GetAssembler() = 0;
159   virtual size_t GetWordSize() const = 0;
160   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
161   virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
162   void InitializeCodeGeneration(size_t number_of_spill_slots,
163                                 size_t maximum_number_of_live_core_registers,
164                                 size_t maximum_number_of_live_fp_registers,
165                                 size_t number_of_out_slots,
166                                 const GrowableArray<HBasicBlock*>& block_order);
167   int32_t GetStackSlot(HLocal* local) const;
168   Location GetTemporaryLocation(HTemporary* temp) const;
169 
GetFrameSize()170   uint32_t GetFrameSize() const { return frame_size_; }
SetFrameSize(uint32_t size)171   void SetFrameSize(uint32_t size) { frame_size_ = size; }
GetCoreSpillMask()172   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
GetFpuSpillMask()173   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
174 
GetNumberOfCoreRegisters()175   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
GetNumberOfFloatingPointRegisters()176   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
177   virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
178 
ComputeSpillMask()179   virtual void ComputeSpillMask() {
180     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
181     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
182     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
183   }
184 
ComputeRegisterMask(const int * registers,size_t length)185   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
186     uint32_t mask = 0;
187     for (size_t i = 0, e = length; i < e; ++i) {
188       mask |= (1 << registers[i]);
189     }
190     return mask;
191   }
192 
193   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
194   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
195   virtual InstructionSet GetInstructionSet() const = 0;
196 
GetCompilerOptions()197   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
198 
199   // Saves the register in the stack. Returns the size taken on stack.
200   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
201   // Restores the register from the stack. Returns the size taken on stack.
202   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
203 
204   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
205   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
206 
207   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
208   // Returns whether we should split long moves in parallel moves.
ShouldSplitLongMoves()209   virtual bool ShouldSplitLongMoves() const { return false; }
210 
IsCoreCalleeSaveRegister(int reg)211   bool IsCoreCalleeSaveRegister(int reg) const {
212     return (core_callee_save_mask_ & (1 << reg)) != 0;
213   }
214 
IsFloatingPointCalleeSaveRegister(int reg)215   bool IsFloatingPointCalleeSaveRegister(int reg) const {
216     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
217   }
218 
219   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
220   bool CanMoveNullCheckToUser(HNullCheck* null_check);
221   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
222 
AddSlowPath(SlowPathCode * slow_path)223   void AddSlowPath(SlowPathCode* slow_path) {
224     slow_paths_.Add(slow_path);
225   }
226 
227   void BuildSourceMap(DefaultSrcMap* src_map) const;
228   void BuildMappingTable(std::vector<uint8_t>* vector) const;
229   void BuildVMapTable(std::vector<uint8_t>* vector) const;
230   void BuildNativeGCMap(
231       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
232   void BuildStackMaps(std::vector<uint8_t>* vector);
233 
IsBaseline()234   bool IsBaseline() const {
235     return is_baseline_;
236   }
237 
IsLeafMethod()238   bool IsLeafMethod() const {
239     return is_leaf_;
240   }
241 
MarkNotLeaf()242   void MarkNotLeaf() {
243     is_leaf_ = false;
244     requires_current_method_ = true;
245   }
246 
SetRequiresCurrentMethod()247   void SetRequiresCurrentMethod() {
248     requires_current_method_ = true;
249   }
250 
RequiresCurrentMethod()251   bool RequiresCurrentMethod() const {
252     return requires_current_method_;
253   }
254 
255   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
256   // suspend check. This is called when the code generator generates code
257   // for the suspend check at the back edge (instead of where the suspend check
258   // is, which is the loop entry). At this point, the spill slots for the phis
259   // have not been written to.
260   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
261 
GetBlockedCoreRegisters()262   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
GetBlockedFloatingPointRegisters()263   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
264 
265   // Helper that returns the pointer offset of an index in an object array.
266   // Note: this method assumes we always have the same pointer size, regardless
267   // of the architecture.
268   static size_t GetCacheOffset(uint32_t index);
269   // Pointer variant for ArtMethod and ArtField arrays.
270   size_t GetCachePointerOffset(uint32_t index);
271 
272   void EmitParallelMoves(Location from1,
273                          Location to1,
274                          Primitive::Type type1,
275                          Location from2,
276                          Location to2,
277                          Primitive::Type type2);
278 
StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)279   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
280     // Check that null value is not represented as an integer constant.
281     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
282     return type == Primitive::kPrimNot && !value->IsNullConstant();
283   }
284 
AddAllocatedRegister(Location location)285   void AddAllocatedRegister(Location location) {
286     allocated_registers_.Add(location);
287   }
288 
289   void AllocateLocations(HInstruction* instruction);
290 
291   // Tells whether the stack frame of the compiled method is
292   // considered "empty", that is either actually having a size of zero,
293   // or just containing the saved return address register.
HasEmptyFrame()294   bool HasEmptyFrame() const {
295     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
296   }
297 
GetInt32ValueOf(HConstant * constant)298   static int32_t GetInt32ValueOf(HConstant* constant) {
299     if (constant->IsIntConstant()) {
300       return constant->AsIntConstant()->GetValue();
301     } else if (constant->IsNullConstant()) {
302       return 0;
303     } else {
304       DCHECK(constant->IsFloatConstant());
305       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
306     }
307   }
308 
GetInt64ValueOf(HConstant * constant)309   static int64_t GetInt64ValueOf(HConstant* constant) {
310     if (constant->IsIntConstant()) {
311       return constant->AsIntConstant()->GetValue();
312     } else if (constant->IsNullConstant()) {
313       return 0;
314     } else if (constant->IsFloatConstant()) {
315       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
316     } else if (constant->IsLongConstant()) {
317       return constant->AsLongConstant()->GetValue();
318     } else {
319       DCHECK(constant->IsDoubleConstant());
320       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
321     }
322   }
323 
GetFirstRegisterSlotInSlowPath()324   size_t GetFirstRegisterSlotInSlowPath() const {
325     return first_register_slot_in_slow_path_;
326   }
327 
FrameEntrySpillSize()328   uint32_t FrameEntrySpillSize() const {
329     return GetFpuSpillSize() + GetCoreSpillSize();
330   }
331 
332   virtual ParallelMoveResolver* GetMoveResolver() = 0;
333 
334  protected:
CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options)335   CodeGenerator(HGraph* graph,
336                 size_t number_of_core_registers,
337                 size_t number_of_fpu_registers,
338                 size_t number_of_register_pairs,
339                 uint32_t core_callee_save_mask,
340                 uint32_t fpu_callee_save_mask,
341                 const CompilerOptions& compiler_options)
342       : frame_size_(0),
343         core_spill_mask_(0),
344         fpu_spill_mask_(0),
345         first_register_slot_in_slow_path_(0),
346         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
347         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
348         blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
349         number_of_core_registers_(number_of_core_registers),
350         number_of_fpu_registers_(number_of_fpu_registers),
351         number_of_register_pairs_(number_of_register_pairs),
352         core_callee_save_mask_(core_callee_save_mask),
353         fpu_callee_save_mask_(fpu_callee_save_mask),
354         is_baseline_(false),
355         graph_(graph),
356         compiler_options_(compiler_options),
357         pc_infos_(graph->GetArena(), 32),
358         slow_paths_(graph->GetArena(), 8),
359         block_order_(nullptr),
360         current_block_index_(0),
361         is_leaf_(true),
362         requires_current_method_(false),
363         stack_map_stream_(graph->GetArena()) {}
364 
365   // Register allocation logic.
366   void AllocateRegistersLocally(HInstruction* instruction) const;
367 
368   // Backend specific implementation for allocating a register.
369   virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
370 
371   static size_t FindFreeEntry(bool* array, size_t length);
372   static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
373 
374   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
375 
376   virtual HGraphVisitor* GetLocationBuilder() = 0;
377   virtual HGraphVisitor* GetInstructionVisitor() = 0;
378 
379   // Returns the location of the first spilled entry for floating point registers,
380   // relative to the stack pointer.
GetFpuSpillStart()381   uint32_t GetFpuSpillStart() const {
382     return GetFrameSize() - FrameEntrySpillSize();
383   }
384 
GetFpuSpillSize()385   uint32_t GetFpuSpillSize() const {
386     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
387   }
388 
GetCoreSpillSize()389   uint32_t GetCoreSpillSize() const {
390     return POPCOUNT(core_spill_mask_) * GetWordSize();
391   }
392 
HasAllocatedCalleeSaveRegisters()393   bool HasAllocatedCalleeSaveRegisters() const {
394     // We check the core registers against 1 because it always comprises the return PC.
395     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
396       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
397   }
398 
CallPushesPC()399   bool CallPushesPC() const {
400     InstructionSet instruction_set = GetInstructionSet();
401     return instruction_set == kX86 || instruction_set == kX86_64;
402   }
403 
404   // Arm64 has its own type for a label, so we need to templatize this method
405   // to share the logic.
406   template <typename T>
CommonGetLabelOf(T * raw_pointer_to_labels_array,HBasicBlock * block)407   T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
408     block = FirstNonEmptyBlock(block);
409     return raw_pointer_to_labels_array + block->GetBlockId();
410   }
411 
412   // Frame size required for this method.
413   uint32_t frame_size_;
414   uint32_t core_spill_mask_;
415   uint32_t fpu_spill_mask_;
416   uint32_t first_register_slot_in_slow_path_;
417 
418   // Registers that were allocated during linear scan.
419   RegisterSet allocated_registers_;
420 
421   // Arrays used when doing register allocation to know which
422   // registers we can allocate. `SetupBlockedRegisters` updates the
423   // arrays.
424   bool* const blocked_core_registers_;
425   bool* const blocked_fpu_registers_;
426   bool* const blocked_register_pairs_;
427   size_t number_of_core_registers_;
428   size_t number_of_fpu_registers_;
429   size_t number_of_register_pairs_;
430   const uint32_t core_callee_save_mask_;
431   const uint32_t fpu_callee_save_mask_;
432 
433   // Whether we are using baseline.
434   bool is_baseline_;
435 
436  private:
437   void InitLocationsBaseline(HInstruction* instruction);
438   size_t GetStackOffsetOfSavedRegister(size_t index);
439   void CompileInternal(CodeAllocator* allocator, bool is_baseline);
440   void BlockIfInRegister(Location location, bool is_out = false) const;
441 
442   HGraph* const graph_;
443   const CompilerOptions& compiler_options_;
444 
445   GrowableArray<PcInfo> pc_infos_;
446   GrowableArray<SlowPathCode*> slow_paths_;
447 
448   // The order to use for code generation.
449   const GrowableArray<HBasicBlock*>* block_order_;
450 
451   // The current block index in `block_order_` of the block
452   // we are generating code for.
453   size_t current_block_index_;
454 
455   // Whether the method is a leaf method.
456   bool is_leaf_;
457 
458   // Whether an instruction in the graph accesses the current method.
459   bool requires_current_method_;
460 
461   StackMapStream stack_map_stream_;
462 
463   friend class OptimizingCFITest;
464 
465   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
466 };
467 
468 template <typename C, typename F>
469 class CallingConvention {
470  public:
CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,size_t pointer_size)471   CallingConvention(const C* registers,
472                     size_t number_of_registers,
473                     const F* fpu_registers,
474                     size_t number_of_fpu_registers,
475                     size_t pointer_size)
476       : registers_(registers),
477         number_of_registers_(number_of_registers),
478         fpu_registers_(fpu_registers),
479         number_of_fpu_registers_(number_of_fpu_registers),
480         pointer_size_(pointer_size) {}
481 
GetNumberOfRegisters()482   size_t GetNumberOfRegisters() const { return number_of_registers_; }
GetNumberOfFpuRegisters()483   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
484 
GetRegisterAt(size_t index)485   C GetRegisterAt(size_t index) const {
486     DCHECK_LT(index, number_of_registers_);
487     return registers_[index];
488   }
489 
GetFpuRegisterAt(size_t index)490   F GetFpuRegisterAt(size_t index) const {
491     DCHECK_LT(index, number_of_fpu_registers_);
492     return fpu_registers_[index];
493   }
494 
GetStackOffsetOf(size_t index)495   size_t GetStackOffsetOf(size_t index) const {
496     // We still reserve the space for parameters passed by registers.
497     // Add space for the method pointer.
498     return pointer_size_ + index * kVRegSize;
499   }
500 
501  private:
502   const C* registers_;
503   const size_t number_of_registers_;
504   const F* fpu_registers_;
505   const size_t number_of_fpu_registers_;
506   const size_t pointer_size_;
507 
508   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
509 };
510 
511 }  // namespace art
512 
513 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
514