1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19 
20 #include "arch/instruction_set.h"
21 #include "arch/instruction_set_features.h"
22 #include "base/arena_containers.h"
23 #include "base/arena_object.h"
24 #include "base/bit_field.h"
25 #include "compiled_method.h"
26 #include "driver/compiler_options.h"
27 #include "globals.h"
28 #include "graph_visualizer.h"
29 #include "locations.h"
30 #include "memory_region.h"
31 #include "nodes.h"
32 #include "optimizing_compiler_stats.h"
33 #include "stack_map_stream.h"
34 #include "utils/label.h"
35 
36 namespace art {
37 
38 // Binary encoding of 2^32 for type double.
39 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
40 // Binary encoding of 2^31 for type double.
41 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
42 
43 // Minimum value for a primitive integer.
44 static int32_t constexpr kPrimIntMin = 0x80000000;
45 // Minimum value for a primitive long.
46 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
47 
48 // Maximum value for a primitive integer.
49 static int32_t constexpr kPrimIntMax = 0x7fffffff;
50 // Maximum value for a primitive long.
51 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
52 
53 class Assembler;
54 class CodeGenerator;
55 class CompilerDriver;
56 class LinkerPatch;
57 class ParallelMoveResolver;
58 
59 class CodeAllocator {
60  public:
CodeAllocator()61   CodeAllocator() {}
~CodeAllocator()62   virtual ~CodeAllocator() {}
63 
64   virtual uint8_t* Allocate(size_t size) = 0;
65 
66  private:
67   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
68 };
69 
70 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
71  public:
SlowPathCode(HInstruction * instruction)72   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
73     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
74       saved_core_stack_offsets_[i] = kRegisterNotSaved;
75       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
76     }
77   }
78 
~SlowPathCode()79   virtual ~SlowPathCode() {}
80 
81   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
82 
83   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
84   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
85 
IsCoreRegisterSaved(int reg)86   bool IsCoreRegisterSaved(int reg) const {
87     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
88   }
89 
IsFpuRegisterSaved(int reg)90   bool IsFpuRegisterSaved(int reg) const {
91     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
92   }
93 
GetStackOffsetOfCoreRegister(int reg)94   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
95     return saved_core_stack_offsets_[reg];
96   }
97 
GetStackOffsetOfFpuRegister(int reg)98   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
99     return saved_fpu_stack_offsets_[reg];
100   }
101 
IsFatal()102   virtual bool IsFatal() const { return false; }
103 
104   virtual const char* GetDescription() const = 0;
105 
GetEntryLabel()106   Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()107   Label* GetExitLabel() { return &exit_label_; }
108 
GetInstruction()109   HInstruction* GetInstruction() const {
110     return instruction_;
111   }
112 
GetDexPc()113   uint32_t GetDexPc() const {
114     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
115   }
116 
117  protected:
118   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
119   static constexpr uint32_t kRegisterNotSaved = -1;
120   // The instruction where this slow path is happening.
121   HInstruction* instruction_;
122   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
123   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
124 
125  private:
126   Label entry_label_;
127   Label exit_label_;
128 
129   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
130 };
131 
132 class InvokeDexCallingConventionVisitor {
133  public:
134   virtual Location GetNextLocation(Primitive::Type type) = 0;
135   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
136   virtual Location GetMethodLocation() const = 0;
137 
138  protected:
InvokeDexCallingConventionVisitor()139   InvokeDexCallingConventionVisitor() {}
~InvokeDexCallingConventionVisitor()140   virtual ~InvokeDexCallingConventionVisitor() {}
141 
142   // The current index for core registers.
143   uint32_t gp_index_ = 0u;
144   // The current index for floating-point registers.
145   uint32_t float_index_ = 0u;
146   // The current stack index.
147   uint32_t stack_index_ = 0u;
148 
149  private:
150   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
151 };
152 
153 class FieldAccessCallingConvention {
154  public:
155   virtual Location GetObjectLocation() const = 0;
156   virtual Location GetFieldIndexLocation() const = 0;
157   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
158   virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
159   virtual Location GetFpuLocation(Primitive::Type type) const = 0;
~FieldAccessCallingConvention()160   virtual ~FieldAccessCallingConvention() {}
161 
162  protected:
FieldAccessCallingConvention()163   FieldAccessCallingConvention() {}
164 
165  private:
166   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
167 };
168 
169 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
170  public:
171   // Compiles the graph to executable instructions.
172   void Compile(CodeAllocator* allocator);
173   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
174                                                InstructionSet instruction_set,
175                                                const InstructionSetFeatures& isa_features,
176                                                const CompilerOptions& compiler_options,
177                                                OptimizingCompilerStats* stats = nullptr);
~CodeGenerator()178   virtual ~CodeGenerator() {}
179 
180   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
GetGraph()181   HGraph* GetGraph() const { return graph_; }
182 
183   HBasicBlock* GetNextBlockToEmit() const;
184   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
185   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
186 
GetStackSlotOfParameter(HParameterValue * parameter)187   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
188     // Note that this follows the current calling convention.
189     return GetFrameSize()
190         + InstructionSetPointerSize(GetInstructionSet())  // Art method
191         + parameter->GetIndex() * kVRegSize;
192   }
193 
194   virtual void Initialize() = 0;
195   virtual void Finalize(CodeAllocator* allocator);
196   virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
197   virtual void GenerateFrameEntry() = 0;
198   virtual void GenerateFrameExit() = 0;
199   virtual void Bind(HBasicBlock* block) = 0;
200   virtual void MoveConstant(Location destination, int32_t value) = 0;
201   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
202   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
203 
204   virtual Assembler* GetAssembler() = 0;
205   virtual const Assembler& GetAssembler() const = 0;
206   virtual size_t GetWordSize() const = 0;
207   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
208   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
209   void InitializeCodeGeneration(size_t number_of_spill_slots,
210                                 size_t maximum_number_of_live_core_registers,
211                                 size_t maximum_number_of_live_fpu_registers,
212                                 size_t number_of_out_slots,
213                                 const ArenaVector<HBasicBlock*>& block_order);
214 
GetFrameSize()215   uint32_t GetFrameSize() const { return frame_size_; }
SetFrameSize(uint32_t size)216   void SetFrameSize(uint32_t size) { frame_size_ = size; }
GetCoreSpillMask()217   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
GetFpuSpillMask()218   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
219 
GetNumberOfCoreRegisters()220   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
GetNumberOfFloatingPointRegisters()221   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
222   virtual void SetupBlockedRegisters() const = 0;
223 
ComputeSpillMask()224   virtual void ComputeSpillMask() {
225     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
226     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
227     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
228   }
229 
ComputeRegisterMask(const int * registers,size_t length)230   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
231     uint32_t mask = 0;
232     for (size_t i = 0, e = length; i < e; ++i) {
233       mask |= (1 << registers[i]);
234     }
235     return mask;
236   }
237 
238   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
239   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
240   virtual InstructionSet GetInstructionSet() const = 0;
241 
GetCompilerOptions()242   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
243 
244   void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
245 
246   // Saves the register in the stack. Returns the size taken on stack.
247   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
248   // Restores the register from the stack. Returns the size taken on stack.
249   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
250 
251   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
252   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
253 
254   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
255   // Returns whether we should split long moves in parallel moves.
ShouldSplitLongMoves()256   virtual bool ShouldSplitLongMoves() const { return false; }
257 
GetNumberOfCoreCalleeSaveRegisters()258   size_t GetNumberOfCoreCalleeSaveRegisters() const {
259     return POPCOUNT(core_callee_save_mask_);
260   }
261 
GetNumberOfCoreCallerSaveRegisters()262   size_t GetNumberOfCoreCallerSaveRegisters() const {
263     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
264     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
265   }
266 
IsCoreCalleeSaveRegister(int reg)267   bool IsCoreCalleeSaveRegister(int reg) const {
268     return (core_callee_save_mask_ & (1 << reg)) != 0;
269   }
270 
IsFloatingPointCalleeSaveRegister(int reg)271   bool IsFloatingPointCalleeSaveRegister(int reg) const {
272     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
273   }
274 
275   // Record native to dex mapping for a suspend point.  Required by runtime.
276   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
277   // Check whether we have already recorded mapping at this PC.
278   bool HasStackMapAtCurrentPc();
279   // Record extra stack maps if we support native debugging.
280   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
281                                   uint32_t dex_pc,
282                                   SlowPathCode* slow_path = nullptr);
283 
284   bool CanMoveNullCheckToUser(HNullCheck* null_check);
285   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
286   void GenerateNullCheck(HNullCheck* null_check);
287   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
288   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
289 
290   // Records a stack map which the runtime might use to set catch phi values
291   // during exception delivery.
292   // TODO: Replace with a catch-entering instruction that records the environment.
293   void RecordCatchBlockInfo();
294 
295   // Returns true if implicit null checks are allowed in the compiler options
296   // and if the null check is not inside a try block. We currently cannot do
297   // implicit null checks in that case because we need the NullCheckSlowPath to
298   // save live registers, which may be needed by the runtime to set catch phis.
299   bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
300 
301   // TODO: Avoid creating the `std::unique_ptr` here.
AddSlowPath(SlowPathCode * slow_path)302   void AddSlowPath(SlowPathCode* slow_path) {
303     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
304   }
305 
306   void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
307   size_t ComputeStackMapsSize();
308 
IsLeafMethod()309   bool IsLeafMethod() const {
310     return is_leaf_;
311   }
312 
MarkNotLeaf()313   void MarkNotLeaf() {
314     is_leaf_ = false;
315     requires_current_method_ = true;
316   }
317 
SetRequiresCurrentMethod()318   void SetRequiresCurrentMethod() {
319     requires_current_method_ = true;
320   }
321 
RequiresCurrentMethod()322   bool RequiresCurrentMethod() const {
323     return requires_current_method_;
324   }
325 
326   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
327   // suspend check. This is called when the code generator generates code
328   // for the suspend check at the back edge (instead of where the suspend check
329   // is, which is the loop entry). At this point, the spill slots for the phis
330   // have not been written to.
331   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
332 
GetBlockedCoreRegisters()333   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
GetBlockedFloatingPointRegisters()334   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
335 
336   // Helper that returns the pointer offset of an index in an object array.
337   // Note: this method assumes we always have the same pointer size, regardless
338   // of the architecture.
339   static size_t GetCacheOffset(uint32_t index);
340   // Pointer variant for ArtMethod and ArtField arrays.
341   size_t GetCachePointerOffset(uint32_t index);
342 
343   void EmitParallelMoves(Location from1,
344                          Location to1,
345                          Primitive::Type type1,
346                          Location from2,
347                          Location to2,
348                          Primitive::Type type2);
349 
StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)350   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
351     // Check that null value is not represented as an integer constant.
352     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
353     return type == Primitive::kPrimNot && !value->IsNullConstant();
354   }
355 
356   void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
357 
AddAllocatedRegister(Location location)358   void AddAllocatedRegister(Location location) {
359     allocated_registers_.Add(location);
360   }
361 
HasAllocatedRegister(bool is_core,int reg)362   bool HasAllocatedRegister(bool is_core, int reg) const {
363     return is_core
364         ? allocated_registers_.ContainsCoreRegister(reg)
365         : allocated_registers_.ContainsFloatingPointRegister(reg);
366   }
367 
368   void AllocateLocations(HInstruction* instruction);
369 
370   // Tells whether the stack frame of the compiled method is
371   // considered "empty", that is either actually having a size of zero,
372   // or just containing the saved return address register.
HasEmptyFrame()373   bool HasEmptyFrame() const {
374     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
375   }
376 
GetInt32ValueOf(HConstant * constant)377   static int32_t GetInt32ValueOf(HConstant* constant) {
378     if (constant->IsIntConstant()) {
379       return constant->AsIntConstant()->GetValue();
380     } else if (constant->IsNullConstant()) {
381       return 0;
382     } else {
383       DCHECK(constant->IsFloatConstant());
384       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
385     }
386   }
387 
GetInt64ValueOf(HConstant * constant)388   static int64_t GetInt64ValueOf(HConstant* constant) {
389     if (constant->IsIntConstant()) {
390       return constant->AsIntConstant()->GetValue();
391     } else if (constant->IsNullConstant()) {
392       return 0;
393     } else if (constant->IsFloatConstant()) {
394       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
395     } else if (constant->IsLongConstant()) {
396       return constant->AsLongConstant()->GetValue();
397     } else {
398       DCHECK(constant->IsDoubleConstant());
399       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
400     }
401   }
402 
GetFirstRegisterSlotInSlowPath()403   size_t GetFirstRegisterSlotInSlowPath() const {
404     return first_register_slot_in_slow_path_;
405   }
406 
FrameEntrySpillSize()407   uint32_t FrameEntrySpillSize() const {
408     return GetFpuSpillSize() + GetCoreSpillSize();
409   }
410 
411   virtual ParallelMoveResolver* GetMoveResolver() = 0;
412 
413   static void CreateCommonInvokeLocationSummary(
414       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
415 
416   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
417 
418   void CreateUnresolvedFieldLocationSummary(
419       HInstruction* field_access,
420       Primitive::Type field_type,
421       const FieldAccessCallingConvention& calling_convention);
422 
423   void GenerateUnresolvedFieldAccess(
424       HInstruction* field_access,
425       Primitive::Type field_type,
426       uint32_t field_index,
427       uint32_t dex_pc,
428       const FieldAccessCallingConvention& calling_convention);
429 
430   // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
431   static void CreateLoadClassLocationSummary(HLoadClass* cls,
432                                              Location runtime_type_index_location,
433                                              Location runtime_return_location,
434                                              bool code_generator_supports_read_barrier = false);
435 
436   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
437 
SetDisassemblyInformation(DisassemblyInformation * info)438   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
GetDisassemblyInformation()439   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
440 
441   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
442                              HInstruction* instruction,
443                              uint32_t dex_pc,
444                              SlowPathCode* slow_path) = 0;
445 
446   // Check if the desired_string_load_kind is supported. If it is, return it,
447   // otherwise return a fall-back info that should be used instead.
448   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
449       HLoadString::LoadKind desired_string_load_kind) = 0;
450 
451   // Check if the desired_dispatch_info is supported. If it is, return it,
452   // otherwise return a fall-back info that should be used instead.
453   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
454       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
455       MethodReference target_method) = 0;
456 
457   // Generate a call to a static or direct method.
458   virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
459   // Generate a call to a virtual method.
460   virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
461 
462   // Copy the result of a call into the given target.
463   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
464 
465   virtual void GenerateNop() = 0;
466 
467  protected:
468   // Method patch info used for recording locations of required linker patches and
469   // target methods. The target method can be used for various purposes, whether for
470   // patching the address of the method or the code pointer or a PC-relative call.
471   template <typename LabelType>
472   struct MethodPatchInfo {
MethodPatchInfoMethodPatchInfo473     explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
474 
475     MethodReference target_method;
476     LabelType label;
477   };
478 
479   // String patch info used for recording locations of required linker patches and
480   // target strings. The actual string address can be absolute or PC-relative.
481   template <typename LabelType>
482   struct StringPatchInfo {
StringPatchInfoStringPatchInfo483     StringPatchInfo(const DexFile& df, uint32_t index)
484         : dex_file(df), string_index(index), label() { }
485 
486     const DexFile& dex_file;
487     uint32_t string_index;
488     LabelType label;
489   };
490 
CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)491   CodeGenerator(HGraph* graph,
492                 size_t number_of_core_registers,
493                 size_t number_of_fpu_registers,
494                 size_t number_of_register_pairs,
495                 uint32_t core_callee_save_mask,
496                 uint32_t fpu_callee_save_mask,
497                 const CompilerOptions& compiler_options,
498                 OptimizingCompilerStats* stats)
499       : frame_size_(0),
500         core_spill_mask_(0),
501         fpu_spill_mask_(0),
502         first_register_slot_in_slow_path_(0),
503         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
504                                                                     kArenaAllocCodeGenerator)),
505         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
506                                                                    kArenaAllocCodeGenerator)),
507         blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
508                                                                     kArenaAllocCodeGenerator)),
509         number_of_core_registers_(number_of_core_registers),
510         number_of_fpu_registers_(number_of_fpu_registers),
511         number_of_register_pairs_(number_of_register_pairs),
512         core_callee_save_mask_(core_callee_save_mask),
513         fpu_callee_save_mask_(fpu_callee_save_mask),
514         stack_map_stream_(graph->GetArena()),
515         block_order_(nullptr),
516         disasm_info_(nullptr),
517         stats_(stats),
518         graph_(graph),
519         compiler_options_(compiler_options),
520         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
521         current_slow_path_(nullptr),
522         current_block_index_(0),
523         is_leaf_(true),
524         requires_current_method_(false) {
525     slow_paths_.reserve(8);
526   }
527 
528   virtual HGraphVisitor* GetLocationBuilder() = 0;
529   virtual HGraphVisitor* GetInstructionVisitor() = 0;
530 
531   // Returns the location of the first spilled entry for floating point registers,
532   // relative to the stack pointer.
GetFpuSpillStart()533   uint32_t GetFpuSpillStart() const {
534     return GetFrameSize() - FrameEntrySpillSize();
535   }
536 
GetFpuSpillSize()537   uint32_t GetFpuSpillSize() const {
538     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
539   }
540 
GetCoreSpillSize()541   uint32_t GetCoreSpillSize() const {
542     return POPCOUNT(core_spill_mask_) * GetWordSize();
543   }
544 
HasAllocatedCalleeSaveRegisters()545   bool HasAllocatedCalleeSaveRegisters() const {
546     // We check the core registers against 1 because it always comprises the return PC.
547     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
548       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
549   }
550 
CallPushesPC()551   bool CallPushesPC() const {
552     InstructionSet instruction_set = GetInstructionSet();
553     return instruction_set == kX86 || instruction_set == kX86_64;
554   }
555 
556   // Arm64 has its own type for a label, so we need to templatize these methods
557   // to share the logic.
558 
559   template <typename LabelType>
CommonInitializeLabels()560   LabelType* CommonInitializeLabels() {
561     // We use raw array allocations instead of ArenaVector<> because Labels are
562     // non-constructible and non-movable and as such cannot be held in a vector.
563     size_t size = GetGraph()->GetBlocks().size();
564     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
565                                                                       kArenaAllocCodeGenerator);
566     for (size_t i = 0; i != size; ++i) {
567       new(labels + i) LabelType();
568     }
569     return labels;
570   }
571 
572   template <typename LabelType>
CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)573   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
574     block = FirstNonEmptyBlock(block);
575     return raw_pointer_to_labels_array + block->GetBlockId();
576   }
577 
GetCurrentSlowPath()578   SlowPathCode* GetCurrentSlowPath() {
579     return current_slow_path_;
580   }
581 
582   // Frame size required for this method.
583   uint32_t frame_size_;
584   uint32_t core_spill_mask_;
585   uint32_t fpu_spill_mask_;
586   uint32_t first_register_slot_in_slow_path_;
587 
588   // Registers that were allocated during linear scan.
589   RegisterSet allocated_registers_;
590 
591   // Arrays used when doing register allocation to know which
592   // registers we can allocate. `SetupBlockedRegisters` updates the
593   // arrays.
594   bool* const blocked_core_registers_;
595   bool* const blocked_fpu_registers_;
596   bool* const blocked_register_pairs_;
597   size_t number_of_core_registers_;
598   size_t number_of_fpu_registers_;
599   size_t number_of_register_pairs_;
600   const uint32_t core_callee_save_mask_;
601   const uint32_t fpu_callee_save_mask_;
602 
603   StackMapStream stack_map_stream_;
604 
605   // The order to use for code generation.
606   const ArenaVector<HBasicBlock*>* block_order_;
607 
608   DisassemblyInformation* disasm_info_;
609 
610  private:
611   size_t GetStackOffsetOfSavedRegister(size_t index);
612   void GenerateSlowPaths();
613   void BlockIfInRegister(Location location, bool is_out = false) const;
614   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
615 
616   OptimizingCompilerStats* stats_;
617 
618   HGraph* const graph_;
619   const CompilerOptions& compiler_options_;
620 
621   ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
622 
623   // The current slow-path that we're generating code for.
624   SlowPathCode* current_slow_path_;
625 
626   // The current block index in `block_order_` of the block
627   // we are generating code for.
628   size_t current_block_index_;
629 
630   // Whether the method is a leaf method.
631   bool is_leaf_;
632 
633   // Whether an instruction in the graph accesses the current method.
634   bool requires_current_method_;
635 
636   friend class OptimizingCFITest;
637 
638   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
639 };
640 
641 template <typename C, typename F>
642 class CallingConvention {
643  public:
CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,size_t pointer_size)644   CallingConvention(const C* registers,
645                     size_t number_of_registers,
646                     const F* fpu_registers,
647                     size_t number_of_fpu_registers,
648                     size_t pointer_size)
649       : registers_(registers),
650         number_of_registers_(number_of_registers),
651         fpu_registers_(fpu_registers),
652         number_of_fpu_registers_(number_of_fpu_registers),
653         pointer_size_(pointer_size) {}
654 
GetNumberOfRegisters()655   size_t GetNumberOfRegisters() const { return number_of_registers_; }
GetNumberOfFpuRegisters()656   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
657 
GetRegisterAt(size_t index)658   C GetRegisterAt(size_t index) const {
659     DCHECK_LT(index, number_of_registers_);
660     return registers_[index];
661   }
662 
GetFpuRegisterAt(size_t index)663   F GetFpuRegisterAt(size_t index) const {
664     DCHECK_LT(index, number_of_fpu_registers_);
665     return fpu_registers_[index];
666   }
667 
GetStackOffsetOf(size_t index)668   size_t GetStackOffsetOf(size_t index) const {
669     // We still reserve the space for parameters passed by registers.
670     // Add space for the method pointer.
671     return pointer_size_ + index * kVRegSize;
672   }
673 
674  private:
675   const C* registers_;
676   const size_t number_of_registers_;
677   const F* fpu_registers_;
678   const size_t number_of_fpu_registers_;
679   const size_t pointer_size_;
680 
681   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
682 };
683 
684 /**
685  * A templated class SlowPathGenerator with a templated method NewSlowPath()
686  * that can be used by any code generator to share equivalent slow-paths with
687  * the objective of reducing generated code size.
688  *
689  * InstructionType:  instruction that requires SlowPathCodeType
690  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
691  */
692 template <typename InstructionType>
693 class SlowPathGenerator {
694   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
695                 "InstructionType is not a subclass of art::HInstruction");
696 
697  public:
SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)698   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
699       : graph_(graph),
700         codegen_(codegen),
701         slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
702 
703   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
704   // Templating the method (rather than the whole class) on the slow-path type enables
705   // keeping this code at a generic, non architecture-specific place.
706   //
707   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
708   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
709   //       or template the class as a whole on SlowPathType.
710   template <typename SlowPathCodeType>
NewSlowPath(InstructionType * instruction)711   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
712     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
713                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
714     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
715                   "SlowPathCodeType is not constructible from InstructionType*");
716     // Iterate over potential candidates for sharing. Currently, only same-typed
717     // slow-paths with exactly the same dex-pc are viable candidates.
718     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
719     const uint32_t dex_pc = instruction->GetDexPc();
720     auto iter = slow_path_map_.find(dex_pc);
721     if (iter != slow_path_map_.end()) {
722       auto candidates = iter->second;
723       for (const auto& it : candidates) {
724         InstructionType* other_instruction = it.first;
725         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
726         // Determine if the instructions allow for slow-path sharing.
727         if (HaveSameLiveRegisters(instruction, other_instruction) &&
728             HaveSameStackMap(instruction, other_instruction)) {
729           // Can share: reuse existing one.
730           return other_slow_path;
731         }
732       }
733     } else {
734       // First time this dex-pc is seen.
735       iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
736     }
737     // Cannot share: create and add new slow-path for this particular dex-pc.
738     SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
739     iter->second.emplace_back(std::make_pair(instruction, slow_path));
740     codegen_->AddSlowPath(slow_path);
741     return slow_path;
742   }
743 
744  private:
745   // Tests if both instructions have same set of live physical registers. This ensures
746   // the slow-path has exactly the same preamble on saving these registers to stack.
HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)747   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
748     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
749     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
750     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
751     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
752     return (((live1->GetCoreRegisters() & core_spill) ==
753              (live2->GetCoreRegisters() & core_spill)) &&
754             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
755              (live2->GetFloatingPointRegisters() & fpu_spill)));
756   }
757 
758   // Tests if both instructions have the same stack map. This ensures the interpreter
759   // will find exactly the same dex-registers at the same entries.
HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)760   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
761     DCHECK(i1->HasEnvironment());
762     DCHECK(i2->HasEnvironment());
763     // We conservatively test if the two instructions find exactly the same instructions
764     // and location in each dex-register. This guarantees they will have the same stack map.
765     HEnvironment* e1 = i1->GetEnvironment();
766     HEnvironment* e2 = i2->GetEnvironment();
767     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
768       return false;
769     }
770     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
771       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
772           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
773         return false;
774       }
775     }
776     return true;
777   }
778 
779   HGraph* const graph_;
780   CodeGenerator* const codegen_;
781 
782   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
783   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
784 
785   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
786 };
787 
788 class InstructionCodeGenerator : public HGraphVisitor {
789  public:
InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)790   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
791       : HGraphVisitor(graph),
792         deopt_slow_paths_(graph, codegen) {}
793 
794  protected:
795   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
796   // TODO: under current regime, only deopt sharing make sense; extend later.
797   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
798 };
799 
800 }  // namespace art
801 
802 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
803