1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19 
20 #include "arch/instruction_set.h"
21 #include "arch/instruction_set_features.h"
22 #include "base/arena_containers.h"
23 #include "base/arena_object.h"
24 #include "base/bit_field.h"
25 #include "base/bit_utils.h"
26 #include "base/enums.h"
27 #include "globals.h"
28 #include "graph_visualizer.h"
29 #include "locations.h"
30 #include "memory_region.h"
31 #include "nodes.h"
32 #include "optimizing_compiler_stats.h"
33 #include "read_barrier_option.h"
34 #include "stack_map_stream.h"
35 #include "string_reference.h"
36 #include "utils/label.h"
37 #include "utils/type_reference.h"
38 
39 namespace art {
40 
41 // Binary encoding of 2^32 for type double.
42 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
43 // Binary encoding of 2^31 for type double.
44 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
45 
46 // Minimum value for a primitive integer.
47 static int32_t constexpr kPrimIntMin = 0x80000000;
48 // Minimum value for a primitive long.
49 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
50 
51 // Maximum value for a primitive integer.
52 static int32_t constexpr kPrimIntMax = 0x7fffffff;
53 // Maximum value for a primitive long.
54 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
55 
56 static constexpr ReadBarrierOption kCompilerReadBarrierOption =
57     kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
58 
59 class Assembler;
60 class CodeGenerator;
61 class CompilerDriver;
62 class CompilerOptions;
63 class LinkerPatch;
64 class ParallelMoveResolver;
65 
66 class CodeAllocator {
67  public:
CodeAllocator()68   CodeAllocator() {}
~CodeAllocator()69   virtual ~CodeAllocator() {}
70 
71   virtual uint8_t* Allocate(size_t size) = 0;
72 
73  private:
74   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
75 };
76 
77 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
78  public:
SlowPathCode(HInstruction * instruction)79   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
80     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
81       saved_core_stack_offsets_[i] = kRegisterNotSaved;
82       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
83     }
84   }
85 
~SlowPathCode()86   virtual ~SlowPathCode() {}
87 
88   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
89 
90   // Save live core and floating-point caller-save registers and
91   // update the stack mask in `locations` for registers holding object
92   // references.
93   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
94   // Restore live core and floating-point caller-save registers.
95   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
96 
IsCoreRegisterSaved(int reg)97   bool IsCoreRegisterSaved(int reg) const {
98     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
99   }
100 
IsFpuRegisterSaved(int reg)101   bool IsFpuRegisterSaved(int reg) const {
102     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
103   }
104 
GetStackOffsetOfCoreRegister(int reg)105   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
106     return saved_core_stack_offsets_[reg];
107   }
108 
GetStackOffsetOfFpuRegister(int reg)109   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
110     return saved_fpu_stack_offsets_[reg];
111   }
112 
IsFatal()113   virtual bool IsFatal() const { return false; }
114 
115   virtual const char* GetDescription() const = 0;
116 
GetEntryLabel()117   Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()118   Label* GetExitLabel() { return &exit_label_; }
119 
GetInstruction()120   HInstruction* GetInstruction() const {
121     return instruction_;
122   }
123 
GetDexPc()124   uint32_t GetDexPc() const {
125     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
126   }
127 
128  protected:
129   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
130   static constexpr uint32_t kRegisterNotSaved = -1;
131   // The instruction where this slow path is happening.
132   HInstruction* instruction_;
133   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
134   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
135 
136  private:
137   Label entry_label_;
138   Label exit_label_;
139 
140   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
141 };
142 
143 class InvokeDexCallingConventionVisitor {
144  public:
145   virtual Location GetNextLocation(Primitive::Type type) = 0;
146   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
147   virtual Location GetMethodLocation() const = 0;
148 
149  protected:
InvokeDexCallingConventionVisitor()150   InvokeDexCallingConventionVisitor() {}
~InvokeDexCallingConventionVisitor()151   virtual ~InvokeDexCallingConventionVisitor() {}
152 
153   // The current index for core registers.
154   uint32_t gp_index_ = 0u;
155   // The current index for floating-point registers.
156   uint32_t float_index_ = 0u;
157   // The current stack index.
158   uint32_t stack_index_ = 0u;
159 
160  private:
161   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
162 };
163 
164 class FieldAccessCallingConvention {
165  public:
166   virtual Location GetObjectLocation() const = 0;
167   virtual Location GetFieldIndexLocation() const = 0;
168   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
169   virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
170   virtual Location GetFpuLocation(Primitive::Type type) const = 0;
~FieldAccessCallingConvention()171   virtual ~FieldAccessCallingConvention() {}
172 
173  protected:
FieldAccessCallingConvention()174   FieldAccessCallingConvention() {}
175 
176  private:
177   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
178 };
179 
180 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
181  public:
182   // Compiles the graph to executable instructions.
183   void Compile(CodeAllocator* allocator);
184   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
185                                                InstructionSet instruction_set,
186                                                const InstructionSetFeatures& isa_features,
187                                                const CompilerOptions& compiler_options,
188                                                OptimizingCompilerStats* stats = nullptr);
~CodeGenerator()189   virtual ~CodeGenerator() {}
190 
191   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
GetGraph()192   HGraph* GetGraph() const { return graph_; }
193 
194   HBasicBlock* GetNextBlockToEmit() const;
195   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
196   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
197 
GetStackSlotOfParameter(HParameterValue * parameter)198   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
199     // Note that this follows the current calling convention.
200     return GetFrameSize()
201         + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
202         + parameter->GetIndex() * kVRegSize;
203   }
204 
205   virtual void Initialize() = 0;
206   virtual void Finalize(CodeAllocator* allocator);
207   virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
208   virtual void GenerateFrameEntry() = 0;
209   virtual void GenerateFrameExit() = 0;
210   virtual void Bind(HBasicBlock* block) = 0;
211   virtual void MoveConstant(Location destination, int32_t value) = 0;
212   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
213   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
214 
215   virtual Assembler* GetAssembler() = 0;
216   virtual const Assembler& GetAssembler() const = 0;
217   virtual size_t GetWordSize() const = 0;
218   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
219   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
220   void InitializeCodeGeneration(size_t number_of_spill_slots,
221                                 size_t maximum_safepoint_spill_size,
222                                 size_t number_of_out_slots,
223                                 const ArenaVector<HBasicBlock*>& block_order);
224   // Backends can override this as necessary. For most, no special alignment is required.
GetPreferredSlotsAlignment()225   virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
226 
GetFrameSize()227   uint32_t GetFrameSize() const { return frame_size_; }
SetFrameSize(uint32_t size)228   void SetFrameSize(uint32_t size) { frame_size_ = size; }
GetCoreSpillMask()229   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
GetFpuSpillMask()230   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
231 
GetNumberOfCoreRegisters()232   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
GetNumberOfFloatingPointRegisters()233   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
234   virtual void SetupBlockedRegisters() const = 0;
235 
ComputeSpillMask()236   virtual void ComputeSpillMask() {
237     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
238     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
239     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
240   }
241 
ComputeRegisterMask(const int * registers,size_t length)242   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
243     uint32_t mask = 0;
244     for (size_t i = 0, e = length; i < e; ++i) {
245       mask |= (1 << registers[i]);
246     }
247     return mask;
248   }
249 
250   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
251   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
252   virtual InstructionSet GetInstructionSet() const = 0;
253 
GetCompilerOptions()254   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
255 
256   void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
257 
258   // Saves the register in the stack. Returns the size taken on stack.
259   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
260   // Restores the register from the stack. Returns the size taken on stack.
261   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
262 
263   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
264   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
265 
266   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
267   // Returns whether we should split long moves in parallel moves.
ShouldSplitLongMoves()268   virtual bool ShouldSplitLongMoves() const { return false; }
269 
GetNumberOfCoreCalleeSaveRegisters()270   size_t GetNumberOfCoreCalleeSaveRegisters() const {
271     return POPCOUNT(core_callee_save_mask_);
272   }
273 
GetNumberOfCoreCallerSaveRegisters()274   size_t GetNumberOfCoreCallerSaveRegisters() const {
275     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
276     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
277   }
278 
IsCoreCalleeSaveRegister(int reg)279   bool IsCoreCalleeSaveRegister(int reg) const {
280     return (core_callee_save_mask_ & (1 << reg)) != 0;
281   }
282 
IsFloatingPointCalleeSaveRegister(int reg)283   bool IsFloatingPointCalleeSaveRegister(int reg) const {
284     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
285   }
286 
GetSlowPathSpills(LocationSummary * locations,bool core_registers)287   uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
288     DCHECK(locations->OnlyCallsOnSlowPath() ||
289            (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
290                !locations->HasCustomSlowPathCallingConvention()));
291     uint32_t live_registers = core_registers
292         ? locations->GetLiveRegisters()->GetCoreRegisters()
293         : locations->GetLiveRegisters()->GetFloatingPointRegisters();
294     if (locations->HasCustomSlowPathCallingConvention()) {
295       // Save only the live registers that the custom calling convention wants us to save.
296       uint32_t caller_saves = core_registers
297           ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
298           : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
299       return live_registers & caller_saves;
300     } else {
301       // Default ABI, we need to spill non-callee-save live registers.
302       uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
303       return live_registers & ~callee_saves;
304     }
305   }
306 
GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)307   size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
308     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
309   }
310 
GetStackOffsetOfShouldDeoptimizeFlag()311   size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
312     DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
313     DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
314     return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
315   }
316 
317   // Record native to dex mapping for a suspend point.  Required by runtime.
318   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
319   // Check whether we have already recorded mapping at this PC.
320   bool HasStackMapAtCurrentPc();
321   // Record extra stack maps if we support native debugging.
322   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
323                                   uint32_t dex_pc,
324                                   SlowPathCode* slow_path = nullptr);
325 
326   bool CanMoveNullCheckToUser(HNullCheck* null_check);
327   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
328   LocationSummary* CreateThrowingSlowPathLocations(
329       HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
330   void GenerateNullCheck(HNullCheck* null_check);
331   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
332   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
333 
334   // Records a stack map which the runtime might use to set catch phi values
335   // during exception delivery.
336   // TODO: Replace with a catch-entering instruction that records the environment.
337   void RecordCatchBlockInfo();
338 
339   // TODO: Avoid creating the `std::unique_ptr` here.
AddSlowPath(SlowPathCode * slow_path)340   void AddSlowPath(SlowPathCode* slow_path) {
341     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
342   }
343 
344   void BuildStackMaps(MemoryRegion stack_map_region,
345                       MemoryRegion method_info_region,
346                       const DexFile::CodeItem& code_item);
347   void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
GetNumberOfJitRoots()348   size_t GetNumberOfJitRoots() const {
349     return jit_string_roots_.size() + jit_class_roots_.size();
350   }
351 
352   // Fills the `literals` array with literals collected during code generation.
353   // Also emits literal patches.
354   void EmitJitRoots(uint8_t* code,
355                     Handle<mirror::ObjectArray<mirror::Object>> roots,
356                     const uint8_t* roots_data)
357       REQUIRES_SHARED(Locks::mutator_lock_);
358 
IsLeafMethod()359   bool IsLeafMethod() const {
360     return is_leaf_;
361   }
362 
MarkNotLeaf()363   void MarkNotLeaf() {
364     is_leaf_ = false;
365     requires_current_method_ = true;
366   }
367 
SetRequiresCurrentMethod()368   void SetRequiresCurrentMethod() {
369     requires_current_method_ = true;
370   }
371 
RequiresCurrentMethod()372   bool RequiresCurrentMethod() const {
373     return requires_current_method_;
374   }
375 
376   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
377   // suspend check. This is called when the code generator generates code
378   // for the suspend check at the back edge (instead of where the suspend check
379   // is, which is the loop entry). At this point, the spill slots for the phis
380   // have not been written to.
381   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
382 
GetBlockedCoreRegisters()383   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
GetBlockedFloatingPointRegisters()384   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
385 
IsBlockedCoreRegister(size_t i)386   bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
IsBlockedFloatingPointRegister(size_t i)387   bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
388 
389   // Helper that returns the pointer offset of an index in an object array.
390   // Note: this method assumes we always have the same pointer size, regardless
391   // of the architecture.
392   static size_t GetCacheOffset(uint32_t index);
393   // Pointer variant for ArtMethod and ArtField arrays.
394   size_t GetCachePointerOffset(uint32_t index);
395 
396   // Helper that returns the offset of the array's length field.
397   // Note: Besides the normal arrays, we also use the HArrayLength for
398   // accessing the String's `count` field in String intrinsics.
399   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
400 
401   // Helper that returns the offset of the array's data.
402   // Note: Besides the normal arrays, we also use the HArrayGet for
403   // accessing the String's `value` field in String intrinsics.
404   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
405 
406   // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
407   template <PointerSize pointer_size>
GetReadBarrierMarkEntryPointsOffset(size_t reg)408   static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
409     // The entry point list defines 30 ReadBarrierMarkRegX entry points.
410     DCHECK_LT(reg, 30u);
411     // The ReadBarrierMarkRegX entry points are ordered by increasing
412     // register number in Thread::tls_Ptr_.quick_entrypoints.
413     return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
414         + static_cast<size_t>(pointer_size) * reg;
415   }
416 
417   void EmitParallelMoves(Location from1,
418                          Location to1,
419                          Primitive::Type type1,
420                          Location from2,
421                          Location to2,
422                          Primitive::Type type2);
423 
StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)424   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
425     // Check that null value is not represented as an integer constant.
426     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
427     return type == Primitive::kPrimNot && !value->IsNullConstant();
428   }
429 
430 
431   // Performs checks pertaining to an InvokeRuntime call.
432   void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
433                              HInstruction* instruction,
434                              SlowPathCode* slow_path);
435 
436   // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
437   static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
438                                                           SlowPathCode* slow_path);
439 
AddAllocatedRegister(Location location)440   void AddAllocatedRegister(Location location) {
441     allocated_registers_.Add(location);
442   }
443 
HasAllocatedRegister(bool is_core,int reg)444   bool HasAllocatedRegister(bool is_core, int reg) const {
445     return is_core
446         ? allocated_registers_.ContainsCoreRegister(reg)
447         : allocated_registers_.ContainsFloatingPointRegister(reg);
448   }
449 
450   void AllocateLocations(HInstruction* instruction);
451 
452   // Tells whether the stack frame of the compiled method is
453   // considered "empty", that is either actually having a size of zero,
454   // or just containing the saved return address register.
HasEmptyFrame()455   bool HasEmptyFrame() const {
456     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
457   }
458 
GetInt32ValueOf(HConstant * constant)459   static int32_t GetInt32ValueOf(HConstant* constant) {
460     if (constant->IsIntConstant()) {
461       return constant->AsIntConstant()->GetValue();
462     } else if (constant->IsNullConstant()) {
463       return 0;
464     } else {
465       DCHECK(constant->IsFloatConstant());
466       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
467     }
468   }
469 
GetInt64ValueOf(HConstant * constant)470   static int64_t GetInt64ValueOf(HConstant* constant) {
471     if (constant->IsIntConstant()) {
472       return constant->AsIntConstant()->GetValue();
473     } else if (constant->IsNullConstant()) {
474       return 0;
475     } else if (constant->IsFloatConstant()) {
476       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
477     } else if (constant->IsLongConstant()) {
478       return constant->AsLongConstant()->GetValue();
479     } else {
480       DCHECK(constant->IsDoubleConstant());
481       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
482     }
483   }
484 
GetFirstRegisterSlotInSlowPath()485   size_t GetFirstRegisterSlotInSlowPath() const {
486     return first_register_slot_in_slow_path_;
487   }
488 
FrameEntrySpillSize()489   uint32_t FrameEntrySpillSize() const {
490     return GetFpuSpillSize() + GetCoreSpillSize();
491   }
492 
493   virtual ParallelMoveResolver* GetMoveResolver() = 0;
494 
495   static void CreateCommonInvokeLocationSummary(
496       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
497 
498   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
499 
500   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke);
501 
502   void CreateUnresolvedFieldLocationSummary(
503       HInstruction* field_access,
504       Primitive::Type field_type,
505       const FieldAccessCallingConvention& calling_convention);
506 
507   void GenerateUnresolvedFieldAccess(
508       HInstruction* field_access,
509       Primitive::Type field_type,
510       uint32_t field_index,
511       uint32_t dex_pc,
512       const FieldAccessCallingConvention& calling_convention);
513 
514   static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
515                                                         Location runtime_type_index_location,
516                                                         Location runtime_return_location);
517   void GenerateLoadClassRuntimeCall(HLoadClass* cls);
518 
519   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
520 
SetDisassemblyInformation(DisassemblyInformation * info)521   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
GetDisassemblyInformation()522   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
523 
524   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
525                              HInstruction* instruction,
526                              uint32_t dex_pc,
527                              SlowPathCode* slow_path = nullptr) = 0;
528 
529   // Check if the desired_string_load_kind is supported. If it is, return it,
530   // otherwise return a fall-back kind that should be used instead.
531   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
532       HLoadString::LoadKind desired_string_load_kind) = 0;
533 
534   // Check if the desired_class_load_kind is supported. If it is, return it,
535   // otherwise return a fall-back kind that should be used instead.
536   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
537       HLoadClass::LoadKind desired_class_load_kind) = 0;
538 
GetLoadStringCallKind(HLoadString * load)539   static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
540     switch (load->GetLoadKind()) {
541       case HLoadString::LoadKind::kBssEntry:
542         DCHECK(load->NeedsEnvironment());
543         return LocationSummary::kCallOnSlowPath;
544       case HLoadString::LoadKind::kDexCacheViaMethod:
545         DCHECK(load->NeedsEnvironment());
546         return LocationSummary::kCallOnMainOnly;
547       case HLoadString::LoadKind::kJitTableAddress:
548         DCHECK(!load->NeedsEnvironment());
549         return kEmitCompilerReadBarrier
550             ? LocationSummary::kCallOnSlowPath
551             : LocationSummary::kNoCall;
552         break;
553       default:
554         DCHECK(!load->NeedsEnvironment());
555         return LocationSummary::kNoCall;
556     }
557   }
558 
559   // Check if the desired_dispatch_info is supported. If it is, return it,
560   // otherwise return a fall-back info that should be used instead.
561   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
562       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
563       HInvokeStaticOrDirect* invoke) = 0;
564 
565   // Generate a call to a static or direct method.
566   virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
567   // Generate a call to a virtual method.
568   virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
569 
570   // Copy the result of a call into the given target.
571   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
572 
573   virtual void GenerateNop() = 0;
574 
575   uint32_t GetReferenceSlowFlagOffset() const;
576   uint32_t GetReferenceDisableFlagOffset() const;
577 
578   static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
579 
580  protected:
581   // Patch info used for recording locations of required linker patches and their targets,
582   // i.e. target method, string, type or code identified by their dex file and index.
583   template <typename LabelType>
584   struct PatchInfo {
PatchInfoPatchInfo585     PatchInfo(const DexFile& target_dex_file, uint32_t target_index)
586         : dex_file(target_dex_file), index(target_index) { }
587 
588     const DexFile& dex_file;
589     uint32_t index;
590     LabelType label;
591   };
592 
CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)593   CodeGenerator(HGraph* graph,
594                 size_t number_of_core_registers,
595                 size_t number_of_fpu_registers,
596                 size_t number_of_register_pairs,
597                 uint32_t core_callee_save_mask,
598                 uint32_t fpu_callee_save_mask,
599                 const CompilerOptions& compiler_options,
600                 OptimizingCompilerStats* stats)
601       : frame_size_(0),
602         core_spill_mask_(0),
603         fpu_spill_mask_(0),
604         first_register_slot_in_slow_path_(0),
605         allocated_registers_(RegisterSet::Empty()),
606         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
607                                                                     kArenaAllocCodeGenerator)),
608         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
609                                                                    kArenaAllocCodeGenerator)),
610         number_of_core_registers_(number_of_core_registers),
611         number_of_fpu_registers_(number_of_fpu_registers),
612         number_of_register_pairs_(number_of_register_pairs),
613         core_callee_save_mask_(core_callee_save_mask),
614         fpu_callee_save_mask_(fpu_callee_save_mask),
615         stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()),
616         block_order_(nullptr),
617         jit_string_roots_(StringReferenceValueComparator(),
618                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
619         jit_class_roots_(TypeReferenceValueComparator(),
620                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
621         disasm_info_(nullptr),
622         stats_(stats),
623         graph_(graph),
624         compiler_options_(compiler_options),
625         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
626         current_slow_path_(nullptr),
627         current_block_index_(0),
628         is_leaf_(true),
629         requires_current_method_(false) {
630     slow_paths_.reserve(8);
631   }
632 
633   virtual HGraphVisitor* GetLocationBuilder() = 0;
634   virtual HGraphVisitor* GetInstructionVisitor() = 0;
635 
636   // Returns the location of the first spilled entry for floating point registers,
637   // relative to the stack pointer.
GetFpuSpillStart()638   uint32_t GetFpuSpillStart() const {
639     return GetFrameSize() - FrameEntrySpillSize();
640   }
641 
GetFpuSpillSize()642   uint32_t GetFpuSpillSize() const {
643     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
644   }
645 
GetCoreSpillSize()646   uint32_t GetCoreSpillSize() const {
647     return POPCOUNT(core_spill_mask_) * GetWordSize();
648   }
649 
HasAllocatedCalleeSaveRegisters()650   virtual bool HasAllocatedCalleeSaveRegisters() const {
651     // We check the core registers against 1 because it always comprises the return PC.
652     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
653       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
654   }
655 
CallPushesPC()656   bool CallPushesPC() const {
657     InstructionSet instruction_set = GetInstructionSet();
658     return instruction_set == kX86 || instruction_set == kX86_64;
659   }
660 
661   // Arm64 has its own type for a label, so we need to templatize these methods
662   // to share the logic.
663 
664   template <typename LabelType>
CommonInitializeLabels()665   LabelType* CommonInitializeLabels() {
666     // We use raw array allocations instead of ArenaVector<> because Labels are
667     // non-constructible and non-movable and as such cannot be held in a vector.
668     size_t size = GetGraph()->GetBlocks().size();
669     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
670                                                                       kArenaAllocCodeGenerator);
671     for (size_t i = 0; i != size; ++i) {
672       new(labels + i) LabelType();
673     }
674     return labels;
675   }
676 
677   template <typename LabelType>
CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)678   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
679     block = FirstNonEmptyBlock(block);
680     return raw_pointer_to_labels_array + block->GetBlockId();
681   }
682 
GetCurrentSlowPath()683   SlowPathCode* GetCurrentSlowPath() {
684     return current_slow_path_;
685   }
686 
687   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
EmitJitRootPatches(uint8_t * code ATTRIBUTE_UNUSED,const uint8_t * roots_data ATTRIBUTE_UNUSED)688   virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
689                                   const uint8_t* roots_data ATTRIBUTE_UNUSED) {
690     DCHECK_EQ(jit_string_roots_.size(), 0u);
691     DCHECK_EQ(jit_class_roots_.size(), 0u);
692   }
693 
694   // Frame size required for this method.
695   uint32_t frame_size_;
696   uint32_t core_spill_mask_;
697   uint32_t fpu_spill_mask_;
698   uint32_t first_register_slot_in_slow_path_;
699 
700   // Registers that were allocated during linear scan.
701   RegisterSet allocated_registers_;
702 
703   // Arrays used when doing register allocation to know which
704   // registers we can allocate. `SetupBlockedRegisters` updates the
705   // arrays.
706   bool* const blocked_core_registers_;
707   bool* const blocked_fpu_registers_;
708   size_t number_of_core_registers_;
709   size_t number_of_fpu_registers_;
710   size_t number_of_register_pairs_;
711   const uint32_t core_callee_save_mask_;
712   const uint32_t fpu_callee_save_mask_;
713 
714   StackMapStream stack_map_stream_;
715 
716   // The order to use for code generation.
717   const ArenaVector<HBasicBlock*>* block_order_;
718 
719   // Maps a StringReference (dex_file, string_index) to the index in the literal table.
720   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
721   // will compute all the indices.
722   ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
723 
724   // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
725   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
726   // will compute all the indices.
727   ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
728 
729   DisassemblyInformation* disasm_info_;
730 
731  private:
732   size_t GetStackOffsetOfSavedRegister(size_t index);
733   void GenerateSlowPaths();
734   void BlockIfInRegister(Location location, bool is_out = false) const;
735   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
736 
737   OptimizingCompilerStats* stats_;
738 
739   HGraph* const graph_;
740   const CompilerOptions& compiler_options_;
741 
742   ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
743 
744   // The current slow-path that we're generating code for.
745   SlowPathCode* current_slow_path_;
746 
747   // The current block index in `block_order_` of the block
748   // we are generating code for.
749   size_t current_block_index_;
750 
751   // Whether the method is a leaf method.
752   bool is_leaf_;
753 
754   // Whether an instruction in the graph accesses the current method.
755   // TODO: Rename: this actually indicates that some instruction in the method
756   // needs the environment including a valid stack frame.
757   bool requires_current_method_;
758 
759   friend class OptimizingCFITest;
760 
761   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
762 };
763 
764 template <typename C, typename F>
765 class CallingConvention {
766  public:
CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)767   CallingConvention(const C* registers,
768                     size_t number_of_registers,
769                     const F* fpu_registers,
770                     size_t number_of_fpu_registers,
771                     PointerSize pointer_size)
772       : registers_(registers),
773         number_of_registers_(number_of_registers),
774         fpu_registers_(fpu_registers),
775         number_of_fpu_registers_(number_of_fpu_registers),
776         pointer_size_(pointer_size) {}
777 
GetNumberOfRegisters()778   size_t GetNumberOfRegisters() const { return number_of_registers_; }
GetNumberOfFpuRegisters()779   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
780 
GetRegisterAt(size_t index)781   C GetRegisterAt(size_t index) const {
782     DCHECK_LT(index, number_of_registers_);
783     return registers_[index];
784   }
785 
GetFpuRegisterAt(size_t index)786   F GetFpuRegisterAt(size_t index) const {
787     DCHECK_LT(index, number_of_fpu_registers_);
788     return fpu_registers_[index];
789   }
790 
GetStackOffsetOf(size_t index)791   size_t GetStackOffsetOf(size_t index) const {
792     // We still reserve the space for parameters passed by registers.
793     // Add space for the method pointer.
794     return static_cast<size_t>(pointer_size_) + index * kVRegSize;
795   }
796 
797  private:
798   const C* registers_;
799   const size_t number_of_registers_;
800   const F* fpu_registers_;
801   const size_t number_of_fpu_registers_;
802   const PointerSize pointer_size_;
803 
804   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
805 };
806 
807 /**
808  * A templated class SlowPathGenerator with a templated method NewSlowPath()
809  * that can be used by any code generator to share equivalent slow-paths with
810  * the objective of reducing generated code size.
811  *
812  * InstructionType:  instruction that requires SlowPathCodeType
813  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
814  */
815 template <typename InstructionType>
816 class SlowPathGenerator {
817   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
818                 "InstructionType is not a subclass of art::HInstruction");
819 
820  public:
SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)821   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
822       : graph_(graph),
823         codegen_(codegen),
824         slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
825 
826   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
827   // Templating the method (rather than the whole class) on the slow-path type enables
828   // keeping this code at a generic, non architecture-specific place.
829   //
830   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
831   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
832   //       or template the class as a whole on SlowPathType.
833   template <typename SlowPathCodeType>
NewSlowPath(InstructionType * instruction)834   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
835     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
836                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
837     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
838                   "SlowPathCodeType is not constructible from InstructionType*");
839     // Iterate over potential candidates for sharing. Currently, only same-typed
840     // slow-paths with exactly the same dex-pc are viable candidates.
841     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
842     const uint32_t dex_pc = instruction->GetDexPc();
843     auto iter = slow_path_map_.find(dex_pc);
844     if (iter != slow_path_map_.end()) {
845       auto candidates = iter->second;
846       for (const auto& it : candidates) {
847         InstructionType* other_instruction = it.first;
848         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
849         // Determine if the instructions allow for slow-path sharing.
850         if (HaveSameLiveRegisters(instruction, other_instruction) &&
851             HaveSameStackMap(instruction, other_instruction)) {
852           // Can share: reuse existing one.
853           return other_slow_path;
854         }
855       }
856     } else {
857       // First time this dex-pc is seen.
858       iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
859     }
860     // Cannot share: create and add new slow-path for this particular dex-pc.
861     SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
862     iter->second.emplace_back(std::make_pair(instruction, slow_path));
863     codegen_->AddSlowPath(slow_path);
864     return slow_path;
865   }
866 
867  private:
868   // Tests if both instructions have same set of live physical registers. This ensures
869   // the slow-path has exactly the same preamble on saving these registers to stack.
HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)870   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
871     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
872     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
873     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
874     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
875     return (((live1->GetCoreRegisters() & core_spill) ==
876              (live2->GetCoreRegisters() & core_spill)) &&
877             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
878              (live2->GetFloatingPointRegisters() & fpu_spill)));
879   }
880 
881   // Tests if both instructions have the same stack map. This ensures the interpreter
882   // will find exactly the same dex-registers at the same entries.
HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)883   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
884     DCHECK(i1->HasEnvironment());
885     DCHECK(i2->HasEnvironment());
886     // We conservatively test if the two instructions find exactly the same instructions
887     // and location in each dex-register. This guarantees they will have the same stack map.
888     HEnvironment* e1 = i1->GetEnvironment();
889     HEnvironment* e2 = i2->GetEnvironment();
890     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
891       return false;
892     }
893     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
894       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
895           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
896         return false;
897       }
898     }
899     return true;
900   }
901 
902   HGraph* const graph_;
903   CodeGenerator* const codegen_;
904 
905   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
906   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
907 
908   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
909 };
910 
911 class InstructionCodeGenerator : public HGraphVisitor {
912  public:
InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)913   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
914       : HGraphVisitor(graph),
915         deopt_slow_paths_(graph, codegen) {}
916 
917  protected:
918   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
919   // TODO: under current regime, only deopt sharing make sense; extend later.
920   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
921 };
922 
923 }  // namespace art
924 
925 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
926