1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19 
20 #include "arch/instruction_set.h"
21 #include "arch/instruction_set_features.h"
22 #include "base/arena_containers.h"
23 #include "base/arena_object.h"
24 #include "base/array_ref.h"
25 #include "base/bit_field.h"
26 #include "base/bit_utils.h"
27 #include "base/globals.h"
28 #include "base/macros.h"
29 #include "base/memory_region.h"
30 #include "base/pointer_size.h"
31 #include "class_root.h"
32 #include "dex/string_reference.h"
33 #include "dex/type_reference.h"
34 #include "graph_visualizer.h"
35 #include "locations.h"
36 #include "nodes.h"
37 #include "oat/oat_quick_method_header.h"
38 #include "optimizing_compiler_stats.h"
39 #include "read_barrier_option.h"
40 #include "stack.h"
41 #include "subtype_check.h"
42 #include "utils/assembler.h"
43 #include "utils/label.h"
44 
45 namespace art HIDDEN {
46 
47 // Binary encoding of 2^32 for type double.
48 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
49 // Binary encoding of 2^31 for type double.
50 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
51 
52 // Minimum value for a primitive integer.
53 static int32_t constexpr kPrimIntMin = 0x80000000;
54 // Minimum value for a primitive long.
55 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
56 
57 // Maximum value for a primitive integer.
58 static int32_t constexpr kPrimIntMax = 0x7fffffff;
59 // Maximum value for a primitive long.
60 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
61 
62 constexpr size_t kClassStatusLsbPosition = SubtypeCheckBits::BitStructSizeOf();
63 constexpr size_t kClassStatusByteOffset =
64     mirror::Class::StatusOffset().SizeValue() + (kClassStatusLsbPosition / kBitsPerByte);
65 constexpr uint32_t kShiftedVisiblyInitializedValue = enum_cast<uint32_t>(
66     ClassStatus::kVisiblyInitialized) << (kClassStatusLsbPosition % kBitsPerByte);
67 constexpr uint32_t kShiftedInitializingValue =
68     enum_cast<uint32_t>(ClassStatus::kInitializing) << (kClassStatusLsbPosition % kBitsPerByte);
69 constexpr uint32_t kShiftedInitializedValue =
70     enum_cast<uint32_t>(ClassStatus::kInitialized) << (kClassStatusLsbPosition % kBitsPerByte);
71 
72 class Assembler;
73 class CodeGenerationData;
74 class CodeGenerator;
75 class CompilerOptions;
76 class StackMapStream;
77 class ParallelMoveResolver;
78 
79 namespace linker {
80 class LinkerPatch;
81 }  // namespace linker
82 
83 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
84  public:
SlowPathCode(HInstruction * instruction)85   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
86     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
87       saved_core_stack_offsets_[i] = kRegisterNotSaved;
88       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
89     }
90   }
91 
~SlowPathCode()92   virtual ~SlowPathCode() {}
93 
94   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
95 
96   // Save live core and floating-point caller-save registers and
97   // update the stack mask in `locations` for registers holding object
98   // references.
99   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
100   // Restore live core and floating-point caller-save registers.
101   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
102 
IsCoreRegisterSaved(int reg)103   bool IsCoreRegisterSaved(int reg) const {
104     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
105   }
106 
IsFpuRegisterSaved(int reg)107   bool IsFpuRegisterSaved(int reg) const {
108     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
109   }
110 
GetStackOffsetOfCoreRegister(int reg)111   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
112     return saved_core_stack_offsets_[reg];
113   }
114 
GetStackOffsetOfFpuRegister(int reg)115   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
116     return saved_fpu_stack_offsets_[reg];
117   }
118 
IsFatal()119   virtual bool IsFatal() const { return false; }
120 
121   virtual const char* GetDescription() const = 0;
122 
GetEntryLabel()123   Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()124   Label* GetExitLabel() { return &exit_label_; }
125 
GetInstruction()126   HInstruction* GetInstruction() const {
127     return instruction_;
128   }
129 
GetDexPc()130   uint32_t GetDexPc() const {
131     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
132   }
133 
134  protected:
135   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
136   static constexpr uint32_t kRegisterNotSaved = -1;
137   // The instruction where this slow path is happening.
138   HInstruction* instruction_;
139   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
140   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
141 
142  private:
143   Label entry_label_;
144   Label exit_label_;
145 
146   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
147 };
148 
149 class InvokeDexCallingConventionVisitor {
150  public:
151   virtual Location GetNextLocation(DataType::Type type) = 0;
152   virtual Location GetReturnLocation(DataType::Type type) const = 0;
153   virtual Location GetMethodLocation() const = 0;
154 
155  protected:
InvokeDexCallingConventionVisitor()156   InvokeDexCallingConventionVisitor() {}
~InvokeDexCallingConventionVisitor()157   virtual ~InvokeDexCallingConventionVisitor() {}
158 
159   // The current index for core registers.
160   uint32_t gp_index_ = 0u;
161   // The current index for floating-point registers.
162   uint32_t float_index_ = 0u;
163   // The current stack index.
164   uint32_t stack_index_ = 0u;
165 
166  private:
167   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
168 };
169 
170 class FieldAccessCallingConvention {
171  public:
172   virtual Location GetObjectLocation() const = 0;
173   virtual Location GetFieldIndexLocation() const = 0;
174   virtual Location GetReturnLocation(DataType::Type type) const = 0;
175   virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0;
176   virtual Location GetFpuLocation(DataType::Type type) const = 0;
~FieldAccessCallingConvention()177   virtual ~FieldAccessCallingConvention() {}
178 
179  protected:
FieldAccessCallingConvention()180   FieldAccessCallingConvention() {}
181 
182  private:
183   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
184 };
185 
186 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
187  public:
188   // Compiles the graph to executable instructions.
189   void Compile();
190   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
191                                                const CompilerOptions& compiler_options,
192                                                OptimizingCompilerStats* stats = nullptr);
193   virtual ~CodeGenerator();
194 
195   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
GetGraph()196   HGraph* GetGraph() const { return graph_; }
197 
198   HBasicBlock* GetNextBlockToEmit() const;
199   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
200   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
201 
GetStackSlotOfParameter(HParameterValue * parameter)202   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
203     // Note that this follows the current calling convention.
204     return GetFrameSize()
205         + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
206         + parameter->GetIndex() * kVRegSize;
207   }
208 
209   virtual void Initialize() = 0;
210   virtual void Finalize();
211   virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
212   virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
213   virtual void EmitThunkCode(const linker::LinkerPatch& patch,
214                              /*out*/ ArenaVector<uint8_t>* code,
215                              /*out*/ std::string* debug_name);
216   virtual void GenerateFrameEntry() = 0;
217   virtual void GenerateFrameExit() = 0;
218   virtual void Bind(HBasicBlock* block) = 0;
219   virtual void MoveConstant(Location destination, int32_t value) = 0;
220   virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0;
221   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
222 
223   virtual Assembler* GetAssembler() = 0;
224   virtual const Assembler& GetAssembler() const = 0;
225   virtual size_t GetWordSize() const = 0;
226 
227   // Returns whether the target supports predicated SIMD instructions.
SupportsPredicatedSIMD()228   virtual bool SupportsPredicatedSIMD() const { return false; }
229 
230   // Get FP register width in bytes for spilling/restoring in the slow paths.
231   //
232   // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
233   // alias and live SIMD registers are forced to be spilled in full size in the slow paths.
GetSlowPathFPWidth()234   virtual size_t GetSlowPathFPWidth() const {
235     // Default implementation.
236     return GetCalleePreservedFPWidth();
237   }
238 
239   // Get FP register width required to be preserved by the target ABI.
240   virtual size_t GetCalleePreservedFPWidth() const  = 0;
241 
242   // Get the size of the target SIMD register in bytes.
243   virtual size_t GetSIMDRegisterWidth() const = 0;
244   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
245   void InitializeCodeGeneration(size_t number_of_spill_slots,
246                                 size_t maximum_safepoint_spill_size,
247                                 size_t number_of_out_slots,
248                                 const ArenaVector<HBasicBlock*>& block_order);
249   // Backends can override this as necessary. For most, no special alignment is required.
GetPreferredSlotsAlignment()250   virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
251 
GetFrameSize()252   uint32_t GetFrameSize() const { return frame_size_; }
SetFrameSize(uint32_t size)253   void SetFrameSize(uint32_t size) { frame_size_ = size; }
GetMaximumFrameSize()254   uint32_t GetMaximumFrameSize() const {
255     return GetStackOverflowReservedBytes(GetInstructionSet());
256   }
257 
GetCoreSpillMask()258   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
GetFpuSpillMask()259   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
260 
GetNumberOfCoreRegisters()261   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
GetNumberOfFloatingPointRegisters()262   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
263   virtual void SetupBlockedRegisters() const = 0;
264 
ComputeSpillMask()265   virtual void ComputeSpillMask() {
266     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
267     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
268     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
269   }
270 
271   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
272   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
273   virtual InstructionSet GetInstructionSet() const = 0;
274 
275   // Saves the register in the stack. Returns the size taken on stack.
276   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
277   // Restores the register from the stack. Returns the size taken on stack.
278   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
279 
280   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
281   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
282 
283   virtual bool NeedsTwoRegisters(DataType::Type type) const = 0;
284   // Returns whether we should split long moves in parallel moves.
ShouldSplitLongMoves()285   virtual bool ShouldSplitLongMoves() const { return false; }
286 
287   // Returns true if `invoke` is an implemented intrinsic in this codegen's arch.
IsImplementedIntrinsic(HInvoke * invoke)288   bool IsImplementedIntrinsic(HInvoke* invoke) const {
289     return invoke->IsIntrinsic() &&
290            !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())];
291   }
292 
GetNumberOfCoreCalleeSaveRegisters()293   size_t GetNumberOfCoreCalleeSaveRegisters() const {
294     return POPCOUNT(core_callee_save_mask_);
295   }
296 
GetNumberOfCoreCallerSaveRegisters()297   size_t GetNumberOfCoreCallerSaveRegisters() const {
298     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
299     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
300   }
301 
IsCoreCalleeSaveRegister(int reg)302   bool IsCoreCalleeSaveRegister(int reg) const {
303     return (core_callee_save_mask_ & (1 << reg)) != 0;
304   }
305 
IsFloatingPointCalleeSaveRegister(int reg)306   bool IsFloatingPointCalleeSaveRegister(int reg) const {
307     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
308   }
309 
GetSlowPathSpills(LocationSummary * locations,bool core_registers)310   uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
311     DCHECK(locations->OnlyCallsOnSlowPath() ||
312            (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
313                !locations->HasCustomSlowPathCallingConvention()));
314     uint32_t live_registers = core_registers
315         ? locations->GetLiveRegisters()->GetCoreRegisters()
316         : locations->GetLiveRegisters()->GetFloatingPointRegisters();
317     if (locations->HasCustomSlowPathCallingConvention()) {
318       // Save only the live registers that the custom calling convention wants us to save.
319       uint32_t caller_saves = core_registers
320           ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
321           : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
322       return live_registers & caller_saves;
323     } else {
324       // Default ABI, we need to spill non-callee-save live registers.
325       uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
326       return live_registers & ~callee_saves;
327     }
328   }
329 
GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)330   size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
331     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
332   }
333 
GetStackOffsetOfShouldDeoptimizeFlag()334   size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
335     DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
336     DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
337     return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
338   }
339 
340   // Record native to dex mapping for a suspend point. Required by runtime.
341   void RecordPcInfo(HInstruction* instruction,
342                     uint32_t dex_pc,
343                     uint32_t native_pc,
344                     SlowPathCode* slow_path = nullptr,
345                     bool native_debug_info = false);
346 
347   // Record native to dex mapping for a suspend point.
348   // The native_pc is used from Assembler::CodePosition.
349   //
350   // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc
351   // for the instruction. If the exact native_pc is required it must be provided explicitly.
352   void RecordPcInfo(HInstruction* instruction,
353                     uint32_t dex_pc,
354                     SlowPathCode* slow_path = nullptr,
355                     bool native_debug_info = false);
356 
357   // Check whether we have already recorded mapping at this PC.
358   bool HasStackMapAtCurrentPc();
359 
360   // Record extra stack maps if we support native debugging.
361   //
362   // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions
363   // corresponding the dex PC.
364   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
365                                   uint32_t dex_pc,
366                                   SlowPathCode* slow_path = nullptr);
367 
368   bool CanMoveNullCheckToUser(HNullCheck* null_check);
369   virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction);
370   LocationSummary* CreateThrowingSlowPathLocations(
371       HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
372   void GenerateNullCheck(HNullCheck* null_check);
373   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
374   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
375 
376   // Records a stack map which the runtime might use to set catch phi values
377   // during exception delivery.
378   // TODO: Replace with a catch-entering instruction that records the environment.
379   void RecordCatchBlockInfo();
380 
GetCompilerOptions()381   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
382   bool EmitReadBarrier() const;
383   bool EmitBakerReadBarrier() const;
384   bool EmitNonBakerReadBarrier() const;
385   ReadBarrierOption GetCompilerReadBarrierOption() const;
386 
387   // Returns true if we should check the GC card for consistency purposes.
388   bool ShouldCheckGCCard(DataType::Type type,
389                          HInstruction* value,
390                          WriteBarrierKind write_barrier_kind) const;
391 
392   // Get the ScopedArenaAllocator used for codegen memory allocation.
393   ScopedArenaAllocator* GetScopedAllocator();
394 
395   void AddSlowPath(SlowPathCode* slow_path);
396 
397   ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check);
398   size_t GetNumberOfJitRoots() const;
399 
400   // Fills the `literals` array with literals collected during code generation.
401   // Also emits literal patches.
402   void EmitJitRoots(uint8_t* code,
403                     const uint8_t* roots_data,
404                     /*out*/std::vector<Handle<mirror::Object>>* roots)
405       REQUIRES_SHARED(Locks::mutator_lock_);
406 
IsLeafMethod()407   bool IsLeafMethod() const {
408     return is_leaf_;
409   }
410 
MarkNotLeaf()411   void MarkNotLeaf() {
412     is_leaf_ = false;
413     requires_current_method_ = true;
414   }
415 
NeedsSuspendCheckEntry()416   bool NeedsSuspendCheckEntry() const {
417     return needs_suspend_check_entry_;
418   }
419 
MarkNeedsSuspendCheckEntry()420   void MarkNeedsSuspendCheckEntry() {
421     needs_suspend_check_entry_ = true;
422   }
423 
SetRequiresCurrentMethod()424   void SetRequiresCurrentMethod() {
425     requires_current_method_ = true;
426   }
427 
RequiresCurrentMethod()428   bool RequiresCurrentMethod() const {
429     return requires_current_method_;
430   }
431 
432   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
433   // suspend check. This is called when the code generator generates code
434   // for the suspend check at the back edge (instead of where the suspend check
435   // is, which is the loop entry). At this point, the spill slots for the phis
436   // have not been written to.
437   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check,
438                                              HParallelMove* spills) const;
439 
GetBlockedCoreRegisters()440   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
GetBlockedFloatingPointRegisters()441   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
442 
IsBlockedCoreRegister(size_t i)443   bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
IsBlockedFloatingPointRegister(size_t i)444   bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
445 
446   // Helper that returns the offset of the array's length field.
447   // Note: Besides the normal arrays, we also use the HArrayLength for
448   // accessing the String's `count` field in String intrinsics.
449   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
450 
451   // Helper that returns the offset of the array's data.
452   // Note: Besides the normal arrays, we also use the HArrayGet for
453   // accessing the String's `value` field in String intrinsics.
454   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
455 
456   void EmitParallelMoves(Location from1,
457                          Location to1,
458                          DataType::Type type1,
459                          Location from2,
460                          Location to2,
461                          DataType::Type type2);
462 
InstanceOfNeedsReadBarrier(HInstanceOf * instance_of)463   bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) {
464     // Used only for `kExactCheck`, `kAbstractClassCheck`, `kClassHierarchyCheck`,
465     // `kArrayObjectCheck` and `kInterfaceCheck`.
466     DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck ||
467            instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck ||
468            instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck ||
469            instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck ||
470            instance_of->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck)
471         << instance_of->GetTypeCheckKind();
472     // If the target class is in the boot or app image, it's non-moveable and it doesn't matter
473     // if we compare it with a from-space or to-space reference, the result is the same.
474     // It's OK to traverse a class hierarchy jumping between from-space and to-space.
475     return EmitReadBarrier() && !instance_of->GetTargetClass()->IsInImage();
476   }
477 
ReadBarrierOptionForInstanceOf(HInstanceOf * instance_of)478   ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
479     return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier;
480   }
481 
IsTypeCheckSlowPathFatal(HCheckCast * check_cast)482   bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) {
483     switch (check_cast->GetTypeCheckKind()) {
484       case TypeCheckKind::kExactCheck:
485       case TypeCheckKind::kAbstractClassCheck:
486       case TypeCheckKind::kClassHierarchyCheck:
487       case TypeCheckKind::kArrayObjectCheck:
488       case TypeCheckKind::kInterfaceCheck: {
489         bool needs_read_barrier =
490             EmitReadBarrier() && !check_cast->GetTargetClass()->IsInImage();
491         // We do not emit read barriers for HCheckCast, so we can get false negatives
492         // and the slow path shall re-check and simply return if the cast is actually OK.
493         return !needs_read_barrier;
494       }
495       case TypeCheckKind::kArrayCheck:
496       case TypeCheckKind::kUnresolvedCheck:
497         return false;
498       case TypeCheckKind::kBitstringCheck:
499         return true;
500     }
501     LOG(FATAL) << "Unreachable";
502     UNREACHABLE();
503   }
504 
GetCheckCastCallKind(HCheckCast * check_cast)505   LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) {
506     return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock())
507         ? LocationSummary::kNoCall  // In fact, call on a fatal (non-returning) slow path.
508         : LocationSummary::kCallOnSlowPath;
509   }
510 
StoreNeedsWriteBarrier(DataType::Type type,HInstruction * value)511   static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) {
512     // Check that null value is not represented as an integer constant.
513     DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant());
514     return type == DataType::Type::kReference && !value->IsNullConstant();
515   }
516 
517   // If we are compiling a graph with the WBE pass enabled, we want to honor the WriteBarrierKind
518   // set during the WBE pass.
519   bool StoreNeedsWriteBarrier(DataType::Type type,
520                               HInstruction* value,
521                               WriteBarrierKind write_barrier_kind) const;
522 
523   // Performs checks pertaining to an InvokeRuntime call.
524   void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
525                              HInstruction* instruction,
526                              SlowPathCode* slow_path);
527 
528   // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
529   static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
530                                                           SlowPathCode* slow_path);
531 
AddAllocatedRegister(Location location)532   void AddAllocatedRegister(Location location) {
533     allocated_registers_.Add(location);
534   }
535 
HasAllocatedRegister(bool is_core,int reg)536   bool HasAllocatedRegister(bool is_core, int reg) const {
537     return is_core
538         ? allocated_registers_.ContainsCoreRegister(reg)
539         : allocated_registers_.ContainsFloatingPointRegister(reg);
540   }
541 
542   void AllocateLocations(HInstruction* instruction);
543 
544   // Tells whether the stack frame of the compiled method is
545   // considered "empty", that is either actually having a size of zero,
546   // or just containing the saved return address register.
HasEmptyFrame()547   bool HasEmptyFrame() const {
548     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
549   }
550 
GetInt8ValueOf(HConstant * constant)551   static int8_t GetInt8ValueOf(HConstant* constant) {
552     DCHECK(constant->IsIntConstant());
553     return constant->AsIntConstant()->GetValue();
554   }
555 
GetInt16ValueOf(HConstant * constant)556   static int16_t GetInt16ValueOf(HConstant* constant) {
557     DCHECK(constant->IsIntConstant());
558     return constant->AsIntConstant()->GetValue();
559   }
560 
GetInt32ValueOf(HConstant * constant)561   static int32_t GetInt32ValueOf(HConstant* constant) {
562     if (constant->IsIntConstant()) {
563       return constant->AsIntConstant()->GetValue();
564     } else if (constant->IsNullConstant()) {
565       return 0;
566     } else {
567       DCHECK(constant->IsFloatConstant());
568       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
569     }
570   }
571 
GetInt64ValueOf(HConstant * constant)572   static int64_t GetInt64ValueOf(HConstant* constant) {
573     if (constant->IsIntConstant()) {
574       return constant->AsIntConstant()->GetValue();
575     } else if (constant->IsNullConstant()) {
576       return 0;
577     } else if (constant->IsFloatConstant()) {
578       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
579     } else if (constant->IsLongConstant()) {
580       return constant->AsLongConstant()->GetValue();
581     } else {
582       DCHECK(constant->IsDoubleConstant());
583       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
584     }
585   }
586 
GetFirstRegisterSlotInSlowPath()587   size_t GetFirstRegisterSlotInSlowPath() const {
588     return first_register_slot_in_slow_path_;
589   }
590 
FrameEntrySpillSize()591   uint32_t FrameEntrySpillSize() const {
592     return GetFpuSpillSize() + GetCoreSpillSize();
593   }
594 
595   virtual ParallelMoveResolver* GetMoveResolver() = 0;
596 
597   static void CreateCommonInvokeLocationSummary(
598       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
599 
600   template <typename CriticalNativeCallingConventionVisitor,
601             size_t kNativeStackAlignment,
602             size_t GetCriticalNativeDirectCallFrameSize(std::string_view shorty)>
PrepareCriticalNativeCall(HInvokeStaticOrDirect * invoke)603   size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) {
604       DCHECK(!invoke->GetLocations()->Intrinsified());
605       CriticalNativeCallingConventionVisitor calling_convention_visitor(
606           /*for_register_allocation=*/ false);
607       HParallelMove parallel_move(GetGraph()->GetAllocator());
608       PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, &parallel_move);
609       size_t out_frame_size =
610           RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment);
611       if (kIsDebugBuild) {
612         std::string_view shorty = GetCriticalNativeShorty(invoke);
613         CHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty), out_frame_size);
614       }
615       if (out_frame_size != 0u) {
616         FinishCriticalNativeFrameSetup(out_frame_size, &parallel_move);
617       }
618       return out_frame_size;
619   }
620 
621   void GenerateInvokeStaticOrDirectRuntimeCall(
622       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
623 
624   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
625 
626   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr);
627 
628   void GenerateInvokeCustomCall(HInvokeCustom* invoke);
629 
630   void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out);
631 
632   void CreateUnresolvedFieldLocationSummary(
633       HInstruction* field_access,
634       DataType::Type field_type,
635       const FieldAccessCallingConvention& calling_convention);
636 
637   void GenerateUnresolvedFieldAccess(
638       HInstruction* field_access,
639       DataType::Type field_type,
640       uint32_t field_index,
641       uint32_t dex_pc,
642       const FieldAccessCallingConvention& calling_convention);
643 
644   static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
645                                                         Location runtime_type_index_location,
646                                                         Location runtime_return_location);
647   void GenerateLoadClassRuntimeCall(HLoadClass* cls);
648 
649   static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle,
650                                                              Location runtime_handle_index_location,
651                                                              Location runtime_return_location);
652   void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle);
653 
654   static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type,
655                                                              Location runtime_type_index_location,
656                                                              Location runtime_return_location);
657   void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type);
658 
659   static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object)
660       REQUIRES_SHARED(Locks::mutator_lock_);
661   static uint32_t GetBootImageOffset(HLoadClass* load_class);
662   static uint32_t GetBootImageOffset(HLoadString* load_string);
663   static uint32_t GetBootImageOffset(HInvoke* invoke);
664   static uint32_t GetBootImageOffset(ClassRoot class_root);
665   static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke);
666 
667   static LocationSummary* CreateSystemArrayCopyLocationSummary(
668       HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3);
669 
SetDisassemblyInformation(DisassemblyInformation * info)670   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
GetDisassemblyInformation()671   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
672 
673   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
674                              HInstruction* instruction,
675                              uint32_t dex_pc,
676                              SlowPathCode* slow_path = nullptr) = 0;
677 
678   // Check if the desired_string_load_kind is supported. If it is, return it,
679   // otherwise return a fall-back kind that should be used instead.
680   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
681       HLoadString::LoadKind desired_string_load_kind) = 0;
682 
683   // Check if the desired_class_load_kind is supported. If it is, return it,
684   // otherwise return a fall-back kind that should be used instead.
685   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
686       HLoadClass::LoadKind desired_class_load_kind) = 0;
687 
GetLoadStringCallKind(HLoadString * load)688   LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
689     switch (load->GetLoadKind()) {
690       case HLoadString::LoadKind::kBssEntry:
691         DCHECK(load->NeedsEnvironment());
692         return LocationSummary::kCallOnSlowPath;
693       case HLoadString::LoadKind::kRuntimeCall:
694         DCHECK(load->NeedsEnvironment());
695         return LocationSummary::kCallOnMainOnly;
696       case HLoadString::LoadKind::kJitTableAddress:
697         DCHECK(!load->NeedsEnvironment());
698         return EmitReadBarrier()
699             ? LocationSummary::kCallOnSlowPath
700             : LocationSummary::kNoCall;
701         break;
702       default:
703         DCHECK(!load->NeedsEnvironment());
704         return LocationSummary::kNoCall;
705     }
706   }
707 
708   // Check if the desired_dispatch_info is supported. If it is, return it,
709   // otherwise return a fall-back info that should be used instead.
710   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
711       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
712       ArtMethod* method) = 0;
713 
714   // Generate a call to a static or direct method.
715   virtual void GenerateStaticOrDirectCall(
716       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
717   // Generate a call to a virtual method.
718   virtual void GenerateVirtualCall(
719       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
720 
721   // Copy the result of a call into the given target.
722   virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0;
723 
724   virtual void IncreaseFrame(size_t adjustment) = 0;
725   virtual void DecreaseFrame(size_t adjustment) = 0;
726 
727   virtual void GenerateNop() = 0;
728 
729   static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
730   static ScaleFactor ScaleFactorForType(DataType::Type type);
731 
GetCode()732   ArrayRef<const uint8_t> GetCode() const {
733     return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(),
734                                    GetAssembler().CodeSize());
735   }
736 
737  protected:
738   // Patch info used for recording locations of required linker patches and their targets,
739   // i.e. target method, string, type or code identified by their dex file and index,
740   // or boot image .data.img.rel.ro entries identified by the boot image offset.
741   template <typename LabelType>
742   struct PatchInfo {
PatchInfoPatchInfo743     PatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
744         : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { }
745 
746     // Target dex file or null for boot image .data.img.rel.ro patches.
747     const DexFile* target_dex_file;
748     // Either the boot image offset (to write to .data.img.rel.ro) or string/type/method index.
749     uint32_t offset_or_index;
750     // Label for the instruction to patch.
751     LabelType label;
752   };
753 
754   CodeGenerator(HGraph* graph,
755                 size_t number_of_core_registers,
756                 size_t number_of_fpu_registers,
757                 size_t number_of_register_pairs,
758                 uint32_t core_callee_save_mask,
759                 uint32_t fpu_callee_save_mask,
760                 const CompilerOptions& compiler_options,
761                 OptimizingCompilerStats* stats,
762                 const art::ArrayRef<const bool>& unimplemented_intrinsics);
763 
764   virtual HGraphVisitor* GetLocationBuilder() = 0;
765   virtual HGraphVisitor* GetInstructionVisitor() = 0;
766 
767   template <typename RegType>
ComputeRegisterMask(const RegType * registers,size_t length)768   static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) {
769     uint32_t mask = 0;
770     for (size_t i = 0, e = length; i < e; ++i) {
771       mask |= (1 << registers[i]);
772     }
773     return mask;
774   }
775 
776   // Returns the location of the first spilled entry for floating point registers,
777   // relative to the stack pointer.
GetFpuSpillStart()778   uint32_t GetFpuSpillStart() const {
779     return GetFrameSize() - FrameEntrySpillSize();
780   }
781 
GetFpuSpillSize()782   uint32_t GetFpuSpillSize() const {
783     return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth();
784   }
785 
GetCoreSpillSize()786   uint32_t GetCoreSpillSize() const {
787     return POPCOUNT(core_spill_mask_) * GetWordSize();
788   }
789 
HasAllocatedCalleeSaveRegisters()790   virtual bool HasAllocatedCalleeSaveRegisters() const {
791     // We check the core registers against 1 because it always comprises the return PC.
792     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
793       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
794   }
795 
CallPushesPC()796   bool CallPushesPC() const {
797     InstructionSet instruction_set = GetInstructionSet();
798     return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64;
799   }
800 
801   // Arm64 has its own type for a label, so we need to templatize these methods
802   // to share the logic.
803 
804   template <typename LabelType>
CommonInitializeLabels()805   LabelType* CommonInitializeLabels() {
806     // We use raw array allocations instead of ArenaVector<> because Labels are
807     // non-constructible and non-movable and as such cannot be held in a vector.
808     size_t size = GetGraph()->GetBlocks().size();
809     LabelType* labels =
810         GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator);
811     for (size_t i = 0; i != size; ++i) {
812       new(labels + i) LabelType();
813     }
814     return labels;
815   }
816 
817   template <typename LabelType>
CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)818   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
819     block = FirstNonEmptyBlock(block);
820     return raw_pointer_to_labels_array + block->GetBlockId();
821   }
822 
GetCurrentSlowPath()823   SlowPathCode* GetCurrentSlowPath() {
824     return current_slow_path_;
825   }
826 
827   StackMapStream* GetStackMapStream();
828 
GetCodeGenerationData()829   CodeGenerationData* GetCodeGenerationData() {
830     return code_generation_data_.get();
831   }
832 
833   void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
834   uint64_t GetJitStringRootIndex(StringReference string_reference);
835   void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
836   uint64_t GetJitClassRootIndex(TypeReference type_reference);
837 
838   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
839   virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data);
840 
841   // Frame size required for this method.
842   uint32_t frame_size_;
843   uint32_t core_spill_mask_;
844   uint32_t fpu_spill_mask_;
845   uint32_t first_register_slot_in_slow_path_;
846 
847   // Registers that were allocated during linear scan.
848   RegisterSet allocated_registers_;
849 
850   // Arrays used when doing register allocation to know which
851   // registers we can allocate. `SetupBlockedRegisters` updates the
852   // arrays.
853   bool* const blocked_core_registers_;
854   bool* const blocked_fpu_registers_;
855   size_t number_of_core_registers_;
856   size_t number_of_fpu_registers_;
857   size_t number_of_register_pairs_;
858   const uint32_t core_callee_save_mask_;
859   const uint32_t fpu_callee_save_mask_;
860 
861   // The order to use for code generation.
862   const ArenaVector<HBasicBlock*>* block_order_;
863 
864   DisassemblyInformation* disasm_info_;
865 
866  private:
867   void InitializeCodeGenerationData();
868   size_t GetStackOffsetOfSavedRegister(size_t index);
869   void GenerateSlowPaths();
870   void BlockIfInRegister(Location location, bool is_out = false) const;
871   void EmitEnvironment(HEnvironment* environment,
872                        SlowPathCode* slow_path,
873                        bool needs_vreg_info = true,
874                        bool is_for_catch_handler = false,
875                        bool innermost_environment = true);
876   void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler);
877   void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment);
878 
879   static void PrepareCriticalNativeArgumentMoves(
880       HInvokeStaticOrDirect* invoke,
881       /*inout*/InvokeDexCallingConventionVisitor* visitor,
882       /*out*/HParallelMove* parallel_move);
883 
884   void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move);
885 
886   static std::string_view GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke);
887 
888   OptimizingCompilerStats* stats_;
889 
890   HGraph* const graph_;
891   const CompilerOptions& compiler_options_;
892 
893   // The current slow-path that we're generating code for.
894   SlowPathCode* current_slow_path_;
895 
896   // The current block index in `block_order_` of the block
897   // we are generating code for.
898   size_t current_block_index_;
899 
900   // Whether the method is a leaf method.
901   bool is_leaf_;
902 
903   // Whether the method has to emit a SuspendCheck at entry.
904   bool needs_suspend_check_entry_;
905 
906   // Whether an instruction in the graph accesses the current method.
907   // TODO: Rename: this actually indicates that some instruction in the method
908   // needs the environment including a valid stack frame.
909   bool requires_current_method_;
910 
911   // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the
912   // ArenaStack memory allocated in previous passes instead of adding to the memory
913   // held by the ArenaAllocator. This ScopedArenaAllocator is created in
914   // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
915   std::unique_ptr<CodeGenerationData> code_generation_data_;
916 
917   // Which intrinsics we don't have handcrafted code for.
918   art::ArrayRef<const bool> unimplemented_intrinsics_;
919 
920   friend class OptimizingCFITest;
921   ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD);
922   ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD);
923 
924   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
925 };
926 
927 template <typename C, typename F>
928 class CallingConvention {
929  public:
CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)930   CallingConvention(const C* registers,
931                     size_t number_of_registers,
932                     const F* fpu_registers,
933                     size_t number_of_fpu_registers,
934                     PointerSize pointer_size)
935       : registers_(registers),
936         number_of_registers_(number_of_registers),
937         fpu_registers_(fpu_registers),
938         number_of_fpu_registers_(number_of_fpu_registers),
939         pointer_size_(pointer_size) {}
940 
GetNumberOfRegisters()941   size_t GetNumberOfRegisters() const { return number_of_registers_; }
GetNumberOfFpuRegisters()942   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
943 
GetRegisterAt(size_t index)944   C GetRegisterAt(size_t index) const {
945     DCHECK_LT(index, number_of_registers_);
946     return registers_[index];
947   }
948 
GetFpuRegisterAt(size_t index)949   F GetFpuRegisterAt(size_t index) const {
950     DCHECK_LT(index, number_of_fpu_registers_);
951     return fpu_registers_[index];
952   }
953 
GetStackOffsetOf(size_t index)954   size_t GetStackOffsetOf(size_t index) const {
955     // We still reserve the space for parameters passed by registers.
956     // Add space for the method pointer.
957     return static_cast<size_t>(pointer_size_) + index * kVRegSize;
958   }
959 
960  private:
961   const C* registers_;
962   const size_t number_of_registers_;
963   const F* fpu_registers_;
964   const size_t number_of_fpu_registers_;
965   const PointerSize pointer_size_;
966 
967   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
968 };
969 
970 /**
971  * A templated class SlowPathGenerator with a templated method NewSlowPath()
972  * that can be used by any code generator to share equivalent slow-paths with
973  * the objective of reducing generated code size.
974  *
975  * InstructionType:  instruction that requires SlowPathCodeType
976  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
977  */
978 template <typename InstructionType>
979 class SlowPathGenerator {
980   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
981                 "InstructionType is not a subclass of art::HInstruction");
982 
983  public:
SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)984   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
985       : graph_(graph),
986         codegen_(codegen),
987         slow_path_map_(std::less<uint32_t>(),
988                        graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {}
989 
990   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
991   // Templating the method (rather than the whole class) on the slow-path type enables
992   // keeping this code at a generic, non architecture-specific place.
993   //
994   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
995   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
996   //       or template the class as a whole on SlowPathType.
997   template <typename SlowPathCodeType>
NewSlowPath(InstructionType * instruction)998   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
999     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
1000                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
1001     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
1002                   "SlowPathCodeType is not constructible from InstructionType*");
1003     // Iterate over potential candidates for sharing. Currently, only same-typed
1004     // slow-paths with exactly the same dex-pc are viable candidates.
1005     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
1006     const uint32_t dex_pc = instruction->GetDexPc();
1007     auto iter = slow_path_map_.find(dex_pc);
1008     if (iter != slow_path_map_.end()) {
1009       const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
1010       for (const auto& it : candidates) {
1011         InstructionType* other_instruction = it.first;
1012         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
1013         // Determine if the instructions allow for slow-path sharing.
1014         if (HaveSameLiveRegisters(instruction, other_instruction) &&
1015             HaveSameStackMap(instruction, other_instruction)) {
1016           // Can share: reuse existing one.
1017           return other_slow_path;
1018         }
1019       }
1020     } else {
1021       // First time this dex-pc is seen.
1022       iter = slow_path_map_.Put(dex_pc,
1023                                 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}});
1024     }
1025     // Cannot share: create and add new slow-path for this particular dex-pc.
1026     SlowPathCodeType* slow_path =
1027         new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction);
1028     iter->second.emplace_back(std::make_pair(instruction, slow_path));
1029     codegen_->AddSlowPath(slow_path);
1030     return slow_path;
1031   }
1032 
1033  private:
1034   // Tests if both instructions have same set of live physical registers. This ensures
1035   // the slow-path has exactly the same preamble on saving these registers to stack.
HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)1036   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
1037     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
1038     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
1039     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
1040     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
1041     return (((live1->GetCoreRegisters() & core_spill) ==
1042              (live2->GetCoreRegisters() & core_spill)) &&
1043             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
1044              (live2->GetFloatingPointRegisters() & fpu_spill)));
1045   }
1046 
1047   // Tests if both instructions have the same stack map. This ensures the interpreter
1048   // will find exactly the same dex-registers at the same entries.
HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)1049   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
1050     DCHECK(i1->HasEnvironment());
1051     DCHECK(i2->HasEnvironment());
1052     // We conservatively test if the two instructions find exactly the same instructions
1053     // and location in each dex-register. This guarantees they will have the same stack map.
1054     HEnvironment* e1 = i1->GetEnvironment();
1055     HEnvironment* e2 = i2->GetEnvironment();
1056     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
1057       return false;
1058     }
1059     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
1060       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
1061           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
1062         return false;
1063       }
1064     }
1065     return true;
1066   }
1067 
1068   HGraph* const graph_;
1069   CodeGenerator* const codegen_;
1070 
1071   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
1072   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
1073 
1074   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
1075 };
1076 
1077 class InstructionCodeGenerator : public HGraphVisitor {
1078  public:
InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)1079   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
1080       : HGraphVisitor(graph),
1081         deopt_slow_paths_(graph, codegen) {}
1082 
1083  protected:
1084   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
1085   // TODO: under current regime, only deopt sharing make sense; extend later.
1086   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
1087 };
1088 
1089 }  // namespace art
1090 
1091 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
1092