1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/arena_containers.h" 23 #include "base/arena_object.h" 24 #include "base/array_ref.h" 25 #include "base/bit_field.h" 26 #include "base/bit_utils.h" 27 #include "base/globals.h" 28 #include "base/macros.h" 29 #include "base/memory_region.h" 30 #include "base/pointer_size.h" 31 #include "class_root.h" 32 #include "dex/string_reference.h" 33 #include "dex/type_reference.h" 34 #include "graph_visualizer.h" 35 #include "locations.h" 36 #include "nodes.h" 37 #include "oat/oat_quick_method_header.h" 38 #include "optimizing_compiler_stats.h" 39 #include "read_barrier_option.h" 40 #include "stack.h" 41 #include "subtype_check.h" 42 #include "utils/assembler.h" 43 #include "utils/label.h" 44 45 namespace art HIDDEN { 46 47 // Binary encoding of 2^32 for type double. 48 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 49 // Binary encoding of 2^31 for type double. 50 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 51 52 // Minimum value for a primitive integer. 53 static int32_t constexpr kPrimIntMin = 0x80000000; 54 // Minimum value for a primitive long. 55 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 56 57 // Maximum value for a primitive integer. 58 static int32_t constexpr kPrimIntMax = 0x7fffffff; 59 // Maximum value for a primitive long. 60 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 61 62 constexpr size_t kClassStatusLsbPosition = SubtypeCheckBits::BitStructSizeOf(); 63 constexpr size_t kClassStatusByteOffset = 64 mirror::Class::StatusOffset().SizeValue() + (kClassStatusLsbPosition / kBitsPerByte); 65 constexpr uint32_t kShiftedVisiblyInitializedValue = enum_cast<uint32_t>( 66 ClassStatus::kVisiblyInitialized) << (kClassStatusLsbPosition % kBitsPerByte); 67 constexpr uint32_t kShiftedInitializingValue = 68 enum_cast<uint32_t>(ClassStatus::kInitializing) << (kClassStatusLsbPosition % kBitsPerByte); 69 constexpr uint32_t kShiftedInitializedValue = 70 enum_cast<uint32_t>(ClassStatus::kInitialized) << (kClassStatusLsbPosition % kBitsPerByte); 71 72 class Assembler; 73 class CodeGenerationData; 74 class CodeGenerator; 75 class CompilerOptions; 76 class StackMapStream; 77 class ParallelMoveResolver; 78 79 namespace linker { 80 class LinkerPatch; 81 } // namespace linker 82 83 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { 84 public: SlowPathCode(HInstruction * instruction)85 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 86 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 87 saved_core_stack_offsets_[i] = kRegisterNotSaved; 88 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 89 } 90 } 91 ~SlowPathCode()92 virtual ~SlowPathCode() {} 93 94 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 95 96 // Save live core and floating-point caller-save registers and 97 // update the stack mask in `locations` for registers holding object 98 // references. 99 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 100 // Restore live core and floating-point caller-save registers. 101 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 102 IsCoreRegisterSaved(int reg)103 bool IsCoreRegisterSaved(int reg) const { 104 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 105 } 106 IsFpuRegisterSaved(int reg)107 bool IsFpuRegisterSaved(int reg) const { 108 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 109 } 110 GetStackOffsetOfCoreRegister(int reg)111 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 112 return saved_core_stack_offsets_[reg]; 113 } 114 GetStackOffsetOfFpuRegister(int reg)115 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 116 return saved_fpu_stack_offsets_[reg]; 117 } 118 IsFatal()119 virtual bool IsFatal() const { return false; } 120 121 virtual const char* GetDescription() const = 0; 122 GetEntryLabel()123 Label* GetEntryLabel() { return &entry_label_; } GetExitLabel()124 Label* GetExitLabel() { return &exit_label_; } 125 GetInstruction()126 HInstruction* GetInstruction() const { 127 return instruction_; 128 } 129 GetDexPc()130 uint32_t GetDexPc() const { 131 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 132 } 133 134 protected: 135 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 136 static constexpr uint32_t kRegisterNotSaved = -1; 137 // The instruction where this slow path is happening. 138 HInstruction* instruction_; 139 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 140 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 141 142 private: 143 Label entry_label_; 144 Label exit_label_; 145 146 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 147 }; 148 149 class InvokeDexCallingConventionVisitor { 150 public: 151 virtual Location GetNextLocation(DataType::Type type) = 0; 152 virtual Location GetReturnLocation(DataType::Type type) const = 0; 153 virtual Location GetMethodLocation() const = 0; 154 155 protected: InvokeDexCallingConventionVisitor()156 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()157 virtual ~InvokeDexCallingConventionVisitor() {} 158 159 // The current index for core registers. 160 uint32_t gp_index_ = 0u; 161 // The current index for floating-point registers. 162 uint32_t float_index_ = 0u; 163 // The current stack index. 164 uint32_t stack_index_ = 0u; 165 166 private: 167 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 168 }; 169 170 class FieldAccessCallingConvention { 171 public: 172 virtual Location GetObjectLocation() const = 0; 173 virtual Location GetFieldIndexLocation() const = 0; 174 virtual Location GetReturnLocation(DataType::Type type) const = 0; 175 virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0; 176 virtual Location GetFpuLocation(DataType::Type type) const = 0; ~FieldAccessCallingConvention()177 virtual ~FieldAccessCallingConvention() {} 178 179 protected: FieldAccessCallingConvention()180 FieldAccessCallingConvention() {} 181 182 private: 183 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 184 }; 185 186 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { 187 public: 188 // Compiles the graph to executable instructions. 189 void Compile(); 190 static std::unique_ptr<CodeGenerator> Create(HGraph* graph, 191 const CompilerOptions& compiler_options, 192 OptimizingCompilerStats* stats = nullptr); 193 virtual ~CodeGenerator(); 194 195 // Get the graph. This is the outermost graph, never the graph of a method being inlined. GetGraph()196 HGraph* GetGraph() const { return graph_; } 197 198 HBasicBlock* GetNextBlockToEmit() const; 199 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 200 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 201 GetStackSlotOfParameter(HParameterValue * parameter)202 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 203 // Note that this follows the current calling convention. 204 return GetFrameSize() 205 + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method 206 + parameter->GetIndex() * kVRegSize; 207 } 208 209 virtual void Initialize() = 0; 210 virtual void Finalize(); 211 virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); 212 virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; 213 virtual void EmitThunkCode(const linker::LinkerPatch& patch, 214 /*out*/ ArenaVector<uint8_t>* code, 215 /*out*/ std::string* debug_name); 216 virtual void GenerateFrameEntry() = 0; 217 virtual void GenerateFrameExit() = 0; 218 virtual void Bind(HBasicBlock* block) = 0; 219 virtual void MoveConstant(Location destination, int32_t value) = 0; 220 virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0; 221 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 222 223 virtual Assembler* GetAssembler() = 0; 224 virtual const Assembler& GetAssembler() const = 0; 225 virtual size_t GetWordSize() const = 0; 226 227 // Returns whether the target supports predicated SIMD instructions. SupportsPredicatedSIMD()228 virtual bool SupportsPredicatedSIMD() const { return false; } 229 230 // Get FP register width in bytes for spilling/restoring in the slow paths. 231 // 232 // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers 233 // alias and live SIMD registers are forced to be spilled in full size in the slow paths. GetSlowPathFPWidth()234 virtual size_t GetSlowPathFPWidth() const { 235 // Default implementation. 236 return GetCalleePreservedFPWidth(); 237 } 238 239 // Get FP register width required to be preserved by the target ABI. 240 virtual size_t GetCalleePreservedFPWidth() const = 0; 241 242 // Get the size of the target SIMD register in bytes. 243 virtual size_t GetSIMDRegisterWidth() const = 0; 244 virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; 245 void InitializeCodeGeneration(size_t number_of_spill_slots, 246 size_t maximum_safepoint_spill_size, 247 size_t number_of_out_slots, 248 const ArenaVector<HBasicBlock*>& block_order); 249 // Backends can override this as necessary. For most, no special alignment is required. GetPreferredSlotsAlignment()250 virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } 251 GetFrameSize()252 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)253 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetMaximumFrameSize()254 uint32_t GetMaximumFrameSize() const { 255 return GetStackOverflowReservedBytes(GetInstructionSet()); 256 } 257 GetCoreSpillMask()258 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()259 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 260 GetNumberOfCoreRegisters()261 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()262 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 263 virtual void SetupBlockedRegisters() const = 0; 264 ComputeSpillMask()265 virtual void ComputeSpillMask() { 266 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 267 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 268 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 269 } 270 271 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 272 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 273 virtual InstructionSet GetInstructionSet() const = 0; 274 275 // Saves the register in the stack. Returns the size taken on stack. 276 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 277 // Restores the register from the stack. Returns the size taken on stack. 278 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 279 280 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 281 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 282 283 virtual bool NeedsTwoRegisters(DataType::Type type) const = 0; 284 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()285 virtual bool ShouldSplitLongMoves() const { return false; } 286 287 // Returns true if `invoke` is an implemented intrinsic in this codegen's arch. IsImplementedIntrinsic(HInvoke * invoke)288 bool IsImplementedIntrinsic(HInvoke* invoke) const { 289 return invoke->IsIntrinsic() && 290 !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())]; 291 } 292 GetNumberOfCoreCalleeSaveRegisters()293 size_t GetNumberOfCoreCalleeSaveRegisters() const { 294 return POPCOUNT(core_callee_save_mask_); 295 } 296 GetNumberOfCoreCallerSaveRegisters()297 size_t GetNumberOfCoreCallerSaveRegisters() const { 298 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 299 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 300 } 301 IsCoreCalleeSaveRegister(int reg)302 bool IsCoreCalleeSaveRegister(int reg) const { 303 return (core_callee_save_mask_ & (1 << reg)) != 0; 304 } 305 IsFloatingPointCalleeSaveRegister(int reg)306 bool IsFloatingPointCalleeSaveRegister(int reg) const { 307 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 308 } 309 GetSlowPathSpills(LocationSummary * locations,bool core_registers)310 uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { 311 DCHECK(locations->OnlyCallsOnSlowPath() || 312 (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && 313 !locations->HasCustomSlowPathCallingConvention())); 314 uint32_t live_registers = core_registers 315 ? locations->GetLiveRegisters()->GetCoreRegisters() 316 : locations->GetLiveRegisters()->GetFloatingPointRegisters(); 317 if (locations->HasCustomSlowPathCallingConvention()) { 318 // Save only the live registers that the custom calling convention wants us to save. 319 uint32_t caller_saves = core_registers 320 ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() 321 : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); 322 return live_registers & caller_saves; 323 } else { 324 // Default ABI, we need to spill non-callee-save live registers. 325 uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; 326 return live_registers & ~callee_saves; 327 } 328 } 329 GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)330 size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { 331 return POPCOUNT(GetSlowPathSpills(locations, core_registers)); 332 } 333 GetStackOffsetOfShouldDeoptimizeFlag()334 size_t GetStackOffsetOfShouldDeoptimizeFlag() const { 335 DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); 336 DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); 337 return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; 338 } 339 340 // Record native to dex mapping for a suspend point. Required by runtime. 341 void RecordPcInfo(HInstruction* instruction, 342 uint32_t dex_pc, 343 uint32_t native_pc, 344 SlowPathCode* slow_path = nullptr, 345 bool native_debug_info = false); 346 347 // Record native to dex mapping for a suspend point. 348 // The native_pc is used from Assembler::CodePosition. 349 // 350 // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc 351 // for the instruction. If the exact native_pc is required it must be provided explicitly. 352 void RecordPcInfo(HInstruction* instruction, 353 uint32_t dex_pc, 354 SlowPathCode* slow_path = nullptr, 355 bool native_debug_info = false); 356 357 // Check whether we have already recorded mapping at this PC. 358 bool HasStackMapAtCurrentPc(); 359 360 // Record extra stack maps if we support native debugging. 361 // 362 // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions 363 // corresponding the dex PC. 364 void MaybeRecordNativeDebugInfo(HInstruction* instruction, 365 uint32_t dex_pc, 366 SlowPathCode* slow_path = nullptr); 367 368 bool CanMoveNullCheckToUser(HNullCheck* null_check); 369 virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction); 370 LocationSummary* CreateThrowingSlowPathLocations( 371 HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); 372 void GenerateNullCheck(HNullCheck* null_check); 373 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 374 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 375 376 // Records a stack map which the runtime might use to set catch phi values 377 // during exception delivery. 378 // TODO: Replace with a catch-entering instruction that records the environment. 379 void RecordCatchBlockInfo(); 380 GetCompilerOptions()381 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 382 bool EmitReadBarrier() const; 383 bool EmitBakerReadBarrier() const; 384 bool EmitNonBakerReadBarrier() const; 385 ReadBarrierOption GetCompilerReadBarrierOption() const; 386 387 // Returns true if we should check the GC card for consistency purposes. 388 bool ShouldCheckGCCard(DataType::Type type, 389 HInstruction* value, 390 WriteBarrierKind write_barrier_kind) const; 391 392 // Get the ScopedArenaAllocator used for codegen memory allocation. 393 ScopedArenaAllocator* GetScopedAllocator(); 394 395 void AddSlowPath(SlowPathCode* slow_path); 396 397 ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check); 398 size_t GetNumberOfJitRoots() const; 399 400 // Fills the `literals` array with literals collected during code generation. 401 // Also emits literal patches. 402 void EmitJitRoots(uint8_t* code, 403 const uint8_t* roots_data, 404 /*out*/std::vector<Handle<mirror::Object>>* roots) 405 REQUIRES_SHARED(Locks::mutator_lock_); 406 IsLeafMethod()407 bool IsLeafMethod() const { 408 return is_leaf_; 409 } 410 MarkNotLeaf()411 void MarkNotLeaf() { 412 is_leaf_ = false; 413 requires_current_method_ = true; 414 } 415 NeedsSuspendCheckEntry()416 bool NeedsSuspendCheckEntry() const { 417 return needs_suspend_check_entry_; 418 } 419 MarkNeedsSuspendCheckEntry()420 void MarkNeedsSuspendCheckEntry() { 421 needs_suspend_check_entry_ = true; 422 } 423 SetRequiresCurrentMethod()424 void SetRequiresCurrentMethod() { 425 requires_current_method_ = true; 426 } 427 RequiresCurrentMethod()428 bool RequiresCurrentMethod() const { 429 return requires_current_method_; 430 } 431 432 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 433 // suspend check. This is called when the code generator generates code 434 // for the suspend check at the back edge (instead of where the suspend check 435 // is, which is the loop entry). At this point, the spill slots for the phis 436 // have not been written to. 437 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check, 438 HParallelMove* spills) const; 439 GetBlockedCoreRegisters()440 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()441 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 442 IsBlockedCoreRegister(size_t i)443 bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } IsBlockedFloatingPointRegister(size_t i)444 bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } 445 446 // Helper that returns the offset of the array's length field. 447 // Note: Besides the normal arrays, we also use the HArrayLength for 448 // accessing the String's `count` field in String intrinsics. 449 static uint32_t GetArrayLengthOffset(HArrayLength* array_length); 450 451 // Helper that returns the offset of the array's data. 452 // Note: Besides the normal arrays, we also use the HArrayGet for 453 // accessing the String's `value` field in String intrinsics. 454 static uint32_t GetArrayDataOffset(HArrayGet* array_get); 455 456 void EmitParallelMoves(Location from1, 457 Location to1, 458 DataType::Type type1, 459 Location from2, 460 Location to2, 461 DataType::Type type2); 462 InstanceOfNeedsReadBarrier(HInstanceOf * instance_of)463 bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { 464 // Used only for `kExactCheck`, `kAbstractClassCheck`, `kClassHierarchyCheck`, 465 // `kArrayObjectCheck` and `kInterfaceCheck`. 466 DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || 467 instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || 468 instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck || 469 instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck || 470 instance_of->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) 471 << instance_of->GetTypeCheckKind(); 472 // If the target class is in the boot or app image, it's non-moveable and it doesn't matter 473 // if we compare it with a from-space or to-space reference, the result is the same. 474 // It's OK to traverse a class hierarchy jumping between from-space and to-space. 475 return EmitReadBarrier() && !instance_of->GetTargetClass()->IsInImage(); 476 } 477 ReadBarrierOptionForInstanceOf(HInstanceOf * instance_of)478 ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { 479 return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier; 480 } 481 IsTypeCheckSlowPathFatal(HCheckCast * check_cast)482 bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { 483 switch (check_cast->GetTypeCheckKind()) { 484 case TypeCheckKind::kExactCheck: 485 case TypeCheckKind::kAbstractClassCheck: 486 case TypeCheckKind::kClassHierarchyCheck: 487 case TypeCheckKind::kArrayObjectCheck: 488 case TypeCheckKind::kInterfaceCheck: { 489 bool needs_read_barrier = 490 EmitReadBarrier() && !check_cast->GetTargetClass()->IsInImage(); 491 // We do not emit read barriers for HCheckCast, so we can get false negatives 492 // and the slow path shall re-check and simply return if the cast is actually OK. 493 return !needs_read_barrier; 494 } 495 case TypeCheckKind::kArrayCheck: 496 case TypeCheckKind::kUnresolvedCheck: 497 return false; 498 case TypeCheckKind::kBitstringCheck: 499 return true; 500 } 501 LOG(FATAL) << "Unreachable"; 502 UNREACHABLE(); 503 } 504 GetCheckCastCallKind(HCheckCast * check_cast)505 LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { 506 return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock()) 507 ? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path. 508 : LocationSummary::kCallOnSlowPath; 509 } 510 StoreNeedsWriteBarrier(DataType::Type type,HInstruction * value)511 static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) { 512 // Check that null value is not represented as an integer constant. 513 DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant()); 514 return type == DataType::Type::kReference && !value->IsNullConstant(); 515 } 516 517 // If we are compiling a graph with the WBE pass enabled, we want to honor the WriteBarrierKind 518 // set during the WBE pass. 519 bool StoreNeedsWriteBarrier(DataType::Type type, 520 HInstruction* value, 521 WriteBarrierKind write_barrier_kind) const; 522 523 // Performs checks pertaining to an InvokeRuntime call. 524 void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, 525 HInstruction* instruction, 526 SlowPathCode* slow_path); 527 528 // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. 529 static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, 530 SlowPathCode* slow_path); 531 AddAllocatedRegister(Location location)532 void AddAllocatedRegister(Location location) { 533 allocated_registers_.Add(location); 534 } 535 HasAllocatedRegister(bool is_core,int reg)536 bool HasAllocatedRegister(bool is_core, int reg) const { 537 return is_core 538 ? allocated_registers_.ContainsCoreRegister(reg) 539 : allocated_registers_.ContainsFloatingPointRegister(reg); 540 } 541 542 void AllocateLocations(HInstruction* instruction); 543 544 // Tells whether the stack frame of the compiled method is 545 // considered "empty", that is either actually having a size of zero, 546 // or just containing the saved return address register. HasEmptyFrame()547 bool HasEmptyFrame() const { 548 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 549 } 550 GetInt8ValueOf(HConstant * constant)551 static int8_t GetInt8ValueOf(HConstant* constant) { 552 DCHECK(constant->IsIntConstant()); 553 return constant->AsIntConstant()->GetValue(); 554 } 555 GetInt16ValueOf(HConstant * constant)556 static int16_t GetInt16ValueOf(HConstant* constant) { 557 DCHECK(constant->IsIntConstant()); 558 return constant->AsIntConstant()->GetValue(); 559 } 560 GetInt32ValueOf(HConstant * constant)561 static int32_t GetInt32ValueOf(HConstant* constant) { 562 if (constant->IsIntConstant()) { 563 return constant->AsIntConstant()->GetValue(); 564 } else if (constant->IsNullConstant()) { 565 return 0; 566 } else { 567 DCHECK(constant->IsFloatConstant()); 568 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 569 } 570 } 571 GetInt64ValueOf(HConstant * constant)572 static int64_t GetInt64ValueOf(HConstant* constant) { 573 if (constant->IsIntConstant()) { 574 return constant->AsIntConstant()->GetValue(); 575 } else if (constant->IsNullConstant()) { 576 return 0; 577 } else if (constant->IsFloatConstant()) { 578 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 579 } else if (constant->IsLongConstant()) { 580 return constant->AsLongConstant()->GetValue(); 581 } else { 582 DCHECK(constant->IsDoubleConstant()); 583 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 584 } 585 } 586 GetFirstRegisterSlotInSlowPath()587 size_t GetFirstRegisterSlotInSlowPath() const { 588 return first_register_slot_in_slow_path_; 589 } 590 FrameEntrySpillSize()591 uint32_t FrameEntrySpillSize() const { 592 return GetFpuSpillSize() + GetCoreSpillSize(); 593 } 594 595 virtual ParallelMoveResolver* GetMoveResolver() = 0; 596 597 static void CreateCommonInvokeLocationSummary( 598 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 599 600 template <typename CriticalNativeCallingConventionVisitor, 601 size_t kNativeStackAlignment, 602 size_t GetCriticalNativeDirectCallFrameSize(std::string_view shorty)> PrepareCriticalNativeCall(HInvokeStaticOrDirect * invoke)603 size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) { 604 DCHECK(!invoke->GetLocations()->Intrinsified()); 605 CriticalNativeCallingConventionVisitor calling_convention_visitor( 606 /*for_register_allocation=*/ false); 607 HParallelMove parallel_move(GetGraph()->GetAllocator()); 608 PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, ¶llel_move); 609 size_t out_frame_size = 610 RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment); 611 if (kIsDebugBuild) { 612 std::string_view shorty = GetCriticalNativeShorty(invoke); 613 CHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty), out_frame_size); 614 } 615 if (out_frame_size != 0u) { 616 FinishCriticalNativeFrameSetup(out_frame_size, ¶llel_move); 617 } 618 return out_frame_size; 619 } 620 621 void GenerateInvokeStaticOrDirectRuntimeCall( 622 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); 623 624 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 625 626 void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr); 627 628 void GenerateInvokeCustomCall(HInvokeCustom* invoke); 629 630 void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out); 631 632 void CreateUnresolvedFieldLocationSummary( 633 HInstruction* field_access, 634 DataType::Type field_type, 635 const FieldAccessCallingConvention& calling_convention); 636 637 void GenerateUnresolvedFieldAccess( 638 HInstruction* field_access, 639 DataType::Type field_type, 640 uint32_t field_index, 641 uint32_t dex_pc, 642 const FieldAccessCallingConvention& calling_convention); 643 644 static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, 645 Location runtime_type_index_location, 646 Location runtime_return_location); 647 void GenerateLoadClassRuntimeCall(HLoadClass* cls); 648 649 static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle, 650 Location runtime_handle_index_location, 651 Location runtime_return_location); 652 void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle); 653 654 static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type, 655 Location runtime_type_index_location, 656 Location runtime_return_location); 657 void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type); 658 659 static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object) 660 REQUIRES_SHARED(Locks::mutator_lock_); 661 static uint32_t GetBootImageOffset(HLoadClass* load_class); 662 static uint32_t GetBootImageOffset(HLoadString* load_string); 663 static uint32_t GetBootImageOffset(HInvoke* invoke); 664 static uint32_t GetBootImageOffset(ClassRoot class_root); 665 static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke); 666 667 static LocationSummary* CreateSystemArrayCopyLocationSummary( 668 HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3); 669 SetDisassemblyInformation(DisassemblyInformation * info)670 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } GetDisassemblyInformation()671 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 672 673 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 674 HInstruction* instruction, 675 uint32_t dex_pc, 676 SlowPathCode* slow_path = nullptr) = 0; 677 678 // Check if the desired_string_load_kind is supported. If it is, return it, 679 // otherwise return a fall-back kind that should be used instead. 680 virtual HLoadString::LoadKind GetSupportedLoadStringKind( 681 HLoadString::LoadKind desired_string_load_kind) = 0; 682 683 // Check if the desired_class_load_kind is supported. If it is, return it, 684 // otherwise return a fall-back kind that should be used instead. 685 virtual HLoadClass::LoadKind GetSupportedLoadClassKind( 686 HLoadClass::LoadKind desired_class_load_kind) = 0; 687 GetLoadStringCallKind(HLoadString * load)688 LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { 689 switch (load->GetLoadKind()) { 690 case HLoadString::LoadKind::kBssEntry: 691 DCHECK(load->NeedsEnvironment()); 692 return LocationSummary::kCallOnSlowPath; 693 case HLoadString::LoadKind::kRuntimeCall: 694 DCHECK(load->NeedsEnvironment()); 695 return LocationSummary::kCallOnMainOnly; 696 case HLoadString::LoadKind::kJitTableAddress: 697 DCHECK(!load->NeedsEnvironment()); 698 return EmitReadBarrier() 699 ? LocationSummary::kCallOnSlowPath 700 : LocationSummary::kNoCall; 701 break; 702 default: 703 DCHECK(!load->NeedsEnvironment()); 704 return LocationSummary::kNoCall; 705 } 706 } 707 708 // Check if the desired_dispatch_info is supported. If it is, return it, 709 // otherwise return a fall-back info that should be used instead. 710 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 711 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 712 ArtMethod* method) = 0; 713 714 // Generate a call to a static or direct method. 715 virtual void GenerateStaticOrDirectCall( 716 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 717 // Generate a call to a virtual method. 718 virtual void GenerateVirtualCall( 719 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 720 721 // Copy the result of a call into the given target. 722 virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0; 723 724 virtual void IncreaseFrame(size_t adjustment) = 0; 725 virtual void DecreaseFrame(size_t adjustment) = 0; 726 727 virtual void GenerateNop() = 0; 728 729 static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); 730 static ScaleFactor ScaleFactorForType(DataType::Type type); 731 GetCode()732 ArrayRef<const uint8_t> GetCode() const { 733 return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(), 734 GetAssembler().CodeSize()); 735 } 736 737 protected: 738 // Patch info used for recording locations of required linker patches and their targets, 739 // i.e. target method, string, type or code identified by their dex file and index, 740 // or boot image .data.img.rel.ro entries identified by the boot image offset. 741 template <typename LabelType> 742 struct PatchInfo { PatchInfoPatchInfo743 PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) 744 : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } 745 746 // Target dex file or null for boot image .data.img.rel.ro patches. 747 const DexFile* target_dex_file; 748 // Either the boot image offset (to write to .data.img.rel.ro) or string/type/method index. 749 uint32_t offset_or_index; 750 // Label for the instruction to patch. 751 LabelType label; 752 }; 753 754 CodeGenerator(HGraph* graph, 755 size_t number_of_core_registers, 756 size_t number_of_fpu_registers, 757 size_t number_of_register_pairs, 758 uint32_t core_callee_save_mask, 759 uint32_t fpu_callee_save_mask, 760 const CompilerOptions& compiler_options, 761 OptimizingCompilerStats* stats, 762 const art::ArrayRef<const bool>& unimplemented_intrinsics); 763 764 virtual HGraphVisitor* GetLocationBuilder() = 0; 765 virtual HGraphVisitor* GetInstructionVisitor() = 0; 766 767 template <typename RegType> ComputeRegisterMask(const RegType * registers,size_t length)768 static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) { 769 uint32_t mask = 0; 770 for (size_t i = 0, e = length; i < e; ++i) { 771 mask |= (1 << registers[i]); 772 } 773 return mask; 774 } 775 776 // Returns the location of the first spilled entry for floating point registers, 777 // relative to the stack pointer. GetFpuSpillStart()778 uint32_t GetFpuSpillStart() const { 779 return GetFrameSize() - FrameEntrySpillSize(); 780 } 781 GetFpuSpillSize()782 uint32_t GetFpuSpillSize() const { 783 return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth(); 784 } 785 GetCoreSpillSize()786 uint32_t GetCoreSpillSize() const { 787 return POPCOUNT(core_spill_mask_) * GetWordSize(); 788 } 789 HasAllocatedCalleeSaveRegisters()790 virtual bool HasAllocatedCalleeSaveRegisters() const { 791 // We check the core registers against 1 because it always comprises the return PC. 792 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 793 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 794 } 795 CallPushesPC()796 bool CallPushesPC() const { 797 InstructionSet instruction_set = GetInstructionSet(); 798 return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64; 799 } 800 801 // Arm64 has its own type for a label, so we need to templatize these methods 802 // to share the logic. 803 804 template <typename LabelType> CommonInitializeLabels()805 LabelType* CommonInitializeLabels() { 806 // We use raw array allocations instead of ArenaVector<> because Labels are 807 // non-constructible and non-movable and as such cannot be held in a vector. 808 size_t size = GetGraph()->GetBlocks().size(); 809 LabelType* labels = 810 GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator); 811 for (size_t i = 0; i != size; ++i) { 812 new(labels + i) LabelType(); 813 } 814 return labels; 815 } 816 817 template <typename LabelType> CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)818 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 819 block = FirstNonEmptyBlock(block); 820 return raw_pointer_to_labels_array + block->GetBlockId(); 821 } 822 GetCurrentSlowPath()823 SlowPathCode* GetCurrentSlowPath() { 824 return current_slow_path_; 825 } 826 827 StackMapStream* GetStackMapStream(); 828 GetCodeGenerationData()829 CodeGenerationData* GetCodeGenerationData() { 830 return code_generation_data_.get(); 831 } 832 833 void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); 834 uint64_t GetJitStringRootIndex(StringReference string_reference); 835 void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); 836 uint64_t GetJitClassRootIndex(TypeReference type_reference); 837 838 // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. 839 virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data); 840 841 // Frame size required for this method. 842 uint32_t frame_size_; 843 uint32_t core_spill_mask_; 844 uint32_t fpu_spill_mask_; 845 uint32_t first_register_slot_in_slow_path_; 846 847 // Registers that were allocated during linear scan. 848 RegisterSet allocated_registers_; 849 850 // Arrays used when doing register allocation to know which 851 // registers we can allocate. `SetupBlockedRegisters` updates the 852 // arrays. 853 bool* const blocked_core_registers_; 854 bool* const blocked_fpu_registers_; 855 size_t number_of_core_registers_; 856 size_t number_of_fpu_registers_; 857 size_t number_of_register_pairs_; 858 const uint32_t core_callee_save_mask_; 859 const uint32_t fpu_callee_save_mask_; 860 861 // The order to use for code generation. 862 const ArenaVector<HBasicBlock*>* block_order_; 863 864 DisassemblyInformation* disasm_info_; 865 866 private: 867 void InitializeCodeGenerationData(); 868 size_t GetStackOffsetOfSavedRegister(size_t index); 869 void GenerateSlowPaths(); 870 void BlockIfInRegister(Location location, bool is_out = false) const; 871 void EmitEnvironment(HEnvironment* environment, 872 SlowPathCode* slow_path, 873 bool needs_vreg_info = true, 874 bool is_for_catch_handler = false, 875 bool innermost_environment = true); 876 void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler); 877 void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment); 878 879 static void PrepareCriticalNativeArgumentMoves( 880 HInvokeStaticOrDirect* invoke, 881 /*inout*/InvokeDexCallingConventionVisitor* visitor, 882 /*out*/HParallelMove* parallel_move); 883 884 void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move); 885 886 static std::string_view GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke); 887 888 OptimizingCompilerStats* stats_; 889 890 HGraph* const graph_; 891 const CompilerOptions& compiler_options_; 892 893 // The current slow-path that we're generating code for. 894 SlowPathCode* current_slow_path_; 895 896 // The current block index in `block_order_` of the block 897 // we are generating code for. 898 size_t current_block_index_; 899 900 // Whether the method is a leaf method. 901 bool is_leaf_; 902 903 // Whether the method has to emit a SuspendCheck at entry. 904 bool needs_suspend_check_entry_; 905 906 // Whether an instruction in the graph accesses the current method. 907 // TODO: Rename: this actually indicates that some instruction in the method 908 // needs the environment including a valid stack frame. 909 bool requires_current_method_; 910 911 // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the 912 // ArenaStack memory allocated in previous passes instead of adding to the memory 913 // held by the ArenaAllocator. This ScopedArenaAllocator is created in 914 // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. 915 std::unique_ptr<CodeGenerationData> code_generation_data_; 916 917 // Which intrinsics we don't have handcrafted code for. 918 art::ArrayRef<const bool> unimplemented_intrinsics_; 919 920 friend class OptimizingCFITest; 921 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); 922 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); 923 924 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 925 }; 926 927 template <typename C, typename F> 928 class CallingConvention { 929 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)930 CallingConvention(const C* registers, 931 size_t number_of_registers, 932 const F* fpu_registers, 933 size_t number_of_fpu_registers, 934 PointerSize pointer_size) 935 : registers_(registers), 936 number_of_registers_(number_of_registers), 937 fpu_registers_(fpu_registers), 938 number_of_fpu_registers_(number_of_fpu_registers), 939 pointer_size_(pointer_size) {} 940 GetNumberOfRegisters()941 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()942 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 943 GetRegisterAt(size_t index)944 C GetRegisterAt(size_t index) const { 945 DCHECK_LT(index, number_of_registers_); 946 return registers_[index]; 947 } 948 GetFpuRegisterAt(size_t index)949 F GetFpuRegisterAt(size_t index) const { 950 DCHECK_LT(index, number_of_fpu_registers_); 951 return fpu_registers_[index]; 952 } 953 GetStackOffsetOf(size_t index)954 size_t GetStackOffsetOf(size_t index) const { 955 // We still reserve the space for parameters passed by registers. 956 // Add space for the method pointer. 957 return static_cast<size_t>(pointer_size_) + index * kVRegSize; 958 } 959 960 private: 961 const C* registers_; 962 const size_t number_of_registers_; 963 const F* fpu_registers_; 964 const size_t number_of_fpu_registers_; 965 const PointerSize pointer_size_; 966 967 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 968 }; 969 970 /** 971 * A templated class SlowPathGenerator with a templated method NewSlowPath() 972 * that can be used by any code generator to share equivalent slow-paths with 973 * the objective of reducing generated code size. 974 * 975 * InstructionType: instruction that requires SlowPathCodeType 976 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 977 */ 978 template <typename InstructionType> 979 class SlowPathGenerator { 980 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 981 "InstructionType is not a subclass of art::HInstruction"); 982 983 public: SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)984 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 985 : graph_(graph), 986 codegen_(codegen), 987 slow_path_map_(std::less<uint32_t>(), 988 graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {} 989 990 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 991 // Templating the method (rather than the whole class) on the slow-path type enables 992 // keeping this code at a generic, non architecture-specific place. 993 // 994 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 995 // To relax this requirement, we would need some RTTI on the stored slow-paths, 996 // or template the class as a whole on SlowPathType. 997 template <typename SlowPathCodeType> NewSlowPath(InstructionType * instruction)998 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 999 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 1000 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 1001 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 1002 "SlowPathCodeType is not constructible from InstructionType*"); 1003 // Iterate over potential candidates for sharing. Currently, only same-typed 1004 // slow-paths with exactly the same dex-pc are viable candidates. 1005 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 1006 const uint32_t dex_pc = instruction->GetDexPc(); 1007 auto iter = slow_path_map_.find(dex_pc); 1008 if (iter != slow_path_map_.end()) { 1009 const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; 1010 for (const auto& it : candidates) { 1011 InstructionType* other_instruction = it.first; 1012 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 1013 // Determine if the instructions allow for slow-path sharing. 1014 if (HaveSameLiveRegisters(instruction, other_instruction) && 1015 HaveSameStackMap(instruction, other_instruction)) { 1016 // Can share: reuse existing one. 1017 return other_slow_path; 1018 } 1019 } 1020 } else { 1021 // First time this dex-pc is seen. 1022 iter = slow_path_map_.Put(dex_pc, 1023 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}}); 1024 } 1025 // Cannot share: create and add new slow-path for this particular dex-pc. 1026 SlowPathCodeType* slow_path = 1027 new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction); 1028 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 1029 codegen_->AddSlowPath(slow_path); 1030 return slow_path; 1031 } 1032 1033 private: 1034 // Tests if both instructions have same set of live physical registers. This ensures 1035 // the slow-path has exactly the same preamble on saving these registers to stack. HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)1036 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 1037 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 1038 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 1039 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 1040 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 1041 return (((live1->GetCoreRegisters() & core_spill) == 1042 (live2->GetCoreRegisters() & core_spill)) && 1043 ((live1->GetFloatingPointRegisters() & fpu_spill) == 1044 (live2->GetFloatingPointRegisters() & fpu_spill))); 1045 } 1046 1047 // Tests if both instructions have the same stack map. This ensures the interpreter 1048 // will find exactly the same dex-registers at the same entries. HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)1049 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 1050 DCHECK(i1->HasEnvironment()); 1051 DCHECK(i2->HasEnvironment()); 1052 // We conservatively test if the two instructions find exactly the same instructions 1053 // and location in each dex-register. This guarantees they will have the same stack map. 1054 HEnvironment* e1 = i1->GetEnvironment(); 1055 HEnvironment* e2 = i2->GetEnvironment(); 1056 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 1057 return false; 1058 } 1059 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 1060 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 1061 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 1062 return false; 1063 } 1064 } 1065 return true; 1066 } 1067 1068 HGraph* const graph_; 1069 CodeGenerator* const codegen_; 1070 1071 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 1072 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 1073 1074 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 1075 }; 1076 1077 class InstructionCodeGenerator : public HGraphVisitor { 1078 public: InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)1079 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 1080 : HGraphVisitor(graph), 1081 deopt_slow_paths_(graph, codegen) {} 1082 1083 protected: 1084 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 1085 // TODO: under current regime, only deopt sharing make sense; extend later. 1086 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 1087 }; 1088 1089 } // namespace art 1090 1091 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 1092