1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/arena_containers.h" 23 #include "base/arena_object.h" 24 #include "base/array_ref.h" 25 #include "base/bit_field.h" 26 #include "base/bit_utils.h" 27 #include "base/enums.h" 28 #include "base/globals.h" 29 #include "base/memory_region.h" 30 #include "dex/string_reference.h" 31 #include "dex/type_reference.h" 32 #include "graph_visualizer.h" 33 #include "locations.h" 34 #include "nodes.h" 35 #include "optimizing_compiler_stats.h" 36 #include "read_barrier_option.h" 37 #include "stack.h" 38 #include "utils/label.h" 39 40 namespace art { 41 42 // Binary encoding of 2^32 for type double. 43 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 44 // Binary encoding of 2^31 for type double. 45 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 46 47 // Minimum value for a primitive integer. 48 static int32_t constexpr kPrimIntMin = 0x80000000; 49 // Minimum value for a primitive long. 50 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 51 52 // Maximum value for a primitive integer. 53 static int32_t constexpr kPrimIntMax = 0x7fffffff; 54 // Maximum value for a primitive long. 55 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 56 57 static constexpr ReadBarrierOption kCompilerReadBarrierOption = 58 kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; 59 60 class Assembler; 61 class CodeGenerator; 62 class CompilerOptions; 63 class StackMapStream; 64 class ParallelMoveResolver; 65 66 namespace linker { 67 class LinkerPatch; 68 } // namespace linker 69 70 class CodeAllocator { 71 public: CodeAllocator()72 CodeAllocator() {} ~CodeAllocator()73 virtual ~CodeAllocator() {} 74 75 virtual uint8_t* Allocate(size_t size) = 0; 76 virtual ArrayRef<const uint8_t> GetMemory() const = 0; 77 78 private: 79 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 80 }; 81 82 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { 83 public: SlowPathCode(HInstruction * instruction)84 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 85 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 86 saved_core_stack_offsets_[i] = kRegisterNotSaved; 87 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 88 } 89 } 90 ~SlowPathCode()91 virtual ~SlowPathCode() {} 92 93 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 94 95 // Save live core and floating-point caller-save registers and 96 // update the stack mask in `locations` for registers holding object 97 // references. 98 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 99 // Restore live core and floating-point caller-save registers. 100 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 101 IsCoreRegisterSaved(int reg)102 bool IsCoreRegisterSaved(int reg) const { 103 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 104 } 105 IsFpuRegisterSaved(int reg)106 bool IsFpuRegisterSaved(int reg) const { 107 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 108 } 109 GetStackOffsetOfCoreRegister(int reg)110 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 111 return saved_core_stack_offsets_[reg]; 112 } 113 GetStackOffsetOfFpuRegister(int reg)114 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 115 return saved_fpu_stack_offsets_[reg]; 116 } 117 IsFatal()118 virtual bool IsFatal() const { return false; } 119 120 virtual const char* GetDescription() const = 0; 121 GetEntryLabel()122 Label* GetEntryLabel() { return &entry_label_; } GetExitLabel()123 Label* GetExitLabel() { return &exit_label_; } 124 GetInstruction()125 HInstruction* GetInstruction() const { 126 return instruction_; 127 } 128 GetDexPc()129 uint32_t GetDexPc() const { 130 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 131 } 132 133 protected: 134 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 135 static constexpr uint32_t kRegisterNotSaved = -1; 136 // The instruction where this slow path is happening. 137 HInstruction* instruction_; 138 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 139 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 140 141 private: 142 Label entry_label_; 143 Label exit_label_; 144 145 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 146 }; 147 148 class InvokeDexCallingConventionVisitor { 149 public: 150 virtual Location GetNextLocation(DataType::Type type) = 0; 151 virtual Location GetReturnLocation(DataType::Type type) const = 0; 152 virtual Location GetMethodLocation() const = 0; 153 154 protected: InvokeDexCallingConventionVisitor()155 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()156 virtual ~InvokeDexCallingConventionVisitor() {} 157 158 // The current index for core registers. 159 uint32_t gp_index_ = 0u; 160 // The current index for floating-point registers. 161 uint32_t float_index_ = 0u; 162 // The current stack index. 163 uint32_t stack_index_ = 0u; 164 165 private: 166 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 167 }; 168 169 class FieldAccessCallingConvention { 170 public: 171 virtual Location GetObjectLocation() const = 0; 172 virtual Location GetFieldIndexLocation() const = 0; 173 virtual Location GetReturnLocation(DataType::Type type) const = 0; 174 virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0; 175 virtual Location GetFpuLocation(DataType::Type type) const = 0; ~FieldAccessCallingConvention()176 virtual ~FieldAccessCallingConvention() {} 177 178 protected: FieldAccessCallingConvention()179 FieldAccessCallingConvention() {} 180 181 private: 182 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 183 }; 184 185 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { 186 public: 187 // Compiles the graph to executable instructions. 188 void Compile(CodeAllocator* allocator); 189 static std::unique_ptr<CodeGenerator> Create(HGraph* graph, 190 const CompilerOptions& compiler_options, 191 OptimizingCompilerStats* stats = nullptr); 192 virtual ~CodeGenerator(); 193 194 // Get the graph. This is the outermost graph, never the graph of a method being inlined. GetGraph()195 HGraph* GetGraph() const { return graph_; } 196 197 HBasicBlock* GetNextBlockToEmit() const; 198 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 199 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 200 GetStackSlotOfParameter(HParameterValue * parameter)201 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 202 // Note that this follows the current calling convention. 203 return GetFrameSize() 204 + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method 205 + parameter->GetIndex() * kVRegSize; 206 } 207 208 virtual void Initialize() = 0; 209 virtual void Finalize(CodeAllocator* allocator); 210 virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); 211 virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; 212 virtual void EmitThunkCode(const linker::LinkerPatch& patch, 213 /*out*/ ArenaVector<uint8_t>* code, 214 /*out*/ std::string* debug_name); 215 virtual void GenerateFrameEntry() = 0; 216 virtual void GenerateFrameExit() = 0; 217 virtual void Bind(HBasicBlock* block) = 0; 218 virtual void MoveConstant(Location destination, int32_t value) = 0; 219 virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0; 220 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 221 222 virtual Assembler* GetAssembler() = 0; 223 virtual const Assembler& GetAssembler() const = 0; 224 virtual size_t GetWordSize() const = 0; 225 226 // Get FP register width in bytes for spilling/restoring in the slow paths. 227 // 228 // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers 229 // alias and live SIMD registers are forced to be spilled in full size in the slow paths. GetSlowPathFPWidth()230 virtual size_t GetSlowPathFPWidth() const { 231 // Default implementation. 232 return GetCalleePreservedFPWidth(); 233 } 234 235 // Get FP register width required to be preserved by the target ABI. 236 virtual size_t GetCalleePreservedFPWidth() const = 0; 237 238 virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; 239 void InitializeCodeGeneration(size_t number_of_spill_slots, 240 size_t maximum_safepoint_spill_size, 241 size_t number_of_out_slots, 242 const ArenaVector<HBasicBlock*>& block_order); 243 // Backends can override this as necessary. For most, no special alignment is required. GetPreferredSlotsAlignment()244 virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } 245 GetFrameSize()246 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)247 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetCoreSpillMask()248 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()249 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 250 GetNumberOfCoreRegisters()251 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()252 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 253 virtual void SetupBlockedRegisters() const = 0; 254 ComputeSpillMask()255 virtual void ComputeSpillMask() { 256 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 257 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 258 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 259 } 260 ComputeRegisterMask(const int * registers,size_t length)261 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 262 uint32_t mask = 0; 263 for (size_t i = 0, e = length; i < e; ++i) { 264 mask |= (1 << registers[i]); 265 } 266 return mask; 267 } 268 269 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 270 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 271 virtual InstructionSet GetInstructionSet() const = 0; 272 GetCompilerOptions()273 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 274 275 // Saves the register in the stack. Returns the size taken on stack. 276 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 277 // Restores the register from the stack. Returns the size taken on stack. 278 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 279 280 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 281 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 282 283 virtual bool NeedsTwoRegisters(DataType::Type type) const = 0; 284 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()285 virtual bool ShouldSplitLongMoves() const { return false; } 286 GetNumberOfCoreCalleeSaveRegisters()287 size_t GetNumberOfCoreCalleeSaveRegisters() const { 288 return POPCOUNT(core_callee_save_mask_); 289 } 290 GetNumberOfCoreCallerSaveRegisters()291 size_t GetNumberOfCoreCallerSaveRegisters() const { 292 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 293 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 294 } 295 IsCoreCalleeSaveRegister(int reg)296 bool IsCoreCalleeSaveRegister(int reg) const { 297 return (core_callee_save_mask_ & (1 << reg)) != 0; 298 } 299 IsFloatingPointCalleeSaveRegister(int reg)300 bool IsFloatingPointCalleeSaveRegister(int reg) const { 301 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 302 } 303 GetSlowPathSpills(LocationSummary * locations,bool core_registers)304 uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { 305 DCHECK(locations->OnlyCallsOnSlowPath() || 306 (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && 307 !locations->HasCustomSlowPathCallingConvention())); 308 uint32_t live_registers = core_registers 309 ? locations->GetLiveRegisters()->GetCoreRegisters() 310 : locations->GetLiveRegisters()->GetFloatingPointRegisters(); 311 if (locations->HasCustomSlowPathCallingConvention()) { 312 // Save only the live registers that the custom calling convention wants us to save. 313 uint32_t caller_saves = core_registers 314 ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() 315 : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); 316 return live_registers & caller_saves; 317 } else { 318 // Default ABI, we need to spill non-callee-save live registers. 319 uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; 320 return live_registers & ~callee_saves; 321 } 322 } 323 GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)324 size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { 325 return POPCOUNT(GetSlowPathSpills(locations, core_registers)); 326 } 327 GetStackOffsetOfShouldDeoptimizeFlag()328 size_t GetStackOffsetOfShouldDeoptimizeFlag() const { 329 DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); 330 DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); 331 return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; 332 } 333 334 // Record native to dex mapping for a suspend point. Required by runtime. 335 void RecordPcInfo(HInstruction* instruction, 336 uint32_t dex_pc, 337 uint32_t native_pc, 338 SlowPathCode* slow_path = nullptr, 339 bool native_debug_info = false); 340 341 // Record native to dex mapping for a suspend point. 342 // The native_pc is used from Assembler::CodePosition. 343 // 344 // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc 345 // for the instruction. If the exact native_pc is required it must be provided explicitly. 346 void RecordPcInfo(HInstruction* instruction, 347 uint32_t dex_pc, 348 SlowPathCode* slow_path = nullptr, 349 bool native_debug_info = false); 350 351 // Check whether we have already recorded mapping at this PC. 352 bool HasStackMapAtCurrentPc(); 353 354 // Record extra stack maps if we support native debugging. 355 // 356 // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions 357 // corresponding the dex PC. 358 void MaybeRecordNativeDebugInfo(HInstruction* instruction, 359 uint32_t dex_pc, 360 SlowPathCode* slow_path = nullptr); 361 362 bool CanMoveNullCheckToUser(HNullCheck* null_check); 363 virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction); 364 LocationSummary* CreateThrowingSlowPathLocations( 365 HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); 366 void GenerateNullCheck(HNullCheck* null_check); 367 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 368 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 369 370 // Records a stack map which the runtime might use to set catch phi values 371 // during exception delivery. 372 // TODO: Replace with a catch-entering instruction that records the environment. 373 void RecordCatchBlockInfo(); 374 375 // Get the ScopedArenaAllocator used for codegen memory allocation. 376 ScopedArenaAllocator* GetScopedAllocator(); 377 378 void AddSlowPath(SlowPathCode* slow_path); 379 380 ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check); 381 size_t GetNumberOfJitRoots() const; 382 383 // Fills the `literals` array with literals collected during code generation. 384 // Also emits literal patches. 385 void EmitJitRoots(uint8_t* code, 386 const uint8_t* roots_data, 387 /*out*/std::vector<Handle<mirror::Object>>* roots) 388 REQUIRES_SHARED(Locks::mutator_lock_); 389 IsLeafMethod()390 bool IsLeafMethod() const { 391 return is_leaf_; 392 } 393 MarkNotLeaf()394 void MarkNotLeaf() { 395 is_leaf_ = false; 396 requires_current_method_ = true; 397 } 398 SetRequiresCurrentMethod()399 void SetRequiresCurrentMethod() { 400 requires_current_method_ = true; 401 } 402 RequiresCurrentMethod()403 bool RequiresCurrentMethod() const { 404 return requires_current_method_; 405 } 406 407 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 408 // suspend check. This is called when the code generator generates code 409 // for the suspend check at the back edge (instead of where the suspend check 410 // is, which is the loop entry). At this point, the spill slots for the phis 411 // have not been written to. 412 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check, 413 HParallelMove* spills) const; 414 GetBlockedCoreRegisters()415 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()416 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 417 IsBlockedCoreRegister(size_t i)418 bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } IsBlockedFloatingPointRegister(size_t i)419 bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } 420 421 // Helper that returns the offset of the array's length field. 422 // Note: Besides the normal arrays, we also use the HArrayLength for 423 // accessing the String's `count` field in String intrinsics. 424 static uint32_t GetArrayLengthOffset(HArrayLength* array_length); 425 426 // Helper that returns the offset of the array's data. 427 // Note: Besides the normal arrays, we also use the HArrayGet for 428 // accessing the String's `value` field in String intrinsics. 429 static uint32_t GetArrayDataOffset(HArrayGet* array_get); 430 431 void EmitParallelMoves(Location from1, 432 Location to1, 433 DataType::Type type1, 434 Location from2, 435 Location to2, 436 DataType::Type type2); 437 InstanceOfNeedsReadBarrier(HInstanceOf * instance_of)438 static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { 439 // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck. 440 DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || 441 instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || 442 instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck || 443 instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck) 444 << instance_of->GetTypeCheckKind(); 445 // If the target class is in the boot image, it's non-moveable and it doesn't matter 446 // if we compare it with a from-space or to-space reference, the result is the same. 447 // It's OK to traverse a class hierarchy jumping between from-space and to-space. 448 return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); 449 } 450 ReadBarrierOptionForInstanceOf(HInstanceOf * instance_of)451 static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { 452 return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier; 453 } 454 IsTypeCheckSlowPathFatal(HCheckCast * check_cast)455 static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { 456 switch (check_cast->GetTypeCheckKind()) { 457 case TypeCheckKind::kExactCheck: 458 case TypeCheckKind::kAbstractClassCheck: 459 case TypeCheckKind::kClassHierarchyCheck: 460 case TypeCheckKind::kArrayObjectCheck: 461 case TypeCheckKind::kInterfaceCheck: { 462 bool needs_read_barrier = 463 kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); 464 // We do not emit read barriers for HCheckCast, so we can get false negatives 465 // and the slow path shall re-check and simply return if the cast is actually OK. 466 return !needs_read_barrier; 467 } 468 case TypeCheckKind::kArrayCheck: 469 case TypeCheckKind::kUnresolvedCheck: 470 return false; 471 case TypeCheckKind::kBitstringCheck: 472 return true; 473 } 474 LOG(FATAL) << "Unreachable"; 475 UNREACHABLE(); 476 } 477 GetCheckCastCallKind(HCheckCast * check_cast)478 static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { 479 return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock()) 480 ? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path. 481 : LocationSummary::kCallOnSlowPath; 482 } 483 StoreNeedsWriteBarrier(DataType::Type type,HInstruction * value)484 static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) { 485 // Check that null value is not represented as an integer constant. 486 DCHECK(type != DataType::Type::kReference || !value->IsIntConstant()); 487 return type == DataType::Type::kReference && !value->IsNullConstant(); 488 } 489 490 491 // Performs checks pertaining to an InvokeRuntime call. 492 void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, 493 HInstruction* instruction, 494 SlowPathCode* slow_path); 495 496 // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. 497 static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, 498 SlowPathCode* slow_path); 499 AddAllocatedRegister(Location location)500 void AddAllocatedRegister(Location location) { 501 allocated_registers_.Add(location); 502 } 503 HasAllocatedRegister(bool is_core,int reg)504 bool HasAllocatedRegister(bool is_core, int reg) const { 505 return is_core 506 ? allocated_registers_.ContainsCoreRegister(reg) 507 : allocated_registers_.ContainsFloatingPointRegister(reg); 508 } 509 510 void AllocateLocations(HInstruction* instruction); 511 512 // Tells whether the stack frame of the compiled method is 513 // considered "empty", that is either actually having a size of zero, 514 // or just containing the saved return address register. HasEmptyFrame()515 bool HasEmptyFrame() const { 516 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 517 } 518 GetInt8ValueOf(HConstant * constant)519 static int8_t GetInt8ValueOf(HConstant* constant) { 520 DCHECK(constant->IsIntConstant()); 521 return constant->AsIntConstant()->GetValue(); 522 } 523 GetInt16ValueOf(HConstant * constant)524 static int16_t GetInt16ValueOf(HConstant* constant) { 525 DCHECK(constant->IsIntConstant()); 526 return constant->AsIntConstant()->GetValue(); 527 } 528 GetInt32ValueOf(HConstant * constant)529 static int32_t GetInt32ValueOf(HConstant* constant) { 530 if (constant->IsIntConstant()) { 531 return constant->AsIntConstant()->GetValue(); 532 } else if (constant->IsNullConstant()) { 533 return 0; 534 } else { 535 DCHECK(constant->IsFloatConstant()); 536 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 537 } 538 } 539 GetInt64ValueOf(HConstant * constant)540 static int64_t GetInt64ValueOf(HConstant* constant) { 541 if (constant->IsIntConstant()) { 542 return constant->AsIntConstant()->GetValue(); 543 } else if (constant->IsNullConstant()) { 544 return 0; 545 } else if (constant->IsFloatConstant()) { 546 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 547 } else if (constant->IsLongConstant()) { 548 return constant->AsLongConstant()->GetValue(); 549 } else { 550 DCHECK(constant->IsDoubleConstant()); 551 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 552 } 553 } 554 GetFirstRegisterSlotInSlowPath()555 size_t GetFirstRegisterSlotInSlowPath() const { 556 return first_register_slot_in_slow_path_; 557 } 558 FrameEntrySpillSize()559 uint32_t FrameEntrySpillSize() const { 560 return GetFpuSpillSize() + GetCoreSpillSize(); 561 } 562 563 virtual ParallelMoveResolver* GetMoveResolver() = 0; 564 565 static void CreateCommonInvokeLocationSummary( 566 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 567 568 void GenerateInvokeStaticOrDirectRuntimeCall( 569 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); 570 571 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 572 573 void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); 574 575 void GenerateInvokeCustomCall(HInvokeCustom* invoke); 576 577 void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out); 578 579 void CreateUnresolvedFieldLocationSummary( 580 HInstruction* field_access, 581 DataType::Type field_type, 582 const FieldAccessCallingConvention& calling_convention); 583 584 void GenerateUnresolvedFieldAccess( 585 HInstruction* field_access, 586 DataType::Type field_type, 587 uint32_t field_index, 588 uint32_t dex_pc, 589 const FieldAccessCallingConvention& calling_convention); 590 591 static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, 592 Location runtime_type_index_location, 593 Location runtime_return_location); 594 void GenerateLoadClassRuntimeCall(HLoadClass* cls); 595 596 static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle, 597 Location runtime_handle_index_location, 598 Location runtime_return_location); 599 void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle); 600 601 static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type, 602 Location runtime_type_index_location, 603 Location runtime_return_location); 604 void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type); 605 606 uint32_t GetBootImageOffset(HLoadClass* load_class); 607 uint32_t GetBootImageOffset(HLoadString* load_string); 608 uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke); 609 610 static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); 611 SetDisassemblyInformation(DisassemblyInformation * info)612 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } GetDisassemblyInformation()613 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 614 615 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 616 HInstruction* instruction, 617 uint32_t dex_pc, 618 SlowPathCode* slow_path = nullptr) = 0; 619 620 // Check if the desired_string_load_kind is supported. If it is, return it, 621 // otherwise return a fall-back kind that should be used instead. 622 virtual HLoadString::LoadKind GetSupportedLoadStringKind( 623 HLoadString::LoadKind desired_string_load_kind) = 0; 624 625 // Check if the desired_class_load_kind is supported. If it is, return it, 626 // otherwise return a fall-back kind that should be used instead. 627 virtual HLoadClass::LoadKind GetSupportedLoadClassKind( 628 HLoadClass::LoadKind desired_class_load_kind) = 0; 629 GetLoadStringCallKind(HLoadString * load)630 static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { 631 switch (load->GetLoadKind()) { 632 case HLoadString::LoadKind::kBssEntry: 633 DCHECK(load->NeedsEnvironment()); 634 return LocationSummary::kCallOnSlowPath; 635 case HLoadString::LoadKind::kRuntimeCall: 636 DCHECK(load->NeedsEnvironment()); 637 return LocationSummary::kCallOnMainOnly; 638 case HLoadString::LoadKind::kJitTableAddress: 639 DCHECK(!load->NeedsEnvironment()); 640 return kEmitCompilerReadBarrier 641 ? LocationSummary::kCallOnSlowPath 642 : LocationSummary::kNoCall; 643 break; 644 default: 645 DCHECK(!load->NeedsEnvironment()); 646 return LocationSummary::kNoCall; 647 } 648 } 649 650 // Check if the desired_dispatch_info is supported. If it is, return it, 651 // otherwise return a fall-back info that should be used instead. 652 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 653 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 654 ArtMethod* method) = 0; 655 656 // Generate a call to a static or direct method. 657 virtual void GenerateStaticOrDirectCall( 658 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 659 // Generate a call to a virtual method. 660 virtual void GenerateVirtualCall( 661 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 662 663 // Copy the result of a call into the given target. 664 virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0; 665 666 virtual void GenerateNop() = 0; 667 668 static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); 669 670 protected: 671 // Patch info used for recording locations of required linker patches and their targets, 672 // i.e. target method, string, type or code identified by their dex file and index, 673 // or .data.bimg.rel.ro entries identified by the boot image offset. 674 template <typename LabelType> 675 struct PatchInfo { PatchInfoPatchInfo676 PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) 677 : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } 678 679 // Target dex file or null for .data.bmig.rel.ro patches. 680 const DexFile* target_dex_file; 681 // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. 682 uint32_t offset_or_index; 683 // Label for the instruction to patch. 684 LabelType label; 685 }; 686 687 CodeGenerator(HGraph* graph, 688 size_t number_of_core_registers, 689 size_t number_of_fpu_registers, 690 size_t number_of_register_pairs, 691 uint32_t core_callee_save_mask, 692 uint32_t fpu_callee_save_mask, 693 const CompilerOptions& compiler_options, 694 OptimizingCompilerStats* stats); 695 696 virtual HGraphVisitor* GetLocationBuilder() = 0; 697 virtual HGraphVisitor* GetInstructionVisitor() = 0; 698 699 // Returns the location of the first spilled entry for floating point registers, 700 // relative to the stack pointer. GetFpuSpillStart()701 uint32_t GetFpuSpillStart() const { 702 return GetFrameSize() - FrameEntrySpillSize(); 703 } 704 GetFpuSpillSize()705 uint32_t GetFpuSpillSize() const { 706 return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth(); 707 } 708 GetCoreSpillSize()709 uint32_t GetCoreSpillSize() const { 710 return POPCOUNT(core_spill_mask_) * GetWordSize(); 711 } 712 HasAllocatedCalleeSaveRegisters()713 virtual bool HasAllocatedCalleeSaveRegisters() const { 714 // We check the core registers against 1 because it always comprises the return PC. 715 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 716 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 717 } 718 CallPushesPC()719 bool CallPushesPC() const { 720 InstructionSet instruction_set = GetInstructionSet(); 721 return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64; 722 } 723 724 // Arm64 has its own type for a label, so we need to templatize these methods 725 // to share the logic. 726 727 template <typename LabelType> CommonInitializeLabels()728 LabelType* CommonInitializeLabels() { 729 // We use raw array allocations instead of ArenaVector<> because Labels are 730 // non-constructible and non-movable and as such cannot be held in a vector. 731 size_t size = GetGraph()->GetBlocks().size(); 732 LabelType* labels = 733 GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator); 734 for (size_t i = 0; i != size; ++i) { 735 new(labels + i) LabelType(); 736 } 737 return labels; 738 } 739 740 template <typename LabelType> CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)741 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 742 block = FirstNonEmptyBlock(block); 743 return raw_pointer_to_labels_array + block->GetBlockId(); 744 } 745 GetCurrentSlowPath()746 SlowPathCode* GetCurrentSlowPath() { 747 return current_slow_path_; 748 } 749 750 StackMapStream* GetStackMapStream(); 751 752 void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); 753 uint64_t GetJitStringRootIndex(StringReference string_reference); 754 void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); 755 uint64_t GetJitClassRootIndex(TypeReference type_reference); 756 757 // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. 758 virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data); 759 760 // Frame size required for this method. 761 uint32_t frame_size_; 762 uint32_t core_spill_mask_; 763 uint32_t fpu_spill_mask_; 764 uint32_t first_register_slot_in_slow_path_; 765 766 // Registers that were allocated during linear scan. 767 RegisterSet allocated_registers_; 768 769 // Arrays used when doing register allocation to know which 770 // registers we can allocate. `SetupBlockedRegisters` updates the 771 // arrays. 772 bool* const blocked_core_registers_; 773 bool* const blocked_fpu_registers_; 774 size_t number_of_core_registers_; 775 size_t number_of_fpu_registers_; 776 size_t number_of_register_pairs_; 777 const uint32_t core_callee_save_mask_; 778 const uint32_t fpu_callee_save_mask_; 779 780 // The order to use for code generation. 781 const ArenaVector<HBasicBlock*>* block_order_; 782 783 DisassemblyInformation* disasm_info_; 784 785 private: 786 class CodeGenerationData; 787 788 void InitializeCodeGenerationData(); 789 size_t GetStackOffsetOfSavedRegister(size_t index); 790 void GenerateSlowPaths(); 791 void BlockIfInRegister(Location location, bool is_out = false) const; 792 void EmitEnvironment(HEnvironment* environment, 793 SlowPathCode* slow_path, 794 bool needs_vreg_info = true); 795 void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path); 796 797 OptimizingCompilerStats* stats_; 798 799 HGraph* const graph_; 800 const CompilerOptions& compiler_options_; 801 802 // The current slow-path that we're generating code for. 803 SlowPathCode* current_slow_path_; 804 805 // The current block index in `block_order_` of the block 806 // we are generating code for. 807 size_t current_block_index_; 808 809 // Whether the method is a leaf method. 810 bool is_leaf_; 811 812 // Whether an instruction in the graph accesses the current method. 813 // TODO: Rename: this actually indicates that some instruction in the method 814 // needs the environment including a valid stack frame. 815 bool requires_current_method_; 816 817 // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the 818 // ArenaStack memory allocated in previous passes instead of adding to the memory 819 // held by the ArenaAllocator. This ScopedArenaAllocator is created in 820 // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. 821 std::unique_ptr<CodeGenerationData> code_generation_data_; 822 823 friend class OptimizingCFITest; 824 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); 825 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); 826 827 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 828 }; 829 830 template <typename C, typename F> 831 class CallingConvention { 832 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)833 CallingConvention(const C* registers, 834 size_t number_of_registers, 835 const F* fpu_registers, 836 size_t number_of_fpu_registers, 837 PointerSize pointer_size) 838 : registers_(registers), 839 number_of_registers_(number_of_registers), 840 fpu_registers_(fpu_registers), 841 number_of_fpu_registers_(number_of_fpu_registers), 842 pointer_size_(pointer_size) {} 843 GetNumberOfRegisters()844 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()845 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 846 GetRegisterAt(size_t index)847 C GetRegisterAt(size_t index) const { 848 DCHECK_LT(index, number_of_registers_); 849 return registers_[index]; 850 } 851 GetFpuRegisterAt(size_t index)852 F GetFpuRegisterAt(size_t index) const { 853 DCHECK_LT(index, number_of_fpu_registers_); 854 return fpu_registers_[index]; 855 } 856 GetStackOffsetOf(size_t index)857 size_t GetStackOffsetOf(size_t index) const { 858 // We still reserve the space for parameters passed by registers. 859 // Add space for the method pointer. 860 return static_cast<size_t>(pointer_size_) + index * kVRegSize; 861 } 862 863 private: 864 const C* registers_; 865 const size_t number_of_registers_; 866 const F* fpu_registers_; 867 const size_t number_of_fpu_registers_; 868 const PointerSize pointer_size_; 869 870 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 871 }; 872 873 /** 874 * A templated class SlowPathGenerator with a templated method NewSlowPath() 875 * that can be used by any code generator to share equivalent slow-paths with 876 * the objective of reducing generated code size. 877 * 878 * InstructionType: instruction that requires SlowPathCodeType 879 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 880 */ 881 template <typename InstructionType> 882 class SlowPathGenerator { 883 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 884 "InstructionType is not a subclass of art::HInstruction"); 885 886 public: SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)887 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 888 : graph_(graph), 889 codegen_(codegen), 890 slow_path_map_(std::less<uint32_t>(), 891 graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {} 892 893 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 894 // Templating the method (rather than the whole class) on the slow-path type enables 895 // keeping this code at a generic, non architecture-specific place. 896 // 897 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 898 // To relax this requirement, we would need some RTTI on the stored slow-paths, 899 // or template the class as a whole on SlowPathType. 900 template <typename SlowPathCodeType> NewSlowPath(InstructionType * instruction)901 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 902 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 903 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 904 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 905 "SlowPathCodeType is not constructible from InstructionType*"); 906 // Iterate over potential candidates for sharing. Currently, only same-typed 907 // slow-paths with exactly the same dex-pc are viable candidates. 908 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 909 const uint32_t dex_pc = instruction->GetDexPc(); 910 auto iter = slow_path_map_.find(dex_pc); 911 if (iter != slow_path_map_.end()) { 912 const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; 913 for (const auto& it : candidates) { 914 InstructionType* other_instruction = it.first; 915 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 916 // Determine if the instructions allow for slow-path sharing. 917 if (HaveSameLiveRegisters(instruction, other_instruction) && 918 HaveSameStackMap(instruction, other_instruction)) { 919 // Can share: reuse existing one. 920 return other_slow_path; 921 } 922 } 923 } else { 924 // First time this dex-pc is seen. 925 iter = slow_path_map_.Put(dex_pc, 926 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}}); 927 } 928 // Cannot share: create and add new slow-path for this particular dex-pc. 929 SlowPathCodeType* slow_path = 930 new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction); 931 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 932 codegen_->AddSlowPath(slow_path); 933 return slow_path; 934 } 935 936 private: 937 // Tests if both instructions have same set of live physical registers. This ensures 938 // the slow-path has exactly the same preamble on saving these registers to stack. HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)939 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 940 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 941 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 942 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 943 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 944 return (((live1->GetCoreRegisters() & core_spill) == 945 (live2->GetCoreRegisters() & core_spill)) && 946 ((live1->GetFloatingPointRegisters() & fpu_spill) == 947 (live2->GetFloatingPointRegisters() & fpu_spill))); 948 } 949 950 // Tests if both instructions have the same stack map. This ensures the interpreter 951 // will find exactly the same dex-registers at the same entries. HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)952 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 953 DCHECK(i1->HasEnvironment()); 954 DCHECK(i2->HasEnvironment()); 955 // We conservatively test if the two instructions find exactly the same instructions 956 // and location in each dex-register. This guarantees they will have the same stack map. 957 HEnvironment* e1 = i1->GetEnvironment(); 958 HEnvironment* e2 = i2->GetEnvironment(); 959 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 960 return false; 961 } 962 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 963 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 964 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 965 return false; 966 } 967 } 968 return true; 969 } 970 971 HGraph* const graph_; 972 CodeGenerator* const codegen_; 973 974 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 975 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 976 977 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 978 }; 979 980 class InstructionCodeGenerator : public HGraphVisitor { 981 public: InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)982 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 983 : HGraphVisitor(graph), 984 deopt_slow_paths_(graph, codegen) {} 985 986 protected: 987 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 988 // TODO: under current regime, only deopt sharing make sense; extend later. 989 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 990 }; 991 992 } // namespace art 993 994 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 995