1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/arena_containers.h" 23 #include "base/arena_object.h" 24 #include "base/bit_field.h" 25 #include "base/bit_utils.h" 26 #include "base/enums.h" 27 #include "globals.h" 28 #include "graph_visualizer.h" 29 #include "locations.h" 30 #include "memory_region.h" 31 #include "nodes.h" 32 #include "optimizing_compiler_stats.h" 33 #include "read_barrier_option.h" 34 #include "stack_map_stream.h" 35 #include "string_reference.h" 36 #include "utils/label.h" 37 #include "utils/type_reference.h" 38 39 namespace art { 40 41 // Binary encoding of 2^32 for type double. 42 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 43 // Binary encoding of 2^31 for type double. 44 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 45 46 // Minimum value for a primitive integer. 47 static int32_t constexpr kPrimIntMin = 0x80000000; 48 // Minimum value for a primitive long. 49 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 50 51 // Maximum value for a primitive integer. 52 static int32_t constexpr kPrimIntMax = 0x7fffffff; 53 // Maximum value for a primitive long. 54 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 55 56 static constexpr ReadBarrierOption kCompilerReadBarrierOption = 57 kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; 58 59 class Assembler; 60 class CodeGenerator; 61 class CompilerDriver; 62 class CompilerOptions; 63 class LinkerPatch; 64 class ParallelMoveResolver; 65 66 class CodeAllocator { 67 public: CodeAllocator()68 CodeAllocator() {} ~CodeAllocator()69 virtual ~CodeAllocator() {} 70 71 virtual uint8_t* Allocate(size_t size) = 0; 72 73 private: 74 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 75 }; 76 77 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { 78 public: SlowPathCode(HInstruction * instruction)79 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 80 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 81 saved_core_stack_offsets_[i] = kRegisterNotSaved; 82 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 83 } 84 } 85 ~SlowPathCode()86 virtual ~SlowPathCode() {} 87 88 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 89 90 // Save live core and floating-point caller-save registers and 91 // update the stack mask in `locations` for registers holding object 92 // references. 93 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 94 // Restore live core and floating-point caller-save registers. 95 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 96 IsCoreRegisterSaved(int reg)97 bool IsCoreRegisterSaved(int reg) const { 98 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 99 } 100 IsFpuRegisterSaved(int reg)101 bool IsFpuRegisterSaved(int reg) const { 102 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 103 } 104 GetStackOffsetOfCoreRegister(int reg)105 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 106 return saved_core_stack_offsets_[reg]; 107 } 108 GetStackOffsetOfFpuRegister(int reg)109 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 110 return saved_fpu_stack_offsets_[reg]; 111 } 112 IsFatal()113 virtual bool IsFatal() const { return false; } 114 115 virtual const char* GetDescription() const = 0; 116 GetEntryLabel()117 Label* GetEntryLabel() { return &entry_label_; } GetExitLabel()118 Label* GetExitLabel() { return &exit_label_; } 119 GetInstruction()120 HInstruction* GetInstruction() const { 121 return instruction_; 122 } 123 GetDexPc()124 uint32_t GetDexPc() const { 125 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 126 } 127 128 protected: 129 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 130 static constexpr uint32_t kRegisterNotSaved = -1; 131 // The instruction where this slow path is happening. 132 HInstruction* instruction_; 133 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 134 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 135 136 private: 137 Label entry_label_; 138 Label exit_label_; 139 140 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 141 }; 142 143 class InvokeDexCallingConventionVisitor { 144 public: 145 virtual Location GetNextLocation(Primitive::Type type) = 0; 146 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 147 virtual Location GetMethodLocation() const = 0; 148 149 protected: InvokeDexCallingConventionVisitor()150 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()151 virtual ~InvokeDexCallingConventionVisitor() {} 152 153 // The current index for core registers. 154 uint32_t gp_index_ = 0u; 155 // The current index for floating-point registers. 156 uint32_t float_index_ = 0u; 157 // The current stack index. 158 uint32_t stack_index_ = 0u; 159 160 private: 161 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 162 }; 163 164 class FieldAccessCallingConvention { 165 public: 166 virtual Location GetObjectLocation() const = 0; 167 virtual Location GetFieldIndexLocation() const = 0; 168 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 169 virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0; 170 virtual Location GetFpuLocation(Primitive::Type type) const = 0; ~FieldAccessCallingConvention()171 virtual ~FieldAccessCallingConvention() {} 172 173 protected: FieldAccessCallingConvention()174 FieldAccessCallingConvention() {} 175 176 private: 177 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 178 }; 179 180 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { 181 public: 182 // Compiles the graph to executable instructions. 183 void Compile(CodeAllocator* allocator); 184 static std::unique_ptr<CodeGenerator> Create(HGraph* graph, 185 InstructionSet instruction_set, 186 const InstructionSetFeatures& isa_features, 187 const CompilerOptions& compiler_options, 188 OptimizingCompilerStats* stats = nullptr); ~CodeGenerator()189 virtual ~CodeGenerator() {} 190 191 // Get the graph. This is the outermost graph, never the graph of a method being inlined. GetGraph()192 HGraph* GetGraph() const { return graph_; } 193 194 HBasicBlock* GetNextBlockToEmit() const; 195 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 196 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 197 GetStackSlotOfParameter(HParameterValue * parameter)198 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 199 // Note that this follows the current calling convention. 200 return GetFrameSize() 201 + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method 202 + parameter->GetIndex() * kVRegSize; 203 } 204 205 virtual void Initialize() = 0; 206 virtual void Finalize(CodeAllocator* allocator); 207 virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches); 208 virtual void GenerateFrameEntry() = 0; 209 virtual void GenerateFrameExit() = 0; 210 virtual void Bind(HBasicBlock* block) = 0; 211 virtual void MoveConstant(Location destination, int32_t value) = 0; 212 virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0; 213 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 214 215 virtual Assembler* GetAssembler() = 0; 216 virtual const Assembler& GetAssembler() const = 0; 217 virtual size_t GetWordSize() const = 0; 218 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 219 virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; 220 void InitializeCodeGeneration(size_t number_of_spill_slots, 221 size_t maximum_safepoint_spill_size, 222 size_t number_of_out_slots, 223 const ArenaVector<HBasicBlock*>& block_order); 224 // Backends can override this as necessary. For most, no special alignment is required. GetPreferredSlotsAlignment()225 virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } 226 GetFrameSize()227 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)228 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetCoreSpillMask()229 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()230 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 231 GetNumberOfCoreRegisters()232 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()233 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 234 virtual void SetupBlockedRegisters() const = 0; 235 ComputeSpillMask()236 virtual void ComputeSpillMask() { 237 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 238 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 239 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 240 } 241 ComputeRegisterMask(const int * registers,size_t length)242 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 243 uint32_t mask = 0; 244 for (size_t i = 0, e = length; i < e; ++i) { 245 mask |= (1 << registers[i]); 246 } 247 return mask; 248 } 249 250 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 251 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 252 virtual InstructionSet GetInstructionSet() const = 0; 253 GetCompilerOptions()254 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 255 256 void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; 257 258 // Saves the register in the stack. Returns the size taken on stack. 259 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 260 // Restores the register from the stack. Returns the size taken on stack. 261 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 262 263 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 264 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 265 266 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 267 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()268 virtual bool ShouldSplitLongMoves() const { return false; } 269 GetNumberOfCoreCalleeSaveRegisters()270 size_t GetNumberOfCoreCalleeSaveRegisters() const { 271 return POPCOUNT(core_callee_save_mask_); 272 } 273 GetNumberOfCoreCallerSaveRegisters()274 size_t GetNumberOfCoreCallerSaveRegisters() const { 275 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 276 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 277 } 278 IsCoreCalleeSaveRegister(int reg)279 bool IsCoreCalleeSaveRegister(int reg) const { 280 return (core_callee_save_mask_ & (1 << reg)) != 0; 281 } 282 IsFloatingPointCalleeSaveRegister(int reg)283 bool IsFloatingPointCalleeSaveRegister(int reg) const { 284 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 285 } 286 GetSlowPathSpills(LocationSummary * locations,bool core_registers)287 uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { 288 DCHECK(locations->OnlyCallsOnSlowPath() || 289 (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && 290 !locations->HasCustomSlowPathCallingConvention())); 291 uint32_t live_registers = core_registers 292 ? locations->GetLiveRegisters()->GetCoreRegisters() 293 : locations->GetLiveRegisters()->GetFloatingPointRegisters(); 294 if (locations->HasCustomSlowPathCallingConvention()) { 295 // Save only the live registers that the custom calling convention wants us to save. 296 uint32_t caller_saves = core_registers 297 ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() 298 : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); 299 return live_registers & caller_saves; 300 } else { 301 // Default ABI, we need to spill non-callee-save live registers. 302 uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; 303 return live_registers & ~callee_saves; 304 } 305 } 306 GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)307 size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { 308 return POPCOUNT(GetSlowPathSpills(locations, core_registers)); 309 } 310 GetStackOffsetOfShouldDeoptimizeFlag()311 size_t GetStackOffsetOfShouldDeoptimizeFlag() const { 312 DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); 313 DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); 314 return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; 315 } 316 317 // Record native to dex mapping for a suspend point. Required by runtime. 318 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 319 // Check whether we have already recorded mapping at this PC. 320 bool HasStackMapAtCurrentPc(); 321 // Record extra stack maps if we support native debugging. 322 void MaybeRecordNativeDebugInfo(HInstruction* instruction, 323 uint32_t dex_pc, 324 SlowPathCode* slow_path = nullptr); 325 326 bool CanMoveNullCheckToUser(HNullCheck* null_check); 327 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 328 LocationSummary* CreateThrowingSlowPathLocations( 329 HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); 330 void GenerateNullCheck(HNullCheck* null_check); 331 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 332 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 333 334 // Records a stack map which the runtime might use to set catch phi values 335 // during exception delivery. 336 // TODO: Replace with a catch-entering instruction that records the environment. 337 void RecordCatchBlockInfo(); 338 339 // TODO: Avoid creating the `std::unique_ptr` here. AddSlowPath(SlowPathCode * slow_path)340 void AddSlowPath(SlowPathCode* slow_path) { 341 slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path)); 342 } 343 344 void BuildStackMaps(MemoryRegion stack_map_region, 345 MemoryRegion method_info_region, 346 const DexFile::CodeItem& code_item); 347 void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size); GetNumberOfJitRoots()348 size_t GetNumberOfJitRoots() const { 349 return jit_string_roots_.size() + jit_class_roots_.size(); 350 } 351 352 // Fills the `literals` array with literals collected during code generation. 353 // Also emits literal patches. 354 void EmitJitRoots(uint8_t* code, 355 Handle<mirror::ObjectArray<mirror::Object>> roots, 356 const uint8_t* roots_data) 357 REQUIRES_SHARED(Locks::mutator_lock_); 358 IsLeafMethod()359 bool IsLeafMethod() const { 360 return is_leaf_; 361 } 362 MarkNotLeaf()363 void MarkNotLeaf() { 364 is_leaf_ = false; 365 requires_current_method_ = true; 366 } 367 SetRequiresCurrentMethod()368 void SetRequiresCurrentMethod() { 369 requires_current_method_ = true; 370 } 371 RequiresCurrentMethod()372 bool RequiresCurrentMethod() const { 373 return requires_current_method_; 374 } 375 376 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 377 // suspend check. This is called when the code generator generates code 378 // for the suspend check at the back edge (instead of where the suspend check 379 // is, which is the loop entry). At this point, the spill slots for the phis 380 // have not been written to. 381 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 382 GetBlockedCoreRegisters()383 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()384 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 385 IsBlockedCoreRegister(size_t i)386 bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } IsBlockedFloatingPointRegister(size_t i)387 bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } 388 389 // Helper that returns the pointer offset of an index in an object array. 390 // Note: this method assumes we always have the same pointer size, regardless 391 // of the architecture. 392 static size_t GetCacheOffset(uint32_t index); 393 // Pointer variant for ArtMethod and ArtField arrays. 394 size_t GetCachePointerOffset(uint32_t index); 395 396 // Helper that returns the offset of the array's length field. 397 // Note: Besides the normal arrays, we also use the HArrayLength for 398 // accessing the String's `count` field in String intrinsics. 399 static uint32_t GetArrayLengthOffset(HArrayLength* array_length); 400 401 // Helper that returns the offset of the array's data. 402 // Note: Besides the normal arrays, we also use the HArrayGet for 403 // accessing the String's `value` field in String intrinsics. 404 static uint32_t GetArrayDataOffset(HArrayGet* array_get); 405 406 // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`. 407 template <PointerSize pointer_size> GetReadBarrierMarkEntryPointsOffset(size_t reg)408 static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) { 409 // The entry point list defines 30 ReadBarrierMarkRegX entry points. 410 DCHECK_LT(reg, 30u); 411 // The ReadBarrierMarkRegX entry points are ordered by increasing 412 // register number in Thread::tls_Ptr_.quick_entrypoints. 413 return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value() 414 + static_cast<size_t>(pointer_size) * reg; 415 } 416 417 void EmitParallelMoves(Location from1, 418 Location to1, 419 Primitive::Type type1, 420 Location from2, 421 Location to2, 422 Primitive::Type type2); 423 StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)424 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 425 // Check that null value is not represented as an integer constant. 426 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 427 return type == Primitive::kPrimNot && !value->IsNullConstant(); 428 } 429 430 431 // Performs checks pertaining to an InvokeRuntime call. 432 void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, 433 HInstruction* instruction, 434 SlowPathCode* slow_path); 435 436 // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. 437 static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, 438 SlowPathCode* slow_path); 439 AddAllocatedRegister(Location location)440 void AddAllocatedRegister(Location location) { 441 allocated_registers_.Add(location); 442 } 443 HasAllocatedRegister(bool is_core,int reg)444 bool HasAllocatedRegister(bool is_core, int reg) const { 445 return is_core 446 ? allocated_registers_.ContainsCoreRegister(reg) 447 : allocated_registers_.ContainsFloatingPointRegister(reg); 448 } 449 450 void AllocateLocations(HInstruction* instruction); 451 452 // Tells whether the stack frame of the compiled method is 453 // considered "empty", that is either actually having a size of zero, 454 // or just containing the saved return address register. HasEmptyFrame()455 bool HasEmptyFrame() const { 456 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 457 } 458 GetInt32ValueOf(HConstant * constant)459 static int32_t GetInt32ValueOf(HConstant* constant) { 460 if (constant->IsIntConstant()) { 461 return constant->AsIntConstant()->GetValue(); 462 } else if (constant->IsNullConstant()) { 463 return 0; 464 } else { 465 DCHECK(constant->IsFloatConstant()); 466 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 467 } 468 } 469 GetInt64ValueOf(HConstant * constant)470 static int64_t GetInt64ValueOf(HConstant* constant) { 471 if (constant->IsIntConstant()) { 472 return constant->AsIntConstant()->GetValue(); 473 } else if (constant->IsNullConstant()) { 474 return 0; 475 } else if (constant->IsFloatConstant()) { 476 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 477 } else if (constant->IsLongConstant()) { 478 return constant->AsLongConstant()->GetValue(); 479 } else { 480 DCHECK(constant->IsDoubleConstant()); 481 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 482 } 483 } 484 GetFirstRegisterSlotInSlowPath()485 size_t GetFirstRegisterSlotInSlowPath() const { 486 return first_register_slot_in_slow_path_; 487 } 488 FrameEntrySpillSize()489 uint32_t FrameEntrySpillSize() const { 490 return GetFpuSpillSize() + GetCoreSpillSize(); 491 } 492 493 virtual ParallelMoveResolver* GetMoveResolver() = 0; 494 495 static void CreateCommonInvokeLocationSummary( 496 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 497 498 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 499 500 void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); 501 502 void CreateUnresolvedFieldLocationSummary( 503 HInstruction* field_access, 504 Primitive::Type field_type, 505 const FieldAccessCallingConvention& calling_convention); 506 507 void GenerateUnresolvedFieldAccess( 508 HInstruction* field_access, 509 Primitive::Type field_type, 510 uint32_t field_index, 511 uint32_t dex_pc, 512 const FieldAccessCallingConvention& calling_convention); 513 514 static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, 515 Location runtime_type_index_location, 516 Location runtime_return_location); 517 void GenerateLoadClassRuntimeCall(HLoadClass* cls); 518 519 static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); 520 SetDisassemblyInformation(DisassemblyInformation * info)521 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } GetDisassemblyInformation()522 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 523 524 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 525 HInstruction* instruction, 526 uint32_t dex_pc, 527 SlowPathCode* slow_path = nullptr) = 0; 528 529 // Check if the desired_string_load_kind is supported. If it is, return it, 530 // otherwise return a fall-back kind that should be used instead. 531 virtual HLoadString::LoadKind GetSupportedLoadStringKind( 532 HLoadString::LoadKind desired_string_load_kind) = 0; 533 534 // Check if the desired_class_load_kind is supported. If it is, return it, 535 // otherwise return a fall-back kind that should be used instead. 536 virtual HLoadClass::LoadKind GetSupportedLoadClassKind( 537 HLoadClass::LoadKind desired_class_load_kind) = 0; 538 GetLoadStringCallKind(HLoadString * load)539 static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { 540 switch (load->GetLoadKind()) { 541 case HLoadString::LoadKind::kBssEntry: 542 DCHECK(load->NeedsEnvironment()); 543 return LocationSummary::kCallOnSlowPath; 544 case HLoadString::LoadKind::kDexCacheViaMethod: 545 DCHECK(load->NeedsEnvironment()); 546 return LocationSummary::kCallOnMainOnly; 547 case HLoadString::LoadKind::kJitTableAddress: 548 DCHECK(!load->NeedsEnvironment()); 549 return kEmitCompilerReadBarrier 550 ? LocationSummary::kCallOnSlowPath 551 : LocationSummary::kNoCall; 552 break; 553 default: 554 DCHECK(!load->NeedsEnvironment()); 555 return LocationSummary::kNoCall; 556 } 557 } 558 559 // Check if the desired_dispatch_info is supported. If it is, return it, 560 // otherwise return a fall-back info that should be used instead. 561 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 562 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 563 HInvokeStaticOrDirect* invoke) = 0; 564 565 // Generate a call to a static or direct method. 566 virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; 567 // Generate a call to a virtual method. 568 virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0; 569 570 // Copy the result of a call into the given target. 571 virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; 572 573 virtual void GenerateNop() = 0; 574 575 uint32_t GetReferenceSlowFlagOffset() const; 576 uint32_t GetReferenceDisableFlagOffset() const; 577 578 static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); 579 580 protected: 581 // Patch info used for recording locations of required linker patches and their targets, 582 // i.e. target method, string, type or code identified by their dex file and index. 583 template <typename LabelType> 584 struct PatchInfo { PatchInfoPatchInfo585 PatchInfo(const DexFile& target_dex_file, uint32_t target_index) 586 : dex_file(target_dex_file), index(target_index) { } 587 588 const DexFile& dex_file; 589 uint32_t index; 590 LabelType label; 591 }; 592 CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)593 CodeGenerator(HGraph* graph, 594 size_t number_of_core_registers, 595 size_t number_of_fpu_registers, 596 size_t number_of_register_pairs, 597 uint32_t core_callee_save_mask, 598 uint32_t fpu_callee_save_mask, 599 const CompilerOptions& compiler_options, 600 OptimizingCompilerStats* stats) 601 : frame_size_(0), 602 core_spill_mask_(0), 603 fpu_spill_mask_(0), 604 first_register_slot_in_slow_path_(0), 605 allocated_registers_(RegisterSet::Empty()), 606 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, 607 kArenaAllocCodeGenerator)), 608 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, 609 kArenaAllocCodeGenerator)), 610 number_of_core_registers_(number_of_core_registers), 611 number_of_fpu_registers_(number_of_fpu_registers), 612 number_of_register_pairs_(number_of_register_pairs), 613 core_callee_save_mask_(core_callee_save_mask), 614 fpu_callee_save_mask_(fpu_callee_save_mask), 615 stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()), 616 block_order_(nullptr), 617 jit_string_roots_(StringReferenceValueComparator(), 618 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 619 jit_class_roots_(TypeReferenceValueComparator(), 620 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 621 disasm_info_(nullptr), 622 stats_(stats), 623 graph_(graph), 624 compiler_options_(compiler_options), 625 slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 626 current_slow_path_(nullptr), 627 current_block_index_(0), 628 is_leaf_(true), 629 requires_current_method_(false) { 630 slow_paths_.reserve(8); 631 } 632 633 virtual HGraphVisitor* GetLocationBuilder() = 0; 634 virtual HGraphVisitor* GetInstructionVisitor() = 0; 635 636 // Returns the location of the first spilled entry for floating point registers, 637 // relative to the stack pointer. GetFpuSpillStart()638 uint32_t GetFpuSpillStart() const { 639 return GetFrameSize() - FrameEntrySpillSize(); 640 } 641 GetFpuSpillSize()642 uint32_t GetFpuSpillSize() const { 643 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 644 } 645 GetCoreSpillSize()646 uint32_t GetCoreSpillSize() const { 647 return POPCOUNT(core_spill_mask_) * GetWordSize(); 648 } 649 HasAllocatedCalleeSaveRegisters()650 virtual bool HasAllocatedCalleeSaveRegisters() const { 651 // We check the core registers against 1 because it always comprises the return PC. 652 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 653 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 654 } 655 CallPushesPC()656 bool CallPushesPC() const { 657 InstructionSet instruction_set = GetInstructionSet(); 658 return instruction_set == kX86 || instruction_set == kX86_64; 659 } 660 661 // Arm64 has its own type for a label, so we need to templatize these methods 662 // to share the logic. 663 664 template <typename LabelType> CommonInitializeLabels()665 LabelType* CommonInitializeLabels() { 666 // We use raw array allocations instead of ArenaVector<> because Labels are 667 // non-constructible and non-movable and as such cannot be held in a vector. 668 size_t size = GetGraph()->GetBlocks().size(); 669 LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, 670 kArenaAllocCodeGenerator); 671 for (size_t i = 0; i != size; ++i) { 672 new(labels + i) LabelType(); 673 } 674 return labels; 675 } 676 677 template <typename LabelType> CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)678 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 679 block = FirstNonEmptyBlock(block); 680 return raw_pointer_to_labels_array + block->GetBlockId(); 681 } 682 GetCurrentSlowPath()683 SlowPathCode* GetCurrentSlowPath() { 684 return current_slow_path_; 685 } 686 687 // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. EmitJitRootPatches(uint8_t * code ATTRIBUTE_UNUSED,const uint8_t * roots_data ATTRIBUTE_UNUSED)688 virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, 689 const uint8_t* roots_data ATTRIBUTE_UNUSED) { 690 DCHECK_EQ(jit_string_roots_.size(), 0u); 691 DCHECK_EQ(jit_class_roots_.size(), 0u); 692 } 693 694 // Frame size required for this method. 695 uint32_t frame_size_; 696 uint32_t core_spill_mask_; 697 uint32_t fpu_spill_mask_; 698 uint32_t first_register_slot_in_slow_path_; 699 700 // Registers that were allocated during linear scan. 701 RegisterSet allocated_registers_; 702 703 // Arrays used when doing register allocation to know which 704 // registers we can allocate. `SetupBlockedRegisters` updates the 705 // arrays. 706 bool* const blocked_core_registers_; 707 bool* const blocked_fpu_registers_; 708 size_t number_of_core_registers_; 709 size_t number_of_fpu_registers_; 710 size_t number_of_register_pairs_; 711 const uint32_t core_callee_save_mask_; 712 const uint32_t fpu_callee_save_mask_; 713 714 StackMapStream stack_map_stream_; 715 716 // The order to use for code generation. 717 const ArenaVector<HBasicBlock*>* block_order_; 718 719 // Maps a StringReference (dex_file, string_index) to the index in the literal table. 720 // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` 721 // will compute all the indices. 722 ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; 723 724 // Maps a ClassReference (dex_file, type_index) to the index in the literal table. 725 // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` 726 // will compute all the indices. 727 ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; 728 729 DisassemblyInformation* disasm_info_; 730 731 private: 732 size_t GetStackOffsetOfSavedRegister(size_t index); 733 void GenerateSlowPaths(); 734 void BlockIfInRegister(Location location, bool is_out = false) const; 735 void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); 736 737 OptimizingCompilerStats* stats_; 738 739 HGraph* const graph_; 740 const CompilerOptions& compiler_options_; 741 742 ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; 743 744 // The current slow-path that we're generating code for. 745 SlowPathCode* current_slow_path_; 746 747 // The current block index in `block_order_` of the block 748 // we are generating code for. 749 size_t current_block_index_; 750 751 // Whether the method is a leaf method. 752 bool is_leaf_; 753 754 // Whether an instruction in the graph accesses the current method. 755 // TODO: Rename: this actually indicates that some instruction in the method 756 // needs the environment including a valid stack frame. 757 bool requires_current_method_; 758 759 friend class OptimizingCFITest; 760 761 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 762 }; 763 764 template <typename C, typename F> 765 class CallingConvention { 766 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)767 CallingConvention(const C* registers, 768 size_t number_of_registers, 769 const F* fpu_registers, 770 size_t number_of_fpu_registers, 771 PointerSize pointer_size) 772 : registers_(registers), 773 number_of_registers_(number_of_registers), 774 fpu_registers_(fpu_registers), 775 number_of_fpu_registers_(number_of_fpu_registers), 776 pointer_size_(pointer_size) {} 777 GetNumberOfRegisters()778 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()779 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 780 GetRegisterAt(size_t index)781 C GetRegisterAt(size_t index) const { 782 DCHECK_LT(index, number_of_registers_); 783 return registers_[index]; 784 } 785 GetFpuRegisterAt(size_t index)786 F GetFpuRegisterAt(size_t index) const { 787 DCHECK_LT(index, number_of_fpu_registers_); 788 return fpu_registers_[index]; 789 } 790 GetStackOffsetOf(size_t index)791 size_t GetStackOffsetOf(size_t index) const { 792 // We still reserve the space for parameters passed by registers. 793 // Add space for the method pointer. 794 return static_cast<size_t>(pointer_size_) + index * kVRegSize; 795 } 796 797 private: 798 const C* registers_; 799 const size_t number_of_registers_; 800 const F* fpu_registers_; 801 const size_t number_of_fpu_registers_; 802 const PointerSize pointer_size_; 803 804 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 805 }; 806 807 /** 808 * A templated class SlowPathGenerator with a templated method NewSlowPath() 809 * that can be used by any code generator to share equivalent slow-paths with 810 * the objective of reducing generated code size. 811 * 812 * InstructionType: instruction that requires SlowPathCodeType 813 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 814 */ 815 template <typename InstructionType> 816 class SlowPathGenerator { 817 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 818 "InstructionType is not a subclass of art::HInstruction"); 819 820 public: SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)821 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 822 : graph_(graph), 823 codegen_(codegen), 824 slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} 825 826 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 827 // Templating the method (rather than the whole class) on the slow-path type enables 828 // keeping this code at a generic, non architecture-specific place. 829 // 830 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 831 // To relax this requirement, we would need some RTTI on the stored slow-paths, 832 // or template the class as a whole on SlowPathType. 833 template <typename SlowPathCodeType> NewSlowPath(InstructionType * instruction)834 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 835 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 836 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 837 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 838 "SlowPathCodeType is not constructible from InstructionType*"); 839 // Iterate over potential candidates for sharing. Currently, only same-typed 840 // slow-paths with exactly the same dex-pc are viable candidates. 841 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 842 const uint32_t dex_pc = instruction->GetDexPc(); 843 auto iter = slow_path_map_.find(dex_pc); 844 if (iter != slow_path_map_.end()) { 845 auto candidates = iter->second; 846 for (const auto& it : candidates) { 847 InstructionType* other_instruction = it.first; 848 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 849 // Determine if the instructions allow for slow-path sharing. 850 if (HaveSameLiveRegisters(instruction, other_instruction) && 851 HaveSameStackMap(instruction, other_instruction)) { 852 // Can share: reuse existing one. 853 return other_slow_path; 854 } 855 } 856 } else { 857 // First time this dex-pc is seen. 858 iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); 859 } 860 // Cannot share: create and add new slow-path for this particular dex-pc. 861 SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); 862 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 863 codegen_->AddSlowPath(slow_path); 864 return slow_path; 865 } 866 867 private: 868 // Tests if both instructions have same set of live physical registers. This ensures 869 // the slow-path has exactly the same preamble on saving these registers to stack. HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)870 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 871 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 872 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 873 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 874 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 875 return (((live1->GetCoreRegisters() & core_spill) == 876 (live2->GetCoreRegisters() & core_spill)) && 877 ((live1->GetFloatingPointRegisters() & fpu_spill) == 878 (live2->GetFloatingPointRegisters() & fpu_spill))); 879 } 880 881 // Tests if both instructions have the same stack map. This ensures the interpreter 882 // will find exactly the same dex-registers at the same entries. HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)883 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 884 DCHECK(i1->HasEnvironment()); 885 DCHECK(i2->HasEnvironment()); 886 // We conservatively test if the two instructions find exactly the same instructions 887 // and location in each dex-register. This guarantees they will have the same stack map. 888 HEnvironment* e1 = i1->GetEnvironment(); 889 HEnvironment* e2 = i2->GetEnvironment(); 890 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 891 return false; 892 } 893 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 894 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 895 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 896 return false; 897 } 898 } 899 return true; 900 } 901 902 HGraph* const graph_; 903 CodeGenerator* const codegen_; 904 905 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 906 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 907 908 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 909 }; 910 911 class InstructionCodeGenerator : public HGraphVisitor { 912 public: InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)913 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 914 : HGraphVisitor(graph), 915 deopt_slow_paths_(graph, codegen) {} 916 917 protected: 918 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 919 // TODO: under current regime, only deopt sharing make sense; extend later. 920 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 921 }; 922 923 } // namespace art 924 925 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 926