1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 19 20 #include "arch/x86_64/instruction_set_features_x86_64.h" 21 #include "code_generator.h" 22 #include "driver/compiler_options.h" 23 #include "nodes.h" 24 #include "parallel_move_resolver.h" 25 #include "utils/x86_64/assembler_x86_64.h" 26 27 namespace art { 28 namespace x86_64 { 29 30 // Use a local definition to prevent copying mistakes. 31 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize); 32 33 // Some x86_64 instructions require a register to be available as temp. 34 static constexpr Register TMP = R11; 35 36 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; 37 static constexpr FloatRegister kParameterFloatRegisters[] = 38 { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 39 40 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 41 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); 42 43 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; 44 static constexpr size_t kRuntimeParameterCoreRegistersLength = 45 arraysize(kRuntimeParameterCoreRegisters); 46 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; 47 static constexpr size_t kRuntimeParameterFpuRegistersLength = 48 arraysize(kRuntimeParameterFpuRegisters); 49 50 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI. 51 // If the ART ABI changes, this list must be updated. It is used to ensure that 52 // these are not clobbered by any direct call to native code (such as math intrinsics). 53 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; 54 55 56 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { 57 public: InvokeRuntimeCallingConvention()58 InvokeRuntimeCallingConvention() 59 : CallingConvention(kRuntimeParameterCoreRegisters, 60 kRuntimeParameterCoreRegistersLength, 61 kRuntimeParameterFpuRegisters, 62 kRuntimeParameterFpuRegistersLength, 63 kX86_64PointerSize) {} 64 65 private: 66 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 67 }; 68 69 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { 70 public: InvokeDexCallingConvention()71 InvokeDexCallingConvention() : CallingConvention( 72 kParameterCoreRegisters, 73 kParameterCoreRegistersLength, 74 kParameterFloatRegisters, 75 kParameterFloatRegistersLength, 76 kX86_64PointerSize) {} 77 78 private: 79 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 80 }; 81 82 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 83 public: CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)84 explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation) 85 : for_register_allocation_(for_register_allocation) {} 86 ~CriticalNativeCallingConventionVisitorX86_64()87 virtual ~CriticalNativeCallingConventionVisitorX86_64() {} 88 89 Location GetNextLocation(DataType::Type type) override; 90 Location GetReturnLocation(DataType::Type type) const override; 91 Location GetMethodLocation() const override; 92 GetStackOffset()93 size_t GetStackOffset() const { return stack_offset_; } 94 95 private: 96 // Register allocator does not support adjusting frame size, so we cannot provide final locations 97 // of stack arguments for register allocation. We ask the register allocator for any location and 98 // move these arguments to the right place after adjusting the SP when generating the call. 99 const bool for_register_allocation_; 100 size_t gpr_index_ = 0u; 101 size_t fpr_index_ = 0u; 102 size_t stack_offset_ = 0u; 103 104 DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64); 105 }; 106 107 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { 108 public: FieldAccessCallingConventionX86_64()109 FieldAccessCallingConventionX86_64() {} 110 GetObjectLocation()111 Location GetObjectLocation() const override { 112 return Location::RegisterLocation(RSI); 113 } GetFieldIndexLocation()114 Location GetFieldIndexLocation() const override { 115 return Location::RegisterLocation(RDI); 116 } GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED)117 Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { 118 return Location::RegisterLocation(RAX); 119 } GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,bool is_instance)120 Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) 121 const override { 122 return is_instance 123 ? Location::RegisterLocation(RDX) 124 : Location::RegisterLocation(RSI); 125 } GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED)126 Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { 127 return Location::FpuRegisterLocation(XMM0); 128 } 129 130 private: 131 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); 132 }; 133 134 135 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 136 public: InvokeDexCallingConventionVisitorX86_64()137 InvokeDexCallingConventionVisitorX86_64() {} ~InvokeDexCallingConventionVisitorX86_64()138 virtual ~InvokeDexCallingConventionVisitorX86_64() {} 139 140 Location GetNextLocation(DataType::Type type) override; 141 Location GetReturnLocation(DataType::Type type) const override; 142 Location GetMethodLocation() const override; 143 144 private: 145 InvokeDexCallingConvention calling_convention; 146 147 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); 148 }; 149 150 class CodeGeneratorX86_64; 151 152 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { 153 public: ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)154 ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) 155 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 156 157 void EmitMove(size_t index) override; 158 void EmitSwap(size_t index) override; 159 void SpillScratch(int reg) override; 160 void RestoreScratch(int reg) override; 161 162 X86_64Assembler* GetAssembler() const; 163 164 private: 165 void Exchange32(CpuRegister reg, int mem); 166 void Exchange32(XmmRegister reg, int mem); 167 void Exchange64(CpuRegister reg1, CpuRegister reg2); 168 void Exchange64(CpuRegister reg, int mem); 169 void Exchange64(XmmRegister reg, int mem); 170 void Exchange128(XmmRegister reg, int mem); 171 void ExchangeMemory32(int mem1, int mem2); 172 void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); 173 174 CodeGeneratorX86_64* const codegen_; 175 176 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); 177 }; 178 179 class LocationsBuilderX86_64 : public HGraphVisitor { 180 public: LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)181 LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) 182 : HGraphVisitor(graph), codegen_(codegen) {} 183 184 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 185 void Visit##name(H##name* instr) override; 186 187 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)188 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 189 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 190 191 #undef DECLARE_VISIT_INSTRUCTION 192 193 void VisitInstruction(HInstruction* instruction) override { 194 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 195 << " (id " << instruction->GetId() << ")"; 196 } 197 198 private: 199 void HandleInvoke(HInvoke* invoke); 200 void HandleBitwiseOperation(HBinaryOperation* operation); 201 void HandleCondition(HCondition* condition); 202 void HandleShift(HBinaryOperation* operation); 203 void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); 204 void HandleFieldGet(HInstruction* instruction); 205 bool CpuHasAvxFeatureFlag(); 206 bool CpuHasAvx2FeatureFlag(); 207 208 CodeGeneratorX86_64* const codegen_; 209 InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; 210 211 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); 212 }; 213 214 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { 215 public: 216 InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); 217 218 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 219 void Visit##name(H##name* instr) override; 220 221 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)222 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 223 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 224 225 #undef DECLARE_VISIT_INSTRUCTION 226 227 void VisitInstruction(HInstruction* instruction) override { 228 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 229 << " (id " << instruction->GetId() << ")"; 230 } 231 GetAssembler()232 X86_64Assembler* GetAssembler() const { return assembler_; } 233 234 private: 235 // Generate code for the given suspend check. If not null, `successor` 236 // is the block to branch to if the suspend check is not needed, and after 237 // the suspend call. 238 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); 239 void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); 240 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); 241 void HandleBitwiseOperation(HBinaryOperation* operation); 242 void GenerateRemFP(HRem* rem); 243 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 244 void DivByPowerOfTwo(HDiv* instruction); 245 void RemByPowerOfTwo(HRem* instruction); 246 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 247 void GenerateDivRemIntegral(HBinaryOperation* instruction); 248 void HandleCondition(HCondition* condition); 249 void HandleShift(HBinaryOperation* operation); 250 251 void HandleFieldSet(HInstruction* instruction, 252 const FieldInfo& field_info, 253 bool value_can_be_null); 254 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 255 256 void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); 257 void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); 258 void GenerateMinMax(HBinaryOperation* minmax, bool is_min); 259 260 // Generate a heap reference load using one register `out`: 261 // 262 // out <- *(out + offset) 263 // 264 // while honoring heap poisoning and/or read barriers (if any). 265 // 266 // Location `maybe_temp` is used when generating a read barrier and 267 // shall be a register in that case; it may be an invalid location 268 // otherwise. 269 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 270 Location out, 271 uint32_t offset, 272 Location maybe_temp, 273 ReadBarrierOption read_barrier_option); 274 // Generate a heap reference load using two different registers 275 // `out` and `obj`: 276 // 277 // out <- *(obj + offset) 278 // 279 // while honoring heap poisoning and/or read barriers (if any). 280 // 281 // Location `maybe_temp` is used when generating a Baker's (fast 282 // path) read barrier and shall be a register in that case; it may 283 // be an invalid location otherwise. 284 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 285 Location out, 286 Location obj, 287 uint32_t offset, 288 ReadBarrierOption read_barrier_option); 289 // Generate a GC root reference load: 290 // 291 // root <- *address 292 // 293 // while honoring read barriers based on read_barrier_option. 294 void GenerateGcRootFieldLoad(HInstruction* instruction, 295 Location root, 296 const Address& address, 297 Label* fixup_label, 298 ReadBarrierOption read_barrier_option); 299 300 void PushOntoFPStack(Location source, uint32_t temp_offset, 301 uint32_t stack_adjustment, bool is_float); 302 void GenerateCompareTest(HCondition* condition); 303 template<class LabelType> 304 void GenerateTestAndBranch(HInstruction* instruction, 305 size_t condition_input_index, 306 LabelType* true_target, 307 LabelType* false_target); 308 template<class LabelType> 309 void GenerateCompareTestAndBranch(HCondition* condition, 310 LabelType* true_target, 311 LabelType* false_target); 312 template<class LabelType> 313 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 314 315 void HandleGoto(HInstruction* got, HBasicBlock* successor); 316 317 bool CpuHasAvxFeatureFlag(); 318 bool CpuHasAvx2FeatureFlag(); 319 320 X86_64Assembler* const assembler_; 321 CodeGeneratorX86_64* const codegen_; 322 323 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); 324 }; 325 326 // Class for fixups to jump tables. 327 class JumpTableRIPFixup; 328 329 class CodeGeneratorX86_64 : public CodeGenerator { 330 public: 331 CodeGeneratorX86_64(HGraph* graph, 332 const CompilerOptions& compiler_options, 333 OptimizingCompilerStats* stats = nullptr); ~CodeGeneratorX86_64()334 virtual ~CodeGeneratorX86_64() {} 335 336 void GenerateFrameEntry() override; 337 void GenerateFrameExit() override; 338 void Bind(HBasicBlock* block) override; 339 void MoveConstant(Location destination, int32_t value) override; 340 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; 341 void AddLocationAsTemp(Location location, LocationSummary* locations) override; 342 343 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; 344 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; 345 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 346 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 347 348 // Generate code to invoke a runtime entry point. 349 void InvokeRuntime(QuickEntrypointEnum entrypoint, 350 HInstruction* instruction, 351 uint32_t dex_pc, 352 SlowPathCode* slow_path = nullptr) override; 353 354 // Generate code to invoke a runtime entry point, but do not record 355 // PC-related information in a stack map. 356 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 357 HInstruction* instruction, 358 SlowPathCode* slow_path); 359 360 void GenerateInvokeRuntime(int32_t entry_point_offset); 361 GetWordSize()362 size_t GetWordSize() const override { 363 return kX86_64WordSize; 364 } 365 GetSlowPathFPWidth()366 size_t GetSlowPathFPWidth() const override { 367 return GetGraph()->HasSIMD() 368 ? GetSIMDRegisterWidth() 369 : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill 370 } 371 GetCalleePreservedFPWidth()372 size_t GetCalleePreservedFPWidth() const override { 373 return 1 * kX86_64WordSize; 374 } 375 GetSIMDRegisterWidth()376 size_t GetSIMDRegisterWidth() const override { 377 return 2 * kX86_64WordSize; 378 } 379 GetLocationBuilder()380 HGraphVisitor* GetLocationBuilder() override { 381 return &location_builder_; 382 } 383 GetInstructionVisitor()384 HGraphVisitor* GetInstructionVisitor() override { 385 return &instruction_visitor_; 386 } 387 GetAssembler()388 X86_64Assembler* GetAssembler() override { 389 return &assembler_; 390 } 391 GetAssembler()392 const X86_64Assembler& GetAssembler() const override { 393 return assembler_; 394 } 395 GetMoveResolver()396 ParallelMoveResolverX86_64* GetMoveResolver() override { 397 return &move_resolver_; 398 } 399 GetAddressOf(HBasicBlock * block)400 uintptr_t GetAddressOf(HBasicBlock* block) override { 401 return GetLabelOf(block)->Position(); 402 } 403 404 void SetupBlockedRegisters() const override; 405 void DumpCoreRegister(std::ostream& stream, int reg) const override; 406 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; 407 void Finalize(CodeAllocator* allocator) override; 408 GetInstructionSet()409 InstructionSet GetInstructionSet() const override { 410 return InstructionSet::kX86_64; 411 } 412 413 const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; 414 415 // Emit a write barrier. 416 void MarkGCCard(CpuRegister temp, 417 CpuRegister card, 418 CpuRegister object, 419 CpuRegister value, 420 bool value_can_be_null); 421 422 void GenerateMemoryBarrier(MemBarrierKind kind); 423 424 // Helper method to move a value between two locations. 425 void Move(Location destination, Location source); 426 GetLabelOf(HBasicBlock * block)427 Label* GetLabelOf(HBasicBlock* block) const { 428 return CommonGetLabelOf<Label>(block_labels_, block); 429 } 430 Initialize()431 void Initialize() override { 432 block_labels_ = CommonInitializeLabels<Label>(); 433 } 434 NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED)435 bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { 436 return false; 437 } 438 439 // Check if the desired_string_load_kind is supported. If it is, return it, 440 // otherwise return a fall-back kind that should be used instead. 441 HLoadString::LoadKind GetSupportedLoadStringKind( 442 HLoadString::LoadKind desired_string_load_kind) override; 443 444 // Check if the desired_class_load_kind is supported. If it is, return it, 445 // otherwise return a fall-back kind that should be used instead. 446 HLoadClass::LoadKind GetSupportedLoadClassKind( 447 HLoadClass::LoadKind desired_class_load_kind) override; 448 449 // Check if the desired_dispatch_info is supported. If it is, return it, 450 // otherwise return a fall-back info that should be used instead. 451 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 452 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 453 ArtMethod* method) override; 454 455 void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); 456 void GenerateStaticOrDirectCall( 457 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 458 void GenerateVirtualCall( 459 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 460 461 void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); 462 void RecordBootImageRelRoPatch(uint32_t boot_image_offset); 463 void RecordBootImageMethodPatch(HInvoke* invoke); 464 void RecordMethodBssEntryPatch(HInvoke* invoke); 465 void RecordBootImageTypePatch(HLoadClass* load_class); 466 Label* NewTypeBssEntryPatch(HLoadClass* load_class); 467 void RecordBootImageStringPatch(HLoadString* load_string); 468 Label* NewStringBssEntryPatch(HLoadString* load_string); 469 void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); 470 Label* NewJitRootStringPatch(const DexFile& dex_file, 471 dex::StringIndex string_index, 472 Handle<mirror::String> handle); 473 Label* NewJitRootClassPatch(const DexFile& dex_file, 474 dex::TypeIndex type_index, 475 Handle<mirror::Class> handle); 476 477 void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); 478 void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke); 479 480 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; 481 482 void PatchJitRootUse(uint8_t* code, 483 const uint8_t* roots_data, 484 const PatchInfo<Label>& info, 485 uint64_t index_in_table) const; 486 487 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; 488 489 // Fast path implementation of ReadBarrier::Barrier for a heap 490 // reference field load when Baker's read barriers are used. 491 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 492 Location ref, 493 CpuRegister obj, 494 uint32_t offset, 495 bool needs_null_check); 496 // Fast path implementation of ReadBarrier::Barrier for a heap 497 // reference array load when Baker's read barriers are used. 498 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 499 Location ref, 500 CpuRegister obj, 501 uint32_t data_offset, 502 Location index, 503 bool needs_null_check); 504 // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, 505 // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. 506 // 507 // Load the object reference located at address `src`, held by 508 // object `obj`, into `ref`, and mark it if needed. The base of 509 // address `src` must be `obj`. 510 // 511 // If `always_update_field` is true, the value of the reference is 512 // atomically updated in the holder (`obj`). This operation 513 // requires two temporary registers, which must be provided as 514 // non-null pointers (`temp1` and `temp2`). 515 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 516 Location ref, 517 CpuRegister obj, 518 const Address& src, 519 bool needs_null_check, 520 bool always_update_field = false, 521 CpuRegister* temp1 = nullptr, 522 CpuRegister* temp2 = nullptr); 523 524 // Generate a read barrier for a heap reference within `instruction` 525 // using a slow path. 526 // 527 // A read barrier for an object reference read from the heap is 528 // implemented as a call to the artReadBarrierSlow runtime entry 529 // point, which is passed the values in locations `ref`, `obj`, and 530 // `offset`: 531 // 532 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 533 // mirror::Object* obj, 534 // uint32_t offset); 535 // 536 // The `out` location contains the value returned by 537 // artReadBarrierSlow. 538 // 539 // When `index` provided (i.e., when it is different from 540 // Location::NoLocation()), the offset value passed to 541 // artReadBarrierSlow is adjusted to take `index` into account. 542 void GenerateReadBarrierSlow(HInstruction* instruction, 543 Location out, 544 Location ref, 545 Location obj, 546 uint32_t offset, 547 Location index = Location::NoLocation()); 548 549 // If read barriers are enabled, generate a read barrier for a heap 550 // reference using a slow path. If heap poisoning is enabled, also 551 // unpoison the reference in `out`. 552 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 553 Location out, 554 Location ref, 555 Location obj, 556 uint32_t offset, 557 Location index = Location::NoLocation()); 558 559 // Generate a read barrier for a GC root within `instruction` using 560 // a slow path. 561 // 562 // A read barrier for an object reference GC root is implemented as 563 // a call to the artReadBarrierForRootSlow runtime entry point, 564 // which is passed the value in location `root`: 565 // 566 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 567 // 568 // The `out` location contains the value returned by 569 // artReadBarrierForRootSlow. 570 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 571 ConstantAreaStart()572 int ConstantAreaStart() const { 573 return constant_area_start_; 574 } 575 576 Address LiteralDoubleAddress(double v); 577 Address LiteralFloatAddress(float v); 578 Address LiteralInt32Address(int32_t v); 579 Address LiteralInt64Address(int64_t v); 580 581 // Load a 32/64-bit value into a register in the most efficient manner. 582 void Load32BitValue(CpuRegister dest, int32_t value); 583 void Load64BitValue(CpuRegister dest, int64_t value); 584 void Load32BitValue(XmmRegister dest, int32_t value); 585 void Load64BitValue(XmmRegister dest, int64_t value); 586 void Load32BitValue(XmmRegister dest, float value); 587 void Load64BitValue(XmmRegister dest, double value); 588 589 // Compare a register with a 32/64-bit value in the most efficient manner. 590 void Compare32BitValue(CpuRegister dest, int32_t value); 591 void Compare64BitValue(CpuRegister dest, int64_t value); 592 593 // Compare int values. Supports register locations for `lhs`. 594 void GenerateIntCompare(Location lhs, Location rhs); 595 void GenerateIntCompare(CpuRegister lhs, Location rhs); 596 597 // Compare long values. Supports only register locations for `lhs`. 598 void GenerateLongCompare(Location lhs, Location rhs); 599 600 // Construct address for array access. 601 static Address ArrayAddress(CpuRegister obj, 602 Location index, 603 ScaleFactor scale, 604 uint32_t data_offset); 605 606 Address LiteralCaseTable(HPackedSwitch* switch_instr); 607 608 // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. 609 void Store64BitValueToStack(Location dest, int64_t value); 610 611 void MoveFromReturnRegister(Location trg, DataType::Type type) override; 612 613 // Assign a 64 bit constant to an address. 614 void MoveInt64ToAddress(const Address& addr_low, 615 const Address& addr_high, 616 int64_t v, 617 HInstruction* instruction); 618 619 // Ensure that prior stores complete to memory before subsequent loads. 620 // The locked add implementation will avoid serializing device memory, but will 621 // touch (but not change) the top of the stack. 622 // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. 623 void MemoryFence(bool force_mfence = false) { 624 if (!force_mfence) { 625 assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); 626 } else { 627 assembler_.mfence(); 628 } 629 } 630 631 void IncreaseFrame(size_t adjustment) override; 632 void DecreaseFrame(size_t adjustment) override; 633 634 void GenerateNop() override; 635 void GenerateImplicitNullCheck(HNullCheck* instruction) override; 636 void GenerateExplicitNullCheck(HNullCheck* instruction) override; 637 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); 638 639 640 void MaybeIncrementHotness(bool is_frame_entry); 641 642 static void BlockNonVolatileXmmRegisters(LocationSummary* locations); 643 644 // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset. 645 // We will fix this up in the linker later to have the right value. 646 static constexpr int32_t kPlaceholder32BitOffset = 256; 647 648 private: 649 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 650 static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, 651 ArenaVector<linker::LinkerPatch>* linker_patches); 652 653 // Labels for each block that will be compiled. 654 Label* block_labels_; // Indexed by block id. 655 Label frame_entry_label_; 656 LocationsBuilderX86_64 location_builder_; 657 InstructionCodeGeneratorX86_64 instruction_visitor_; 658 ParallelMoveResolverX86_64 move_resolver_; 659 X86_64Assembler assembler_; 660 661 // Offset to the start of the constant area in the assembled code. 662 // Used for fixups to the constant area. 663 int constant_area_start_; 664 665 // PC-relative method patch info for kBootImageLinkTimePcRelative. 666 ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; 667 // PC-relative method patch info for kBssEntry. 668 ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; 669 // PC-relative type patch info for kBootImageLinkTimePcRelative. 670 ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; 671 // PC-relative type patch info for kBssEntry. 672 ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; 673 // PC-relative public type patch info for kBssEntryPublic. 674 ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_; 675 // PC-relative package type patch info for kBssEntryPackage. 676 ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_; 677 // PC-relative String patch info for kBootImageLinkTimePcRelative. 678 ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; 679 // PC-relative String patch info for kBssEntry. 680 ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; 681 // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. 682 ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_; 683 // PC-relative patch info for IntrinsicObjects for the boot image, 684 // and for method/type/string patches for kBootImageRelRo otherwise. 685 ArenaDeque<PatchInfo<Label>> boot_image_other_patches_; 686 687 // Patches for string literals in JIT compiled code. 688 ArenaDeque<PatchInfo<Label>> jit_string_patches_; 689 // Patches for class literals in JIT compiled code. 690 ArenaDeque<PatchInfo<Label>> jit_class_patches_; 691 692 // Fixups for jump tables need to be handled specially. 693 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 694 695 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); 696 }; 697 698 } // namespace x86_64 699 } // namespace art 700 701 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 702