1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ 19 20 #include "arch/x86/instruction_set_features_x86.h" 21 #include "base/macros.h" 22 #include "base/pointer_size.h" 23 #include "code_generator.h" 24 #include "dex/dex_file_types.h" 25 #include "driver/compiler_options.h" 26 #include "nodes.h" 27 #include "parallel_move_resolver.h" 28 #include "utils/x86/assembler_x86.h" 29 30 namespace art HIDDEN { 31 namespace x86 { 32 33 // Use a local definition to prevent copying mistakes. 34 static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize); 35 36 class CodeGeneratorX86; 37 38 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX }; 39 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX }; 40 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 41 static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; 42 static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); 43 44 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; 45 static constexpr size_t kRuntimeParameterCoreRegistersLength = 46 arraysize(kRuntimeParameterCoreRegisters); 47 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; 48 static constexpr size_t kRuntimeParameterFpuRegistersLength = 49 arraysize(kRuntimeParameterFpuRegisters); 50 51 #define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \ 52 V(MathRoundDouble) \ 53 V(FloatIsInfinite) \ 54 V(DoubleIsInfinite) \ 55 V(IntegerHighestOneBit) \ 56 V(LongHighestOneBit) \ 57 V(LongDivideUnsigned) \ 58 V(IntegerRemainderUnsigned) \ 59 V(LongRemainderUnsigned) \ 60 V(CRC32Update) \ 61 V(CRC32UpdateBytes) \ 62 V(CRC32UpdateByteBuffer) \ 63 V(FP16ToFloat) \ 64 V(FP16ToHalf) \ 65 V(FP16Floor) \ 66 V(FP16Ceil) \ 67 V(FP16Rint) \ 68 V(FP16Greater) \ 69 V(FP16GreaterEquals) \ 70 V(FP16Less) \ 71 V(FP16LessEquals) \ 72 V(FP16Compare) \ 73 V(FP16Min) \ 74 V(FP16Max) \ 75 V(MathMultiplyHigh) \ 76 V(StringStringIndexOf) \ 77 V(StringStringIndexOfAfter) \ 78 V(StringBufferAppend) \ 79 V(StringBufferLength) \ 80 V(StringBufferToString) \ 81 V(StringBuilderAppendObject) \ 82 V(StringBuilderAppendString) \ 83 V(StringBuilderAppendCharSequence) \ 84 V(StringBuilderAppendCharArray) \ 85 V(StringBuilderAppendBoolean) \ 86 V(StringBuilderAppendChar) \ 87 V(StringBuilderAppendInt) \ 88 V(StringBuilderAppendLong) \ 89 V(StringBuilderAppendFloat) \ 90 V(StringBuilderAppendDouble) \ 91 V(StringBuilderLength) \ 92 V(StringBuilderToString) \ 93 /* 1.8 */ \ 94 V(MethodHandleInvokeExact) \ 95 V(MethodHandleInvoke) 96 97 class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { 98 public: InvokeRuntimeCallingConvention()99 InvokeRuntimeCallingConvention() 100 : CallingConvention(kRuntimeParameterCoreRegisters, 101 kRuntimeParameterCoreRegistersLength, 102 kRuntimeParameterFpuRegisters, 103 kRuntimeParameterFpuRegistersLength, 104 kX86PointerSize) {} 105 106 private: 107 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 108 }; 109 110 class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> { 111 public: InvokeDexCallingConvention()112 InvokeDexCallingConvention() : CallingConvention( 113 kParameterCoreRegisters, 114 kParameterCoreRegistersLength, 115 kParameterFpuRegisters, 116 kParameterFpuRegistersLength, 117 kX86PointerSize) {} 118 GetRegisterPairAt(size_t argument_index)119 RegisterPair GetRegisterPairAt(size_t argument_index) { 120 DCHECK_LT(argument_index + 1, GetNumberOfRegisters()); 121 return kParameterCorePairRegisters[argument_index]; 122 } 123 124 private: 125 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 126 }; 127 128 class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { 129 public: InvokeDexCallingConventionVisitorX86()130 InvokeDexCallingConventionVisitorX86() {} ~InvokeDexCallingConventionVisitorX86()131 virtual ~InvokeDexCallingConventionVisitorX86() {} 132 133 Location GetNextLocation(DataType::Type type) override; 134 Location GetReturnLocation(DataType::Type type) const override; 135 Location GetMethodLocation() const override; 136 137 private: 138 InvokeDexCallingConvention calling_convention; 139 140 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86); 141 }; 142 143 class CriticalNativeCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { 144 public: CriticalNativeCallingConventionVisitorX86(bool for_register_allocation)145 explicit CriticalNativeCallingConventionVisitorX86(bool for_register_allocation) 146 : for_register_allocation_(for_register_allocation) {} 147 ~CriticalNativeCallingConventionVisitorX86()148 virtual ~CriticalNativeCallingConventionVisitorX86() {} 149 150 Location GetNextLocation(DataType::Type type) override; 151 Location GetReturnLocation(DataType::Type type) const override; 152 Location GetMethodLocation() const override; 153 GetStackOffset()154 size_t GetStackOffset() const { return stack_offset_; } 155 156 private: 157 // Register allocator does not support adjusting frame size, so we cannot provide final locations 158 // of stack arguments for register allocation. We ask the register allocator for any location and 159 // move these arguments to the right place after adjusting the SP when generating the call. 160 const bool for_register_allocation_; 161 size_t stack_offset_ = 0u; 162 163 DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86); 164 }; 165 166 class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { 167 public: FieldAccessCallingConventionX86()168 FieldAccessCallingConventionX86() {} 169 GetObjectLocation()170 Location GetObjectLocation() const override { 171 return Location::RegisterLocation(ECX); 172 } GetFieldIndexLocation()173 Location GetFieldIndexLocation() const override { 174 return Location::RegisterLocation(EAX); 175 } GetReturnLocation(DataType::Type type)176 Location GetReturnLocation(DataType::Type type) const override { 177 return DataType::Is64BitType(type) 178 ? Location::RegisterPairLocation(EAX, EDX) 179 : Location::RegisterLocation(EAX); 180 } GetSetValueLocation(DataType::Type type,bool is_instance)181 Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { 182 return DataType::Is64BitType(type) 183 ? (is_instance 184 ? Location::RegisterPairLocation(EDX, EBX) 185 : Location::RegisterPairLocation(ECX, EDX)) 186 : (is_instance 187 ? Location::RegisterLocation(EDX) 188 : Location::RegisterLocation(ECX)); 189 } GetFpuLocation(DataType::Type type)190 Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { 191 return Location::FpuRegisterLocation(XMM0); 192 } 193 194 private: 195 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86); 196 }; 197 198 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { 199 public: ParallelMoveResolverX86(ArenaAllocator * allocator,CodeGeneratorX86 * codegen)200 ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) 201 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 202 203 void EmitMove(size_t index) override; 204 void EmitSwap(size_t index) override; 205 void SpillScratch(int reg) override; 206 void RestoreScratch(int reg) override; 207 208 X86Assembler* GetAssembler() const; 209 210 private: 211 void Exchange(Register reg, int mem); 212 void Exchange32(XmmRegister reg, int mem); 213 void Exchange128(XmmRegister reg, int mem); 214 void ExchangeMemory(int mem1, int mem2, int number_of_words); 215 void MoveMemoryToMemory(int dst, int src, int number_of_words); 216 217 CodeGeneratorX86* const codegen_; 218 219 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86); 220 }; 221 222 class LocationsBuilderX86 : public HGraphVisitor { 223 public: LocationsBuilderX86(HGraph * graph,CodeGeneratorX86 * codegen)224 LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen) 225 : HGraphVisitor(graph), codegen_(codegen) {} 226 227 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 228 void Visit##name(H##name* instr) override; 229 230 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)231 FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) 232 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 233 234 #undef DECLARE_VISIT_INSTRUCTION 235 236 void VisitInstruction(HInstruction* instruction) override { 237 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 238 << " (id " << instruction->GetId() << ")"; 239 } 240 241 private: 242 void HandleBitwiseOperation(HBinaryOperation* instruction); 243 void HandleInvoke(HInvoke* invoke); 244 void HandleCondition(HCondition* condition); 245 void HandleShift(HBinaryOperation* instruction); 246 void HandleFieldSet(HInstruction* instruction, 247 const FieldInfo& field_info, 248 WriteBarrierKind write_barrier_kind); 249 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 250 bool CpuHasAvxFeatureFlag(); 251 bool CpuHasAvx2FeatureFlag(); 252 253 CodeGeneratorX86* const codegen_; 254 InvokeDexCallingConventionVisitorX86 parameter_visitor_; 255 256 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); 257 }; 258 259 class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { 260 public: 261 InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); 262 263 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 264 void Visit##name(H##name* instr) override; 265 266 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)267 FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) 268 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 269 270 #undef DECLARE_VISIT_INSTRUCTION 271 272 void VisitInstruction(HInstruction* instruction) override { 273 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 274 << " (id " << instruction->GetId() << ")"; 275 } 276 GetAssembler()277 X86Assembler* GetAssembler() const { return assembler_; } 278 279 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump 280 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 281 // generates less code/data with a small num_entries. 282 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; 283 284 // Generate a GC root reference load: 285 // 286 // root <- *address 287 // 288 // while honoring read barriers based on read_barrier_option. 289 void GenerateGcRootFieldLoad(HInstruction* instruction, 290 Location root, 291 const Address& address, 292 Label* fixup_label, 293 ReadBarrierOption read_barrier_option); 294 295 void HandleFieldSet(HInstruction* instruction, 296 uint32_t value_index, 297 DataType::Type type, 298 Address field_addr, 299 Register base, 300 bool is_volatile, 301 bool value_can_be_null, 302 WriteBarrierKind write_barrier_kind); 303 304 private: 305 // Generate code for the given suspend check. If not null, `successor` 306 // is the block to branch to if the suspend check is not needed, and after 307 // the suspend call. 308 void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); 309 void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); 310 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); 311 void HandleBitwiseOperation(HBinaryOperation* instruction); 312 void GenerateDivRemIntegral(HBinaryOperation* instruction); 313 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 314 void DivByPowerOfTwo(HDiv* instruction); 315 void RemByPowerOfTwo(HRem* instruction); 316 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 317 void GenerateRemFP(HRem* rem); 318 void HandleCondition(HCondition* condition); 319 void HandleShift(HBinaryOperation* instruction); 320 void GenerateShlLong(const Location& loc, Register shifter); 321 void GenerateShrLong(const Location& loc, Register shifter); 322 void GenerateUShrLong(const Location& loc, Register shifter); 323 void GenerateShlLong(const Location& loc, int shift); 324 void GenerateShrLong(const Location& loc, int shift); 325 void GenerateUShrLong(const Location& loc, int shift); 326 void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); 327 void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); 328 void GenerateMinMax(HBinaryOperation* minmax, bool is_min); 329 330 void HandleFieldSet(HInstruction* instruction, 331 const FieldInfo& field_info, 332 bool value_can_be_null, 333 WriteBarrierKind write_barrier_kind); 334 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 335 336 // Generate a heap reference load using one register `out`: 337 // 338 // out <- *(out + offset) 339 // 340 // while honoring heap poisoning and/or read barriers (if any). 341 // 342 // Location `maybe_temp` is used when generating a read barrier and 343 // shall be a register in that case; it may be an invalid location 344 // otherwise. 345 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 346 Location out, 347 uint32_t offset, 348 Location maybe_temp, 349 ReadBarrierOption read_barrier_option); 350 // Generate a heap reference load using two different registers 351 // `out` and `obj`: 352 // 353 // out <- *(obj + offset) 354 // 355 // while honoring heap poisoning and/or read barriers (if any). 356 // 357 // Location `maybe_temp` is used when generating a Baker's (fast 358 // path) read barrier and shall be a register in that case; it may 359 // be an invalid location otherwise. 360 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 361 Location out, 362 Location obj, 363 uint32_t offset, 364 ReadBarrierOption read_barrier_option); 365 366 // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. 367 // `is_wide` specifies whether it is long/double or not. 368 void PushOntoFPStack(Location source, uint32_t temp_offset, 369 uint32_t stack_adjustment, bool is_fp, bool is_wide); 370 371 template<class LabelType> 372 void GenerateTestAndBranch(HInstruction* instruction, 373 size_t condition_input_index, 374 LabelType* true_target, 375 LabelType* false_target); 376 template<class LabelType> 377 void GenerateCompareTestAndBranch(HCondition* condition, 378 LabelType* true_target, 379 LabelType* false_target); 380 template<class LabelType> 381 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 382 template<class LabelType> 383 void GenerateLongComparesAndJumps(HCondition* cond, 384 LabelType* true_label, 385 LabelType* false_label); 386 387 void HandleGoto(HInstruction* got, HBasicBlock* successor); 388 void GenPackedSwitchWithCompares(Register value_reg, 389 int32_t lower_bound, 390 uint32_t num_entries, 391 HBasicBlock* switch_block, 392 HBasicBlock* default_block); 393 394 void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); 395 bool CpuHasAvxFeatureFlag(); 396 bool CpuHasAvx2FeatureFlag(); 397 398 void GenerateMethodEntryExitHook(HInstruction* instruction); 399 400 X86Assembler* const assembler_; 401 CodeGeneratorX86* const codegen_; 402 403 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86); 404 }; 405 406 class JumpTableRIPFixup; 407 408 class CodeGeneratorX86 : public CodeGenerator { 409 public: 410 CodeGeneratorX86(HGraph* graph, 411 const CompilerOptions& compiler_options, 412 OptimizingCompilerStats* stats = nullptr); ~CodeGeneratorX86()413 virtual ~CodeGeneratorX86() {} 414 415 void GenerateFrameEntry() override; 416 void GenerateFrameExit() override; 417 void Bind(HBasicBlock* block) override; 418 void MoveConstant(Location destination, int32_t value) override; 419 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; 420 void AddLocationAsTemp(Location location, LocationSummary* locations) override; 421 422 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; 423 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; 424 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 425 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 426 427 // Generate code to invoke a runtime entry point. 428 void InvokeRuntime(QuickEntrypointEnum entrypoint, 429 HInstruction* instruction, 430 uint32_t dex_pc, 431 SlowPathCode* slow_path = nullptr) override; 432 433 // Generate code to invoke a runtime entry point, but do not record 434 // PC-related information in a stack map. 435 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 436 HInstruction* instruction, 437 SlowPathCode* slow_path); 438 439 void GenerateInvokeRuntime(int32_t entry_point_offset); 440 GetWordSize()441 size_t GetWordSize() const override { 442 return kX86WordSize; 443 } 444 GetSlowPathFPWidth()445 size_t GetSlowPathFPWidth() const override { 446 return GetGraph()->HasSIMD() 447 ? GetSIMDRegisterWidth() 448 : 2 * kX86WordSize; // 8 bytes == 2 words for each spill 449 } 450 GetCalleePreservedFPWidth()451 size_t GetCalleePreservedFPWidth() const override { 452 return 2 * kX86WordSize; 453 } 454 GetSIMDRegisterWidth()455 size_t GetSIMDRegisterWidth() const override { 456 return 4 * kX86WordSize; 457 } 458 GetLocationBuilder()459 HGraphVisitor* GetLocationBuilder() override { 460 return &location_builder_; 461 } 462 GetInstructionVisitor()463 HGraphVisitor* GetInstructionVisitor() override { 464 return &instruction_visitor_; 465 } 466 GetAssembler()467 X86Assembler* GetAssembler() override { 468 return &assembler_; 469 } 470 GetAssembler()471 const X86Assembler& GetAssembler() const override { 472 return assembler_; 473 } 474 GetAddressOf(HBasicBlock * block)475 uintptr_t GetAddressOf(HBasicBlock* block) override { 476 return GetLabelOf(block)->Position(); 477 } 478 479 void SetupBlockedRegisters() const override; 480 481 void DumpCoreRegister(std::ostream& stream, int reg) const override; 482 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; 483 GetMoveResolver()484 ParallelMoveResolverX86* GetMoveResolver() override { 485 return &move_resolver_; 486 } 487 GetInstructionSet()488 InstructionSet GetInstructionSet() const override { 489 return InstructionSet::kX86; 490 } 491 492 const X86InstructionSetFeatures& GetInstructionSetFeatures() const; 493 494 // Helper method to move a 32bits value between two locations. 495 void Move32(Location destination, Location source); 496 // Helper method to move a 64bits value between two locations. 497 void Move64(Location destination, Location source); 498 // Helper method to load a value from an address to a register. 499 void LoadFromMemoryNoBarrier(DataType::Type dst_type, 500 Location dst, 501 Address src, 502 HInstruction* instr = nullptr, 503 XmmRegister temp = kNoXmmRegister, 504 bool is_atomic_load = false); 505 // Helper method to move a primitive value from a location to an address. 506 void MoveToMemory(DataType::Type src_type, 507 Location src, 508 Register dst_base, 509 Register dst_index = Register::kNoRegister, 510 ScaleFactor dst_scale = TIMES_1, 511 int32_t dst_disp = 0); 512 513 // Check if the desired_string_load_kind is supported. If it is, return it, 514 // otherwise return a fall-back kind that should be used instead. 515 HLoadString::LoadKind GetSupportedLoadStringKind( 516 HLoadString::LoadKind desired_string_load_kind) override; 517 518 // Check if the desired_class_load_kind is supported. If it is, return it, 519 // otherwise return a fall-back kind that should be used instead. 520 HLoadClass::LoadKind GetSupportedLoadClassKind( 521 HLoadClass::LoadKind desired_class_load_kind) override; 522 523 // Check if the desired_dispatch_info is supported. If it is, return it, 524 // otherwise return a fall-back info that should be used instead. 525 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 526 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 527 ArtMethod* method) override; 528 529 void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); 530 // Generate a call to a static or direct method. 531 void GenerateStaticOrDirectCall( 532 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 533 // Generate a call to a virtual method. 534 void GenerateVirtualCall( 535 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 536 537 void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, 538 uint32_t intrinsic_data); 539 void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, 540 uint32_t boot_image_offset); 541 void RecordBootImageMethodPatch(HInvoke* invoke); 542 void RecordMethodBssEntryPatch(HInvoke* invoke); 543 void RecordBootImageTypePatch(HLoadClass* load_class); 544 void RecordAppImageTypePatch(HLoadClass* load_class); 545 Label* NewTypeBssEntryPatch(HLoadClass* load_class); 546 void RecordBootImageStringPatch(HLoadString* load_string); 547 Label* NewStringBssEntryPatch(HLoadString* load_string); 548 void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); 549 550 void LoadBootImageAddress(Register reg, 551 uint32_t boot_image_reference, 552 HInvokeStaticOrDirect* invoke); 553 void LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke); 554 555 Label* NewJitRootStringPatch(const DexFile& dex_file, 556 dex::StringIndex string_index, 557 Handle<mirror::String> handle); 558 Label* NewJitRootClassPatch(const DexFile& dex_file, 559 dex::TypeIndex type_index, 560 Handle<mirror::Class> handle); 561 562 void MoveFromReturnRegister(Location trg, DataType::Type type) override; 563 564 // Emit linker patches. 565 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; 566 567 void PatchJitRootUse(uint8_t* code, 568 const uint8_t* roots_data, 569 const PatchInfo<Label>& info, 570 uint64_t index_in_table) const; 571 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; 572 573 // Emit a write barrier if: 574 // A) emit_null_check is false 575 // B) emit_null_check is true, and value is not null. 576 void MaybeMarkGCCard( 577 Register temp, Register card, Register object, Register value, bool emit_null_check); 578 579 // Emit a write barrier unconditionally. 580 void MarkGCCard(Register temp, Register card, Register object); 581 582 // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert 583 // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC 584 // GC is marking for eliminated write barriers. 585 void CheckGCCardIsValid(Register temp, Register card, Register object); 586 587 void GenerateMemoryBarrier(MemBarrierKind kind); 588 GetLabelOf(HBasicBlock * block)589 Label* GetLabelOf(HBasicBlock* block) const { 590 return CommonGetLabelOf<Label>(block_labels_, block); 591 } 592 Initialize()593 void Initialize() override { 594 block_labels_ = CommonInitializeLabels<Label>(); 595 } 596 NeedsTwoRegisters(DataType::Type type)597 bool NeedsTwoRegisters(DataType::Type type) const override { 598 return type == DataType::Type::kInt64; 599 } 600 ShouldSplitLongMoves()601 bool ShouldSplitLongMoves() const override { return true; } 602 GetFrameEntryLabel()603 Label* GetFrameEntryLabel() { return &frame_entry_label_; } 604 AddMethodAddressOffset(HX86ComputeBaseMethodAddress * method_base,int32_t offset)605 void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) { 606 method_address_offset_.Put(method_base->GetId(), offset); 607 } 608 GetMethodAddressOffset(HX86ComputeBaseMethodAddress * method_base)609 int32_t GetMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base) const { 610 return method_address_offset_.Get(method_base->GetId()); 611 } 612 ConstantAreaStart()613 int32_t ConstantAreaStart() const { 614 return constant_area_start_; 615 } 616 617 Address LiteralDoubleAddress(double v, HX86ComputeBaseMethodAddress* method_base, Register reg); 618 Address LiteralFloatAddress(float v, HX86ComputeBaseMethodAddress* method_base, Register reg); 619 Address LiteralInt32Address(int32_t v, HX86ComputeBaseMethodAddress* method_base, Register reg); 620 Address LiteralInt64Address(int64_t v, HX86ComputeBaseMethodAddress* method_base, Register reg); 621 622 // Load a 32-bit value into a register in the most efficient manner. 623 void Load32BitValue(Register dest, int32_t value); 624 625 // Compare a register with a 32-bit value in the most efficient manner. 626 void Compare32BitValue(Register dest, int32_t value); 627 628 // Compare int values. Supports only register locations for `lhs`. 629 void GenerateIntCompare(Location lhs, Location rhs); 630 void GenerateIntCompare(Register lhs, Location rhs); 631 632 // Construct address for array access. 633 static Address ArrayAddress(Register obj, 634 Location index, 635 ScaleFactor scale, 636 uint32_t data_offset); 637 638 Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); 639 640 void Finalize() override; 641 642 // Fast path implementation of ReadBarrier::Barrier for a heap 643 // reference field load when Baker's read barriers are used. 644 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 645 Location ref, 646 Register obj, 647 uint32_t offset, 648 bool needs_null_check); 649 // Fast path implementation of ReadBarrier::Barrier for a heap 650 // reference array load when Baker's read barriers are used. 651 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 652 Location ref, 653 Register obj, 654 uint32_t data_offset, 655 Location index, 656 bool needs_null_check); 657 // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, 658 // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. 659 // 660 // Load the object reference located at address `src`, held by 661 // object `obj`, into `ref`, and mark it if needed. The base of 662 // address `src` must be `obj`. 663 // 664 // If `always_update_field` is true, the value of the reference is 665 // atomically updated in the holder (`obj`). This operation 666 // requires a temporary register, which must be provided as a 667 // non-null pointer (`temp`). 668 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 669 Location ref, 670 Register obj, 671 const Address& src, 672 bool needs_null_check, 673 bool always_update_field = false, 674 Register* temp = nullptr); 675 676 // Generate a read barrier for a heap reference within `instruction` 677 // using a slow path. 678 // 679 // A read barrier for an object reference read from the heap is 680 // implemented as a call to the artReadBarrierSlow runtime entry 681 // point, which is passed the values in locations `ref`, `obj`, and 682 // `offset`: 683 // 684 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 685 // mirror::Object* obj, 686 // uint32_t offset); 687 // 688 // The `out` location contains the value returned by 689 // artReadBarrierSlow. 690 // 691 // When `index` is provided (i.e. for array accesses), the offset 692 // value passed to artReadBarrierSlow is adjusted to take `index` 693 // into account. 694 void GenerateReadBarrierSlow(HInstruction* instruction, 695 Location out, 696 Location ref, 697 Location obj, 698 uint32_t offset, 699 Location index = Location::NoLocation()); 700 701 // If read barriers are enabled, generate a read barrier for a heap 702 // reference using a slow path. If heap poisoning is enabled, also 703 // unpoison the reference in `out`. 704 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 705 Location out, 706 Location ref, 707 Location obj, 708 uint32_t offset, 709 Location index = Location::NoLocation()); 710 711 // Generate a read barrier for a GC root within `instruction` using 712 // a slow path. 713 // 714 // A read barrier for an object reference GC root is implemented as 715 // a call to the artReadBarrierForRootSlow runtime entry point, 716 // which is passed the value in location `root`: 717 // 718 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 719 // 720 // The `out` location contains the value returned by 721 // artReadBarrierForRootSlow. 722 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 723 724 // Ensure that prior stores complete to memory before subsequent loads. 725 // The locked add implementation will avoid serializing device memory, but will 726 // touch (but not change) the top of the stack. 727 // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. 728 void MemoryFence(bool non_temporal = false) { 729 if (!non_temporal) { 730 assembler_.lock()->addl(Address(ESP, 0), Immediate(0)); 731 } else { 732 assembler_.mfence(); 733 } 734 } 735 736 void IncreaseFrame(size_t adjustment) override; 737 void DecreaseFrame(size_t adjustment) override; 738 739 void GenerateNop() override; 740 void GenerateImplicitNullCheck(HNullCheck* instruction) override; 741 void GenerateExplicitNullCheck(HNullCheck* instruction) override; 742 743 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass); 744 void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry); 745 746 // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset. 747 // The correct value will be inserted when processing Assembler fixups. 748 static constexpr int32_t kPlaceholder32BitOffset = 256; 749 750 private: 751 struct X86PcRelativePatchInfo : PatchInfo<Label> { X86PcRelativePatchInfoX86PcRelativePatchInfo752 X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address, 753 const DexFile* target_dex_file, 754 uint32_t target_index) 755 : PatchInfo(target_dex_file, target_index), 756 method_address(address) {} 757 HX86ComputeBaseMethodAddress* method_address; 758 }; 759 760 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 761 void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos, 762 ArenaVector<linker::LinkerPatch>* linker_patches); 763 764 Register GetInvokeExtraParameter(HInvoke* invoke, Register temp); 765 Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); 766 767 // Labels for each block that will be compiled. 768 Label* block_labels_; // Indexed by block id. 769 Label frame_entry_label_; 770 LocationsBuilderX86 location_builder_; 771 InstructionCodeGeneratorX86 instruction_visitor_; 772 ParallelMoveResolverX86 move_resolver_; 773 X86Assembler assembler_; 774 775 // PC-relative method patch info for kBootImageLinkTimePcRelative. 776 ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; 777 // PC-relative method patch info for kBssEntry. 778 ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; 779 // PC-relative type patch info for kBootImageLinkTimePcRelative. 780 ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; 781 // PC-relative type patch info for kAppImageRelRo. 782 ArenaDeque<X86PcRelativePatchInfo> app_image_type_patches_; 783 // PC-relative type patch info for kBssEntry. 784 ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; 785 // PC-relative public type patch info for kBssEntryPublic. 786 ArenaDeque<X86PcRelativePatchInfo> public_type_bss_entry_patches_; 787 // PC-relative package type patch info for kBssEntryPackage. 788 ArenaDeque<X86PcRelativePatchInfo> package_type_bss_entry_patches_; 789 // PC-relative String patch info for kBootImageLinkTimePcRelative. 790 ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; 791 // PC-relative String patch info for kBssEntry. 792 ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; 793 // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. 794 ArenaDeque<X86PcRelativePatchInfo> boot_image_jni_entrypoint_patches_; 795 // PC-relative patch info for IntrinsicObjects for the boot image, 796 // and for method/type/string patches for kBootImageRelRo otherwise. 797 ArenaDeque<X86PcRelativePatchInfo> boot_image_other_patches_; 798 799 // Patches for string root accesses in JIT compiled code. 800 ArenaDeque<PatchInfo<Label>> jit_string_patches_; 801 // Patches for class root accesses in JIT compiled code. 802 ArenaDeque<PatchInfo<Label>> jit_class_patches_; 803 804 // Offset to the start of the constant area in the assembled code. 805 // Used for fixups to the constant area. 806 int32_t constant_area_start_; 807 808 // Fixups for jump tables that need to be patched after the constant table is generated. 809 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 810 811 // Maps a HX86ComputeBaseMethodAddress instruction id, to its offset in the 812 // compiled code. 813 ArenaSafeMap<uint32_t, int32_t> method_address_offset_; 814 815 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); 816 }; 817 818 } // namespace x86 819 } // namespace art 820 821 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ 822