/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #include "arch/x86/instruction_set_features_x86.h" #include "base/macros.h" #include "base/pointer_size.h" #include "code_generator.h" #include "dex/dex_file_types.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" namespace art HIDDEN { namespace x86 { // Use a local definition to prevent copying mistakes. static constexpr size_t kX86WordSize = static_cast(kX86PointerSize); class CodeGeneratorX86; static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX }; static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); #define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \ V(MathRoundDouble) \ V(FloatIsInfinite) \ V(DoubleIsInfinite) \ V(IntegerHighestOneBit) \ V(LongHighestOneBit) \ V(LongDivideUnsigned) \ V(IntegerRemainderUnsigned) \ V(LongRemainderUnsigned) \ V(CRC32Update) \ V(CRC32UpdateBytes) \ V(CRC32UpdateByteBuffer) \ V(FP16ToFloat) \ V(FP16ToHalf) \ V(FP16Floor) \ V(FP16Ceil) \ V(FP16Rint) \ V(FP16Greater) \ V(FP16GreaterEquals) \ V(FP16Less) \ V(FP16LessEquals) \ V(FP16Compare) \ V(FP16Min) \ V(FP16Max) \ V(MathMultiplyHigh) \ V(StringStringIndexOf) \ V(StringStringIndexOfAfter) \ V(StringBufferAppend) \ V(StringBufferLength) \ V(StringBufferToString) \ V(StringBuilderAppendObject) \ V(StringBuilderAppendString) \ V(StringBuilderAppendCharSequence) \ V(StringBuilderAppendCharArray) \ V(StringBuilderAppendBoolean) \ V(StringBuilderAppendChar) \ V(StringBuilderAppendInt) \ V(StringBuilderAppendLong) \ V(StringBuilderAppendFloat) \ V(StringBuilderAppendDouble) \ V(StringBuilderLength) \ V(StringBuilderToString) \ /* 1.8 */ \ V(MethodHandleInvokeExact) \ V(MethodHandleInvoke) class InvokeRuntimeCallingConvention : public CallingConvention { public: InvokeRuntimeCallingConvention() : CallingConvention(kRuntimeParameterCoreRegisters, kRuntimeParameterCoreRegistersLength, kRuntimeParameterFpuRegisters, kRuntimeParameterFpuRegistersLength, kX86PointerSize) {} private: DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); }; class InvokeDexCallingConvention : public CallingConvention { public: InvokeDexCallingConvention() : CallingConvention( kParameterCoreRegisters, kParameterCoreRegistersLength, kParameterFpuRegisters, kParameterFpuRegistersLength, kX86PointerSize) {} RegisterPair GetRegisterPairAt(size_t argument_index) { DCHECK_LT(argument_index + 1, GetNumberOfRegisters()); return kParameterCorePairRegisters[argument_index]; } private: DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { public: InvokeDexCallingConventionVisitorX86() {} virtual ~InvokeDexCallingConventionVisitorX86() {} Location GetNextLocation(DataType::Type type) override; Location GetReturnLocation(DataType::Type type) const override; Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86); }; class CriticalNativeCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { public: explicit CriticalNativeCallingConventionVisitorX86(bool for_register_allocation) : for_register_allocation_(for_register_allocation) {} virtual ~CriticalNativeCallingConventionVisitorX86() {} Location GetNextLocation(DataType::Type type) override; Location GetReturnLocation(DataType::Type type) const override; Location GetMethodLocation() const override; size_t GetStackOffset() const { return stack_offset_; } private: // Register allocator does not support adjusting frame size, so we cannot provide final locations // of stack arguments for register allocation. We ask the register allocator for any location and // move these arguments to the right place after adjusting the SP when generating the call. const bool for_register_allocation_; size_t stack_offset_ = 0u; DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86); }; class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionX86() {} Location GetObjectLocation() const override { return Location::RegisterLocation(ECX); } Location GetFieldIndexLocation() const override { return Location::RegisterLocation(EAX); } Location GetReturnLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? Location::RegisterPairLocation(EAX, EDX) : Location::RegisterLocation(EAX); } Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { return DataType::Is64BitType(type) ? (is_instance ? Location::RegisterPairLocation(EDX, EBX) : Location::RegisterPairLocation(ECX, EDX)) : (is_instance ? Location::RegisterLocation(EDX) : Location::RegisterLocation(ECX)); } Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { return Location::FpuRegisterLocation(XMM0); } private: DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86); }; class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} void EmitMove(size_t index) override; void EmitSwap(size_t index) override; void SpillScratch(int reg) override; void RestoreScratch(int reg) override; X86Assembler* GetAssembler() const; private: void Exchange(Register reg, int mem); void Exchange32(XmmRegister reg, int mem); void Exchange128(XmmRegister reg, int mem); void ExchangeMemory(int mem1, int mem2, int number_of_words); void MoveMemoryToMemory(int dst, int src, int number_of_words); CodeGeneratorX86* const codegen_; DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86); }; class LocationsBuilderX86 : public HGraphVisitor { public: LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen) : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); bool CpuHasAvxFeatureFlag(); bool CpuHasAvx2FeatureFlag(); CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitorX86 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); }; class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { public: InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } X86Assembler* GetAssembler() const { return assembler_; } // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; // Generate a GC root reference load: // // root <- *address // // while honoring read barriers based on read_barrier_option. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, Label* fixup_label, ReadBarrierOption read_barrier_option); void HandleFieldSet(HInstruction* instruction, uint32_t value_index, DataType::Type type, Address field_addr, Register base, bool is_volatile, bool value_can_be_null, WriteBarrierKind write_barrier_kind); private: // Generate code for the given suspend check. If not null, `successor` // is the block to branch to if the suspend check is not needed, and after // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivByPowerOfTwo(HDiv* instruction); void RemByPowerOfTwo(HRem* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem* rem); void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); void GenerateMinMax(HBinaryOperation* minmax, bool is_min); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null, WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); // Generate a heap reference load using one register `out`: // // out <- *(out + offset) // // while honoring heap poisoning and/or read barriers (if any). // // Location `maybe_temp` is used when generating a read barrier and // shall be a register in that case; it may be an invalid location // otherwise. void GenerateReferenceLoadOneRegister(HInstruction* instruction, Location out, uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); // Generate a heap reference load using two different registers // `out` and `obj`: // // out <- *(obj + offset) // // while honoring heap poisoning and/or read barriers (if any). // // Location `maybe_temp` is used when generating a Baker's (fast // path) read barrier and shall be a register in that case; it may // be an invalid location otherwise. void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, uint32_t offset, ReadBarrierOption read_barrier_option); // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. // `is_wide` specifies whether it is long/double or not. void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_fp, bool is_wide); template void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, LabelType* true_target, LabelType* false_target); template void GenerateCompareTestAndBranch(HCondition* condition, LabelType* true_target, LabelType* false_target); template void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); template void GenerateLongComparesAndJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); void GenPackedSwitchWithCompares(Register value_reg, int32_t lower_bound, uint32_t num_entries, HBasicBlock* switch_block, HBasicBlock* default_block); void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); bool CpuHasAvxFeatureFlag(); bool CpuHasAvx2FeatureFlag(); void GenerateMethodEntryExitHook(HInstruction* instruction); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86); }; class JumpTableRIPFixup; class CodeGeneratorX86 : public CodeGenerator { public: CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorX86() {} void GenerateFrameEntry() override; void GenerateFrameExit() override; void Bind(HBasicBlock* block) override; void MoveConstant(Location destination, int32_t value) override; void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; void AddLocationAsTemp(Location location, LocationSummary* locations) override; size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, HInstruction* instruction, SlowPathCode* slow_path); void GenerateInvokeRuntime(int32_t entry_point_offset); size_t GetWordSize() const override { return kX86WordSize; } size_t GetSlowPathFPWidth() const override { return GetGraph()->HasSIMD() ? GetSIMDRegisterWidth() : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } size_t GetCalleePreservedFPWidth() const override { return 2 * kX86WordSize; } size_t GetSIMDRegisterWidth() const override { return 4 * kX86WordSize; } HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } X86Assembler* GetAssembler() override { return &assembler_; } const X86Assembler& GetAssembler() const override { return assembler_; } uintptr_t GetAddressOf(HBasicBlock* block) override { return GetLabelOf(block)->Position(); } void SetupBlockedRegisters() const override; void DumpCoreRegister(std::ostream& stream, int reg) const override; void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; ParallelMoveResolverX86* GetMoveResolver() override { return &move_resolver_; } InstructionSet GetInstructionSet() const override { return InstructionSet::kX86; } const X86InstructionSetFeatures& GetInstructionSetFeatures() const; // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); // Helper method to move a 64bits value between two locations. void Move64(Location destination, Location source); // Helper method to load a value from an address to a register. void LoadFromMemoryNoBarrier(DataType::Type dst_type, Location dst, Address src, HInstruction* instr = nullptr, XmmRegister temp = kNoXmmRegister, bool is_atomic_load = false); // Helper method to move a primitive value from a location to an address. void MoveToMemory(DataType::Type src_type, Location src, Register dst_base, Register dst_index = Register::kNoRegister, ScaleFactor dst_scale = TIMES_1, int32_t dst_disp = 0); // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) override; void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); // Generate a call to a static or direct method. void GenerateStaticOrDirectCall( HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; // Generate a call to a virtual method. void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, uint32_t intrinsic_data); void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvoke* invoke); void RecordMethodBssEntryPatch(HInvoke* invoke); void RecordBootImageTypePatch(HLoadClass* load_class); void RecordAppImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); void LoadBootImageAddress(Register reg, uint32_t boot_image_reference, HInvokeStaticOrDirect* invoke); void LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, Handle handle); Label* NewJitRootClassPatch(const DexFile& dex_file, dex::TypeIndex type_index, Handle handle); void MoveFromReturnRegister(Location trg, DataType::Type type) override; // Emit linker patches. void EmitLinkerPatches(ArenaVector* linker_patches) override; void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const PatchInfo