/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ #include #include #include #include "base/arena_containers.h" #include "base/array_ref.h" #include "base/logging.h" #include "constants_arm.h" #include "utils/arm/managed_register_arm.h" #include "utils/arm/assembler_arm.h" #include "offsets.h" namespace art { namespace arm { class Thumb2Assembler FINAL : public ArmAssembler { public: explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true) : ArmAssembler(arena), can_relocate_branches_(can_relocate_branches), force_32bit_(false), it_cond_index_(kNoItCondition), next_condition_(AL), fixups_(arena->Adapter(kArenaAllocAssembler)), fixup_dependents_(arena->Adapter(kArenaAllocAssembler)), literals_(arena->Adapter(kArenaAllocAssembler)), literal64_dedupe_map_(std::less(), arena->Adapter(kArenaAllocAssembler)), jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0u), last_old_position_(0u), last_fixup_id_(0u) { cfi().DelayEmittingAdvancePCs(); } virtual ~Thumb2Assembler() { } bool IsThumb() const OVERRIDE { return true; } bool IsForced32Bit() const { return force_32bit_; } bool CanRelocateBranches() const { return can_relocate_branches_; } void FinalizeCode() OVERRIDE; // Data-processing instructions. virtual void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; virtual void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void orn(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void mov(Register rd, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void mvn(Register rd, const ShifterOperand& so, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; // Miscellaneous data-processing instructions. void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE; void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE; void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE; void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE; void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE; // Multiply instructions. void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; void mla(Register rd, Register rn, Register rm, Register ra, Condition cond = AL) OVERRIDE; void mls(Register rd, Register rn, Register rm, Register ra, Condition cond = AL) OVERRIDE; void smull(Register rd_lo, Register rd_hi, Register rn, Register rm, Condition cond = AL) OVERRIDE; void umull(Register rd_lo, Register rd_hi, Register rn, Register rm, Condition cond = AL) OVERRIDE; void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; // Bit field extract instructions. void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; // Load/store instructions. void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; // Load/store register dual instructions using registers `rd` and `rd` + 1. void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; // Load/store register dual instructions using registers `rd` and `rd2`. // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1. void ldrd(Register rd, Register rd2, const Address& ad, Condition cond); void strd(Register rd, Register rd2, const Address& ad, Condition cond); void ldm(BlockAddressMode am, Register base, RegList regs, Condition cond = AL) OVERRIDE; void stm(BlockAddressMode am, Register base, RegList regs, Condition cond = AL) OVERRIDE; void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE; void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE; void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL); void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL); void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; // Miscellaneous instructions. void clrex(Condition cond = AL) OVERRIDE; void nop(Condition cond = AL) OVERRIDE; void bkpt(uint16_t imm16) OVERRIDE; void svc(uint32_t imm24) OVERRIDE; // If-then void it(Condition firstcond, ItState i1 = kItOmitted, ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE; void cbz(Register rn, Label* target) OVERRIDE; void cbnz(Register rn, Label* target) OVERRIDE; // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles). void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE; void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE; void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE; void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE; void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; // Returns false if the immediate cannot be encoded. bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE; bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE; void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE; void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE; void vmstat(Condition cond = AL) OVERRIDE; // VMRS APSR_nzcv, FPSCR void vcntd(DRegister dd, DRegister dm) OVERRIDE; void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE; void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; // Branch instructions. void b(Label* label, Condition cond = AL); void bl(Label* label, Condition cond = AL); void blx(Label* label); void blx(Register rm, Condition cond = AL) OVERRIDE; void bx(Register rm, Condition cond = AL) OVERRIDE; virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Rrx(Register rd, Register rm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Lsl(Register rd, Register rm, Register rn, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Lsr(Register rd, Register rm, Register rn, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Asr(Register rd, Register rm, Register rn, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Ror(Register rd, Register rm, Register rn, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; void Push(Register rd, Condition cond = AL) OVERRIDE; void Pop(Register rd, Condition cond = AL) OVERRIDE; void PushList(RegList regs, Condition cond = AL) OVERRIDE; void PopList(RegList regs, Condition cond = AL) OVERRIDE; void StoreList(RegList regs, size_t stack_offset) OVERRIDE; void LoadList(RegList regs, size_t stack_offset) OVERRIDE; void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; // Memory barriers. void dmb(DmbOptions flavor) OVERRIDE; // Get the final position of a label after local fixup based on the old position // recorded before FinalizeCode(). uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; using ArmAssembler::NewLiteral; // Make the helper template visible. Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; void LoadLiteral(Register rt, Literal* literal) OVERRIDE; void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; // Add signed constant value to rd. May clobber IP. void AddConstant(Register rd, Register rn, int32_t value, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE; // Load and Store. May clobber IP. void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE; void MarkExceptionHandler(Label* label) OVERRIDE; void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset, Condition cond = AL) OVERRIDE; void StoreToOffset(StoreOperandType type, Register reg, Register base, int32_t offset, Condition cond = AL) OVERRIDE; void LoadSFromOffset(SRegister reg, Register base, int32_t offset, Condition cond = AL) OVERRIDE; void StoreSToOffset(SRegister reg, Register base, int32_t offset, Condition cond = AL) OVERRIDE; void LoadDFromOffset(DRegister reg, Register base, int32_t offset, Condition cond = AL) OVERRIDE; void StoreDToOffset(DRegister reg, Register base, int32_t offset, Condition cond = AL) OVERRIDE; bool ShifterOperandCanHold(Register rd, Register rn, Opcode opcode, uint32_t immediate, SetCc set_cc, ShifterOperand* shifter_op) OVERRIDE; using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; static bool IsInstructionForExceptionHandling(uintptr_t pc); // Emit data (e.g. encoded instruction or immediate) to the. // instruction stream. void Emit32(int32_t value); // Emit a 32 bit instruction in thumb format. void Emit16(int16_t value); // Emit a 16 bit instruction in little endian format. void Bind(Label* label) OVERRIDE; // Force the assembler to generate 32 bit instructions. void Force32Bit() { force_32bit_ = true; } // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This // will generate a fixup. JumpTable* CreateJumpTable(std::vector&& labels, Register base_reg) OVERRIDE; // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup. void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE; private: typedef uint16_t FixupId; // Fixup: branches and literal pool references. // // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This // depends on both the type of branch and the offset to which it is branching. The 16-bit // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence // of instructions to make up for the limited range of load literal instructions (up to // 4KiB for the 32-bit variant). When generating code for these insns we don't know the // size before hand, so we assume it is the smallest available size and determine the final // code offsets and sizes and emit code in FinalizeCode(). // // To handle this, we keep a record of every branch and literal pool load in the program. // The actual instruction encoding for these is delayed until we know the final size of // every instruction. When we bind a label to a branch we don't know the final location yet // as some preceding instructions may need to be expanded, so we record a non-final offset. // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with // target on the other side of the expanded insn, as their offsets change and this may // trigger further expansion. // // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing // to it, using the fixup ids as links. The first link is stored in the label's position // (the label is linked but not bound), the following links are stored in the code buffer, // in the placeholder where we will eventually emit the actual code. class Fixup { public: // Branch type. enum Type : uint8_t { kConditional, // B. kUnconditional, // B. kUnconditionalLink, // BL. kUnconditionalLinkX, // BLX. kCompareAndBranchXZero, // cbz/cbnz. kLoadLiteralNarrow, // Load narrrow integer literal. kLoadLiteralWide, // Load wide integer literal. kLoadLiteralAddr, // Load address of literal (used for jump table). kLoadFPLiteralSingle, // Load FP literal single. kLoadFPLiteralDouble, // Load FP literal double. }; // Calculated size of branch instruction based on type and offset. enum Size : uint8_t { // Branch variants. kBranch16Bit, kBranch32Bit, // NOTE: We don't support branches which would require multiple instructions, i.e. // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB. // CBZ/CBNZ variants. kCbxz16Bit, // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset. kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. // Load integer literal variants. // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. kLiteral1KiB, // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes. kLiteral4KiB, // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes. kLiteral64KiB, // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes. kLiteral1MiB, // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit. // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes. kLiteralFar, // Load literal base addr. // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes. kLiteralAddr1KiB, // ADR rX, label; 4KiB offset. 4 bytes. kLiteralAddr4KiB, // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes. kLiteralAddr64KiB, // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes. kLiteralAddrFar, // Load long or FP literal variants. // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. kLongOrFPLiteral1KiB, // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes. kLongOrFPLiteral64KiB, // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. kLongOrFPLiteralFar, }; // Unresolved branch possibly with a condition. static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit, Condition cond = AL) { DCHECK(type == kConditional || type == kUnconditional || type == kUnconditionalLink || type == kUnconditionalLinkX); DCHECK(size == kBranch16Bit || size == kBranch32Bit); DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional)); return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister, cond, type, size, location); } // Unresolved compare-and-branch instruction with a register and condition (EQ or NE). static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) { DCHECK(cond == EQ || cond == NE); return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister, cond, kCompareAndBranchXZero, kCbxz16Bit, location); } // Load narrow literal. static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || size == kLiteral1MiB || size == kLiteralFar); DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, AL, kLoadLiteralNarrow, size, location); } // Load wide literal. static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, Size size = kLongOrFPLiteral1KiB) { DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); return Fixup(rt, rt2, kNoSRegister, kNoDRegister, AL, kLoadLiteralWide, size, location); } // Load FP single literal. static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, Size size = kLongOrFPLiteral1KiB) { DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, AL, kLoadFPLiteralSingle, size, location); } // Load FP double literal. static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, Size size = kLongOrFPLiteral1KiB) { DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, AL, kLoadFPLiteralDouble, size, location); } static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) { DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB || size == kLiteralAddrFar); DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB); return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, AL, kLoadLiteralAddr, size, location); } Type GetType() const { return type_; } bool IsLoadLiteral() const { return GetType() >= kLoadLiteralNarrow; } // Returns whether the Fixup can expand from the original size. bool CanExpand() const { switch (GetOriginalSize()) { case kBranch32Bit: case kCbxz48Bit: case kLiteralFar: case kLiteralAddrFar: case kLongOrFPLiteralFar: return false; default: return true; } } Size GetOriginalSize() const { return original_size_; } Size GetSize() const { return size_; } uint32_t GetOriginalSizeInBytes() const; uint32_t GetSizeInBytes() const; uint32_t GetLocation() const { return location_; } uint32_t GetTarget() const { return target_; } uint32_t GetAdjustment() const { return adjustment_; } // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_. static void PrepareDependents(Thumb2Assembler* assembler); ArrayRef Dependents(const Thumb2Assembler& assembler) const { return ArrayRef(assembler.fixup_dependents_).SubArray(dependents_start_, dependents_count_); } // Resolve a branch when the target is known. void Resolve(uint32_t target) { DCHECK_EQ(target_, kUnresolved); DCHECK_NE(target, kUnresolved); target_ = target; } // Branches with bound targets that are in range can be emitted early. // However, the caller still needs to check if the branch doesn't go over // another Fixup that's not ready to be emitted. bool IsCandidateForEmitEarly() const; // Check if the current size is OK for current location_, target_ and adjustment_. // If not, increase the size. Return the size increase, 0 if unchanged. // If the target if after this Fixup, also add the difference to adjustment_, // so that we don't need to consider forward Fixups as their own dependencies. uint32_t AdjustSizeIfNeeded(uint32_t current_code_size); // Increase adjustments. This is called for dependents of a Fixup when its size changes. void IncreaseAdjustment(uint32_t increase) { adjustment_ += increase; } // Finalize the branch with an adjustment to the location. Both location and target are updated. void Finalize(uint32_t location_adjustment) { DCHECK_NE(target_, kUnresolved); location_ += location_adjustment; target_ += location_adjustment; } // Emit the branch instruction into the assembler buffer. This does the // encoding into the thumb instruction. void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; private: Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, Condition cond, Type type, Size size, uint32_t location) : rn_(rn), rt2_(rt2), sd_(sd), dd_(dd), cond_(cond), type_(type), original_size_(size), size_(size), location_(location), target_(kUnresolved), adjustment_(0u), dependents_count_(0u), dependents_start_(0u) { } static size_t SizeInBytes(Size size); // The size of padding added before the literal pool. static size_t LiteralPoolPaddingSize(uint32_t current_code_size); // Returns the offset from the PC-using insn to the target. int32_t GetOffset(uint32_t current_code_size) const; size_t IncreaseSize(Size new_size); int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const; template static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn); static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. const Register rn_; // Rn for cbnz/cbz, Rt for literal loads. Register rt2_; // For kLoadLiteralWide. SRegister sd_; // For kLoadFPLiteralSingle. DRegister dd_; // For kLoadFPLiteralDouble. const Condition cond_; const Type type_; Size original_size_; Size size_; uint32_t location_; // Offset into assembler buffer in bytes. uint32_t target_; // Offset into assembler buffer in bytes. uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. // Fixups that require adjustment when current size changes are stored in a single // array in the assembler and we store only the start index and count here. uint32_t dependents_count_; uint32_t dependents_start_; }; // Emit a single 32 or 16 bit data processing instruction. void EmitDataProcessing(Condition cond, Opcode opcode, SetCc set_cc, Register rn, Register rd, const ShifterOperand& so); // Emit a single 32 bit miscellaneous instruction. void Emit32Miscellaneous(uint8_t op1, uint8_t op2, uint32_t rest_encoding); // Emit reverse byte instructions: rev, rev16, revsh. void EmitReverseBytes(Register rd, Register rm, uint32_t op); // Emit a single 16 bit miscellaneous instruction. void Emit16Miscellaneous(uint32_t rest_encoding); // Must the instruction be 32 bits or can it possibly be encoded // in 16 bits? bool Is32BitDataProcessing(Condition cond, Opcode opcode, SetCc set_cc, Register rn, Register rd, const ShifterOperand& so); // Emit a 32 bit data processing instruction. void Emit32BitDataProcessing(Condition cond, Opcode opcode, SetCc set_cc, Register rn, Register rd, const ShifterOperand& so); // Emit a 16 bit data processing instruction. void Emit16BitDataProcessing(Condition cond, Opcode opcode, SetCc set_cc, Register rn, Register rd, const ShifterOperand& so); void Emit16BitAddSub(Condition cond, Opcode opcode, SetCc set_cc, Register rn, Register rd, const ShifterOperand& so); uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n); void EmitLoadStore(Condition cond, bool load, bool byte, bool half, bool is_signed, Register rd, const Address& ad); void EmitMemOpAddressMode3(Condition cond, int32_t mode, Register rd, const Address& ad); void EmitMultiMemOp(Condition cond, BlockAddressMode am, bool load, Register base, RegList regs); void EmitMulOp(Condition cond, int32_t opcode, Register rd, Register rn, Register rm, Register rs); void EmitVFPsss(Condition cond, int32_t opcode, SRegister sd, SRegister sn, SRegister sm); void EmitVLdmOrStm(int32_t rest, uint32_t reg, int nregs, Register rn, bool is_load, bool dbl, Condition cond); void EmitVFPddd(Condition cond, int32_t opcode, DRegister dd, DRegister dn, DRegister dm); void EmitVFPsd(Condition cond, int32_t opcode, SRegister sd, DRegister dm); void EmitVFPds(Condition cond, int32_t opcode, DRegister dd, SRegister sm); void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, Condition cond = AL, SetCc set_cc = kCcDontCare); void EmitShift(Register rd, Register rn, Shift shift, Register rm, Condition cond = AL, SetCc set_cc = kCcDontCare); static int32_t GetAllowedLoadOffsetBits(LoadOperandType type); static int32_t GetAllowedStoreOffsetBits(StoreOperandType type); bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, int32_t offset, /*out*/ int32_t* add_to_base, /*out*/ int32_t* offset_for_load_store); int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, Register temp, Register base, int32_t offset, Condition cond); // Whether the assembler can relocate branches. If false, unresolved branches will be // emitted on 32bits. bool can_relocate_branches_; // Force the assembler to use 32 bit thumb2 instructions. bool force_32bit_; // IfThen conditions. Used to check that conditional instructions match the preceding IT. Condition it_conditions_[4]; uint8_t it_cond_index_; Condition next_condition_; void SetItCondition(ItState s, Condition cond, uint8_t index); void CheckCondition(Condition cond) { CHECK_EQ(cond, next_condition_); // Move to the next condition if there is one. if (it_cond_index_ < 3) { ++it_cond_index_; next_condition_ = it_conditions_[it_cond_index_]; } else { next_condition_ = AL; } } void CheckConditionLastIt(Condition cond) { if (it_cond_index_ < 3) { // Check that the next condition is AL. This means that the // current condition is the last in the IT block. CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL); } CheckCondition(cond); } FixupId AddFixup(Fixup fixup) { FixupId fixup_id = static_cast(fixups_.size()); fixups_.push_back(fixup); // For iterating using FixupId, we need the next id to be representable. DCHECK_EQ(static_cast(static_cast(fixups_.size())), fixups_.size()); return fixup_id; } Fixup* GetFixup(FixupId fixup_id) { DCHECK_LT(fixup_id, fixups_.size()); return &fixups_[fixup_id]; } void BindLabel(Label* label, uint32_t bound_pc); uint32_t BindLiterals(); void BindJumpTables(uint32_t code_size); void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, std::deque* fixups_to_recalculate); uint32_t AdjustFixups(); void EmitFixups(uint32_t adjusted_code_size); void EmitLiterals(); void EmitJumpTables(); void PatchCFI(); static int16_t BEncoding16(int32_t offset, Condition cond); static int32_t BEncoding32(int32_t offset, Condition cond); static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond); static int16_t CmpRnImm8Encoding16(Register rn, int32_t value); static int16_t AddRdnRmEncoding16(Register rdn, Register rm); static int32_t MovwEncoding32(Register rd, int32_t value); static int32_t MovtEncoding32(Register rd, int32_t value); static int32_t MovModImmEncoding32(Register rd, int32_t value); static int16_t LdrLitEncoding16(Register rt, int32_t offset); static int32_t LdrLitEncoding32(Register rt, int32_t offset); static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset); static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset); static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset); static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset); static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); static int16_t AdrEncoding16(Register rd, int32_t offset); static int32_t AdrEncoding32(Register rd, int32_t offset); ArenaVector fixups_; ArenaVector fixup_dependents_; // Use std::deque<> for literal labels to allow insertions at the end // without invalidating pointers and references to existing elements. ArenaDeque literals_; // Deduplication map for 64-bit literals, used for LoadDImmediate(). ArenaSafeMap literal64_dedupe_map_; // Jump table list. ArenaDeque jump_tables_; // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; FixupId last_fixup_id_; }; } // namespace arm } // namespace art #endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_