1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ 18 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ 19 20 #include <deque> 21 #include <utility> 22 #include <vector> 23 24 #include "base/arena_containers.h" 25 #include "base/array_ref.h" 26 #include "base/logging.h" 27 #include "constants_arm.h" 28 #include "utils/arm/managed_register_arm.h" 29 #include "utils/arm/assembler_arm.h" 30 #include "offsets.h" 31 32 namespace art { 33 namespace arm { 34 35 class Thumb2Assembler FINAL : public ArmAssembler { 36 public: 37 explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true) ArmAssembler(arena)38 : ArmAssembler(arena), 39 can_relocate_branches_(can_relocate_branches), 40 force_32bit_(false), 41 it_cond_index_(kNoItCondition), 42 next_condition_(AL), 43 fixups_(arena->Adapter(kArenaAllocAssembler)), 44 fixup_dependents_(arena->Adapter(kArenaAllocAssembler)), 45 literals_(arena->Adapter(kArenaAllocAssembler)), 46 literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)), 47 jump_tables_(arena->Adapter(kArenaAllocAssembler)), 48 last_position_adjustment_(0u), 49 last_old_position_(0u), 50 last_fixup_id_(0u) { 51 cfi().DelayEmittingAdvancePCs(); 52 } 53 ~Thumb2Assembler()54 virtual ~Thumb2Assembler() { 55 } 56 IsThumb()57 bool IsThumb() const OVERRIDE { 58 return true; 59 } 60 IsForced32Bit()61 bool IsForced32Bit() const { 62 return force_32bit_; 63 } 64 CanRelocateBranches()65 bool CanRelocateBranches() const { 66 return can_relocate_branches_; 67 } 68 69 void FinalizeCode() OVERRIDE; 70 71 // Data-processing instructions. 72 virtual void and_(Register rd, Register rn, const ShifterOperand& so, 73 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 74 75 virtual void eor(Register rd, Register rn, const ShifterOperand& so, 76 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 77 78 virtual void sub(Register rd, Register rn, const ShifterOperand& so, 79 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 80 81 virtual void rsb(Register rd, Register rn, const ShifterOperand& so, 82 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 83 84 virtual void add(Register rd, Register rn, const ShifterOperand& so, 85 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 86 87 virtual void adc(Register rd, Register rn, const ShifterOperand& so, 88 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 89 90 virtual void sbc(Register rd, Register rn, const ShifterOperand& so, 91 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 92 93 virtual void rsc(Register rd, Register rn, const ShifterOperand& so, 94 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 95 96 void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 97 98 void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 99 100 void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 101 102 void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; 103 104 virtual void orr(Register rd, Register rn, const ShifterOperand& so, 105 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 106 107 virtual void orn(Register rd, Register rn, const ShifterOperand& so, 108 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 109 110 virtual void mov(Register rd, const ShifterOperand& so, 111 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 112 113 virtual void bic(Register rd, Register rn, const ShifterOperand& so, 114 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 115 116 virtual void mvn(Register rd, const ShifterOperand& so, 117 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 118 119 // Miscellaneous data-processing instructions. 120 void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE; 121 void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; 122 void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; 123 void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE; 124 void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE; 125 void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE; 126 void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE; 127 128 // Multiply instructions. 129 void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; 130 void mla(Register rd, Register rn, Register rm, Register ra, 131 Condition cond = AL) OVERRIDE; 132 void mls(Register rd, Register rn, Register rm, Register ra, 133 Condition cond = AL) OVERRIDE; 134 void smull(Register rd_lo, Register rd_hi, Register rn, Register rm, 135 Condition cond = AL) OVERRIDE; 136 void umull(Register rd_lo, Register rd_hi, Register rn, Register rm, 137 Condition cond = AL) OVERRIDE; 138 139 void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; 140 void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; 141 142 // Bit field extract instructions. 143 void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; 144 void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; 145 146 // Load/store instructions. 147 void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 148 void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 149 150 void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 151 void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 152 153 void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 154 void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 155 156 void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 157 void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 158 159 // Load/store register dual instructions using registers `rd` and `rd` + 1. 160 void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 161 void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; 162 163 // Load/store register dual instructions using registers `rd` and `rd2`. 164 // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding 165 // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1. 166 void ldrd(Register rd, Register rd2, const Address& ad, Condition cond); 167 void strd(Register rd, Register rd2, const Address& ad, Condition cond); 168 169 170 void ldm(BlockAddressMode am, Register base, 171 RegList regs, Condition cond = AL) OVERRIDE; 172 void stm(BlockAddressMode am, Register base, 173 RegList regs, Condition cond = AL) OVERRIDE; 174 175 void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE; 176 void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE; 177 178 void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL); 179 void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL); 180 181 void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; 182 void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; 183 184 // Miscellaneous instructions. 185 void clrex(Condition cond = AL) OVERRIDE; 186 void nop(Condition cond = AL) OVERRIDE; 187 188 void bkpt(uint16_t imm16) OVERRIDE; 189 void svc(uint32_t imm24) OVERRIDE; 190 191 // If-then 192 void it(Condition firstcond, ItState i1 = kItOmitted, 193 ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE; 194 195 void cbz(Register rn, Label* target) OVERRIDE; 196 void cbnz(Register rn, Label* target) OVERRIDE; 197 198 // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles). 199 void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE; 200 void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE; 201 void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; 202 void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE; 203 void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; 204 void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE; 205 void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 206 void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 207 208 // Returns false if the immediate cannot be encoded. 209 bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE; 210 bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE; 211 212 void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; 213 void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; 214 void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; 215 void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; 216 217 void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 218 void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 219 void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 220 void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 221 void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 222 void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 223 void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 224 void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 225 void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 226 void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 227 void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; 228 void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; 229 230 void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 231 void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 232 void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 233 void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 234 void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 235 void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 236 237 void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; 238 void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; 239 void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 240 void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; 241 void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 242 void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; 243 void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 244 void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; 245 void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 246 void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; 247 248 void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; 249 void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; 250 void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE; 251 void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE; 252 void vmstat(Condition cond = AL) OVERRIDE; // VMRS APSR_nzcv, FPSCR 253 254 void vcntd(DRegister dd, DRegister dm) OVERRIDE; 255 void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE; 256 257 void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; 258 void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; 259 void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; 260 void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; 261 void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; 262 void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; 263 264 // Branch instructions. 265 void b(Label* label, Condition cond = AL); 266 void bl(Label* label, Condition cond = AL); 267 void blx(Label* label); 268 void blx(Register rm, Condition cond = AL) OVERRIDE; 269 void bx(Register rm, Condition cond = AL) OVERRIDE; 270 271 virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, 272 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 273 virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, 274 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 275 virtual void Asr(Register rd, Register rm, uint32_t shift_imm, 276 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 277 virtual void Ror(Register rd, Register rm, uint32_t shift_imm, 278 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 279 virtual void Rrx(Register rd, Register rm, 280 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 281 282 virtual void Lsl(Register rd, Register rm, Register rn, 283 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 284 virtual void Lsr(Register rd, Register rm, Register rn, 285 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 286 virtual void Asr(Register rd, Register rm, Register rn, 287 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 288 virtual void Ror(Register rd, Register rm, Register rn, 289 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 290 291 void Push(Register rd, Condition cond = AL) OVERRIDE; 292 void Pop(Register rd, Condition cond = AL) OVERRIDE; 293 294 void PushList(RegList regs, Condition cond = AL) OVERRIDE; 295 void PopList(RegList regs, Condition cond = AL) OVERRIDE; 296 void StoreList(RegList regs, size_t stack_offset) OVERRIDE; 297 void LoadList(RegList regs, size_t stack_offset) OVERRIDE; 298 299 void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; 300 301 void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; 302 void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; 303 304 // Memory barriers. 305 void dmb(DmbOptions flavor) OVERRIDE; 306 307 // Get the final position of a label after local fixup based on the old position 308 // recorded before FinalizeCode(). 309 uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; 310 311 using ArmAssembler::NewLiteral; // Make the helper template visible. 312 313 Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; 314 void LoadLiteral(Register rt, Literal* literal) OVERRIDE; 315 void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; 316 void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; 317 void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; 318 319 // Add signed constant value to rd. May clobber IP. 320 void AddConstant(Register rd, Register rn, int32_t value, 321 Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; 322 323 void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE; 324 325 // Load and Store. May clobber IP. 326 void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; 327 void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE; 328 void MarkExceptionHandler(Label* label) OVERRIDE; 329 void LoadFromOffset(LoadOperandType type, 330 Register reg, 331 Register base, 332 int32_t offset, 333 Condition cond = AL) OVERRIDE; 334 void StoreToOffset(StoreOperandType type, 335 Register reg, 336 Register base, 337 int32_t offset, 338 Condition cond = AL) OVERRIDE; 339 void LoadSFromOffset(SRegister reg, 340 Register base, 341 int32_t offset, 342 Condition cond = AL) OVERRIDE; 343 void StoreSToOffset(SRegister reg, 344 Register base, 345 int32_t offset, 346 Condition cond = AL) OVERRIDE; 347 void LoadDFromOffset(DRegister reg, 348 Register base, 349 int32_t offset, 350 Condition cond = AL) OVERRIDE; 351 void StoreDToOffset(DRegister reg, 352 Register base, 353 int32_t offset, 354 Condition cond = AL) OVERRIDE; 355 356 bool ShifterOperandCanHold(Register rd, 357 Register rn, 358 Opcode opcode, 359 uint32_t immediate, 360 SetCc set_cc, 361 ShifterOperand* shifter_op) OVERRIDE; 362 using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. 363 364 bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; 365 366 367 static bool IsInstructionForExceptionHandling(uintptr_t pc); 368 369 // Emit data (e.g. encoded instruction or immediate) to the. 370 // instruction stream. 371 void Emit32(int32_t value); // Emit a 32 bit instruction in thumb format. 372 void Emit16(int16_t value); // Emit a 16 bit instruction in little endian format. 373 void Bind(Label* label) OVERRIDE; 374 375 // Force the assembler to generate 32 bit instructions. Force32Bit()376 void Force32Bit() { 377 force_32bit_ = true; 378 } 379 380 // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This 381 // will generate a fixup. 382 JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; 383 // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup. 384 void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE; 385 386 private: 387 typedef uint16_t FixupId; 388 389 // Fixup: branches and literal pool references. 390 // 391 // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This 392 // depends on both the type of branch and the offset to which it is branching. The 16-bit 393 // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare 394 // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be 395 // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence 396 // of instructions to make up for the limited range of load literal instructions (up to 397 // 4KiB for the 32-bit variant). When generating code for these insns we don't know the 398 // size before hand, so we assume it is the smallest available size and determine the final 399 // code offsets and sizes and emit code in FinalizeCode(). 400 // 401 // To handle this, we keep a record of every branch and literal pool load in the program. 402 // The actual instruction encoding for these is delayed until we know the final size of 403 // every instruction. When we bind a label to a branch we don't know the final location yet 404 // as some preceding instructions may need to be expanded, so we record a non-final offset. 405 // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of 406 // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with 407 // target on the other side of the expanded insn, as their offsets change and this may 408 // trigger further expansion. 409 // 410 // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the 411 // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing 412 // to it, using the fixup ids as links. The first link is stored in the label's position 413 // (the label is linked but not bound), the following links are stored in the code buffer, 414 // in the placeholder where we will eventually emit the actual code. 415 416 class Fixup { 417 public: 418 // Branch type. 419 enum Type : uint8_t { 420 kConditional, // B<cond>. 421 kUnconditional, // B. 422 kUnconditionalLink, // BL. 423 kUnconditionalLinkX, // BLX. 424 kCompareAndBranchXZero, // cbz/cbnz. 425 kLoadLiteralNarrow, // Load narrrow integer literal. 426 kLoadLiteralWide, // Load wide integer literal. 427 kLoadLiteralAddr, // Load address of literal (used for jump table). 428 kLoadFPLiteralSingle, // Load FP literal single. 429 kLoadFPLiteralDouble, // Load FP literal double. 430 }; 431 432 // Calculated size of branch instruction based on type and offset. 433 enum Size : uint8_t { 434 // Branch variants. 435 kBranch16Bit, 436 kBranch32Bit, 437 // NOTE: We don't support branches which would require multiple instructions, i.e. 438 // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB. 439 440 // CBZ/CBNZ variants. 441 kCbxz16Bit, // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset. 442 kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. 443 kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. 444 445 // Load integer literal variants. 446 // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. 447 kLiteral1KiB, 448 // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes. 449 kLiteral4KiB, 450 // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes. 451 kLiteral64KiB, 452 // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes. 453 kLiteral1MiB, 454 // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit. 455 // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes. 456 kLiteralFar, 457 458 // Load literal base addr. 459 // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes. 460 kLiteralAddr1KiB, 461 // ADR rX, label; 4KiB offset. 4 bytes. 462 kLiteralAddr4KiB, 463 // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes. 464 kLiteralAddr64KiB, 465 // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes. 466 kLiteralAddrFar, 467 468 // Load long or FP literal variants. 469 // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. 470 kLongOrFPLiteral1KiB, 471 // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes. 472 kLongOrFPLiteral64KiB, 473 // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. 474 kLongOrFPLiteralFar, 475 }; 476 477 // Unresolved branch possibly with a condition. 478 static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit, 479 Condition cond = AL) { 480 DCHECK(type == kConditional || type == kUnconditional || 481 type == kUnconditionalLink || type == kUnconditionalLinkX); 482 DCHECK(size == kBranch16Bit || size == kBranch32Bit); 483 DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional)); 484 return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister, 485 cond, type, size, location); 486 } 487 488 // Unresolved compare-and-branch instruction with a register and condition (EQ or NE). CompareAndBranch(uint32_t location,Register rn,Condition cond)489 static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) { 490 DCHECK(cond == EQ || cond == NE); 491 return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister, 492 cond, kCompareAndBranchXZero, kCbxz16Bit, location); 493 } 494 495 // Load narrow literal. LoadNarrowLiteral(uint32_t location,Register rt,Size size)496 static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { 497 DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || 498 size == kLiteral1MiB || size == kLiteralFar); 499 DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); 500 return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, 501 AL, kLoadLiteralNarrow, size, location); 502 } 503 504 // Load wide literal. 505 static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, 506 Size size = kLongOrFPLiteral1KiB) { 507 DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || 508 size == kLongOrFPLiteralFar); 509 DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); 510 return Fixup(rt, rt2, kNoSRegister, kNoDRegister, 511 AL, kLoadLiteralWide, size, location); 512 } 513 514 // Load FP single literal. 515 static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, 516 Size size = kLongOrFPLiteral1KiB) { 517 DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || 518 size == kLongOrFPLiteralFar); 519 return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, 520 AL, kLoadFPLiteralSingle, size, location); 521 } 522 523 // Load FP double literal. 524 static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, 525 Size size = kLongOrFPLiteral1KiB) { 526 DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || 527 size == kLongOrFPLiteralFar); 528 return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, 529 AL, kLoadFPLiteralDouble, size, location); 530 } 531 LoadLiteralAddress(uint32_t location,Register rt,Size size)532 static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) { 533 DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB || 534 size == kLiteralAddrFar); 535 DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB); 536 return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, 537 AL, kLoadLiteralAddr, size, location); 538 } 539 GetType()540 Type GetType() const { 541 return type_; 542 } 543 IsLoadLiteral()544 bool IsLoadLiteral() const { 545 return GetType() >= kLoadLiteralNarrow; 546 } 547 548 // Returns whether the Fixup can expand from the original size. CanExpand()549 bool CanExpand() const { 550 switch (GetOriginalSize()) { 551 case kBranch32Bit: 552 case kCbxz48Bit: 553 case kLiteralFar: 554 case kLiteralAddrFar: 555 case kLongOrFPLiteralFar: 556 return false; 557 default: 558 return true; 559 } 560 } 561 GetOriginalSize()562 Size GetOriginalSize() const { 563 return original_size_; 564 } 565 GetSize()566 Size GetSize() const { 567 return size_; 568 } 569 570 uint32_t GetOriginalSizeInBytes() const; 571 572 uint32_t GetSizeInBytes() const; 573 GetLocation()574 uint32_t GetLocation() const { 575 return location_; 576 } 577 GetTarget()578 uint32_t GetTarget() const { 579 return target_; 580 } 581 GetAdjustment()582 uint32_t GetAdjustment() const { 583 return adjustment_; 584 } 585 586 // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_. 587 static void PrepareDependents(Thumb2Assembler* assembler); 588 Dependents(const Thumb2Assembler & assembler)589 ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const { 590 return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_, 591 dependents_count_); 592 } 593 594 // Resolve a branch when the target is known. Resolve(uint32_t target)595 void Resolve(uint32_t target) { 596 DCHECK_EQ(target_, kUnresolved); 597 DCHECK_NE(target, kUnresolved); 598 target_ = target; 599 } 600 601 // Branches with bound targets that are in range can be emitted early. 602 // However, the caller still needs to check if the branch doesn't go over 603 // another Fixup that's not ready to be emitted. 604 bool IsCandidateForEmitEarly() const; 605 606 // Check if the current size is OK for current location_, target_ and adjustment_. 607 // If not, increase the size. Return the size increase, 0 if unchanged. 608 // If the target if after this Fixup, also add the difference to adjustment_, 609 // so that we don't need to consider forward Fixups as their own dependencies. 610 uint32_t AdjustSizeIfNeeded(uint32_t current_code_size); 611 612 // Increase adjustments. This is called for dependents of a Fixup when its size changes. IncreaseAdjustment(uint32_t increase)613 void IncreaseAdjustment(uint32_t increase) { 614 adjustment_ += increase; 615 } 616 617 // Finalize the branch with an adjustment to the location. Both location and target are updated. Finalize(uint32_t location_adjustment)618 void Finalize(uint32_t location_adjustment) { 619 DCHECK_NE(target_, kUnresolved); 620 location_ += location_adjustment; 621 target_ += location_adjustment; 622 } 623 624 // Emit the branch instruction into the assembler buffer. This does the 625 // encoding into the thumb instruction. 626 void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; 627 628 private: Fixup(Register rn,Register rt2,SRegister sd,DRegister dd,Condition cond,Type type,Size size,uint32_t location)629 Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, 630 Condition cond, Type type, Size size, uint32_t location) 631 : rn_(rn), 632 rt2_(rt2), 633 sd_(sd), 634 dd_(dd), 635 cond_(cond), 636 type_(type), 637 original_size_(size), size_(size), 638 location_(location), 639 target_(kUnresolved), 640 adjustment_(0u), 641 dependents_count_(0u), 642 dependents_start_(0u) { 643 } 644 645 static size_t SizeInBytes(Size size); 646 647 // The size of padding added before the literal pool. 648 static size_t LiteralPoolPaddingSize(uint32_t current_code_size); 649 650 // Returns the offset from the PC-using insn to the target. 651 int32_t GetOffset(uint32_t current_code_size) const; 652 653 size_t IncreaseSize(Size new_size); 654 655 int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const; 656 657 template <typename Function> 658 static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn); 659 660 static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. 661 662 const Register rn_; // Rn for cbnz/cbz, Rt for literal loads. 663 Register rt2_; // For kLoadLiteralWide. 664 SRegister sd_; // For kLoadFPLiteralSingle. 665 DRegister dd_; // For kLoadFPLiteralDouble. 666 const Condition cond_; 667 const Type type_; 668 Size original_size_; 669 Size size_; 670 uint32_t location_; // Offset into assembler buffer in bytes. 671 uint32_t target_; // Offset into assembler buffer in bytes. 672 uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. 673 // Fixups that require adjustment when current size changes are stored in a single 674 // array in the assembler and we store only the start index and count here. 675 uint32_t dependents_count_; 676 uint32_t dependents_start_; 677 }; 678 679 // Emit a single 32 or 16 bit data processing instruction. 680 void EmitDataProcessing(Condition cond, 681 Opcode opcode, 682 SetCc set_cc, 683 Register rn, 684 Register rd, 685 const ShifterOperand& so); 686 687 // Emit a single 32 bit miscellaneous instruction. 688 void Emit32Miscellaneous(uint8_t op1, 689 uint8_t op2, 690 uint32_t rest_encoding); 691 692 // Emit reverse byte instructions: rev, rev16, revsh. 693 void EmitReverseBytes(Register rd, Register rm, uint32_t op); 694 695 // Emit a single 16 bit miscellaneous instruction. 696 void Emit16Miscellaneous(uint32_t rest_encoding); 697 698 // Must the instruction be 32 bits or can it possibly be encoded 699 // in 16 bits? 700 bool Is32BitDataProcessing(Condition cond, 701 Opcode opcode, 702 SetCc set_cc, 703 Register rn, 704 Register rd, 705 const ShifterOperand& so); 706 707 // Emit a 32 bit data processing instruction. 708 void Emit32BitDataProcessing(Condition cond, 709 Opcode opcode, 710 SetCc set_cc, 711 Register rn, 712 Register rd, 713 const ShifterOperand& so); 714 715 // Emit a 16 bit data processing instruction. 716 void Emit16BitDataProcessing(Condition cond, 717 Opcode opcode, 718 SetCc set_cc, 719 Register rn, 720 Register rd, 721 const ShifterOperand& so); 722 723 void Emit16BitAddSub(Condition cond, 724 Opcode opcode, 725 SetCc set_cc, 726 Register rn, 727 Register rd, 728 const ShifterOperand& so); 729 730 uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n); 731 732 void EmitLoadStore(Condition cond, 733 bool load, 734 bool byte, 735 bool half, 736 bool is_signed, 737 Register rd, 738 const Address& ad); 739 740 void EmitMemOpAddressMode3(Condition cond, 741 int32_t mode, 742 Register rd, 743 const Address& ad); 744 745 void EmitMultiMemOp(Condition cond, 746 BlockAddressMode am, 747 bool load, 748 Register base, 749 RegList regs); 750 751 void EmitMulOp(Condition cond, 752 int32_t opcode, 753 Register rd, 754 Register rn, 755 Register rm, 756 Register rs); 757 758 void EmitVFPsss(Condition cond, 759 int32_t opcode, 760 SRegister sd, 761 SRegister sn, 762 SRegister sm); 763 764 void EmitVLdmOrStm(int32_t rest, 765 uint32_t reg, 766 int nregs, 767 Register rn, 768 bool is_load, 769 bool dbl, 770 Condition cond); 771 772 void EmitVFPddd(Condition cond, 773 int32_t opcode, 774 DRegister dd, 775 DRegister dn, 776 DRegister dm); 777 778 void EmitVFPsd(Condition cond, 779 int32_t opcode, 780 SRegister sd, 781 DRegister dm); 782 783 void EmitVFPds(Condition cond, 784 int32_t opcode, 785 DRegister dd, 786 SRegister sm); 787 788 void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); 789 790 void EmitBranch(Condition cond, Label* label, bool link, bool x); 791 static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); 792 static int DecodeBranchOffset(int32_t inst); 793 void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, 794 Condition cond = AL, SetCc set_cc = kCcDontCare); 795 void EmitShift(Register rd, Register rn, Shift shift, Register rm, 796 Condition cond = AL, SetCc set_cc = kCcDontCare); 797 798 static int32_t GetAllowedLoadOffsetBits(LoadOperandType type); 799 static int32_t GetAllowedStoreOffsetBits(StoreOperandType type); 800 bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, 801 int32_t offset, 802 /*out*/ int32_t* add_to_base, 803 /*out*/ int32_t* offset_for_load_store); 804 int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, 805 Register temp, 806 Register base, 807 int32_t offset, 808 Condition cond); 809 810 // Whether the assembler can relocate branches. If false, unresolved branches will be 811 // emitted on 32bits. 812 bool can_relocate_branches_; 813 814 // Force the assembler to use 32 bit thumb2 instructions. 815 bool force_32bit_; 816 817 // IfThen conditions. Used to check that conditional instructions match the preceding IT. 818 Condition it_conditions_[4]; 819 uint8_t it_cond_index_; 820 Condition next_condition_; 821 822 void SetItCondition(ItState s, Condition cond, uint8_t index); 823 CheckCondition(Condition cond)824 void CheckCondition(Condition cond) { 825 CHECK_EQ(cond, next_condition_); 826 827 // Move to the next condition if there is one. 828 if (it_cond_index_ < 3) { 829 ++it_cond_index_; 830 next_condition_ = it_conditions_[it_cond_index_]; 831 } else { 832 next_condition_ = AL; 833 } 834 } 835 CheckConditionLastIt(Condition cond)836 void CheckConditionLastIt(Condition cond) { 837 if (it_cond_index_ < 3) { 838 // Check that the next condition is AL. This means that the 839 // current condition is the last in the IT block. 840 CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL); 841 } 842 CheckCondition(cond); 843 } 844 AddFixup(Fixup fixup)845 FixupId AddFixup(Fixup fixup) { 846 FixupId fixup_id = static_cast<FixupId>(fixups_.size()); 847 fixups_.push_back(fixup); 848 // For iterating using FixupId, we need the next id to be representable. 849 DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size()); 850 return fixup_id; 851 } 852 GetFixup(FixupId fixup_id)853 Fixup* GetFixup(FixupId fixup_id) { 854 DCHECK_LT(fixup_id, fixups_.size()); 855 return &fixups_[fixup_id]; 856 } 857 858 void BindLabel(Label* label, uint32_t bound_pc); 859 uint32_t BindLiterals(); 860 void BindJumpTables(uint32_t code_size); 861 void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, 862 std::deque<FixupId>* fixups_to_recalculate); 863 uint32_t AdjustFixups(); 864 void EmitFixups(uint32_t adjusted_code_size); 865 void EmitLiterals(); 866 void EmitJumpTables(); 867 void PatchCFI(); 868 869 static int16_t BEncoding16(int32_t offset, Condition cond); 870 static int32_t BEncoding32(int32_t offset, Condition cond); 871 static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond); 872 static int16_t CmpRnImm8Encoding16(Register rn, int32_t value); 873 static int16_t AddRdnRmEncoding16(Register rdn, Register rm); 874 static int32_t MovwEncoding32(Register rd, int32_t value); 875 static int32_t MovtEncoding32(Register rd, int32_t value); 876 static int32_t MovModImmEncoding32(Register rd, int32_t value); 877 static int16_t LdrLitEncoding16(Register rt, int32_t offset); 878 static int32_t LdrLitEncoding32(Register rt, int32_t offset); 879 static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset); 880 static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset); 881 static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset); 882 static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset); 883 static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); 884 static int16_t AdrEncoding16(Register rd, int32_t offset); 885 static int32_t AdrEncoding32(Register rd, int32_t offset); 886 887 ArenaVector<Fixup> fixups_; 888 ArenaVector<FixupId> fixup_dependents_; 889 890 // Use std::deque<> for literal labels to allow insertions at the end 891 // without invalidating pointers and references to existing elements. 892 ArenaDeque<Literal> literals_; 893 894 // Deduplication map for 64-bit literals, used for LoadDImmediate(). 895 ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_; 896 897 // Jump table list. 898 ArenaDeque<JumpTable> jump_tables_; 899 900 // Data for AdjustedPosition(), see the description there. 901 uint32_t last_position_adjustment_; 902 uint32_t last_old_position_; 903 FixupId last_fixup_id_; 904 }; 905 906 } // namespace arm 907 } // namespace art 908 909 #endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ 910