1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
18 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
19 
20 #include <deque>
21 #include <utility>
22 #include <vector>
23 
24 #include "base/arena_containers.h"
25 #include "base/array_ref.h"
26 #include "base/logging.h"
27 #include "constants_arm.h"
28 #include "utils/arm/managed_register_arm.h"
29 #include "utils/arm/assembler_arm.h"
30 #include "offsets.h"
31 
32 namespace art {
33 namespace arm {
34 
35 class Thumb2Assembler FINAL : public ArmAssembler {
36  public:
37   explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true)
ArmAssembler(arena)38       : ArmAssembler(arena),
39         can_relocate_branches_(can_relocate_branches),
40         force_32bit_(false),
41         it_cond_index_(kNoItCondition),
42         next_condition_(AL),
43         fixups_(arena->Adapter(kArenaAllocAssembler)),
44         fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
45         literals_(arena->Adapter(kArenaAllocAssembler)),
46         literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)),
47         jump_tables_(arena->Adapter(kArenaAllocAssembler)),
48         last_position_adjustment_(0u),
49         last_old_position_(0u),
50         last_fixup_id_(0u) {
51     cfi().DelayEmittingAdvancePCs();
52   }
53 
~Thumb2Assembler()54   virtual ~Thumb2Assembler() {
55   }
56 
IsThumb()57   bool IsThumb() const OVERRIDE {
58     return true;
59   }
60 
IsForced32Bit()61   bool IsForced32Bit() const {
62     return force_32bit_;
63   }
64 
CanRelocateBranches()65   bool CanRelocateBranches() const {
66     return can_relocate_branches_;
67   }
68 
69   void FinalizeCode() OVERRIDE;
70 
71   // Data-processing instructions.
72   virtual void and_(Register rd, Register rn, const ShifterOperand& so,
73                     Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
74 
75   virtual void eor(Register rd, Register rn, const ShifterOperand& so,
76                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
77 
78   virtual void sub(Register rd, Register rn, const ShifterOperand& so,
79                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
80 
81   virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
82                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
83 
84   virtual void add(Register rd, Register rn, const ShifterOperand& so,
85                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
86 
87   virtual void adc(Register rd, Register rn, const ShifterOperand& so,
88                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
89 
90   virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
91                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
92 
93   virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
94                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
95 
96   void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
97 
98   void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
99 
100   void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
101 
102   void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
103 
104   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
105                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
106 
107   virtual void orn(Register rd, Register rn, const ShifterOperand& so,
108                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
109 
110   virtual void mov(Register rd, const ShifterOperand& so,
111                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
112 
113   virtual void bic(Register rd, Register rn, const ShifterOperand& so,
114                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
115 
116   virtual void mvn(Register rd, const ShifterOperand& so,
117                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
118 
119   // Miscellaneous data-processing instructions.
120   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
121   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
122   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
123   void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
124   void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
125   void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
126   void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;
127 
128   // Multiply instructions.
129   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
130   void mla(Register rd, Register rn, Register rm, Register ra,
131            Condition cond = AL) OVERRIDE;
132   void mls(Register rd, Register rn, Register rm, Register ra,
133            Condition cond = AL) OVERRIDE;
134   void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
135              Condition cond = AL) OVERRIDE;
136   void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
137              Condition cond = AL) OVERRIDE;
138 
139   void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
140   void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
141 
142   // Bit field extract instructions.
143   void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
144   void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
145 
146   // Load/store instructions.
147   void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
148   void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
149 
150   void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
151   void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
152 
153   void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
154   void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
155 
156   void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
157   void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
158 
159   // Load/store register dual instructions using registers `rd` and `rd` + 1.
160   void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
161   void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
162 
163   // Load/store register dual instructions using registers `rd` and `rd2`.
164   // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
165   // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
166   void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
167   void strd(Register rd, Register rd2, const Address& ad, Condition cond);
168 
169 
170   void ldm(BlockAddressMode am, Register base,
171            RegList regs, Condition cond = AL) OVERRIDE;
172   void stm(BlockAddressMode am, Register base,
173            RegList regs, Condition cond = AL) OVERRIDE;
174 
175   void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
176   void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;
177 
178   void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL);
179   void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL);
180 
181   void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;
182   void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;
183 
184   // Miscellaneous instructions.
185   void clrex(Condition cond = AL) OVERRIDE;
186   void nop(Condition cond = AL) OVERRIDE;
187 
188   void bkpt(uint16_t imm16) OVERRIDE;
189   void svc(uint32_t imm24) OVERRIDE;
190 
191   // If-then
192   void it(Condition firstcond, ItState i1 = kItOmitted,
193         ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE;
194 
195   void cbz(Register rn, Label* target) OVERRIDE;
196   void cbnz(Register rn, Label* target) OVERRIDE;
197 
198   // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
199   void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
200   void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
201   void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
202   void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
203   void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
204   void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
205   void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
206   void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
207 
208   // Returns false if the immediate cannot be encoded.
209   bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
210   bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;
211 
212   void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
213   void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
214   void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
215   void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
216 
217   void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
218   void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
219   void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
220   void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
221   void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
222   void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
223   void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
224   void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
225   void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
226   void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
227   void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
228   void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
229 
230   void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
231   void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
232   void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
233   void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
234   void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
235   void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
236 
237   void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
238   void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
239   void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
240   void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
241   void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
242   void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
243   void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
244   void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
245   void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
246   void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
247 
248   void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
249   void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
250   void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
251   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
252   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
253 
254   void vcntd(DRegister dd, DRegister dm) OVERRIDE;
255   void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
256 
257   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
258   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
259   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
260   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
261   void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
262   void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
263 
264   // Branch instructions.
265   void b(Label* label, Condition cond = AL);
266   void bl(Label* label, Condition cond = AL);
267   void blx(Label* label);
268   void blx(Register rm, Condition cond = AL) OVERRIDE;
269   void bx(Register rm, Condition cond = AL) OVERRIDE;
270 
271   virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
272                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
273   virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
274                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
275   virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
276                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
277   virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
278                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
279   virtual void Rrx(Register rd, Register rm,
280                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
281 
282   virtual void Lsl(Register rd, Register rm, Register rn,
283                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
284   virtual void Lsr(Register rd, Register rm, Register rn,
285                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
286   virtual void Asr(Register rd, Register rm, Register rn,
287                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
288   virtual void Ror(Register rd, Register rm, Register rn,
289                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
290 
291   void Push(Register rd, Condition cond = AL) OVERRIDE;
292   void Pop(Register rd, Condition cond = AL) OVERRIDE;
293 
294   void PushList(RegList regs, Condition cond = AL) OVERRIDE;
295   void PopList(RegList regs, Condition cond = AL) OVERRIDE;
296   void StoreList(RegList regs, size_t stack_offset) OVERRIDE;
297   void LoadList(RegList regs, size_t stack_offset) OVERRIDE;
298 
299   void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
300 
301   void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
302   void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
303 
304   // Memory barriers.
305   void dmb(DmbOptions flavor) OVERRIDE;
306 
307   // Get the final position of a label after local fixup based on the old position
308   // recorded before FinalizeCode().
309   uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE;
310 
311   using ArmAssembler::NewLiteral;  // Make the helper template visible.
312 
313   Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE;
314   void LoadLiteral(Register rt, Literal* literal) OVERRIDE;
315   void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE;
316   void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE;
317   void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE;
318 
319   // Add signed constant value to rd. May clobber IP.
320   void AddConstant(Register rd, Register rn, int32_t value,
321                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
322 
323   void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
324 
325   // Load and Store. May clobber IP.
326   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
327   void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
328   void MarkExceptionHandler(Label* label) OVERRIDE;
329   void LoadFromOffset(LoadOperandType type,
330                       Register reg,
331                       Register base,
332                       int32_t offset,
333                       Condition cond = AL) OVERRIDE;
334   void StoreToOffset(StoreOperandType type,
335                      Register reg,
336                      Register base,
337                      int32_t offset,
338                      Condition cond = AL) OVERRIDE;
339   void LoadSFromOffset(SRegister reg,
340                        Register base,
341                        int32_t offset,
342                        Condition cond = AL) OVERRIDE;
343   void StoreSToOffset(SRegister reg,
344                       Register base,
345                       int32_t offset,
346                       Condition cond = AL) OVERRIDE;
347   void LoadDFromOffset(DRegister reg,
348                        Register base,
349                        int32_t offset,
350                        Condition cond = AL) OVERRIDE;
351   void StoreDToOffset(DRegister reg,
352                       Register base,
353                       int32_t offset,
354                       Condition cond = AL) OVERRIDE;
355 
356   bool ShifterOperandCanHold(Register rd,
357                              Register rn,
358                              Opcode opcode,
359                              uint32_t immediate,
360                              SetCc set_cc,
361                              ShifterOperand* shifter_op) OVERRIDE;
362   using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
363 
364   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
365 
366 
367   static bool IsInstructionForExceptionHandling(uintptr_t pc);
368 
369   // Emit data (e.g. encoded instruction or immediate) to the.
370   // instruction stream.
371   void Emit32(int32_t value);     // Emit a 32 bit instruction in thumb format.
372   void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
373   void Bind(Label* label) OVERRIDE;
374 
375   // Force the assembler to generate 32 bit instructions.
Force32Bit()376   void Force32Bit() {
377     force_32bit_ = true;
378   }
379 
380   // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
381   // will generate a fixup.
382   JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
383   // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
384   void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
385 
386  private:
387   typedef uint16_t FixupId;
388 
389   // Fixup: branches and literal pool references.
390   //
391   // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This
392   // depends on both the type of branch and the offset to which it is branching. The 16-bit
393   // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare
394   // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be
395   // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence
396   // of instructions to make up for the limited range of load literal instructions (up to
397   // 4KiB for the 32-bit variant). When generating code for these insns we don't know the
398   // size before hand, so we assume it is the smallest available size and determine the final
399   // code offsets and sizes and emit code in FinalizeCode().
400   //
401   // To handle this, we keep a record of every branch and literal pool load in the program.
402   // The actual instruction encoding for these is delayed until we know the final size of
403   // every instruction. When we bind a label to a branch we don't know the final location yet
404   // as some preceding instructions may need to be expanded, so we record a non-final offset.
405   // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of
406   // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with
407   // target on the other side of the expanded insn, as their offsets change and this may
408   // trigger further expansion.
409   //
410   // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the
411   // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing
412   // to it, using the fixup ids as links. The first link is stored in the label's position
413   // (the label is linked but not bound), the following links are stored in the code buffer,
414   // in the placeholder where we will eventually emit the actual code.
415 
416   class Fixup {
417    public:
418     // Branch type.
419     enum Type : uint8_t {
420       kConditional,               // B<cond>.
421       kUnconditional,             // B.
422       kUnconditionalLink,         // BL.
423       kUnconditionalLinkX,        // BLX.
424       kCompareAndBranchXZero,     // cbz/cbnz.
425       kLoadLiteralNarrow,         // Load narrrow integer literal.
426       kLoadLiteralWide,           // Load wide integer literal.
427       kLoadLiteralAddr,           // Load address of literal (used for jump table).
428       kLoadFPLiteralSingle,       // Load FP literal single.
429       kLoadFPLiteralDouble,       // Load FP literal double.
430     };
431 
432     // Calculated size of branch instruction based on type and offset.
433     enum Size : uint8_t {
434       // Branch variants.
435       kBranch16Bit,
436       kBranch32Bit,
437       // NOTE: We don't support branches which would require multiple instructions, i.e.
438       // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB.
439 
440       // CBZ/CBNZ variants.
441       kCbxz16Bit,   // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset.
442       kCbxz32Bit,   // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
443       kCbxz48Bit,   // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.
444 
445       // Load integer literal variants.
446       // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
447       kLiteral1KiB,
448       // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes.
449       kLiteral4KiB,
450       // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes.
451       kLiteral64KiB,
452       // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes.
453       kLiteral1MiB,
454       // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit.
455       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
456       kLiteralFar,
457 
458       // Load literal base addr.
459       // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
460       kLiteralAddr1KiB,
461       // ADR rX, label; 4KiB offset. 4 bytes.
462       kLiteralAddr4KiB,
463       // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
464       kLiteralAddr64KiB,
465       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
466       kLiteralAddrFar,
467 
468       // Load long or FP literal variants.
469       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
470       kLongOrFPLiteral1KiB,
471       // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes.
472       kLongOrFPLiteral64KiB,
473       // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
474       kLongOrFPLiteralFar,
475     };
476 
477     // Unresolved branch possibly with a condition.
478     static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit,
479                         Condition cond = AL) {
480       DCHECK(type == kConditional || type == kUnconditional ||
481              type == kUnconditionalLink || type == kUnconditionalLinkX);
482       DCHECK(size == kBranch16Bit || size == kBranch32Bit);
483       DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional));
484       return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister,
485                    cond, type, size, location);
486     }
487 
488     // Unresolved compare-and-branch instruction with a register and condition (EQ or NE).
CompareAndBranch(uint32_t location,Register rn,Condition cond)489     static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) {
490       DCHECK(cond == EQ || cond == NE);
491       return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister,
492                    cond, kCompareAndBranchXZero, kCbxz16Bit, location);
493     }
494 
495     // Load narrow literal.
LoadNarrowLiteral(uint32_t location,Register rt,Size size)496     static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
497       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
498              size == kLiteral1MiB || size == kLiteralFar);
499       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
500       return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
501                    AL, kLoadLiteralNarrow, size, location);
502     }
503 
504     // Load wide literal.
505     static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
506                                  Size size = kLongOrFPLiteral1KiB) {
507       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
508              size == kLongOrFPLiteralFar);
509       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
510       return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
511                    AL, kLoadLiteralWide, size, location);
512     }
513 
514     // Load FP single literal.
515     static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
516                                    Size size = kLongOrFPLiteral1KiB) {
517       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
518              size == kLongOrFPLiteralFar);
519       return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
520                    AL, kLoadFPLiteralSingle, size, location);
521     }
522 
523     // Load FP double literal.
524     static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
525                                    Size size = kLongOrFPLiteral1KiB) {
526       DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
527              size == kLongOrFPLiteralFar);
528       return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
529                    AL, kLoadFPLiteralDouble, size, location);
530     }
531 
LoadLiteralAddress(uint32_t location,Register rt,Size size)532     static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
533       DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
534              size == kLiteralAddrFar);
535       DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
536       return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
537                    AL, kLoadLiteralAddr, size, location);
538     }
539 
GetType()540     Type GetType() const {
541       return type_;
542     }
543 
IsLoadLiteral()544     bool IsLoadLiteral() const {
545       return GetType() >= kLoadLiteralNarrow;
546     }
547 
548     // Returns whether the Fixup can expand from the original size.
CanExpand()549     bool CanExpand() const {
550       switch (GetOriginalSize()) {
551         case kBranch32Bit:
552         case kCbxz48Bit:
553         case kLiteralFar:
554         case kLiteralAddrFar:
555         case kLongOrFPLiteralFar:
556           return false;
557         default:
558           return true;
559       }
560     }
561 
GetOriginalSize()562     Size GetOriginalSize() const {
563       return original_size_;
564     }
565 
GetSize()566     Size GetSize() const {
567       return size_;
568     }
569 
570     uint32_t GetOriginalSizeInBytes() const;
571 
572     uint32_t GetSizeInBytes() const;
573 
GetLocation()574     uint32_t GetLocation() const {
575       return location_;
576     }
577 
GetTarget()578     uint32_t GetTarget() const {
579       return target_;
580     }
581 
GetAdjustment()582     uint32_t GetAdjustment() const {
583       return adjustment_;
584     }
585 
586     // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
587     static void PrepareDependents(Thumb2Assembler* assembler);
588 
Dependents(const Thumb2Assembler & assembler)589     ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const {
590       return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_,
591                                                                            dependents_count_);
592     }
593 
594     // Resolve a branch when the target is known.
Resolve(uint32_t target)595     void Resolve(uint32_t target) {
596       DCHECK_EQ(target_, kUnresolved);
597       DCHECK_NE(target, kUnresolved);
598       target_ = target;
599     }
600 
601     // Branches with bound targets that are in range can be emitted early.
602     // However, the caller still needs to check if the branch doesn't go over
603     // another Fixup that's not ready to be emitted.
604     bool IsCandidateForEmitEarly() const;
605 
606     // Check if the current size is OK for current location_, target_ and adjustment_.
607     // If not, increase the size. Return the size increase, 0 if unchanged.
608     // If the target if after this Fixup, also add the difference to adjustment_,
609     // so that we don't need to consider forward Fixups as their own dependencies.
610     uint32_t AdjustSizeIfNeeded(uint32_t current_code_size);
611 
612     // Increase adjustments. This is called for dependents of a Fixup when its size changes.
IncreaseAdjustment(uint32_t increase)613     void IncreaseAdjustment(uint32_t increase) {
614       adjustment_ += increase;
615     }
616 
617     // Finalize the branch with an adjustment to the location. Both location and target are updated.
Finalize(uint32_t location_adjustment)618     void Finalize(uint32_t location_adjustment) {
619       DCHECK_NE(target_, kUnresolved);
620       location_ += location_adjustment;
621       target_ += location_adjustment;
622     }
623 
624     // Emit the branch instruction into the assembler buffer.  This does the
625     // encoding into the thumb instruction.
626     void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;
627 
628    private:
Fixup(Register rn,Register rt2,SRegister sd,DRegister dd,Condition cond,Type type,Size size,uint32_t location)629     Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
630           Condition cond, Type type, Size size, uint32_t location)
631         : rn_(rn),
632           rt2_(rt2),
633           sd_(sd),
634           dd_(dd),
635           cond_(cond),
636           type_(type),
637           original_size_(size), size_(size),
638           location_(location),
639           target_(kUnresolved),
640           adjustment_(0u),
641           dependents_count_(0u),
642           dependents_start_(0u) {
643     }
644 
645     static size_t SizeInBytes(Size size);
646 
647     // The size of padding added before the literal pool.
648     static size_t LiteralPoolPaddingSize(uint32_t current_code_size);
649 
650     // Returns the offset from the PC-using insn to the target.
651     int32_t GetOffset(uint32_t current_code_size) const;
652 
653     size_t IncreaseSize(Size new_size);
654 
655     int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const;
656 
657     template <typename Function>
658     static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn);
659 
660     static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
661 
662     const Register rn_;   // Rn for cbnz/cbz, Rt for literal loads.
663     Register rt2_;        // For kLoadLiteralWide.
664     SRegister sd_;        // For kLoadFPLiteralSingle.
665     DRegister dd_;        // For kLoadFPLiteralDouble.
666     const Condition cond_;
667     const Type type_;
668     Size original_size_;
669     Size size_;
670     uint32_t location_;     // Offset into assembler buffer in bytes.
671     uint32_t target_;       // Offset into assembler buffer in bytes.
672     uint32_t adjustment_;   // The number of extra bytes inserted between location_ and target_.
673     // Fixups that require adjustment when current size changes are stored in a single
674     // array in the assembler and we store only the start index and count here.
675     uint32_t dependents_count_;
676     uint32_t dependents_start_;
677   };
678 
679   // Emit a single 32 or 16 bit data processing instruction.
680   void EmitDataProcessing(Condition cond,
681                           Opcode opcode,
682                           SetCc set_cc,
683                           Register rn,
684                           Register rd,
685                           const ShifterOperand& so);
686 
687   // Emit a single 32 bit miscellaneous instruction.
688   void Emit32Miscellaneous(uint8_t op1,
689                            uint8_t op2,
690                            uint32_t rest_encoding);
691 
692   // Emit reverse byte instructions: rev, rev16, revsh.
693   void EmitReverseBytes(Register rd, Register rm, uint32_t op);
694 
695   // Emit a single 16 bit miscellaneous instruction.
696   void Emit16Miscellaneous(uint32_t rest_encoding);
697 
698   // Must the instruction be 32 bits or can it possibly be encoded
699   // in 16 bits?
700   bool Is32BitDataProcessing(Condition cond,
701                              Opcode opcode,
702                              SetCc set_cc,
703                              Register rn,
704                              Register rd,
705                              const ShifterOperand& so);
706 
707   // Emit a 32 bit data processing instruction.
708   void Emit32BitDataProcessing(Condition cond,
709                                Opcode opcode,
710                                SetCc set_cc,
711                                Register rn,
712                                Register rd,
713                                const ShifterOperand& so);
714 
715   // Emit a 16 bit data processing instruction.
716   void Emit16BitDataProcessing(Condition cond,
717                                Opcode opcode,
718                                SetCc set_cc,
719                                Register rn,
720                                Register rd,
721                                const ShifterOperand& so);
722 
723   void Emit16BitAddSub(Condition cond,
724                        Opcode opcode,
725                        SetCc set_cc,
726                        Register rn,
727                        Register rd,
728                        const ShifterOperand& so);
729 
730   uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n);
731 
732   void EmitLoadStore(Condition cond,
733                      bool load,
734                      bool byte,
735                      bool half,
736                      bool is_signed,
737                      Register rd,
738                      const Address& ad);
739 
740   void EmitMemOpAddressMode3(Condition cond,
741                              int32_t mode,
742                              Register rd,
743                              const Address& ad);
744 
745   void EmitMultiMemOp(Condition cond,
746                       BlockAddressMode am,
747                       bool load,
748                       Register base,
749                       RegList regs);
750 
751   void EmitMulOp(Condition cond,
752                  int32_t opcode,
753                  Register rd,
754                  Register rn,
755                  Register rm,
756                  Register rs);
757 
758   void EmitVFPsss(Condition cond,
759                   int32_t opcode,
760                   SRegister sd,
761                   SRegister sn,
762                   SRegister sm);
763 
764   void EmitVLdmOrStm(int32_t rest,
765                      uint32_t reg,
766                      int nregs,
767                      Register rn,
768                      bool is_load,
769                      bool dbl,
770                      Condition cond);
771 
772   void EmitVFPddd(Condition cond,
773                   int32_t opcode,
774                   DRegister dd,
775                   DRegister dn,
776                   DRegister dm);
777 
778   void EmitVFPsd(Condition cond,
779                  int32_t opcode,
780                  SRegister sd,
781                  DRegister dm);
782 
783   void EmitVFPds(Condition cond,
784                  int32_t opcode,
785                  DRegister dd,
786                  SRegister sm);
787 
788   void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
789 
790   void EmitBranch(Condition cond, Label* label, bool link, bool x);
791   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
792   static int DecodeBranchOffset(int32_t inst);
793   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
794                  Condition cond = AL, SetCc set_cc = kCcDontCare);
795   void EmitShift(Register rd, Register rn, Shift shift, Register rm,
796                  Condition cond = AL, SetCc set_cc = kCcDontCare);
797 
798   static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
799   static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
800   bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
801                                int32_t offset,
802                                /*out*/ int32_t* add_to_base,
803                                /*out*/ int32_t* offset_for_load_store);
804   int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
805                                 Register temp,
806                                 Register base,
807                                 int32_t offset,
808                                 Condition cond);
809 
810   // Whether the assembler can relocate branches. If false, unresolved branches will be
811   // emitted on 32bits.
812   bool can_relocate_branches_;
813 
814   // Force the assembler to use 32 bit thumb2 instructions.
815   bool force_32bit_;
816 
817   // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
818   Condition it_conditions_[4];
819   uint8_t it_cond_index_;
820   Condition next_condition_;
821 
822   void SetItCondition(ItState s, Condition cond, uint8_t index);
823 
CheckCondition(Condition cond)824   void CheckCondition(Condition cond) {
825     CHECK_EQ(cond, next_condition_);
826 
827     // Move to the next condition if there is one.
828     if (it_cond_index_ < 3) {
829       ++it_cond_index_;
830       next_condition_ = it_conditions_[it_cond_index_];
831     } else {
832       next_condition_ = AL;
833     }
834   }
835 
CheckConditionLastIt(Condition cond)836   void CheckConditionLastIt(Condition cond) {
837     if (it_cond_index_ < 3) {
838       // Check that the next condition is AL.  This means that the
839       // current condition is the last in the IT block.
840       CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL);
841     }
842     CheckCondition(cond);
843   }
844 
AddFixup(Fixup fixup)845   FixupId AddFixup(Fixup fixup) {
846     FixupId fixup_id = static_cast<FixupId>(fixups_.size());
847     fixups_.push_back(fixup);
848     // For iterating using FixupId, we need the next id to be representable.
849     DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size());
850     return fixup_id;
851   }
852 
GetFixup(FixupId fixup_id)853   Fixup* GetFixup(FixupId fixup_id) {
854     DCHECK_LT(fixup_id, fixups_.size());
855     return &fixups_[fixup_id];
856   }
857 
858   void BindLabel(Label* label, uint32_t bound_pc);
859   uint32_t BindLiterals();
860   void BindJumpTables(uint32_t code_size);
861   void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
862                            std::deque<FixupId>* fixups_to_recalculate);
863   uint32_t AdjustFixups();
864   void EmitFixups(uint32_t adjusted_code_size);
865   void EmitLiterals();
866   void EmitJumpTables();
867   void PatchCFI();
868 
869   static int16_t BEncoding16(int32_t offset, Condition cond);
870   static int32_t BEncoding32(int32_t offset, Condition cond);
871   static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond);
872   static int16_t CmpRnImm8Encoding16(Register rn, int32_t value);
873   static int16_t AddRdnRmEncoding16(Register rdn, Register rm);
874   static int32_t MovwEncoding32(Register rd, int32_t value);
875   static int32_t MovtEncoding32(Register rd, int32_t value);
876   static int32_t MovModImmEncoding32(Register rd, int32_t value);
877   static int16_t LdrLitEncoding16(Register rt, int32_t offset);
878   static int32_t LdrLitEncoding32(Register rt, int32_t offset);
879   static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset);
880   static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset);
881   static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
882   static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
883   static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
884   static int16_t AdrEncoding16(Register rd, int32_t offset);
885   static int32_t AdrEncoding32(Register rd, int32_t offset);
886 
887   ArenaVector<Fixup> fixups_;
888   ArenaVector<FixupId> fixup_dependents_;
889 
890   // Use std::deque<> for literal labels to allow insertions at the end
891   // without invalidating pointers and references to existing elements.
892   ArenaDeque<Literal> literals_;
893 
894   // Deduplication map for 64-bit literals, used for LoadDImmediate().
895   ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_;
896 
897   // Jump table list.
898   ArenaDeque<JumpTable> jump_tables_;
899 
900   // Data for AdjustedPosition(), see the description there.
901   uint32_t last_position_adjustment_;
902   uint32_t last_old_position_;
903   FixupId last_fixup_id_;
904 };
905 
906 }  // namespace arm
907 }  // namespace art
908 
909 #endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
910