1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
18 #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
19 
20 #include <utility>
21 #include <vector>
22 
23 #include "base/macros.h"
24 #include "constants_mips64.h"
25 #include "globals.h"
26 #include "managed_register_mips64.h"
27 #include "offsets.h"
28 #include "utils/assembler.h"
29 #include "utils/label.h"
30 
31 namespace art {
32 namespace mips64 {
33 
34 static constexpr size_t kMips64WordSize = 4;
35 static constexpr size_t kMips64DoublewordSize = 8;
36 
37 enum LoadOperandType {
38   kLoadSignedByte,
39   kLoadUnsignedByte,
40   kLoadSignedHalfword,
41   kLoadUnsignedHalfword,
42   kLoadWord,
43   kLoadUnsignedWord,
44   kLoadDoubleword
45 };
46 
47 enum StoreOperandType {
48   kStoreByte,
49   kStoreHalfword,
50   kStoreWord,
51   kStoreDoubleword
52 };
53 
54 // Used to test the values returned by ClassS/ClassD.
55 enum FPClassMaskType {
56   kSignalingNaN      = 0x001,
57   kQuietNaN          = 0x002,
58   kNegativeInfinity  = 0x004,
59   kNegativeNormal    = 0x008,
60   kNegativeSubnormal = 0x010,
61   kNegativeZero      = 0x020,
62   kPositiveInfinity  = 0x040,
63   kPositiveNormal    = 0x080,
64   kPositiveSubnormal = 0x100,
65   kPositiveZero      = 0x200,
66 };
67 
68 class Mips64Label : public Label {
69  public:
Mips64Label()70   Mips64Label() : prev_branch_id_plus_one_(0) {}
71 
Mips64Label(Mips64Label && src)72   Mips64Label(Mips64Label&& src)
73       : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
74 
75  private:
76   uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
77 
78   friend class Mips64Assembler;
79   DISALLOW_COPY_AND_ASSIGN(Mips64Label);
80 };
81 
82 // Slowpath entered when Thread::Current()->_exception is non-null.
83 class Mips64ExceptionSlowPath {
84  public:
Mips64ExceptionSlowPath(Mips64ManagedRegister scratch,size_t stack_adjust)85   explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
86       : scratch_(scratch), stack_adjust_(stack_adjust) {}
87 
Mips64ExceptionSlowPath(Mips64ExceptionSlowPath && src)88   Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src)
89       : scratch_(src.scratch_),
90         stack_adjust_(src.stack_adjust_),
91         exception_entry_(std::move(src.exception_entry_)) {}
92 
93  private:
Entry()94   Mips64Label* Entry() { return &exception_entry_; }
95   const Mips64ManagedRegister scratch_;
96   const size_t stack_adjust_;
97   Mips64Label exception_entry_;
98 
99   friend class Mips64Assembler;
100   DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
101 };
102 
103 class Mips64Assembler FINAL : public Assembler {
104  public:
Mips64Assembler(ArenaAllocator * arena)105   explicit Mips64Assembler(ArenaAllocator* arena)
106       : Assembler(arena),
107         overwriting_(false),
108         overwrite_location_(0),
109         last_position_adjustment_(0),
110         last_old_position_(0),
111         last_branch_id_(0) {
112     cfi().DelayEmittingAdvancePCs();
113   }
114 
~Mips64Assembler()115   virtual ~Mips64Assembler() {
116     for (auto& branch : branches_) {
117       CHECK(branch.IsResolved());
118     }
119   }
120 
121   // Emit Machine Instructions.
122   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
123   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
124   void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
125   void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
126   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
127   void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
128 
129   void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
130   void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
131   void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
132   void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
133   void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
134   void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
135   void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
136   void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
137   void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
138   void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
139   void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
140   void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
141 
142   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
143   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
144   void Or(GpuRegister rd, GpuRegister rs, GpuRegister rt);
145   void Ori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
146   void Xor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
147   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
148   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
149 
150   void Bitswap(GpuRegister rd, GpuRegister rt);
151   void Dbitswap(GpuRegister rd, GpuRegister rt);
152   void Seb(GpuRegister rd, GpuRegister rt);
153   void Seh(GpuRegister rd, GpuRegister rt);
154   void Dsbh(GpuRegister rd, GpuRegister rt);
155   void Dshd(GpuRegister rd, GpuRegister rt);
156   void Dext(GpuRegister rs, GpuRegister rt, int pos, int size);  // MIPS64
157   void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
158   void Wsbh(GpuRegister rd, GpuRegister rt);
159   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
160   void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
161   void Ll(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
162   void Lld(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
163 
164   void Sll(GpuRegister rd, GpuRegister rt, int shamt);
165   void Srl(GpuRegister rd, GpuRegister rt, int shamt);
166   void Rotr(GpuRegister rd, GpuRegister rt, int shamt);
167   void Sra(GpuRegister rd, GpuRegister rt, int shamt);
168   void Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
169   void Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
170   void Rotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
171   void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs);
172   void Dsll(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
173   void Dsrl(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
174   void Drotr(GpuRegister rd, GpuRegister rt, int shamt);
175   void Dsra(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
176   void Dsll32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
177   void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
178   void Drotr32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
179   void Dsra32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
180   void Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
181   void Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
182   void Drotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
183   void Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
184 
185   void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
186   void Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
187   void Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
188   void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
189   void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
190   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
191   void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
192   void Lui(GpuRegister rt, uint16_t imm16);
193   void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
194   void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
195   void Sync(uint32_t stype);
196 
197   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
198   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
199   void Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
200   void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
201 
202   void Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt);
203   void Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
204   void Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16);
205   void Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
206   void Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt);
207   void Selnez(GpuRegister rd, GpuRegister rs, GpuRegister rt);
208   void Clz(GpuRegister rd, GpuRegister rs);
209   void Clo(GpuRegister rd, GpuRegister rs);
210   void Dclz(GpuRegister rd, GpuRegister rs);
211   void Dclo(GpuRegister rd, GpuRegister rs);
212 
213   void Jalr(GpuRegister rd, GpuRegister rs);
214   void Jalr(GpuRegister rs);
215   void Jr(GpuRegister rs);
216   void Auipc(GpuRegister rs, uint16_t imm16);
217   void Addiupc(GpuRegister rs, uint32_t imm19);
218   void Bc(uint32_t imm26);
219   void Jic(GpuRegister rt, uint16_t imm16);
220   void Jialc(GpuRegister rt, uint16_t imm16);
221   void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
222   void Bltzc(GpuRegister rt, uint16_t imm16);
223   void Bgtzc(GpuRegister rt, uint16_t imm16);
224   void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
225   void Bgezc(GpuRegister rt, uint16_t imm16);
226   void Blezc(GpuRegister rt, uint16_t imm16);
227   void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
228   void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
229   void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
230   void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
231   void Beqzc(GpuRegister rs, uint32_t imm21);
232   void Bnezc(GpuRegister rs, uint32_t imm21);
233   void Bc1eqz(FpuRegister ft, uint16_t imm16);
234   void Bc1nez(FpuRegister ft, uint16_t imm16);
235 
236   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
237   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
238   void MulS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
239   void DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
240   void AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
241   void SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
242   void MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
243   void DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
244   void SqrtS(FpuRegister fd, FpuRegister fs);
245   void SqrtD(FpuRegister fd, FpuRegister fs);
246   void AbsS(FpuRegister fd, FpuRegister fs);
247   void AbsD(FpuRegister fd, FpuRegister fs);
248   void MovS(FpuRegister fd, FpuRegister fs);
249   void MovD(FpuRegister fd, FpuRegister fs);
250   void NegS(FpuRegister fd, FpuRegister fs);
251   void NegD(FpuRegister fd, FpuRegister fs);
252   void RoundLS(FpuRegister fd, FpuRegister fs);
253   void RoundLD(FpuRegister fd, FpuRegister fs);
254   void RoundWS(FpuRegister fd, FpuRegister fs);
255   void RoundWD(FpuRegister fd, FpuRegister fs);
256   void TruncLS(FpuRegister fd, FpuRegister fs);
257   void TruncLD(FpuRegister fd, FpuRegister fs);
258   void TruncWS(FpuRegister fd, FpuRegister fs);
259   void TruncWD(FpuRegister fd, FpuRegister fs);
260   void CeilLS(FpuRegister fd, FpuRegister fs);
261   void CeilLD(FpuRegister fd, FpuRegister fs);
262   void CeilWS(FpuRegister fd, FpuRegister fs);
263   void CeilWD(FpuRegister fd, FpuRegister fs);
264   void FloorLS(FpuRegister fd, FpuRegister fs);
265   void FloorLD(FpuRegister fd, FpuRegister fs);
266   void FloorWS(FpuRegister fd, FpuRegister fs);
267   void FloorWD(FpuRegister fd, FpuRegister fs);
268   void SelS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
269   void SelD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
270   void RintS(FpuRegister fd, FpuRegister fs);
271   void RintD(FpuRegister fd, FpuRegister fs);
272   void ClassS(FpuRegister fd, FpuRegister fs);
273   void ClassD(FpuRegister fd, FpuRegister fs);
274   void MinS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
275   void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
276   void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
277   void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
278   void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
279   void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
280   void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
281   void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
282   void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
283   void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
284   void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
285   void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
286   void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
287   void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
288   void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
289   void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
290   void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
291   void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
292   void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
293   void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
294   void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
295   void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
296   void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
297   void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
298 
299   void Cvtsw(FpuRegister fd, FpuRegister fs);
300   void Cvtdw(FpuRegister fd, FpuRegister fs);
301   void Cvtsd(FpuRegister fd, FpuRegister fs);
302   void Cvtds(FpuRegister fd, FpuRegister fs);
303   void Cvtsl(FpuRegister fd, FpuRegister fs);
304   void Cvtdl(FpuRegister fd, FpuRegister fs);
305 
306   void Mfc1(GpuRegister rt, FpuRegister fs);
307   void Mfhc1(GpuRegister rt, FpuRegister fs);
308   void Mtc1(GpuRegister rt, FpuRegister fs);
309   void Mthc1(GpuRegister rt, FpuRegister fs);
310   void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
311   void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
312   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
313   void Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
314   void Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
315   void Sdc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
316 
317   void Break();
318   void Nop();
319   void Move(GpuRegister rd, GpuRegister rs);
320   void Clear(GpuRegister rd);
321   void Not(GpuRegister rd, GpuRegister rs);
322 
323   // Higher level composite instructions.
324   void LoadConst32(GpuRegister rd, int32_t value);
325   void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
326 
327   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
328 
Bind(Label * label)329   void Bind(Label* label) OVERRIDE {
330     Bind(down_cast<Mips64Label*>(label));
331   }
Jump(Label * label ATTRIBUTE_UNUSED)332   void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
333     UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64";
334   }
335 
336   void Bind(Mips64Label* label);
337   void Bc(Mips64Label* label);
338   void Jialc(Mips64Label* label, GpuRegister indirect_reg);
339   void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
340   void Bltzc(GpuRegister rt, Mips64Label* label);
341   void Bgtzc(GpuRegister rt, Mips64Label* label);
342   void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
343   void Bgezc(GpuRegister rt, Mips64Label* label);
344   void Blezc(GpuRegister rt, Mips64Label* label);
345   void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
346   void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
347   void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
348   void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
349   void Beqzc(GpuRegister rs, Mips64Label* label);
350   void Bnezc(GpuRegister rs, Mips64Label* label);
351   void Bc1eqz(FpuRegister ft, Mips64Label* label);
352   void Bc1nez(FpuRegister ft, Mips64Label* label);
353 
354   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
355   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
356   void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
357   void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
358   void StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
359 
360   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
361   void Emit(uint32_t value);
362 
363   //
364   // Overridden common assembler high-level functionality.
365   //
366 
367   // Emit code that will create an activation on the stack.
368   void BuildFrame(size_t frame_size, ManagedRegister method_reg,
369                   const std::vector<ManagedRegister>& callee_save_regs,
370                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
371 
372   // Emit code that will remove an activation from the stack.
373   void RemoveFrame(size_t frame_size,
374                    const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
375 
376   void IncreaseFrameSize(size_t adjust) OVERRIDE;
377   void DecreaseFrameSize(size_t adjust) OVERRIDE;
378 
379   // Store routines.
380   void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
381   void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
382   void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
383 
384   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
385 
386   void StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
387                                   ManagedRegister mscratch) OVERRIDE;
388 
389   void StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) OVERRIDE;
390 
391   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
392                      ManagedRegister mscratch) OVERRIDE;
393 
394   // Load routines.
395   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
396 
397   void LoadFromThread64(ManagedRegister mdest,
398                         ThreadOffset<kMips64DoublewordSize> src,
399                         size_t size) OVERRIDE;
400 
401   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
402 
403   void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
404                bool unpoison_reference) OVERRIDE;
405 
406   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
407 
408   void LoadRawPtrFromThread64(ManagedRegister mdest,
409                               ThreadOffset<kMips64DoublewordSize> offs) OVERRIDE;
410 
411   // Copying routines.
412   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
413 
414   void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMips64DoublewordSize> thr_offs,
415                               ManagedRegister mscratch) OVERRIDE;
416 
417   void CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
418                             ManagedRegister mscratch) OVERRIDE;
419 
420   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
421 
422   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
423 
424   void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch,
425             size_t size) OVERRIDE;
426 
427   void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
428             ManagedRegister mscratch, size_t size) OVERRIDE;
429 
430   void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch,
431             size_t size) OVERRIDE;
432 
433   void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
434             ManagedRegister mscratch, size_t size) OVERRIDE;
435 
436   void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
437             ManagedRegister mscratch, size_t size) OVERRIDE;
438 
439   void MemoryBarrier(ManagedRegister) OVERRIDE;
440 
441   // Sign extension.
442   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
443 
444   // Zero extension.
445   void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
446 
447   // Exploit fast access in managed code to Thread::Current().
448   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
449   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
450 
451   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
452   // value is null and null_allowed. in_reg holds a possibly stale reference
453   // that can be used to avoid loading the handle scope entry to see if the value is
454   // null.
455   void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
456                               ManagedRegister in_reg, bool null_allowed) OVERRIDE;
457 
458   // Set up out_off to hold a Object** into the handle scope, or to be null if the
459   // value is null and null_allowed.
460   void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister
461                               mscratch, bool null_allowed) OVERRIDE;
462 
463   // src holds a handle scope entry (Object**) load this into dst.
464   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
465 
466   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
467   // know that src may not be null.
468   void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
469   void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
470 
471   // Call to address held at [base+offset].
472   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
473   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
474   void CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset,
475                         ManagedRegister mscratch) OVERRIDE;
476 
477   // Generate code to check if Thread::Current()->exception_ is non-null
478   // and branch to a ExceptionSlowPath if it is.
479   void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
480 
481   // Emit slow paths queued during assembly and promote short branches to long if needed.
482   void FinalizeCode() OVERRIDE;
483 
484   // Emit branches and finalize all instructions.
485   void FinalizeInstructions(const MemoryRegion& region);
486 
487   // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64,
488   // must be used instead of Mips64Label::GetPosition()).
489   uint32_t GetLabelLocation(Mips64Label* label) const;
490 
491   // Get the final position of a label after local fixup based on the old position
492   // recorded before FinalizeCode().
493   uint32_t GetAdjustedPosition(uint32_t old_position);
494 
495   enum BranchCondition {
496     kCondLT,
497     kCondGE,
498     kCondLE,
499     kCondGT,
500     kCondLTZ,
501     kCondGEZ,
502     kCondLEZ,
503     kCondGTZ,
504     kCondEQ,
505     kCondNE,
506     kCondEQZ,
507     kCondNEZ,
508     kCondLTU,
509     kCondGEU,
510     kCondF,    // Floating-point predicate false.
511     kCondT,    // Floating-point predicate true.
512     kUncond,
513   };
514   friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
515 
516  private:
517   class Branch {
518    public:
519     enum Type {
520       // Short branches.
521       kUncondBranch,
522       kCondBranch,
523       kCall,
524       // Long branches.
525       kLongUncondBranch,
526       kLongCondBranch,
527       kLongCall,
528     };
529 
530     // Bit sizes of offsets defined as enums to minimize chance of typos.
531     enum OffsetBits {
532       kOffset16 = 16,
533       kOffset18 = 18,
534       kOffset21 = 21,
535       kOffset23 = 23,
536       kOffset28 = 28,
537       kOffset32 = 32,
538     };
539 
540     static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
541     static constexpr int32_t kMaxBranchLength = 32;
542     static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
543 
544     struct BranchInfo {
545       // Branch length as a number of 4-byte-long instructions.
546       uint32_t length;
547       // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
548       // PC-relative offset (or its most significant 16-bit half, which goes first).
549       uint32_t instr_offset;
550       // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
551       // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
552       // instructions) from the instruction containing the offset.
553       uint32_t pc_org;
554       // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is
555       // an exception: use kOffset23 for beqzc/bnezc).
556       OffsetBits offset_size;
557       // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
558       // count.
559       int offset_shift;
560     };
561     static const BranchInfo branch_info_[/* Type */];
562 
563     // Unconditional branch.
564     Branch(uint32_t location, uint32_t target);
565     // Conditional branch.
566     Branch(uint32_t location,
567            uint32_t target,
568            BranchCondition condition,
569            GpuRegister lhs_reg,
570            GpuRegister rhs_reg = ZERO);
571     // Call (branch and link) that stores the target address in a given register (i.e. T9).
572     Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg);
573 
574     // Some conditional branches with lhs = rhs are effectively NOPs, while some
575     // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
576     // So, we need a way to identify such branches in order to emit no instructions for them
577     // or change them to unconditional.
578     static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
579     static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
580 
581     static BranchCondition OppositeCondition(BranchCondition cond);
582 
583     Type GetType() const;
584     BranchCondition GetCondition() const;
585     GpuRegister GetLeftRegister() const;
586     GpuRegister GetRightRegister() const;
587     uint32_t GetTarget() const;
588     uint32_t GetLocation() const;
589     uint32_t GetOldLocation() const;
590     uint32_t GetLength() const;
591     uint32_t GetOldLength() const;
592     uint32_t GetSize() const;
593     uint32_t GetOldSize() const;
594     uint32_t GetEndLocation() const;
595     uint32_t GetOldEndLocation() const;
596     bool IsLong() const;
597     bool IsResolved() const;
598 
599     // Returns the bit size of the signed offset that the branch instruction can handle.
600     OffsetBits GetOffsetSize() const;
601 
602     // Calculates the distance between two byte locations in the assembler buffer and
603     // returns the number of bits needed to represent the distance as a signed integer.
604     //
605     // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
606     // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
607     //
608     // Composite branches (made of several instructions) with longer reach have 32-bit
609     // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
610     // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end,
611     // however. Consider the following implementation of a long unconditional branch, for
612     // example:
613     //
614     //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
615     //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
616     //
617     // Both of the above instructions take 16-bit signed offsets as immediate operands.
618     // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
619     // due to sign extension. This must be compensated for by incrementing offset_31_16
620     // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
621     // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
622     // Therefore, the long branch range is something like from PC - 0x80000000 to
623     // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
624     //
625     // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
626     // case with the addiu instruction and a 16 bit offset.
627     static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
628 
629     // Resolve a branch when the target is known.
630     void Resolve(uint32_t target);
631 
632     // Relocate a branch by a given delta if needed due to expansion of this or another
633     // branch at a given location by this delta (just changes location_ and target_).
634     void Relocate(uint32_t expand_location, uint32_t delta);
635 
636     // If the branch is short, changes its type to long.
637     void PromoteToLong();
638 
639     // If necessary, updates the type by promoting a short branch to a long branch
640     // based on the branch location and target. Returns the amount (in bytes) by
641     // which the branch size has increased.
642     // max_short_distance caps the maximum distance between location_ and target_
643     // that is allowed for short branches. This is for debugging/testing purposes.
644     // max_short_distance = 0 forces all short branches to become long.
645     // Use the implicit default argument when not debugging/testing.
646     uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
647 
648     // Returns the location of the instruction(s) containing the offset.
649     uint32_t GetOffsetLocation() const;
650 
651     // Calculates and returns the offset ready for encoding in the branch instruction(s).
652     uint32_t GetOffset() const;
653 
654    private:
655     // Completes branch construction by determining and recording its type.
656     void InitializeType(bool is_call);
657     // Helper for the above.
658     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
659 
660     uint32_t old_location_;      // Offset into assembler buffer in bytes.
661     uint32_t location_;          // Offset into assembler buffer in bytes.
662     uint32_t target_;            // Offset into assembler buffer in bytes.
663 
664     GpuRegister lhs_reg_;        // Left-hand side register in conditional branches or
665                                  // indirect call register.
666     GpuRegister rhs_reg_;        // Right-hand side register in conditional branches.
667     BranchCondition condition_;  // Condition for conditional branches.
668 
669     Type type_;                  // Current type of the branch.
670     Type old_type_;              // Initial type of the branch.
671   };
672   friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
673   friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
674 
675   void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
676   void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct);
677   void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct);
678   void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
679   void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
680   void EmitI26(int opcode, uint32_t imm26);
681   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
682   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
683   void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
684 
685   void Buncond(Mips64Label* label);
686   void Bcond(Mips64Label* label,
687              BranchCondition condition,
688              GpuRegister lhs,
689              GpuRegister rhs = ZERO);
690   void Call(Mips64Label* label, GpuRegister indirect_reg);
691   void FinalizeLabeledBranch(Mips64Label* label);
692 
693   Branch* GetBranch(uint32_t branch_id);
694   const Branch* GetBranch(uint32_t branch_id) const;
695 
696   void PromoteBranches();
697   void EmitBranch(Branch* branch);
698   void EmitBranches();
699   void PatchCFI();
700 
701   // Emits exception block.
702   void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
703 
704   // List of exception blocks to generate at the end of the code cache.
705   std::vector<Mips64ExceptionSlowPath> exception_blocks_;
706 
707   std::vector<Branch> branches_;
708 
709   // Whether appending instructions at the end of the buffer or overwriting the existing ones.
710   bool overwriting_;
711   // The current overwrite location.
712   uint32_t overwrite_location_;
713 
714   // Data for AdjustedPosition(), see the description there.
715   uint32_t last_position_adjustment_;
716   uint32_t last_old_position_;
717   uint32_t last_branch_id_;
718 
719   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
720 };
721 
722 }  // namespace mips64
723 }  // namespace art
724 
725 #endif  // ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
726