1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "arm64_lir.h"
18 #include "codegen_arm64.h"
19 #include "dex/quick/mir_to_lir-inl.h"
20 #include "dex/reg_storage_eq.h"
21 
22 namespace art {
23 
24 /* This file contains codegen for the A64 ISA. */
25 
EncodeImmSingle(uint32_t bits)26 int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) {
27   /*
28    * Valid values will have the form:
29    *
30    *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
31    *
32    * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
33    */
34 
35   // bits[19..0] are cleared.
36   if ((bits & 0x0007ffff) != 0)
37     return -1;
38 
39   // bits[29..25] are all set or all cleared.
40   uint32_t b_pattern = (bits >> 16) & 0x3e00;
41   if (b_pattern != 0 && b_pattern != 0x3e00)
42     return -1;
43 
44   // bit[30] and bit[29] are opposite.
45   if (((bits ^ (bits << 1)) & 0x40000000) == 0)
46     return -1;
47 
48   // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
49   // bit7: a000.0000
50   uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
51   // bit6: 0b00.0000
52   uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
53   // bit5_to_0: 00cd.efgh
54   uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
55   return (bit7 | bit6 | bit5_to_0);
56 }
57 
EncodeImmDouble(uint64_t bits)58 int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) {
59   /*
60    * Valid values will have the form:
61    *
62    *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
63    *   0000.0000.0000.0000.0000.0000.0000.0000
64    *
65    * where B = not(b).
66    */
67 
68   // bits[47..0] are cleared.
69   if ((bits & UINT64_C(0xffffffffffff)) != 0)
70     return -1;
71 
72   // bits[61..54] are all set or all cleared.
73   uint32_t b_pattern = (bits >> 48) & 0x3fc0;
74   if (b_pattern != 0 && b_pattern != 0x3fc0)
75     return -1;
76 
77   // bit[62] and bit[61] are opposite.
78   if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
79     return -1;
80 
81   // bit7: a000.0000
82   uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
83   // bit6: 0b00.0000
84   uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
85   // bit5_to_0: 00cd.efgh
86   uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
87   return (bit7 | bit6 | bit5_to_0);
88 }
89 
GetLoadStoreSize(LIR * lir)90 size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
91   bool opcode_is_wide = IS_WIDE(lir->opcode);
92   ArmOpcode opcode = UNWIDE(lir->opcode);
93   DCHECK(!IsPseudoLirOp(opcode));
94   const ArmEncodingMap *encoder = &EncodingMap[opcode];
95   uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
96   return (bits >> 30);
97 }
98 
GetInstructionOffset(LIR * lir)99 size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
100   size_t offset = lir->operands[2];
101   uint64_t check_flags = GetTargetInstFlags(lir->opcode);
102   DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
103   if (check_flags & SCALED_OFFSET_X0) {
104     DCHECK(check_flags & IS_TERTIARY_OP);
105     offset = offset * (1 << GetLoadStoreSize(lir));
106   }
107   return offset;
108 }
109 
LoadFPConstantValue(RegStorage r_dest,int32_t value)110 LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
111   DCHECK(r_dest.IsSingle());
112   if (value == 0) {
113     return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr);
114   } else {
115     int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
116     if (encoded_imm >= 0) {
117       return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm);
118     }
119   }
120 
121   LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
122   if (data_target == NULL) {
123     // Wide, as we need 8B alignment.
124     data_target = AddWideData(&literal_list_, value, 0);
125   }
126 
127   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
128   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
129                             r_dest.GetReg(), 0, 0, 0, 0, data_target);
130   AppendLIR(load_pc_rel);
131   return load_pc_rel;
132 }
133 
LoadFPConstantValueWide(RegStorage r_dest,int64_t value)134 LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
135   DCHECK(r_dest.IsDouble());
136   if (value == 0) {
137     return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr);
138   } else {
139     int32_t encoded_imm = EncodeImmDouble(value);
140     if (encoded_imm >= 0) {
141       return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
142     }
143   }
144 
145   // No short form - load from the literal pool.
146   int32_t val_lo = Low32Bits(value);
147   int32_t val_hi = High32Bits(value);
148   LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
149   if (data_target == NULL) {
150     data_target = AddWideData(&literal_list_, val_lo, val_hi);
151   }
152 
153   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
154   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
155                             r_dest.GetReg(), 0, 0, 0, 0, data_target);
156   AppendLIR(load_pc_rel);
157   return load_pc_rel;
158 }
159 
CountLeadingZeros(bool is_wide,uint64_t value)160 static int CountLeadingZeros(bool is_wide, uint64_t value) {
161   return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value);
162 }
163 
CountTrailingZeros(bool is_wide,uint64_t value)164 static int CountTrailingZeros(bool is_wide, uint64_t value) {
165   return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value);
166 }
167 
CountSetBits(bool is_wide,uint64_t value)168 static int CountSetBits(bool is_wide, uint64_t value) {
169   return ((is_wide) ?
170           __builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
171 }
172 
173 /**
174  * @brief Try encoding an immediate in the form required by logical instructions.
175  *
176  * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
177  * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
178  *   32-bit if @p is_wide is false.
179  * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
180  * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
181  */
EncodeLogicalImmediate(bool is_wide,uint64_t value)182 int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
183   unsigned n, imm_s, imm_r;
184 
185   // Logical immediates are encoded using parameters n, imm_s and imm_r using
186   // the following table:
187   //
188   //  N   imms    immr    size        S             R
189   //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
190   //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
191   //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
192   //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
193   //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
194   //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
195   // (s bits must not be all set)
196   //
197   // A pattern is constructed of size bits, where the least significant S+1
198   // bits are set. The pattern is rotated right by R, and repeated across a
199   // 32 or 64-bit value, depending on destination register width.
200   //
201   // To test if an arbitary immediate can be encoded using this scheme, an
202   // iterative algorithm is used.
203   //
204 
205   // 1. If the value has all set or all clear bits, it can't be encoded.
206   if (value == 0 || value == ~UINT64_C(0) ||
207       (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
208     return -1;
209   }
210 
211   unsigned lead_zero  = CountLeadingZeros(is_wide, value);
212   unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
213   unsigned trail_zero = CountTrailingZeros(is_wide, value);
214   unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
215   unsigned set_bits   = CountSetBits(is_wide, value);
216 
217   // The fixed bits in the immediate s field.
218   // If width == 64 (X reg), start at 0xFFFFFF80.
219   // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
220   // widths won't be executed.
221   unsigned width = (is_wide) ? 64 : 32;
222   int imm_s_fixed = (is_wide) ? -128 : -64;
223   int imm_s_mask = 0x3f;
224 
225   for (;;) {
226     // 2. If the value is two bits wide, it can be encoded.
227     if (width == 2) {
228       n = 0;
229       imm_s = 0x3C;
230       imm_r = (value & 3) - 1;
231       break;
232     }
233 
234     n = (width == 64) ? 1 : 0;
235     imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
236     if ((lead_zero + set_bits) == width) {
237       imm_r = 0;
238     } else {
239       imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
240     }
241 
242     // 3. If the sum of leading zeros, trailing zeros and set bits is
243     //    equal to the bit width of the value, it can be encoded.
244     if (lead_zero + trail_zero + set_bits == width) {
245       break;
246     }
247 
248     // 4. If the sum of leading ones, trailing ones and unset bits in the
249     //    value is equal to the bit width of the value, it can be encoded.
250     if (lead_one + trail_one + (width - set_bits) == width) {
251       break;
252     }
253 
254     // 5. If the most-significant half of the bitwise value is equal to
255     //    the least-significant half, return to step 2 using the
256     //    least-significant half of the value.
257     uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
258     if ((value & mask) == ((value >> (width >> 1)) & mask)) {
259       width >>= 1;
260       set_bits >>= 1;
261       imm_s_fixed >>= 1;
262       continue;
263     }
264 
265     // 6. Otherwise, the value can't be encoded.
266     return -1;
267   }
268 
269   return (n << 12 | imm_r << 6 | imm_s);
270 }
271 
272 // Maximum number of instructions to use for encoding the immediate.
273 static const int max_num_ops_per_const_load = 2;
274 
275 /**
276  * @brief Return the number of fast halfwords in the given uint64_t integer.
277  * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The
278  *   number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for
279  *   a more accurate description.
280  * @param value The input 64-bit integer.
281  * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is
282  *   the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits
283  *   set (0xffff). Additionally (retval & 0x8) is set when m > n.
284  */
GetNumFastHalfWords(uint64_t value)285 static int GetNumFastHalfWords(uint64_t value) {
286   unsigned int num_0000_halfwords = 0;
287   unsigned int num_ffff_halfwords = 0;
288   for (int shift = 0; shift < 64; shift += 16) {
289     uint16_t halfword = static_cast<uint16_t>(value >> shift);
290     if (halfword == 0)
291       num_0000_halfwords++;
292     else if (halfword == UINT16_C(0xffff))
293       num_ffff_halfwords++;
294   }
295   if (num_0000_halfwords >= num_ffff_halfwords) {
296     DCHECK_LE(num_0000_halfwords, 4U);
297     return num_0000_halfwords;
298   } else {
299     DCHECK_LE(num_ffff_halfwords, 4U);
300     return num_ffff_halfwords | 0x8;
301   }
302 }
303 
304 // The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a
305 // constant is considered for promotion. If the constant is "inexpensive" then the promotion
306 // algorithm will give it a low priority for promotion, even when it is referenced many times in
307 // the code.
308 
InexpensiveConstantInt(int32_t value)309 bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
310   // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool).
311   // We therefore return true and give it a low priority for promotion.
312   return true;
313 }
314 
InexpensiveConstantFloat(int32_t value)315 bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
316   return EncodeImmSingle(value) >= 0;
317 }
318 
InexpensiveConstantLong(int64_t value)319 bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
320   int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7);
321   if (num_slow_halfwords <= max_num_ops_per_const_load) {
322     return true;
323   }
324   return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0);
325 }
326 
InexpensiveConstantDouble(int64_t value)327 bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
328   return EncodeImmDouble(value) >= 0;
329 }
330 
331 // The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use
332 // when one of the operands is an immediate (e.g. register version or immediate version of add).
333 
InexpensiveConstantInt(int32_t value,Instruction::Code opcode)334 bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
335   switch (opcode) {
336   case Instruction::IF_EQ:
337   case Instruction::IF_NE:
338   case Instruction::IF_LT:
339   case Instruction::IF_GE:
340   case Instruction::IF_GT:
341   case Instruction::IF_LE:
342   case Instruction::ADD_INT:
343   case Instruction::ADD_INT_2ADDR:
344   case Instruction::SUB_INT:
345   case Instruction::SUB_INT_2ADDR:
346     // The code below is consistent with the implementation of OpRegRegImm().
347     {
348       uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value);
349       if (abs_value < 0x1000) {
350         return true;
351       } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
352         return true;
353       }
354       return false;
355     }
356   case Instruction::SHL_INT:
357   case Instruction::SHL_INT_2ADDR:
358   case Instruction::SHR_INT:
359   case Instruction::SHR_INT_2ADDR:
360   case Instruction::USHR_INT:
361   case Instruction::USHR_INT_2ADDR:
362     return true;
363   case Instruction::AND_INT:
364   case Instruction::AND_INT_2ADDR:
365   case Instruction::AND_INT_LIT16:
366   case Instruction::AND_INT_LIT8:
367   case Instruction::OR_INT:
368   case Instruction::OR_INT_2ADDR:
369   case Instruction::OR_INT_LIT16:
370   case Instruction::OR_INT_LIT8:
371   case Instruction::XOR_INT:
372   case Instruction::XOR_INT_2ADDR:
373   case Instruction::XOR_INT_LIT16:
374   case Instruction::XOR_INT_LIT8:
375     if (value == 0 || value == INT32_C(-1)) {
376       return true;
377     }
378     return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0);
379   default:
380     return false;
381   }
382 }
383 
384 /*
385  * Load a immediate using one single instruction when possible; otherwise
386  * use a pair of movz and movk instructions.
387  *
388  * No additional register clobbering operation performed. Use this version when
389  * 1) r_dest is freshly returned from AllocTemp or
390  * 2) The codegen is under fixed register usage
391  */
LoadConstantNoClobber(RegStorage r_dest,int value)392 LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
393   LIR* res;
394 
395   if (r_dest.IsFloat()) {
396     return LoadFPConstantValue(r_dest, value);
397   }
398 
399   if (r_dest.Is64Bit()) {
400     return LoadConstantWide(r_dest, value);
401   }
402 
403   // Loading SP/ZR with an immediate is not supported.
404   DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
405   DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
406 
407   // Compute how many movk, movz instructions are needed to load the value.
408   uint16_t high_bits = High16Bits(value);
409   uint16_t low_bits = Low16Bits(value);
410 
411   bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
412   bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
413 
414   if (LIKELY(low_fast || high_fast)) {
415     // 1 instruction is enough to load the immediate.
416     if (LIKELY(low_bits == high_bits)) {
417       // Value is either 0 or -1: we can just use wzr.
418       ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
419       res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
420     } else {
421       uint16_t uniform_bits, useful_bits;
422       int shift;
423 
424       if (LIKELY(high_fast)) {
425         shift = 0;
426         uniform_bits = high_bits;
427         useful_bits = low_bits;
428       } else {
429         shift = 1;
430         uniform_bits = low_bits;
431         useful_bits = high_bits;
432       }
433 
434       if (UNLIKELY(uniform_bits != 0)) {
435         res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
436       } else {
437         res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
438       }
439     }
440   } else {
441     // movk, movz require 2 instructions. Try detecting logical immediates.
442     int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
443     if (log_imm >= 0) {
444       res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
445     } else {
446       // Use 2 instructions.
447       res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
448       NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
449     }
450   }
451 
452   return res;
453 }
454 
455 // TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide().
LoadConstantWide(RegStorage r_dest,int64_t value)456 LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
457   if (r_dest.IsFloat()) {
458     return LoadFPConstantValueWide(r_dest, value);
459   }
460 
461   DCHECK(r_dest.Is64Bit());
462 
463   // Loading SP/ZR with an immediate is not supported.
464   DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
465   DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
466 
467   if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
468     // value is either 0 or -1: we can just use xzr.
469     ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
470     return NewLIR2(opcode, r_dest.GetReg(), rxzr);
471   }
472 
473   // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many.
474   uint64_t uvalue = static_cast<uint64_t>(value);
475   int num_fast_halfwords = GetNumFastHalfWords(uvalue);
476   int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7);
477   bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0;
478 
479   if (num_slow_halfwords > 1) {
480     // A single movz/movn is not enough. Try the logical immediate route.
481     int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value);
482     if (log_imm >= 0) {
483       return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm);
484     }
485   }
486 
487   if (num_slow_halfwords <= max_num_ops_per_const_load) {
488     // We can encode the number using a movz/movn followed by one or more movk.
489     ArmOpcode op;
490     uint16_t background;
491     LIR* res = nullptr;
492 
493     // Decide whether to use a movz or a movn.
494     if (more_ffff_halfwords) {
495       op = WIDE(kA64Movn3rdM);
496       background = 0xffff;
497     } else {
498       op = WIDE(kA64Movz3rdM);
499       background = 0;
500     }
501 
502     // Emit the first instruction (movz, movn).
503     int shift;
504     for (shift = 0; shift < 4; shift++) {
505       uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
506       if (halfword != background) {
507         res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift);
508         break;
509       }
510     }
511 
512     // Emit the movk instructions.
513     for (shift++; shift < 4; shift++) {
514       uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
515       if (halfword != background) {
516         NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift);
517       }
518     }
519     return res;
520   }
521 
522   // Use the literal pool.
523   int32_t val_lo = Low32Bits(value);
524   int32_t val_hi = High32Bits(value);
525   LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
526   if (data_target == NULL) {
527     data_target = AddWideData(&literal_list_, val_lo, val_hi);
528   }
529 
530   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
531   LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
532                     r_dest.GetReg(), 0, 0, 0, 0, data_target);
533   AppendLIR(res);
534   return res;
535 }
536 
OpUnconditionalBranch(LIR * target)537 LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
538   LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
539   res->target = target;
540   return res;
541 }
542 
OpCondBranch(ConditionCode cc,LIR * target)543 LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
544   LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
545                         0 /* offset to be patched */);
546   branch->target = target;
547   return branch;
548 }
549 
OpReg(OpKind op,RegStorage r_dest_src)550 LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
551   ArmOpcode opcode = kA64Brk1d;
552   switch (op) {
553     case kOpBlx:
554       opcode = kA64Blr1x;
555       break;
556     // TODO(Arm64): port kThumbBx.
557     // case kOpBx:
558     //   opcode = kThumbBx;
559     //   break;
560     default:
561       LOG(FATAL) << "Bad opcode " << op;
562   }
563   return NewLIR1(opcode, r_dest_src.GetReg());
564 }
565 
OpRegRegShift(OpKind op,RegStorage r_dest_src1,RegStorage r_src2,int shift)566 LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
567   ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
568   CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
569   ArmOpcode opcode = kA64Brk1d;
570 
571   switch (op) {
572     case kOpCmn:
573       opcode = kA64Cmn3rro;
574       break;
575     case kOpCmp:
576       opcode = kA64Cmp3rro;
577       break;
578     case kOpMov:
579       opcode = kA64Mov2rr;
580       break;
581     case kOpMvn:
582       opcode = kA64Mvn2rr;
583       break;
584     case kOpNeg:
585       opcode = kA64Neg3rro;
586       break;
587     case kOpTst:
588       opcode = kA64Tst3rro;
589       break;
590     case kOpRev:
591       DCHECK_EQ(shift, 0);
592       // Binary, but rm is encoded twice.
593       return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
594       break;
595     case kOpRevsh:
596       // Binary, but rm is encoded twice.
597       NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
598       // "sxth r1, r2" is "sbfm r1, r2, #0, #15"
599       return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
600       break;
601     case kOp2Byte:
602       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
603       // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
604       // For now we use sbfm directly.
605       return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
606     case kOp2Short:
607       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
608       // For now we use sbfm rather than its alias, sbfx.
609       return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
610     case kOp2Char:
611       // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
612       // For now we use ubfm directly.
613       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
614       return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
615     default:
616       return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
617   }
618 
619   DCHECK(!IsPseudoLirOp(opcode));
620   if (EncodingMap[opcode].flags & IS_BINARY_OP) {
621     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
622     return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
623   } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
624     ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
625     if (kind == kFmtShift) {
626       return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
627     }
628   }
629 
630   LOG(FATAL) << "Unexpected encoding operand count";
631   return NULL;
632 }
633 
OpRegRegExtend(OpKind op,RegStorage r_dest_src1,RegStorage r_src2,A64RegExtEncodings ext,uint8_t amount)634 LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
635                                   A64RegExtEncodings ext, uint8_t amount) {
636   ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
637   ArmOpcode opcode = kA64Brk1d;
638 
639   switch (op) {
640     case kOpCmn:
641       opcode = kA64Cmn3Rre;
642       break;
643     case kOpCmp:
644       opcode = kA64Cmp3Rre;
645       break;
646     case kOpAdd:
647       // Note: intentional fallthrough
648     case kOpSub:
649       return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
650       break;
651     default:
652       LOG(FATAL) << "Bad Opcode: " << opcode;
653       break;
654   }
655 
656   DCHECK(!IsPseudoLirOp(opcode));
657   if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
658     ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
659     if (kind == kFmtExtend) {
660       return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(),
661                      EncodeExtend(ext, amount));
662     }
663   }
664 
665   LOG(FATAL) << "Unexpected encoding operand count";
666   return NULL;
667 }
668 
OpRegReg(OpKind op,RegStorage r_dest_src1,RegStorage r_src2)669 LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
670   /* RegReg operations with SP in first parameter need extended register instruction form.
671    * Only CMN, CMP, ADD & SUB instructions are implemented.
672    */
673   if (r_dest_src1 == rs_sp) {
674     return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0);
675   } else {
676     return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
677   }
678 }
679 
OpMovRegMem(RegStorage r_dest,RegStorage r_base,int offset,MoveType move_type)680 LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
681   UNIMPLEMENTED(FATAL);
682   return nullptr;
683 }
684 
OpMovMemReg(RegStorage r_base,int offset,RegStorage r_src,MoveType move_type)685 LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
686   UNIMPLEMENTED(FATAL);
687   return nullptr;
688 }
689 
OpCondRegReg(OpKind op,ConditionCode cc,RegStorage r_dest,RegStorage r_src)690 LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
691   LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
692   return NULL;
693 }
694 
OpRegRegRegShift(OpKind op,RegStorage r_dest,RegStorage r_src1,RegStorage r_src2,int shift)695 LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
696                                     RegStorage r_src2, int shift) {
697   ArmOpcode opcode = kA64Brk1d;
698 
699   switch (op) {
700     case kOpAdd:
701       opcode = kA64Add4rrro;
702       break;
703     case kOpSub:
704       opcode = kA64Sub4rrro;
705       break;
706     // case kOpRsub:
707     //   opcode = kA64RsubWWW;
708     //   break;
709     case kOpAdc:
710       opcode = kA64Adc3rrr;
711       break;
712     case kOpAnd:
713       opcode = kA64And4rrro;
714       break;
715     case kOpXor:
716       opcode = kA64Eor4rrro;
717       break;
718     case kOpMul:
719       opcode = kA64Mul3rrr;
720       break;
721     case kOpDiv:
722       opcode = kA64Sdiv3rrr;
723       break;
724     case kOpOr:
725       opcode = kA64Orr4rrro;
726       break;
727     case kOpSbc:
728       opcode = kA64Sbc3rrr;
729       break;
730     case kOpLsl:
731       opcode = kA64Lsl3rrr;
732       break;
733     case kOpLsr:
734       opcode = kA64Lsr3rrr;
735       break;
736     case kOpAsr:
737       opcode = kA64Asr3rrr;
738       break;
739     case kOpRor:
740       opcode = kA64Ror3rrr;
741       break;
742     default:
743       LOG(FATAL) << "Bad opcode: " << op;
744       break;
745   }
746 
747   // The instructions above belong to two kinds:
748   // - 4-operands instructions, where the last operand is a shift/extend immediate,
749   // - 3-operands instructions with no shift/extend.
750   ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
751   CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
752   CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
753   if (EncodingMap[opcode].flags & IS_QUAD_OP) {
754     DCHECK(!IsExtendEncoding(shift));
755     return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
756   } else {
757     DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
758     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
759     return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
760   }
761 }
762 
OpRegRegRegExtend(OpKind op,RegStorage r_dest,RegStorage r_src1,RegStorage r_src2,A64RegExtEncodings ext,uint8_t amount)763 LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
764                                      RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
765   ArmOpcode opcode = kA64Brk1d;
766 
767   switch (op) {
768     case kOpAdd:
769       opcode = kA64Add4RRre;
770       break;
771     case kOpSub:
772       opcode = kA64Sub4RRre;
773       break;
774     default:
775       LOG(FATAL) << "Unimplemented opcode: " << op;
776       break;
777   }
778   ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
779 
780   if (r_dest.Is64Bit()) {
781     CHECK(r_src1.Is64Bit());
782 
783     // dest determines whether the op is wide or not. Up-convert src2 when necessary.
784     // Note: this is not according to aarch64 specifications, but our encoding.
785     if (!r_src2.Is64Bit()) {
786       r_src2 = As64BitReg(r_src2);
787     }
788   } else {
789     CHECK(!r_src1.Is64Bit());
790     CHECK(!r_src2.Is64Bit());
791   }
792 
793   // Sanity checks.
794   //    1) Amount is in the range 0..4
795   CHECK_LE(amount, 4);
796 
797   return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
798                  EncodeExtend(ext, amount));
799 }
800 
OpRegRegReg(OpKind op,RegStorage r_dest,RegStorage r_src1,RegStorage r_src2)801 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
802   return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
803 }
804 
OpRegRegImm(OpKind op,RegStorage r_dest,RegStorage r_src1,int value)805 LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
806   return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
807 }
808 
OpRegRegImm64(OpKind op,RegStorage r_dest,RegStorage r_src1,int64_t value)809 LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
810   LIR* res;
811   bool neg = (value < 0);
812   uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
813   ArmOpcode opcode = kA64Brk1d;
814   ArmOpcode alt_opcode = kA64Brk1d;
815   bool is_logical = false;
816   bool is_wide = r_dest.Is64Bit();
817   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
818   int info = 0;
819 
820   switch (op) {
821     case kOpLsl: {
822       // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
823       // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)".
824       // For now, we just use ubfm directly.
825       int max_value = (is_wide) ? 63 : 31;
826       return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
827                      (-value) & max_value, max_value - value);
828     }
829     case kOpLsr:
830       return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
831     case kOpAsr:
832       return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
833     case kOpRor:
834       // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
835       // For now, we just use extr directly.
836       return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
837                      value);
838     case kOpAdd:
839       neg = !neg;
840       // Note: intentional fallthrough
841     case kOpSub:
842       // Add and sub below read/write sp rather than xzr.
843       if (abs_value < 0x1000) {
844         opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
845         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
846       } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
847         opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
848         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
849       } else {
850         alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
851         info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
852       }
853       break;
854     case kOpAdc:
855       alt_opcode = kA64Adc3rrr;
856       break;
857     case kOpSbc:
858       alt_opcode = kA64Sbc3rrr;
859       break;
860     case kOpOr:
861       is_logical = true;
862       opcode = kA64Orr3Rrl;
863       alt_opcode = kA64Orr4rrro;
864       break;
865     case kOpAnd:
866       is_logical = true;
867       opcode = kA64And3Rrl;
868       alt_opcode = kA64And4rrro;
869       break;
870     case kOpXor:
871       is_logical = true;
872       opcode = kA64Eor3Rrl;
873       alt_opcode = kA64Eor4rrro;
874       break;
875     case kOpMul:
876       // TUNING: power of 2, shift & add
877       alt_opcode = kA64Mul3rrr;
878       break;
879     default:
880       LOG(FATAL) << "Bad opcode: " << op;
881   }
882 
883   if (is_logical) {
884     int log_imm = EncodeLogicalImmediate(is_wide, value);
885     if (log_imm >= 0) {
886       return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
887     } else {
888       // When the immediate is either 0 or ~0, the logical operation can be trivially reduced
889       // to a - possibly negated - assignment.
890       if (value == 0) {
891         switch (op) {
892           case kOpOr:
893           case kOpXor:
894             // Or/Xor by zero reduces to an assignment.
895             return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
896           default:
897             // And by zero reduces to a `mov rdest, xzr'.
898             DCHECK(op == kOpAnd);
899             return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
900         }
901       } else if (value == INT64_C(-1)
902                  || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) {
903         switch (op) {
904           case kOpAnd:
905             // And by -1 reduces to an assignment.
906             return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
907           case kOpXor:
908             // Xor by -1 reduces to an `mvn rdest, rsrc'.
909             return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg());
910           default:
911             // Or by -1 reduces to a `mvn rdest, xzr'.
912             DCHECK(op == kOpOr);
913             return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
914         }
915       }
916     }
917   }
918 
919   RegStorage r_scratch;
920   if (is_wide) {
921     r_scratch = AllocTempWide();
922     LoadConstantWide(r_scratch, value);
923   } else {
924     r_scratch = AllocTemp();
925     LoadConstant(r_scratch, value);
926   }
927   if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
928     res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
929   else
930     res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
931   FreeTemp(r_scratch);
932   return res;
933 }
934 
OpRegImm(OpKind op,RegStorage r_dest_src1,int value)935 LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
936   return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
937 }
938 
OpRegImm64(OpKind op,RegStorage r_dest_src1,int64_t value)939 LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
940   ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
941   ArmOpcode opcode = kA64Brk1d;
942   ArmOpcode neg_opcode = kA64Brk1d;
943   bool shift;
944   bool neg = (value < 0);
945   uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
946 
947   if (LIKELY(abs_value < 0x1000)) {
948     // abs_value is a 12-bit immediate.
949     shift = false;
950   } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
951     // abs_value is a shifted 12-bit immediate.
952     shift = true;
953     abs_value >>= 12;
954   } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
955     // Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
956     // This works for both normal registers and SP.
957     // For a frame size == 0x2468, it will be encoded as:
958     //   sub sp, #0x2000
959     //   sub sp, #0x468
960     if (neg) {
961       op = (op == kOpAdd) ? kOpSub : kOpAdd;
962     }
963     OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
964     return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
965   } else {
966     RegStorage r_tmp;
967     LIR* res;
968     if (IS_WIDE(wide)) {
969       r_tmp = AllocTempWide();
970       res = LoadConstantWide(r_tmp, value);
971     } else {
972       r_tmp = AllocTemp();
973       res = LoadConstant(r_tmp, value);
974     }
975     OpRegReg(op, r_dest_src1, r_tmp);
976     FreeTemp(r_tmp);
977     return res;
978   }
979 
980   switch (op) {
981     case kOpAdd:
982       neg_opcode = kA64Sub4RRdT;
983       opcode = kA64Add4RRdT;
984       break;
985     case kOpSub:
986       neg_opcode = kA64Add4RRdT;
987       opcode = kA64Sub4RRdT;
988       break;
989     case kOpCmp:
990       neg_opcode = kA64Cmn3RdT;
991       opcode = kA64Cmp3RdT;
992       break;
993     default:
994       LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
995       break;
996   }
997 
998   if (UNLIKELY(neg))
999     opcode = neg_opcode;
1000 
1001   if (EncodingMap[opcode].flags & IS_QUAD_OP)
1002     return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
1003                    (shift) ? 1 : 0);
1004   else
1005     return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
1006 }
1007 
EncodeShift(int shift_type,int amount)1008 int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
1009   DCHECK_EQ(shift_type & 0x3, shift_type);
1010   DCHECK_EQ(amount & 0x3f, amount);
1011   return ((shift_type & 0x3) << 7) | (amount & 0x3f);
1012 }
1013 
EncodeExtend(int extend_type,int amount)1014 int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
1015   DCHECK_EQ(extend_type & 0x7, extend_type);
1016   DCHECK_EQ(amount & 0x7, amount);
1017   return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
1018 }
1019 
IsExtendEncoding(int encoded_value)1020 bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
1021   return ((1 << 6) & encoded_value) != 0;
1022 }
1023 
LoadBaseIndexed(RegStorage r_base,RegStorage r_index,RegStorage r_dest,int scale,OpSize size)1024 LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
1025                                    int scale, OpSize size) {
1026   LIR* load;
1027   int expected_scale = 0;
1028   ArmOpcode opcode = kA64Brk1d;
1029   r_base = Check64BitReg(r_base);
1030 
1031   // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
1032   //   register offset load (rather than doing the sign extension in a separate instruction).
1033   if (r_index.Is32Bit()) {
1034     // Assemble: ``sxtw xN, wN''.
1035     r_index = As64BitReg(r_index);
1036     NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
1037   }
1038 
1039   if (r_dest.IsFloat()) {
1040     if (r_dest.IsDouble()) {
1041       DCHECK(size == k64 || size == kDouble);
1042       expected_scale = 3;
1043       opcode = FWIDE(kA64Ldr4fXxG);
1044     } else {
1045       DCHECK(r_dest.IsSingle());
1046       DCHECK(size == k32 || size == kSingle);
1047       expected_scale = 2;
1048       opcode = kA64Ldr4fXxG;
1049     }
1050 
1051     DCHECK(scale == 0 || scale == expected_scale);
1052     return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
1053                    (scale != 0) ? 1 : 0);
1054   }
1055 
1056   switch (size) {
1057     case kDouble:
1058     case kWord:
1059     case k64:
1060       r_dest = Check64BitReg(r_dest);
1061       opcode = WIDE(kA64Ldr4rXxG);
1062       expected_scale = 3;
1063       break;
1064     case kSingle:     // Intentional fall-through.
1065     case k32:         // Intentional fall-through.
1066     case kReference:
1067       r_dest = Check32BitReg(r_dest);
1068       opcode = kA64Ldr4rXxG;
1069       expected_scale = 2;
1070       break;
1071     case kUnsignedHalf:
1072       r_dest = Check32BitReg(r_dest);
1073       opcode = kA64Ldrh4wXxd;
1074       expected_scale = 1;
1075       break;
1076     case kSignedHalf:
1077       r_dest = Check32BitReg(r_dest);
1078       opcode = kA64Ldrsh4rXxd;
1079       expected_scale = 1;
1080       break;
1081     case kUnsignedByte:
1082       r_dest = Check32BitReg(r_dest);
1083       opcode = kA64Ldrb3wXx;
1084       break;
1085     case kSignedByte:
1086       r_dest = Check32BitReg(r_dest);
1087       opcode = kA64Ldrsb3rXx;
1088       break;
1089     default:
1090       LOG(FATAL) << "Bad size: " << size;
1091   }
1092 
1093   if (UNLIKELY(expected_scale == 0)) {
1094     // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale.
1095     DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
1096     DCHECK_EQ(scale, 0);
1097     load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
1098   } else {
1099     DCHECK(scale == 0 || scale == expected_scale);
1100     load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
1101                    (scale != 0) ? 1 : 0);
1102   }
1103 
1104   return load;
1105 }
1106 
LoadRefIndexed(RegStorage r_base,RegStorage r_index,RegStorage r_dest,int scale)1107 LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
1108                                   int scale) {
1109   return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference);
1110 }
1111 
StoreBaseIndexed(RegStorage r_base,RegStorage r_index,RegStorage r_src,int scale,OpSize size)1112 LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
1113                                     int scale, OpSize size) {
1114   LIR* store;
1115   int expected_scale = 0;
1116   ArmOpcode opcode = kA64Brk1d;
1117   r_base = Check64BitReg(r_base);
1118 
1119   // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
1120   //   register offset store (rather than doing the sign extension in a separate instruction).
1121   if (r_index.Is32Bit()) {
1122     // Assemble: ``sxtw xN, wN''.
1123     r_index = As64BitReg(r_index);
1124     NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
1125   }
1126 
1127   if (r_src.IsFloat()) {
1128     if (r_src.IsDouble()) {
1129       DCHECK(size == k64 || size == kDouble);
1130       expected_scale = 3;
1131       opcode = FWIDE(kA64Str4fXxG);
1132     } else {
1133       DCHECK(r_src.IsSingle());
1134       DCHECK(size == k32 || size == kSingle);
1135       expected_scale = 2;
1136       opcode = kA64Str4fXxG;
1137     }
1138 
1139     DCHECK(scale == 0 || scale == expected_scale);
1140     return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
1141                    (scale != 0) ? 1 : 0);
1142   }
1143 
1144   switch (size) {
1145     case kDouble:     // Intentional fall-trough.
1146     case kWord:       // Intentional fall-trough.
1147     case k64:
1148       r_src = Check64BitReg(r_src);
1149       opcode = WIDE(kA64Str4rXxG);
1150       expected_scale = 3;
1151       break;
1152     case kSingle:     // Intentional fall-trough.
1153     case k32:         // Intentional fall-trough.
1154     case kReference:
1155       r_src = Check32BitReg(r_src);
1156       opcode = kA64Str4rXxG;
1157       expected_scale = 2;
1158       break;
1159     case kUnsignedHalf:
1160     case kSignedHalf:
1161       r_src = Check32BitReg(r_src);
1162       opcode = kA64Strh4wXxd;
1163       expected_scale = 1;
1164       break;
1165     case kUnsignedByte:
1166     case kSignedByte:
1167       r_src = Check32BitReg(r_src);
1168       opcode = kA64Strb3wXx;
1169       break;
1170     default:
1171       LOG(FATAL) << "Bad size: " << size;
1172   }
1173 
1174   if (UNLIKELY(expected_scale == 0)) {
1175     // This is a tertiary op (e.g. strb), it does not not support scale.
1176     DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
1177     DCHECK_EQ(scale, 0);
1178     store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
1179   } else {
1180     store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
1181                     (scale != 0) ? 1 : 0);
1182   }
1183 
1184   return store;
1185 }
1186 
StoreRefIndexed(RegStorage r_base,RegStorage r_index,RegStorage r_src,int scale)1187 LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
1188                                    int scale) {
1189   return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference);
1190 }
1191 
1192 /*
1193  * Load value from base + displacement.  Optionally perform null check
1194  * on base (which must have an associated s_reg and MIR).  If not
1195  * performing null check, incoming MIR can be null.
1196  */
LoadBaseDispBody(RegStorage r_base,int displacement,RegStorage r_dest,OpSize size)1197 LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
1198                                     OpSize size) {
1199   LIR* load = NULL;
1200   ArmOpcode opcode = kA64Brk1d;
1201   ArmOpcode alt_opcode = kA64Brk1d;
1202   int scale = 0;
1203 
1204   switch (size) {
1205     case kDouble:     // Intentional fall-through.
1206     case kWord:       // Intentional fall-through.
1207     case k64:
1208       r_dest = Check64BitReg(r_dest);
1209       scale = 3;
1210       if (r_dest.IsFloat()) {
1211         DCHECK(r_dest.IsDouble());
1212         opcode = FWIDE(kA64Ldr3fXD);
1213         alt_opcode = FWIDE(kA64Ldur3fXd);
1214       } else {
1215         opcode = WIDE(kA64Ldr3rXD);
1216         alt_opcode = WIDE(kA64Ldur3rXd);
1217       }
1218       break;
1219     case kSingle:     // Intentional fall-through.
1220     case k32:         // Intentional fall-trough.
1221     case kReference:
1222       r_dest = Check32BitReg(r_dest);
1223       scale = 2;
1224       if (r_dest.IsFloat()) {
1225         DCHECK(r_dest.IsSingle());
1226         opcode = kA64Ldr3fXD;
1227       } else {
1228         opcode = kA64Ldr3rXD;
1229       }
1230       break;
1231     case kUnsignedHalf:
1232       scale = 1;
1233       opcode = kA64Ldrh3wXF;
1234       break;
1235     case kSignedHalf:
1236       scale = 1;
1237       opcode = kA64Ldrsh3rXF;
1238       break;
1239     case kUnsignedByte:
1240       opcode = kA64Ldrb3wXd;
1241       break;
1242     case kSignedByte:
1243       opcode = kA64Ldrsb3rXd;
1244       break;
1245     default:
1246       LOG(FATAL) << "Bad size: " << size;
1247   }
1248 
1249   bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
1250   int scaled_disp = displacement >> scale;
1251   if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
1252     // Can use scaled load.
1253     load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
1254   } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
1255     // Can use unscaled load.
1256     load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
1257   } else {
1258     // Use long sequence.
1259     // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
1260     RegStorage r_scratch = AllocTempWide();
1261     LoadConstantWide(r_scratch, displacement);
1262     load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
1263     FreeTemp(r_scratch);
1264   }
1265 
1266   // TODO: in future may need to differentiate Dalvik accesses w/ spills
1267   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1268     DCHECK(r_base == rs_sp);
1269     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
1270   }
1271   return load;
1272 }
1273 
LoadBaseDisp(RegStorage r_base,int displacement,RegStorage r_dest,OpSize size,VolatileKind is_volatile)1274 LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
1275                                 OpSize size, VolatileKind is_volatile) {
1276   // LoadBaseDisp() will emit correct insn for atomic load on arm64
1277   // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
1278 
1279   LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
1280 
1281   if (UNLIKELY(is_volatile == kVolatile)) {
1282     // TODO: This should generate an acquire load instead of the barrier.
1283     GenMemBarrier(kLoadAny);
1284   }
1285 
1286   return load;
1287 }
1288 
LoadRefDisp(RegStorage r_base,int displacement,RegStorage r_dest,VolatileKind is_volatile)1289 LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
1290                                VolatileKind is_volatile) {
1291   return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile);
1292 }
1293 
StoreBaseDispBody(RegStorage r_base,int displacement,RegStorage r_src,OpSize size)1294 LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
1295                                      OpSize size) {
1296   LIR* store = NULL;
1297   ArmOpcode opcode = kA64Brk1d;
1298   ArmOpcode alt_opcode = kA64Brk1d;
1299   int scale = 0;
1300 
1301   switch (size) {
1302     case kDouble:     // Intentional fall-through.
1303     case kWord:       // Intentional fall-through.
1304     case k64:
1305       r_src = Check64BitReg(r_src);
1306       scale = 3;
1307       if (r_src.IsFloat()) {
1308         DCHECK(r_src.IsDouble());
1309         opcode = FWIDE(kA64Str3fXD);
1310         alt_opcode = FWIDE(kA64Stur3fXd);
1311       } else {
1312         opcode = FWIDE(kA64Str3rXD);
1313         alt_opcode = FWIDE(kA64Stur3rXd);
1314       }
1315       break;
1316     case kSingle:     // Intentional fall-through.
1317     case k32:         // Intentional fall-trough.
1318     case kReference:
1319       r_src = Check32BitReg(r_src);
1320       scale = 2;
1321       if (r_src.IsFloat()) {
1322         DCHECK(r_src.IsSingle());
1323         opcode = kA64Str3fXD;
1324       } else {
1325         opcode = kA64Str3rXD;
1326       }
1327       break;
1328     case kUnsignedHalf:
1329     case kSignedHalf:
1330       scale = 1;
1331       opcode = kA64Strh3wXF;
1332       break;
1333     case kUnsignedByte:
1334     case kSignedByte:
1335       opcode = kA64Strb3wXd;
1336       break;
1337     default:
1338       LOG(FATAL) << "Bad size: " << size;
1339   }
1340 
1341   bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
1342   int scaled_disp = displacement >> scale;
1343   if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
1344     // Can use scaled store.
1345     store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
1346   } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
1347     // Can use unscaled store.
1348     store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
1349   } else {
1350     // Use long sequence.
1351     RegStorage r_scratch = AllocTempWide();
1352     LoadConstantWide(r_scratch, displacement);
1353     store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
1354     FreeTemp(r_scratch);
1355   }
1356 
1357   // TODO: In future, may need to differentiate Dalvik & spill accesses.
1358   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1359     DCHECK(r_base == rs_sp);
1360     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
1361   }
1362   return store;
1363 }
1364 
StoreBaseDisp(RegStorage r_base,int displacement,RegStorage r_src,OpSize size,VolatileKind is_volatile)1365 LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
1366                                  OpSize size, VolatileKind is_volatile) {
1367   // TODO: This should generate a release store and no barriers.
1368   if (UNLIKELY(is_volatile == kVolatile)) {
1369     // Ensure that prior accesses become visible to other threads first.
1370     GenMemBarrier(kAnyStore);
1371   }
1372 
1373   // StoreBaseDisp() will emit correct insn for atomic store on arm64
1374   // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
1375 
1376   LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
1377 
1378   if (UNLIKELY(is_volatile == kVolatile)) {
1379     // Preserve order with respect to any subsequent volatile loads.
1380     // We need StoreLoad, but that generally requires the most expensive barrier.
1381     GenMemBarrier(kAnyAny);
1382   }
1383 
1384   return store;
1385 }
1386 
StoreRefDisp(RegStorage r_base,int displacement,RegStorage r_src,VolatileKind is_volatile)1387 LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src,
1388                                 VolatileKind is_volatile) {
1389   return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile);
1390 }
1391 
OpFpRegCopy(RegStorage r_dest,RegStorage r_src)1392 LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
1393   LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
1394   return NULL;
1395 }
1396 
OpMem(OpKind op,RegStorage r_base,int disp)1397 LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
1398   LOG(FATAL) << "Unexpected use of OpMem for Arm64";
1399   return NULL;
1400 }
1401 
InvokeTrampoline(OpKind op,RegStorage r_tgt,QuickEntrypointEnum trampoline)1402 LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
1403   return OpReg(op, r_tgt);
1404 }
1405 
1406 }  // namespace art
1407