1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* This file contains codegen for the Thumb2 ISA. */
18 
19 #include "codegen_arm.h"
20 
21 #include "arch/instruction_set_features.h"
22 #include "arm_lir.h"
23 #include "base/bit_utils.h"
24 #include "base/logging.h"
25 #include "dex/compiler_ir.h"
26 #include "dex/mir_graph.h"
27 #include "dex/quick/mir_to_lir-inl.h"
28 #include "dex/reg_storage_eq.h"
29 #include "driver/compiler_driver.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "mirror/array-inl.h"
32 
33 namespace art {
34 
OpCmpBranch(ConditionCode cond,RegStorage src1,RegStorage src2,LIR * target)35 LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
36   OpRegReg(kOpCmp, src1, src2);
37   return OpCondBranch(cond, target);
38 }
39 
40 /*
41  * Generate a Thumb2 IT instruction, which can nullify up to
42  * four subsequent instructions based on a condition and its
43  * inverse.  The condition applies to the first instruction, which
44  * is executed if the condition is met.  The string "guide" consists
45  * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
46  * A "T" means the instruction is executed if the condition is
47  * met, and an "E" means the instruction is executed if the condition
48  * is not met.
49  */
OpIT(ConditionCode ccode,const char * guide)50 LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) {
51   int mask;
52   int mask3 = 0;
53   int mask2 = 0;
54   int mask1 = 0;
55   ArmConditionCode code = ArmConditionEncoding(ccode);
56   int cond_bit = code & 1;
57   int alt_bit = cond_bit ^ 1;
58 
59   switch (strlen(guide)) {
60     case 3:
61       mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
62       FALLTHROUGH_INTENDED;
63     case 2:
64       mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
65       FALLTHROUGH_INTENDED;
66     case 1:
67       mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
68       break;
69     case 0:
70       break;
71     default:
72       LOG(FATAL) << "OAT: bad case in OpIT";
73       UNREACHABLE();
74   }
75   mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
76        (1 << (3 - strlen(guide)));
77   return NewLIR2(kThumb2It, code, mask);
78 }
79 
UpdateIT(LIR * it,const char * new_guide)80 void ArmMir2Lir::UpdateIT(LIR* it, const char* new_guide) {
81   int mask;
82   int mask3 = 0;
83   int mask2 = 0;
84   int mask1 = 0;
85   ArmConditionCode code = static_cast<ArmConditionCode>(it->operands[0]);
86   int cond_bit = code & 1;
87   int alt_bit = cond_bit ^ 1;
88 
89   switch (strlen(new_guide)) {
90     case 3:
91       mask1 = (new_guide[2] == 'T') ? cond_bit : alt_bit;
92       FALLTHROUGH_INTENDED;
93     case 2:
94       mask2 = (new_guide[1] == 'T') ? cond_bit : alt_bit;
95       FALLTHROUGH_INTENDED;
96     case 1:
97       mask3 = (new_guide[0] == 'T') ? cond_bit : alt_bit;
98       break;
99     case 0:
100       break;
101     default:
102       LOG(FATAL) << "OAT: bad case in UpdateIT";
103       UNREACHABLE();
104   }
105   mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
106       (1 << (3 - strlen(new_guide)));
107   it->operands[1] = mask;
108 }
109 
OpEndIT(LIR * it)110 void ArmMir2Lir::OpEndIT(LIR* it) {
111   // TODO: use the 'it' pointer to do some checks with the LIR, for example
112   //       we could check that the number of instructions matches the mask
113   //       in the IT instruction.
114   CHECK(it != nullptr);
115   GenBarrier();
116 }
117 
118 /*
119  * 64-bit 3way compare function.
120  *     mov   rX, #-1
121  *     cmp   op1hi, op2hi
122  *     blt   done
123  *     bgt   flip
124  *     sub   rX, op1lo, op2lo (treat as unsigned)
125  *     beq   done
126  *     ite   hi
127  *     mov(hi)   rX, #-1
128  *     mov(!hi)  rX, #1
129  * flip:
130  *     neg   rX
131  * done:
132  */
GenCmpLong(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)133 void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
134   LIR* target1;
135   LIR* target2;
136   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
137   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
138   RegStorage t_reg = AllocTemp();
139   LoadConstant(t_reg, -1);
140   OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
141   LIR* branch1 = OpCondBranch(kCondLt, nullptr);
142   LIR* branch2 = OpCondBranch(kCondGt, nullptr);
143   OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
144   LIR* branch3 = OpCondBranch(kCondEq, nullptr);
145 
146   LIR* it = OpIT(kCondHi, "E");
147   NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
148   LoadConstant(t_reg, 1);
149   OpEndIT(it);
150 
151   target2 = NewLIR0(kPseudoTargetLabel);
152   OpRegReg(kOpNeg, t_reg, t_reg);
153 
154   target1 = NewLIR0(kPseudoTargetLabel);
155 
156   RegLocation rl_temp = LocCReturn();  // Just using as template, will change
157   rl_temp.reg.SetReg(t_reg.GetReg());
158   StoreValue(rl_dest, rl_temp);
159   FreeTemp(t_reg);
160 
161   branch1->target = target1;
162   branch2->target = target2;
163   branch3->target = branch1->target;
164 }
165 
GenFusedLongCmpImmBranch(BasicBlock * bb,RegLocation rl_src1,int64_t val,ConditionCode ccode)166 void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
167                                           int64_t val, ConditionCode ccode) {
168   int32_t val_lo = Low32Bits(val);
169   int32_t val_hi = High32Bits(val);
170   DCHECK_GE(ModifiedImmediate(val_lo), 0);
171   DCHECK_GE(ModifiedImmediate(val_hi), 0);
172   LIR* taken = &block_label_list_[bb->taken];
173   LIR* not_taken = &block_label_list_[bb->fall_through];
174   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
175   RegStorage low_reg = rl_src1.reg.GetLow();
176   RegStorage high_reg = rl_src1.reg.GetHigh();
177 
178   if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
179     RegStorage t_reg = AllocTemp();
180     NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
181     FreeTemp(t_reg);
182     OpCondBranch(ccode, taken);
183     return;
184   }
185 
186   switch (ccode) {
187     case kCondEq:
188     case kCondNe:
189       OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
190       break;
191     case kCondLt:
192       OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
193       OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
194       ccode = kCondUlt;
195       break;
196     case kCondLe:
197       OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
198       OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
199       ccode = kCondLs;
200       break;
201     case kCondGt:
202       OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
203       OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
204       ccode = kCondHi;
205       break;
206     case kCondGe:
207       OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
208       OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
209       ccode = kCondUge;
210       break;
211     default:
212       LOG(FATAL) << "Unexpected ccode: " << ccode;
213   }
214   OpCmpImmBranch(ccode, low_reg, val_lo, taken);
215 }
216 
GenSelectConst32(RegStorage left_op,RegStorage right_op,ConditionCode code,int32_t true_val,int32_t false_val,RegStorage rs_dest,RegisterClass dest_reg_class)217 void ArmMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
218                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
219                                   RegisterClass dest_reg_class) {
220   UNUSED(dest_reg_class);
221   // TODO: Generalize the IT below to accept more than one-instruction loads.
222   DCHECK(InexpensiveConstantInt(true_val));
223   DCHECK(InexpensiveConstantInt(false_val));
224 
225   if ((true_val == 0 && code == kCondEq) ||
226       (false_val == 0 && code == kCondNe)) {
227     OpRegRegReg(kOpSub, rs_dest, left_op, right_op);
228     DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
229     LIR* it = OpIT(kCondNe, "");
230     LoadConstant(rs_dest, code == kCondEq ? false_val : true_val);
231     OpEndIT(it);
232     return;
233   }
234 
235   OpRegReg(kOpCmp, left_op, right_op);  // Same?
236   LIR* it = OpIT(code, "E");   // if-convert the test
237   LoadConstant(rs_dest, true_val);      // .eq case - load true
238   LoadConstant(rs_dest, false_val);     // .eq case - load true
239   OpEndIT(it);
240 }
241 
GenSelect(BasicBlock * bb,MIR * mir)242 void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
243   UNUSED(bb);
244   RegLocation rl_result;
245   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
246   RegLocation rl_dest = mir_graph_->GetDest(mir);
247   // Avoid using float regs here.
248   RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
249   RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
250   rl_src = LoadValue(rl_src, src_reg_class);
251   ConditionCode ccode = mir->meta.ccode;
252   if (mir->ssa_rep->num_uses == 1) {
253     // CONST case
254     int true_val = mir->dalvikInsn.vB;
255     int false_val = mir->dalvikInsn.vC;
256     rl_result = EvalLoc(rl_dest, result_reg_class, true);
257     // Change kCondNe to kCondEq for the special cases below.
258     if (ccode == kCondNe) {
259       ccode = kCondEq;
260       std::swap(true_val, false_val);
261     }
262     bool cheap_false_val = InexpensiveConstantInt(false_val);
263     if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
264       OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
265       DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
266       LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
267       LoadConstant(rl_result.reg, false_val);
268       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
269     } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
270       OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
271       DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
272       LIR* it = OpIT(kCondLs, "");
273       LoadConstant(rl_result.reg, false_val);
274       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
275     } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
276       OpRegImm(kOpCmp, rl_src.reg, 0);
277       LIR* it = OpIT(ccode, "E");
278       LoadConstant(rl_result.reg, true_val);
279       LoadConstant(rl_result.reg, false_val);
280       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
281     } else {
282       // Unlikely case - could be tuned.
283       RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
284       RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
285       LoadConstant(t_reg1, true_val);
286       LoadConstant(t_reg2, false_val);
287       OpRegImm(kOpCmp, rl_src.reg, 0);
288       LIR* it = OpIT(ccode, "E");
289       OpRegCopy(rl_result.reg, t_reg1);
290       OpRegCopy(rl_result.reg, t_reg2);
291       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
292     }
293   } else {
294     // MOVE case
295     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
296     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
297     rl_true = LoadValue(rl_true, result_reg_class);
298     rl_false = LoadValue(rl_false, result_reg_class);
299     rl_result = EvalLoc(rl_dest, result_reg_class, true);
300     OpRegImm(kOpCmp, rl_src.reg, 0);
301     LIR* it = nullptr;
302     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
303       it = OpIT(NegateComparison(ccode), "");
304       OpRegCopy(rl_result.reg, rl_false.reg);
305     } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
306       it = OpIT(ccode, "");
307       OpRegCopy(rl_result.reg, rl_true.reg);
308     } else {  // Normal - select between the two.
309       it = OpIT(ccode, "E");
310       OpRegCopy(rl_result.reg, rl_true.reg);
311       OpRegCopy(rl_result.reg, rl_false.reg);
312     }
313     OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
314   }
315   StoreValue(rl_dest, rl_result);
316 }
317 
GenFusedLongCmpBranch(BasicBlock * bb,MIR * mir)318 void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
319   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
320   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
321   // Normalize such that if either operand is constant, src2 will be constant.
322   ConditionCode ccode = mir->meta.ccode;
323   if (rl_src1.is_const) {
324     std::swap(rl_src1, rl_src2);
325     ccode = FlipComparisonOrder(ccode);
326   }
327   if (rl_src2.is_const) {
328     rl_src2 = UpdateLocWide(rl_src2);
329     // Do special compare/branch against simple const operand if not already in registers.
330     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
331     if ((rl_src2.location != kLocPhysReg) &&
332         ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
333       GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
334       return;
335     }
336   }
337   LIR* taken = &block_label_list_[bb->taken];
338   LIR* not_taken = &block_label_list_[bb->fall_through];
339   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
340   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
341   OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
342   switch (ccode) {
343     case kCondEq:
344       OpCondBranch(kCondNe, not_taken);
345       break;
346     case kCondNe:
347       OpCondBranch(kCondNe, taken);
348       break;
349     case kCondLt:
350       OpCondBranch(kCondLt, taken);
351       OpCondBranch(kCondGt, not_taken);
352       ccode = kCondUlt;
353       break;
354     case kCondLe:
355       OpCondBranch(kCondLt, taken);
356       OpCondBranch(kCondGt, not_taken);
357       ccode = kCondLs;
358       break;
359     case kCondGt:
360       OpCondBranch(kCondGt, taken);
361       OpCondBranch(kCondLt, not_taken);
362       ccode = kCondHi;
363       break;
364     case kCondGe:
365       OpCondBranch(kCondGt, taken);
366       OpCondBranch(kCondLt, not_taken);
367       ccode = kCondUge;
368       break;
369     default:
370       LOG(FATAL) << "Unexpected ccode: " << ccode;
371   }
372   OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
373   OpCondBranch(ccode, taken);
374 }
375 
376 /*
377  * Generate a register comparison to an immediate and branch.  Caller
378  * is responsible for setting branch target field.
379  */
OpCmpImmBranch(ConditionCode cond,RegStorage reg,int check_value,LIR * target)380 LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
381   LIR* branch = nullptr;
382   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
383   /*
384    * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
385    * compare-and-branch if zero is ideal if it will reach.  However, because null checks
386    * branch forward to a slow path, they will frequently not reach - and thus have to
387    * be converted to a long form during assembly (which will trigger another assembly
388    * pass).  Here we estimate the branch distance for checks, and if large directly
389    * generate the long form in an attempt to avoid an extra assembly pass.
390    * TODO: consider interspersing slowpaths in code following unconditional branches.
391    */
392   bool skip = ((target != nullptr) && (target->opcode == kPseudoThrowTarget));
393   skip &= ((mir_graph_->GetNumDalvikInsns() - current_dalvik_offset_) > 64);
394   if (!skip && reg.Low8() && (check_value == 0)) {
395     if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
396       branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
397                        reg.GetReg(), 0);
398     } else if (arm_cond == kArmCondLs) {
399       // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
400       // This case happens for a bounds check of array[0].
401       branch = NewLIR2(kThumb2Cbz, reg.GetReg(), 0);
402     }
403   }
404 
405   if (branch == nullptr) {
406     OpRegImm(kOpCmp, reg, check_value);
407     branch = NewLIR2(kThumbBCond, 0, arm_cond);
408   }
409 
410   branch->target = target;
411   return branch;
412 }
413 
OpRegCopyNoInsert(RegStorage r_dest,RegStorage r_src)414 LIR* ArmMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
415   LIR* res;
416   int opcode;
417   // If src or dest is a pair, we'll be using low reg.
418   if (r_dest.IsPair()) {
419     r_dest = r_dest.GetLow();
420   }
421   if (r_src.IsPair()) {
422     r_src = r_src.GetLow();
423   }
424   if (r_dest.IsFloat() || r_src.IsFloat())
425     return OpFpRegCopy(r_dest, r_src);
426   if (r_dest.Low8() && r_src.Low8())
427     opcode = kThumbMovRR;
428   else if (!r_dest.Low8() && !r_src.Low8())
429      opcode = kThumbMovRR_H2H;
430   else if (r_dest.Low8())
431      opcode = kThumbMovRR_H2L;
432   else
433      opcode = kThumbMovRR_L2H;
434   res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
435   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
436     res->flags.is_nop = true;
437   }
438   return res;
439 }
440 
OpRegCopy(RegStorage r_dest,RegStorage r_src)441 void ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
442   if (r_dest != r_src) {
443     LIR* res = OpRegCopyNoInsert(r_dest, r_src);
444     AppendLIR(res);
445   }
446 }
447 
OpRegCopyWide(RegStorage r_dest,RegStorage r_src)448 void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
449   if (r_dest != r_src) {
450     bool dest_fp = r_dest.IsFloat();
451     bool src_fp = r_src.IsFloat();
452     DCHECK(r_dest.Is64Bit());
453     DCHECK(r_src.Is64Bit());
454     // Note: If the register is get by register allocator, it should never be a pair.
455     // But some functions in mir_2_lir assume 64-bit registers are 32-bit register pairs.
456     // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect().
457     if (dest_fp && r_dest.IsPair()) {
458       r_dest = As64BitFloatReg(r_dest);
459     }
460     if (src_fp && r_src.IsPair()) {
461       r_src = As64BitFloatReg(r_src);
462     }
463     if (dest_fp) {
464       if (src_fp) {
465         OpRegCopy(r_dest, r_src);
466       } else {
467         NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
468       }
469     } else {
470       if (src_fp) {
471         NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
472       } else {
473         // Handle overlap
474         if (r_src.GetHighReg() != r_dest.GetLowReg()) {
475           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
476           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
477         } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
478           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
479           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
480         } else {
481           RegStorage r_tmp = AllocTemp();
482           OpRegCopy(r_tmp, r_src.GetHigh());
483           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
484           OpRegCopy(r_dest.GetHigh(), r_tmp);
485           FreeTemp(r_tmp);
486         }
487       }
488     }
489   }
490 }
491 
492 // Table of magic divisors
493 struct MagicTable {
494   uint32_t magic;
495   uint32_t shift;
496   DividePattern pattern;
497 };
498 
499 static const MagicTable magic_table[] = {
500   {0, 0, DivideNone},        // 0
501   {0, 0, DivideNone},        // 1
502   {0, 0, DivideNone},        // 2
503   {0x55555556, 0, Divide3},  // 3
504   {0, 0, DivideNone},        // 4
505   {0x66666667, 1, Divide5},  // 5
506   {0x2AAAAAAB, 0, Divide3},  // 6
507   {0x92492493, 2, Divide7},  // 7
508   {0, 0, DivideNone},        // 8
509   {0x38E38E39, 1, Divide5},  // 9
510   {0x66666667, 2, Divide5},  // 10
511   {0x2E8BA2E9, 1, Divide5},  // 11
512   {0x2AAAAAAB, 1, Divide5},  // 12
513   {0x4EC4EC4F, 2, Divide5},  // 13
514   {0x92492493, 3, Divide7},  // 14
515   {0x88888889, 3, Divide7},  // 15
516 };
517 
518 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
SmallLiteralDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)519 bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
520                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
521   UNUSED(dalvik_opcode);
522   if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
523     return false;
524   }
525   DividePattern pattern = magic_table[lit].pattern;
526   if (pattern == DivideNone) {
527     return false;
528   }
529 
530   RegStorage r_magic = AllocTemp();
531   LoadConstant(r_magic, magic_table[lit].magic);
532   rl_src = LoadValue(rl_src, kCoreReg);
533   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
534   RegStorage r_hi = AllocTemp();
535   RegStorage r_lo = AllocTemp();
536 
537   // rl_dest and rl_src might overlap.
538   // Reuse r_hi to save the div result for reminder case.
539   RegStorage r_div_result = is_div ? rl_result.reg : r_hi;
540 
541   NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
542   switch (pattern) {
543     case Divide3:
544       OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
545       break;
546     case Divide5:
547       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
548       OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
549                        EncodeShift(kArmAsr, magic_table[lit].shift));
550       break;
551     case Divide7:
552       OpRegReg(kOpAdd, r_hi, rl_src.reg);
553       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
554       OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
555                        EncodeShift(kArmAsr, magic_table[lit].shift));
556       break;
557     default:
558       LOG(FATAL) << "Unexpected pattern: " << pattern;
559   }
560 
561   if (!is_div) {
562     // div_result = src / lit
563     // tmp1 = div_result * lit
564     // dest = src - tmp1
565     RegStorage tmp1 = r_lo;
566     EasyMultiplyOp ops[2];
567 
568     bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
569     DCHECK_NE(canEasyMultiply, false);
570 
571     GenEasyMultiplyTwoOps(tmp1, r_div_result, ops);
572     OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
573   }
574 
575   StoreValue(rl_dest, rl_result);
576   return true;
577 }
578 
579 // Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
GetEasyMultiplyOp(int lit,ArmMir2Lir::EasyMultiplyOp * op)580 bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) {
581   if (lit == 0) {
582     // Special case for *divide-by-zero*. The ops won't actually be used to generate code, as
583     // GenArithOpIntLit will directly generate exception-throwing code, and multiply-by-zero will
584     // have been optimized away earlier.
585     op->op = kOpInvalid;
586     op->shift = 0;
587     return true;
588   }
589 
590   if (IsPowerOfTwo(lit)) {
591     op->op = kOpLsl;
592     op->shift = CTZ(lit);
593     return true;
594   }
595 
596   if (IsPowerOfTwo(lit - 1)) {
597     op->op = kOpAdd;
598     op->shift = CTZ(lit - 1);
599     return true;
600   }
601 
602   if (IsPowerOfTwo(lit + 1)) {
603     op->op = kOpRsub;
604     op->shift = CTZ(lit + 1);
605     return true;
606   }
607 
608   op->op = kOpInvalid;
609   op->shift = 0;
610   return false;
611 }
612 
613 // Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
GetEasyMultiplyTwoOps(int lit,EasyMultiplyOp * ops)614 bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
615   if (GetEasyMultiplyOp(lit, &ops[0])) {
616     ops[1].op = kOpInvalid;
617     ops[1].shift = 0;
618     return true;
619   }
620 
621   int lit1 = lit;
622   uint32_t shift = CTZ(lit1);
623   if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
624     ops[1].op = kOpLsl;
625     ops[1].shift = shift;
626     return true;
627   }
628 
629   lit1 = lit - 1;
630   shift = CTZ(lit1);
631   if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
632     ops[1].op = kOpAdd;
633     ops[1].shift = shift;
634     return true;
635   }
636 
637   lit1 = lit + 1;
638   shift = CTZ(lit1);
639   if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
640     ops[1].op = kOpRsub;
641     ops[1].shift = shift;
642     return true;
643   }
644 
645   ops[1].op = kOpInvalid;
646   ops[1].shift = 0;
647 
648   return false;
649 }
650 
651 // Generate instructions to do multiply.
652 // Additional temporary register is required,
653 // if it need to generate 2 instructions and src/dest overlap.
GenEasyMultiplyTwoOps(RegStorage r_dest,RegStorage r_src,EasyMultiplyOp * ops)654 void ArmMir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
655   // tmp1 = ( src << shift1) + [ src | -src | 0 ]
656   // dest = (tmp1 << shift2) + [ src | -src | 0 ]
657 
658   RegStorage r_tmp1;
659   if (ops[1].op == kOpInvalid) {
660     r_tmp1 = r_dest;
661   } else if (r_dest.GetReg() != r_src.GetReg()) {
662     r_tmp1 = r_dest;
663   } else {
664     r_tmp1 = AllocTemp();
665   }
666 
667   switch (ops[0].op) {
668     case kOpLsl:
669       OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift);
670       break;
671     case kOpAdd:
672       OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
673       break;
674     case kOpRsub:
675       OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
676       break;
677     default:
678       DCHECK_EQ(ops[0].op, kOpInvalid);
679       break;
680   }
681 
682   switch (ops[1].op) {
683     case kOpInvalid:
684       return;
685     case kOpLsl:
686       OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift);
687       break;
688     case kOpAdd:
689       OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
690       break;
691     case kOpRsub:
692       OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
693       break;
694     default:
695       LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps";
696       break;
697   }
698 }
699 
EasyMultiply(RegLocation rl_src,RegLocation rl_dest,int lit)700 bool ArmMir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
701   EasyMultiplyOp ops[2];
702 
703   if (!GetEasyMultiplyTwoOps(lit, ops)) {
704     return false;
705   }
706 
707   rl_src = LoadValue(rl_src, kCoreReg);
708   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
709 
710   GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
711   StoreValue(rl_dest, rl_result);
712   return true;
713 }
714 
GenDivRem(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,int flags)715 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
716                                   RegLocation rl_src2, bool is_div, int flags) {
717   UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags);
718   LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
719   UNREACHABLE();
720 }
721 
GenDivRemLit(RegLocation rl_dest,RegLocation rl_src1,int lit,bool is_div)722 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
723                                      bool is_div) {
724   UNUSED(rl_dest, rl_src1, lit, is_div);
725   LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
726   UNREACHABLE();
727 }
728 
GenDivRemLit(RegLocation rl_dest,RegStorage reg1,int lit,bool is_div)729 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
730   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
731 
732   // Put the literal in a temp.
733   RegStorage lit_temp = AllocTemp();
734   LoadConstant(lit_temp, lit);
735   // Use the generic case for div/rem with arg2 in a register.
736   // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
737   rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
738   FreeTemp(lit_temp);
739 
740   return rl_result;
741 }
742 
GenDivRem(RegLocation rl_dest,RegStorage reg1,RegStorage reg2,bool is_div)743 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
744                                   bool is_div) {
745   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
746   if (is_div) {
747     // Simple case, use sdiv instruction.
748     OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
749   } else {
750     // Remainder case, use the following code:
751     // temp = reg1 / reg2      - integer division
752     // temp = temp * reg2
753     // dest = reg1 - temp
754 
755     RegStorage temp = AllocTemp();
756     OpRegRegReg(kOpDiv, temp, reg1, reg2);
757     OpRegReg(kOpMul, temp, reg2);
758     OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
759     FreeTemp(temp);
760   }
761 
762   return rl_result;
763 }
764 
GenInlinedMinMax(CallInfo * info,bool is_min,bool is_long)765 bool ArmMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
766   DCHECK_EQ(cu_->instruction_set, kThumb2);
767   if (is_long) {
768     return false;
769   }
770   RegLocation rl_src1 = info->args[0];
771   RegLocation rl_src2 = info->args[1];
772   rl_src1 = LoadValue(rl_src1, kCoreReg);
773   rl_src2 = LoadValue(rl_src2, kCoreReg);
774   RegLocation rl_dest = InlineTarget(info);
775   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
776   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
777   LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E");
778   OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
779   OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
780   OpEndIT(it);
781   StoreValue(rl_dest, rl_result);
782   return true;
783 }
784 
GenInlinedPeek(CallInfo * info,OpSize size)785 bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
786   RegLocation rl_src_address = info->args[0];  // long address
787   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
788   RegLocation rl_dest = InlineTarget(info);
789   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
790   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
791   if (size == k64) {
792     // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
793     if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
794       Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
795       Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
796     } else {
797       Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
798       Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
799     }
800     StoreValueWide(rl_dest, rl_result);
801   } else {
802     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
803     // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
804     LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
805     StoreValue(rl_dest, rl_result);
806   }
807   return true;
808 }
809 
GenInlinedPoke(CallInfo * info,OpSize size)810 bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
811   RegLocation rl_src_address = info->args[0];  // long address
812   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
813   RegLocation rl_src_value = info->args[2];  // [size] value
814   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
815   if (size == k64) {
816     // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
817     RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
818     StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32, kNotVolatile);
819     StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32, kNotVolatile);
820   } else {
821     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
822     // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
823     RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
824     StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
825   }
826   return true;
827 }
828 
829 // Generate a CAS with memory_order_seq_cst semantics.
GenInlinedCas(CallInfo * info,bool is_long,bool is_object)830 bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
831   DCHECK_EQ(cu_->instruction_set, kThumb2);
832   // Unused - RegLocation rl_src_unsafe = info->args[0];
833   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
834   RegLocation rl_src_offset = info->args[2];  // long low
835   rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
836   RegLocation rl_src_expected = info->args[4];  // int, long or Object
837   // If is_long, high half is in info->args[5]
838   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
839   // If is_long, high half is in info->args[7]
840   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
841 
842   // We have only 5 temporary registers available and actually only 4 if the InlineTarget
843   // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
844   // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
845   // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
846   // into the same temps, reducing the number of required temps down to 5. We shall work
847   // around the potentially locked temp by using LR for r_ptr, unconditionally.
848   // TODO: Pass information about the need for more temps to the stack frame generation
849   // code so that we can rely on being able to allocate enough temps.
850   DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
851   MarkTemp(rs_rARM_LR);
852   FreeTemp(rs_rARM_LR);
853   LockTemp(rs_rARM_LR);
854   bool load_early = true;
855   if (is_long) {
856     RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
857         rl_src_expected.reg;
858     RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
859         rl_src_new_value.reg;
860     bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
861     bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
862     bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
863     bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
864 
865     if (!expected_is_good_reg && !new_value_is_good_reg) {
866       // None of expected/new_value is non-temp reg, need to load both late
867       load_early = false;
868       // Make sure they are not in the temp regs and the load will not be skipped.
869       if (expected_is_core_reg) {
870         FlushRegWide(rl_src_expected.reg);
871         ClobberSReg(rl_src_expected.s_reg_low);
872         ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
873         rl_src_expected.location = kLocDalvikFrame;
874       }
875       if (new_value_is_core_reg) {
876         FlushRegWide(rl_src_new_value.reg);
877         ClobberSReg(rl_src_new_value.s_reg_low);
878         ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
879         rl_src_new_value.location = kLocDalvikFrame;
880       }
881     }
882   }
883 
884   // Prevent reordering with prior memory operations.
885   GenMemBarrier(kAnyStore);
886 
887   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
888   RegLocation rl_new_value;
889   if (!is_long) {
890     rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
891   } else if (load_early) {
892     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
893   }
894 
895   if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
896     // Mark card for object assuming new value is stored.
897     MarkGCCard(0, rl_new_value.reg, rl_object.reg);
898   }
899 
900   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
901 
902   RegStorage r_ptr = rs_rARM_LR;
903   OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
904 
905   // Free now unneeded rl_object and rl_offset to give more temps.
906   ClobberSReg(rl_object.s_reg_low);
907   FreeTemp(rl_object.reg);
908   ClobberSReg(rl_offset.s_reg_low);
909   FreeTemp(rl_offset.reg);
910 
911   RegLocation rl_expected;
912   if (!is_long) {
913     rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
914   } else if (load_early) {
915     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
916   } else {
917     // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
918     RegStorage low_reg = AllocTemp();
919     RegStorage high_reg = AllocTemp();
920     rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
921     rl_expected = rl_new_value;
922   }
923 
924   // do {
925   //   tmp = [r_ptr] - expected;
926   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
927   // result = tmp != 0;
928 
929   RegStorage r_tmp = AllocTemp();
930   LIR* target = NewLIR0(kPseudoTargetLabel);
931 
932   LIR* it = nullptr;
933   if (is_long) {
934     RegStorage r_tmp_high = AllocTemp();
935     if (!load_early) {
936       LoadValueDirectWide(rl_src_expected, rl_expected.reg);
937     }
938     NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
939     OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
940     OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
941     if (!load_early) {
942       LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
943     }
944     // Make sure we use ORR that sets the ccode
945     if (r_tmp.Low8() && r_tmp_high.Low8()) {
946       NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
947     } else {
948       NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
949     }
950     FreeTemp(r_tmp_high);  // Now unneeded
951 
952     DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
953     it = OpIT(kCondEq, "T");
954     NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
955 
956   } else {
957     NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
958     OpRegReg(kOpSub, r_tmp, rl_expected.reg);
959     DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
960     it = OpIT(kCondEq, "T");
961     NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
962   }
963 
964   // Still one conditional left from OpIT(kCondEq, "T") from either branch
965   OpRegImm(kOpCmp /* eq */, r_tmp, 1);
966   OpEndIT(it);
967 
968   OpCondBranch(kCondEq, target);
969 
970   if (!load_early) {
971     FreeTemp(rl_expected.reg);  // Now unneeded.
972   }
973 
974   // Prevent reordering with subsequent memory operations.
975   GenMemBarrier(kLoadAny);
976 
977   // result := (tmp1 != 0) ? 0 : 1;
978   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
979   OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
980   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
981   it = OpIT(kCondUlt, "");
982   LoadConstant(rl_result.reg, 0); /* cc */
983   FreeTemp(r_tmp);  // Now unneeded.
984   OpEndIT(it);     // Barrier to terminate OpIT.
985 
986   StoreValue(rl_dest, rl_result);
987 
988   // Now, restore lr to its non-temp status.
989   Clobber(rs_rARM_LR);
990   UnmarkTemp(rs_rARM_LR);
991   return true;
992 }
993 
GenInlinedArrayCopyCharArray(CallInfo * info)994 bool ArmMir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
995   constexpr int kLargeArrayThreshold = 256;
996 
997   RegLocation rl_src = info->args[0];
998   RegLocation rl_src_pos = info->args[1];
999   RegLocation rl_dst = info->args[2];
1000   RegLocation rl_dst_pos = info->args[3];
1001   RegLocation rl_length = info->args[4];
1002   // Compile time check, handle exception by non-inline method to reduce related meta-data.
1003   if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
1004       (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
1005       (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
1006     return false;
1007   }
1008 
1009   ClobberCallerSave();
1010   LockCallTemps();  // Prepare for explicit register usage.
1011   LockTemp(rs_r12);
1012   RegStorage rs_src = rs_r0;
1013   RegStorage rs_dst = rs_r1;
1014   LoadValueDirectFixed(rl_src, rs_src);
1015   LoadValueDirectFixed(rl_dst, rs_dst);
1016 
1017   // Handle null pointer exception in slow-path.
1018   LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
1019   LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
1020   // Handle potential overlapping in slow-path.
1021   LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
1022   // Handle exception or big length in slow-path.
1023   RegStorage rs_length = rs_r2;
1024   LoadValueDirectFixed(rl_length, rs_length);
1025   LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
1026   // Src bounds check.
1027   RegStorage rs_pos = rs_r3;
1028   RegStorage rs_arr_length = rs_r12;
1029   LoadValueDirectFixed(rl_src_pos, rs_pos);
1030   LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
1031   Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
1032   OpRegReg(kOpSub, rs_arr_length, rs_pos);
1033   LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
1034   // Dst bounds check.
1035   LoadValueDirectFixed(rl_dst_pos, rs_pos);
1036   LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
1037   Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
1038   OpRegReg(kOpSub, rs_arr_length, rs_pos);
1039   LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
1040 
1041   // Everything is checked now.
1042   OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
1043   OpRegReg(kOpAdd, rs_dst, rs_pos);
1044   OpRegReg(kOpAdd, rs_dst, rs_pos);
1045   OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
1046   LoadValueDirectFixed(rl_src_pos, rs_pos);
1047   OpRegReg(kOpAdd, rs_src, rs_pos);
1048   OpRegReg(kOpAdd, rs_src, rs_pos);
1049 
1050   RegStorage rs_tmp = rs_pos;
1051   OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
1052 
1053   // Copy one element.
1054   OpRegRegImm(kOpAnd, rs_tmp, rs_length, 2);
1055   LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
1056   OpRegImm(kOpSub, rs_length, 2);
1057   LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
1058   StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
1059 
1060   // Copy two elements.
1061   LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
1062   LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
1063   OpRegImm(kOpSub, rs_length, 4);
1064   LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
1065   StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
1066   OpUnconditionalBranch(begin_loop);
1067 
1068   LIR *check_failed = NewLIR0(kPseudoTargetLabel);
1069   LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
1070   LIR* return_point = NewLIR0(kPseudoTargetLabel);
1071 
1072   src_check_branch->target = check_failed;
1073   dst_check_branch->target = check_failed;
1074   src_dst_same->target = check_failed;
1075   len_neg_or_too_big->target = check_failed;
1076   src_pos_negative->target = check_failed;
1077   src_bad_len->target = check_failed;
1078   dst_pos_negative->target = check_failed;
1079   dst_bad_len->target = check_failed;
1080   jmp_to_begin_loop->target = begin_loop;
1081   jmp_to_ret->target = return_point;
1082 
1083   AddIntrinsicSlowPath(info, launchpad_branch, return_point);
1084   ClobberCallerSave();  // We must clobber everything because slow path will return here
1085 
1086   return true;
1087 }
1088 
OpPcRelLoad(RegStorage reg,LIR * target)1089 void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1090   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1091   LIR* lir = NewLIR2(kThumb2LdrPcRel12, reg.GetReg(), 0);
1092   lir->target = target;
1093 }
1094 
CanUseOpPcRelDexCacheArrayLoad() const1095 bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
1096   return dex_cache_arrays_layout_.Valid();
1097 }
1098 
OpPcRelDexCacheArrayAddr(const DexFile * dex_file,int offset,RegStorage r_dest)1099 void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
1100   LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
1101   LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
1102   ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
1103   LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg());
1104   add_pc->flags.fixup = kFixupLabel;
1105   movw->operands[2] = WrapPointer(dex_file);
1106   movw->operands[3] = offset;
1107   movw->operands[4] = WrapPointer(add_pc);
1108   movt->operands[2] = movw->operands[2];
1109   movt->operands[3] = movw->operands[3];
1110   movt->operands[4] = movw->operands[4];
1111   dex_cache_access_insns_.push_back(movw);
1112   dex_cache_access_insns_.push_back(movt);
1113 }
1114 
OpPcRelDexCacheArrayLoad(const DexFile * dex_file,int offset,RegStorage r_dest,bool wide)1115 void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
1116                                           bool wide) {
1117   DCHECK(!wide) << "Unsupported";
1118   if (dex_cache_arrays_base_reg_.Valid()) {
1119     LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
1120                 r_dest, kNotVolatile);
1121   } else {
1122     OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
1123     LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
1124   }
1125 }
1126 
OpVldm(RegStorage r_base,int count)1127 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
1128   return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
1129 }
1130 
OpVstm(RegStorage r_base,int count)1131 LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) {
1132   return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
1133 }
1134 
GenMaddMsubInt(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,RegLocation rl_src3,bool is_sub)1135 void ArmMir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
1136                                 RegLocation rl_src3, bool is_sub) {
1137   rl_src1 = LoadValue(rl_src1, kCoreReg);
1138   rl_src2 = LoadValue(rl_src2, kCoreReg);
1139   rl_src3 = LoadValue(rl_src3, kCoreReg);
1140   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1141   NewLIR4(is_sub ? kThumb2Mls : kThumb2Mla, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
1142           rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
1143   StoreValue(rl_dest, rl_result);
1144 }
1145 
GenMultiplyByTwoBitMultiplier(RegLocation rl_src,RegLocation rl_result,int lit,int first_bit,int second_bit)1146 void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1147                                                RegLocation rl_result, int lit,
1148                                                int first_bit, int second_bit) {
1149   UNUSED(lit);
1150   OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
1151                    EncodeShift(kArmLsl, second_bit - first_bit));
1152   if (first_bit != 0) {
1153     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1154   }
1155 }
1156 
GenDivZeroCheckWide(RegStorage reg)1157 void ArmMir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1158   DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
1159   RegStorage t_reg = AllocTemp();
1160   NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
1161   FreeTemp(t_reg);
1162   GenDivZeroCheck(kCondEq);
1163 }
1164 
1165 // Test suspend flag, return target of taken suspend branch
OpTestSuspend(LIR * target)1166 LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
1167 #ifdef ARM_R4_SUSPEND_FLAG
1168   NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
1169   return OpCondBranch((target == nullptr) ? kCondEq : kCondNe, target);
1170 #else
1171   RegStorage t_reg = AllocTemp();
1172   LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(),
1173     t_reg, kUnsignedHalf, kNotVolatile);
1174   LIR* cmp_branch = OpCmpImmBranch((target == nullptr) ? kCondNe : kCondEq, t_reg,
1175     0, target);
1176   FreeTemp(t_reg);
1177   return cmp_branch;
1178 #endif
1179 }
1180 
1181 // Decrement register and branch on condition
OpDecAndBranch(ConditionCode c_code,RegStorage reg,LIR * target)1182 LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1183   // Combine sub & test using sub setflags encoding here
1184   OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
1185   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
1186   return OpCondBranch(c_code, target);
1187 }
1188 
GenMemBarrier(MemBarrierKind barrier_kind)1189 bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
1190   if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
1191     return false;
1192   }
1193   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
1194   LIR* barrier = last_lir_insn_;
1195 
1196   int dmb_flavor;
1197   // TODO: revisit Arm barrier kinds
1198   switch (barrier_kind) {
1199     case kAnyStore: dmb_flavor = kISH; break;
1200     case kLoadAny: dmb_flavor = kISH; break;
1201     case kStoreStore: dmb_flavor = kISHST; break;
1202     case kAnyAny: dmb_flavor = kISH; break;
1203     default:
1204       LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
1205       dmb_flavor = kSY;  // quiet gcc.
1206       break;
1207   }
1208 
1209   bool ret = false;
1210 
1211   // If the same barrier already exists, don't generate another.
1212   if (barrier == nullptr
1213       || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
1214     barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
1215     ret = true;
1216   }
1217 
1218   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
1219   DCHECK(!barrier->flags.use_def_invalid);
1220   barrier->u.m.def_mask = &kEncodeAll;
1221   return ret;
1222 }
1223 
GenNegLong(RegLocation rl_dest,RegLocation rl_src)1224 void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1225   rl_src = LoadValueWide(rl_src, kCoreReg);
1226   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1227   RegStorage z_reg = AllocTemp();
1228   LoadConstantNoClobber(z_reg, 0);
1229   // Check for destructive overlap
1230   if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1231     RegStorage t_reg = AllocTemp();
1232     OpRegCopy(t_reg, rl_result.reg.GetLow());
1233     OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
1234     OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
1235     FreeTemp(t_reg);
1236   } else {
1237     OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
1238     OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
1239   }
1240   FreeTemp(z_reg);
1241   StoreValueWide(rl_dest, rl_result);
1242 }
1243 
GenMulLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1244 void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
1245                             RegLocation rl_src1, RegLocation rl_src2) {
1246   UNUSED(opcode);
1247   /*
1248    * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
1249    * dest     = src1.lo * src2.lo;
1250    * tmp1    += src1.lo * src2.hi;
1251    * dest.hi += tmp1;
1252    *
1253    * To pull off inline multiply, we have a worst-case requirement of 7 temporary
1254    * registers.  Normally for Arm, we get 5.  We can get to 6 by including
1255    * lr in the temp set.  The only problematic case is all operands and result are
1256    * distinct, and none have been promoted.  In that case, we can succeed by aggressively
1257    * freeing operand temp registers after they are no longer needed.  All other cases
1258    * can proceed normally.  We'll just punt on the case of the result having a misaligned
1259    * overlap with either operand and send that case to a runtime handler.
1260    */
1261   RegLocation rl_result;
1262   if (PartiallyIntersects(rl_src1, rl_dest) || (PartiallyIntersects(rl_src2, rl_dest))) {
1263     FlushAllRegs();
1264     CallRuntimeHelperRegLocationRegLocation(kQuickLmul, rl_src1, rl_src2, false);
1265     rl_result = GetReturnWide(kCoreReg);
1266     StoreValueWide(rl_dest, rl_result);
1267     return;
1268   }
1269 
1270   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1271   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1272 
1273   int reg_status = 0;
1274   RegStorage res_lo;
1275   RegStorage res_hi;
1276   bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
1277       !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
1278   bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
1279   bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
1280   // Check if rl_dest is *not* either operand and we have enough temp registers.
1281   if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
1282       (dest_promoted || src1_promoted || src2_promoted)) {
1283     // In this case, we do not need to manually allocate temp registers for result.
1284     rl_result = EvalLoc(rl_dest, kCoreReg, true);
1285     res_lo = rl_result.reg.GetLow();
1286     res_hi = rl_result.reg.GetHigh();
1287   } else {
1288     res_lo = AllocTemp();
1289     if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
1290       // In this case, we have enough temp registers to be allocated for result.
1291       res_hi = AllocTemp();
1292       reg_status = 1;
1293     } else {
1294       // In this case, all temps are now allocated.
1295       // res_hi will be allocated after we can free src1_hi.
1296       reg_status = 2;
1297     }
1298   }
1299 
1300   // Temporarily add LR to the temp pool, and assign it to tmp1
1301   MarkTemp(rs_rARM_LR);
1302   FreeTemp(rs_rARM_LR);
1303   RegStorage tmp1 = rs_rARM_LR;
1304   LockTemp(rs_rARM_LR);
1305 
1306   if (rl_src1.reg == rl_src2.reg) {
1307     DCHECK(res_hi.Valid());
1308     DCHECK(res_lo.Valid());
1309     NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
1310     NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
1311             rl_src1.reg.GetLowReg());
1312     OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
1313   } else {
1314     NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
1315     if (reg_status == 2) {
1316       DCHECK(!res_hi.Valid());
1317       DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
1318       DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
1319       // Will force free src1_hi, so must clobber.
1320       Clobber(rl_src1.reg);
1321       FreeTemp(rl_src1.reg.GetHigh());
1322       res_hi = AllocTemp();
1323     }
1324     DCHECK(res_hi.Valid());
1325     DCHECK(res_lo.Valid());
1326     NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
1327             rl_src1.reg.GetLowReg());
1328     NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
1329             tmp1.GetReg());
1330     NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
1331     if (reg_status == 2) {
1332       FreeTemp(rl_src1.reg.GetLow());
1333     }
1334   }
1335 
1336   if (reg_status != 0) {
1337     // We had manually allocated registers for rl_result.
1338     // Now construct a RegLocation.
1339     rl_result = GetReturnWide(kCoreReg);  // Just using as a template.
1340     rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
1341   }
1342 
1343   // Free tmp1 but keep LR as temp for StoreValueWide() if needed.
1344   FreeTemp(tmp1);
1345 
1346   StoreValueWide(rl_dest, rl_result);
1347 
1348   // Now, restore lr to its non-temp status.
1349   Clobber(rs_rARM_LR);
1350   UnmarkTemp(rs_rARM_LR);
1351 }
1352 
GenArithOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1353 void ArmMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1354                                 RegLocation rl_src2, int flags) {
1355   switch (opcode) {
1356     case Instruction::MUL_LONG:
1357     case Instruction::MUL_LONG_2ADDR:
1358       GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
1359       return;
1360     case Instruction::NEG_LONG:
1361       GenNegLong(rl_dest, rl_src2);
1362       return;
1363 
1364     default:
1365       break;
1366   }
1367 
1368   // Fallback for all other ops.
1369   Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1370 }
1371 
1372 /*
1373  * Generate array load
1374  */
GenArrayGet(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_dest,int scale)1375 void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
1376                              RegLocation rl_index, RegLocation rl_dest, int scale) {
1377   RegisterClass reg_class = RegClassBySize(size);
1378   int len_offset = mirror::Array::LengthOffset().Int32Value();
1379   int data_offset;
1380   RegLocation rl_result;
1381   bool constant_index = rl_index.is_const;
1382   rl_array = LoadValue(rl_array, kRefReg);
1383   if (!constant_index) {
1384     rl_index = LoadValue(rl_index, kCoreReg);
1385   }
1386 
1387   if (rl_dest.wide) {
1388     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1389   } else {
1390     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1391   }
1392 
1393   // If index is constant, just fold it into the data offset
1394   if (constant_index) {
1395     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1396   }
1397 
1398   /* null object? */
1399   GenNullCheck(rl_array.reg, opt_flags);
1400 
1401   bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1402   RegStorage reg_len;
1403   if (needs_range_check) {
1404     reg_len = AllocTemp();
1405     /* Get len */
1406     Load32Disp(rl_array.reg, len_offset, reg_len);
1407     MarkPossibleNullPointerException(opt_flags);
1408   } else {
1409     ForceImplicitNullCheck(rl_array.reg, opt_flags);
1410   }
1411   if (rl_dest.wide || rl_dest.fp || constant_index) {
1412     RegStorage reg_ptr;
1413     if (constant_index) {
1414       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
1415     } else {
1416       // No special indexed operation, lea + load w/ displacement
1417       reg_ptr = AllocTempRef();
1418       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
1419       FreeTemp(rl_index.reg);
1420     }
1421     rl_result = EvalLoc(rl_dest, reg_class, true);
1422 
1423     if (needs_range_check) {
1424       if (constant_index) {
1425         GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1426       } else {
1427         GenArrayBoundsCheck(rl_index.reg, reg_len);
1428       }
1429       FreeTemp(reg_len);
1430     }
1431     LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
1432     if (!constant_index) {
1433       FreeTemp(reg_ptr);
1434     }
1435     if (rl_dest.wide) {
1436       StoreValueWide(rl_dest, rl_result);
1437     } else {
1438       StoreValue(rl_dest, rl_result);
1439     }
1440   } else {
1441     // Offset base, then use indexed load
1442     RegStorage reg_ptr = AllocTempRef();
1443     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1444     FreeTemp(rl_array.reg);
1445     rl_result = EvalLoc(rl_dest, reg_class, true);
1446 
1447     if (needs_range_check) {
1448       GenArrayBoundsCheck(rl_index.reg, reg_len);
1449       FreeTemp(reg_len);
1450     }
1451     LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
1452     FreeTemp(reg_ptr);
1453     StoreValue(rl_dest, rl_result);
1454   }
1455 }
1456 
1457 /*
1458  * Generate array store
1459  *
1460  */
GenArrayPut(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_src,int scale,bool card_mark)1461 void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1462                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1463   RegisterClass reg_class = RegClassBySize(size);
1464   int len_offset = mirror::Array::LengthOffset().Int32Value();
1465   bool constant_index = rl_index.is_const;
1466 
1467   int data_offset;
1468   if (size == k64 || size == kDouble) {
1469     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1470   } else {
1471     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1472   }
1473 
1474   // If index is constant, just fold it into the data offset.
1475   if (constant_index) {
1476     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1477   }
1478 
1479   rl_array = LoadValue(rl_array, kRefReg);
1480   if (!constant_index) {
1481     rl_index = LoadValue(rl_index, kCoreReg);
1482   }
1483 
1484   RegStorage reg_ptr;
1485   bool allocated_reg_ptr_temp = false;
1486   if (constant_index) {
1487     reg_ptr = rl_array.reg;
1488   } else if (IsTemp(rl_array.reg) && !card_mark) {
1489     Clobber(rl_array.reg);
1490     reg_ptr = rl_array.reg;
1491   } else {
1492     allocated_reg_ptr_temp = true;
1493     reg_ptr = AllocTempRef();
1494   }
1495 
1496   /* null object? */
1497   GenNullCheck(rl_array.reg, opt_flags);
1498 
1499   bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1500   RegStorage reg_len;
1501   if (needs_range_check) {
1502     reg_len = AllocTemp();
1503     // NOTE: max live temps(4) here.
1504     /* Get len */
1505     Load32Disp(rl_array.reg, len_offset, reg_len);
1506     MarkPossibleNullPointerException(opt_flags);
1507   } else {
1508     ForceImplicitNullCheck(rl_array.reg, opt_flags);
1509   }
1510   /* at this point, reg_ptr points to array, 2 live temps */
1511   if (rl_src.wide || rl_src.fp || constant_index) {
1512     if (rl_src.wide) {
1513       rl_src = LoadValueWide(rl_src, reg_class);
1514     } else {
1515       rl_src = LoadValue(rl_src, reg_class);
1516     }
1517     if (!constant_index) {
1518       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
1519     }
1520     if (needs_range_check) {
1521       if (constant_index) {
1522         GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1523       } else {
1524         GenArrayBoundsCheck(rl_index.reg, reg_len);
1525       }
1526       FreeTemp(reg_len);
1527     }
1528 
1529     StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
1530   } else {
1531     /* reg_ptr -> array data */
1532     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1533     rl_src = LoadValue(rl_src, reg_class);
1534     if (needs_range_check) {
1535       GenArrayBoundsCheck(rl_index.reg, reg_len);
1536       FreeTemp(reg_len);
1537     }
1538     StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
1539   }
1540   if (allocated_reg_ptr_temp) {
1541     FreeTemp(reg_ptr);
1542   }
1543   if (card_mark) {
1544     MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
1545   }
1546 }
1547 
1548 
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,RegLocation rl_shift,int flags)1549 void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1550                                    RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
1551                                    int flags) {
1552   UNUSED(flags);
1553   rl_src = LoadValueWide(rl_src, kCoreReg);
1554   // Per spec, we only care about low 6 bits of shift amount.
1555   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1556   if (shift_amount == 0) {
1557     StoreValueWide(rl_dest, rl_src);
1558     return;
1559   }
1560   if (PartiallyIntersects(rl_src, rl_dest)) {
1561     GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
1562     return;
1563   }
1564   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1565   switch (opcode) {
1566     case Instruction::SHL_LONG:
1567     case Instruction::SHL_LONG_2ADDR:
1568       if (shift_amount == 1) {
1569         OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
1570         OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
1571       } else if (shift_amount == 32) {
1572         OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
1573         LoadConstant(rl_result.reg.GetLow(), 0);
1574       } else if (shift_amount > 31) {
1575         OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
1576         LoadConstant(rl_result.reg.GetLow(), 0);
1577       } else {
1578         OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1579         OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
1580                          EncodeShift(kArmLsr, 32 - shift_amount));
1581         OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
1582       }
1583       break;
1584     case Instruction::SHR_LONG:
1585     case Instruction::SHR_LONG_2ADDR:
1586       if (shift_amount == 32) {
1587         OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1588         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1589       } else if (shift_amount > 31) {
1590         OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1591         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1592       } else {
1593         RegStorage t_reg = AllocTemp();
1594         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1595         OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
1596                          EncodeShift(kArmLsl, 32 - shift_amount));
1597         FreeTemp(t_reg);
1598         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1599       }
1600       break;
1601     case Instruction::USHR_LONG:
1602     case Instruction::USHR_LONG_2ADDR:
1603       if (shift_amount == 32) {
1604         OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1605         LoadConstant(rl_result.reg.GetHigh(), 0);
1606       } else if (shift_amount > 31) {
1607         OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1608         LoadConstant(rl_result.reg.GetHigh(), 0);
1609       } else {
1610         RegStorage t_reg = AllocTemp();
1611         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1612         OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
1613                          EncodeShift(kArmLsl, 32 - shift_amount));
1614         FreeTemp(t_reg);
1615         OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1616       }
1617       break;
1618     default:
1619       LOG(FATAL) << "Unexpected case";
1620   }
1621   StoreValueWide(rl_dest, rl_result);
1622 }
1623 
GenArithImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1624 void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
1625                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
1626                                    int flags) {
1627   if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
1628     if (!rl_src2.is_const) {
1629       // Don't bother with special handling for subtract from immediate.
1630       GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1631       return;
1632     }
1633   } else {
1634     // Normalize
1635     if (!rl_src2.is_const) {
1636       DCHECK(rl_src1.is_const);
1637       std::swap(rl_src1, rl_src2);
1638     }
1639   }
1640   if (PartiallyIntersects(rl_src1, rl_dest)) {
1641     GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1642     return;
1643   }
1644   DCHECK(rl_src2.is_const);
1645   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1646   uint32_t val_lo = Low32Bits(val);
1647   uint32_t val_hi = High32Bits(val);
1648   int32_t mod_imm_lo = ModifiedImmediate(val_lo);
1649   int32_t mod_imm_hi = ModifiedImmediate(val_hi);
1650 
1651   // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
1652   switch (opcode) {
1653     case Instruction::ADD_LONG:
1654     case Instruction::ADD_LONG_2ADDR:
1655     case Instruction::SUB_LONG:
1656     case Instruction::SUB_LONG_2ADDR:
1657       if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
1658         GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1659         return;
1660       }
1661       break;
1662     default:
1663       break;
1664   }
1665   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1666   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1667   // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
1668   switch (opcode) {
1669     case Instruction::ADD_LONG:
1670     case Instruction::ADD_LONG_2ADDR:
1671       NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1672       NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1673       break;
1674     case Instruction::OR_LONG:
1675     case Instruction::OR_LONG_2ADDR:
1676       if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1677         OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1678       }
1679       if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1680         OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1681       }
1682       break;
1683     case Instruction::XOR_LONG:
1684     case Instruction::XOR_LONG_2ADDR:
1685       OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1686       OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1687       break;
1688     case Instruction::AND_LONG:
1689     case Instruction::AND_LONG_2ADDR:
1690       if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1691         OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1692       }
1693       if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1694         OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1695       }
1696       break;
1697     case Instruction::SUB_LONG_2ADDR:
1698     case Instruction::SUB_LONG:
1699       NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1700       NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1701       break;
1702     default:
1703       LOG(FATAL) << "Unexpected opcode " << opcode;
1704   }
1705   StoreValueWide(rl_dest, rl_result);
1706 }
1707 
HandleEasyDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)1708 bool ArmMir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
1709                                   RegLocation rl_src, RegLocation rl_dest, int lit) {
1710   if (lit < 2) {
1711     return false;
1712   }
1713 
1714   // ARM does either not support a division instruction, or it is potentially expensive. Look for
1715   // more special cases.
1716   if (!IsPowerOfTwo(lit)) {
1717     return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
1718   }
1719 
1720   return Mir2Lir::HandleEasyDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
1721 }
1722 
1723 }  // namespace art
1724