1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* This file contains codegen for the Thumb2 ISA. */
18
19 #include "codegen_arm.h"
20
21 #include "arch/instruction_set_features.h"
22 #include "arm_lir.h"
23 #include "base/bit_utils.h"
24 #include "base/logging.h"
25 #include "dex/compiler_ir.h"
26 #include "dex/mir_graph.h"
27 #include "dex/quick/mir_to_lir-inl.h"
28 #include "dex/reg_storage_eq.h"
29 #include "driver/compiler_driver.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "mirror/array-inl.h"
32
33 namespace art {
34
OpCmpBranch(ConditionCode cond,RegStorage src1,RegStorage src2,LIR * target)35 LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
36 OpRegReg(kOpCmp, src1, src2);
37 return OpCondBranch(cond, target);
38 }
39
40 /*
41 * Generate a Thumb2 IT instruction, which can nullify up to
42 * four subsequent instructions based on a condition and its
43 * inverse. The condition applies to the first instruction, which
44 * is executed if the condition is met. The string "guide" consists
45 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
46 * A "T" means the instruction is executed if the condition is
47 * met, and an "E" means the instruction is executed if the condition
48 * is not met.
49 */
OpIT(ConditionCode ccode,const char * guide)50 LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) {
51 int mask;
52 int mask3 = 0;
53 int mask2 = 0;
54 int mask1 = 0;
55 ArmConditionCode code = ArmConditionEncoding(ccode);
56 int cond_bit = code & 1;
57 int alt_bit = cond_bit ^ 1;
58
59 switch (strlen(guide)) {
60 case 3:
61 mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
62 FALLTHROUGH_INTENDED;
63 case 2:
64 mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
65 FALLTHROUGH_INTENDED;
66 case 1:
67 mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
68 break;
69 case 0:
70 break;
71 default:
72 LOG(FATAL) << "OAT: bad case in OpIT";
73 UNREACHABLE();
74 }
75 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
76 (1 << (3 - strlen(guide)));
77 return NewLIR2(kThumb2It, code, mask);
78 }
79
UpdateIT(LIR * it,const char * new_guide)80 void ArmMir2Lir::UpdateIT(LIR* it, const char* new_guide) {
81 int mask;
82 int mask3 = 0;
83 int mask2 = 0;
84 int mask1 = 0;
85 ArmConditionCode code = static_cast<ArmConditionCode>(it->operands[0]);
86 int cond_bit = code & 1;
87 int alt_bit = cond_bit ^ 1;
88
89 switch (strlen(new_guide)) {
90 case 3:
91 mask1 = (new_guide[2] == 'T') ? cond_bit : alt_bit;
92 FALLTHROUGH_INTENDED;
93 case 2:
94 mask2 = (new_guide[1] == 'T') ? cond_bit : alt_bit;
95 FALLTHROUGH_INTENDED;
96 case 1:
97 mask3 = (new_guide[0] == 'T') ? cond_bit : alt_bit;
98 break;
99 case 0:
100 break;
101 default:
102 LOG(FATAL) << "OAT: bad case in UpdateIT";
103 UNREACHABLE();
104 }
105 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
106 (1 << (3 - strlen(new_guide)));
107 it->operands[1] = mask;
108 }
109
OpEndIT(LIR * it)110 void ArmMir2Lir::OpEndIT(LIR* it) {
111 // TODO: use the 'it' pointer to do some checks with the LIR, for example
112 // we could check that the number of instructions matches the mask
113 // in the IT instruction.
114 CHECK(it != nullptr);
115 GenBarrier();
116 }
117
118 /*
119 * 64-bit 3way compare function.
120 * mov rX, #-1
121 * cmp op1hi, op2hi
122 * blt done
123 * bgt flip
124 * sub rX, op1lo, op2lo (treat as unsigned)
125 * beq done
126 * ite hi
127 * mov(hi) rX, #-1
128 * mov(!hi) rX, #1
129 * flip:
130 * neg rX
131 * done:
132 */
GenCmpLong(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)133 void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
134 LIR* target1;
135 LIR* target2;
136 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
137 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
138 RegStorage t_reg = AllocTemp();
139 LoadConstant(t_reg, -1);
140 OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
141 LIR* branch1 = OpCondBranch(kCondLt, nullptr);
142 LIR* branch2 = OpCondBranch(kCondGt, nullptr);
143 OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
144 LIR* branch3 = OpCondBranch(kCondEq, nullptr);
145
146 LIR* it = OpIT(kCondHi, "E");
147 NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
148 LoadConstant(t_reg, 1);
149 OpEndIT(it);
150
151 target2 = NewLIR0(kPseudoTargetLabel);
152 OpRegReg(kOpNeg, t_reg, t_reg);
153
154 target1 = NewLIR0(kPseudoTargetLabel);
155
156 RegLocation rl_temp = LocCReturn(); // Just using as template, will change
157 rl_temp.reg.SetReg(t_reg.GetReg());
158 StoreValue(rl_dest, rl_temp);
159 FreeTemp(t_reg);
160
161 branch1->target = target1;
162 branch2->target = target2;
163 branch3->target = branch1->target;
164 }
165
GenFusedLongCmpImmBranch(BasicBlock * bb,RegLocation rl_src1,int64_t val,ConditionCode ccode)166 void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
167 int64_t val, ConditionCode ccode) {
168 int32_t val_lo = Low32Bits(val);
169 int32_t val_hi = High32Bits(val);
170 DCHECK_GE(ModifiedImmediate(val_lo), 0);
171 DCHECK_GE(ModifiedImmediate(val_hi), 0);
172 LIR* taken = &block_label_list_[bb->taken];
173 LIR* not_taken = &block_label_list_[bb->fall_through];
174 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
175 RegStorage low_reg = rl_src1.reg.GetLow();
176 RegStorage high_reg = rl_src1.reg.GetHigh();
177
178 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
179 RegStorage t_reg = AllocTemp();
180 NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
181 FreeTemp(t_reg);
182 OpCondBranch(ccode, taken);
183 return;
184 }
185
186 switch (ccode) {
187 case kCondEq:
188 case kCondNe:
189 OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
190 break;
191 case kCondLt:
192 OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
193 OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
194 ccode = kCondUlt;
195 break;
196 case kCondLe:
197 OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
198 OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
199 ccode = kCondLs;
200 break;
201 case kCondGt:
202 OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
203 OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
204 ccode = kCondHi;
205 break;
206 case kCondGe:
207 OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
208 OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
209 ccode = kCondUge;
210 break;
211 default:
212 LOG(FATAL) << "Unexpected ccode: " << ccode;
213 }
214 OpCmpImmBranch(ccode, low_reg, val_lo, taken);
215 }
216
GenSelectConst32(RegStorage left_op,RegStorage right_op,ConditionCode code,int32_t true_val,int32_t false_val,RegStorage rs_dest,RegisterClass dest_reg_class)217 void ArmMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
218 int32_t true_val, int32_t false_val, RegStorage rs_dest,
219 RegisterClass dest_reg_class) {
220 UNUSED(dest_reg_class);
221 // TODO: Generalize the IT below to accept more than one-instruction loads.
222 DCHECK(InexpensiveConstantInt(true_val));
223 DCHECK(InexpensiveConstantInt(false_val));
224
225 if ((true_val == 0 && code == kCondEq) ||
226 (false_val == 0 && code == kCondNe)) {
227 OpRegRegReg(kOpSub, rs_dest, left_op, right_op);
228 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
229 LIR* it = OpIT(kCondNe, "");
230 LoadConstant(rs_dest, code == kCondEq ? false_val : true_val);
231 OpEndIT(it);
232 return;
233 }
234
235 OpRegReg(kOpCmp, left_op, right_op); // Same?
236 LIR* it = OpIT(code, "E"); // if-convert the test
237 LoadConstant(rs_dest, true_val); // .eq case - load true
238 LoadConstant(rs_dest, false_val); // .eq case - load true
239 OpEndIT(it);
240 }
241
GenSelect(BasicBlock * bb,MIR * mir)242 void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
243 UNUSED(bb);
244 RegLocation rl_result;
245 RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
246 RegLocation rl_dest = mir_graph_->GetDest(mir);
247 // Avoid using float regs here.
248 RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
249 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
250 rl_src = LoadValue(rl_src, src_reg_class);
251 ConditionCode ccode = mir->meta.ccode;
252 if (mir->ssa_rep->num_uses == 1) {
253 // CONST case
254 int true_val = mir->dalvikInsn.vB;
255 int false_val = mir->dalvikInsn.vC;
256 rl_result = EvalLoc(rl_dest, result_reg_class, true);
257 // Change kCondNe to kCondEq for the special cases below.
258 if (ccode == kCondNe) {
259 ccode = kCondEq;
260 std::swap(true_val, false_val);
261 }
262 bool cheap_false_val = InexpensiveConstantInt(false_val);
263 if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
264 OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
265 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
266 LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
267 LoadConstant(rl_result.reg, false_val);
268 OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
269 } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
270 OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
271 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
272 LIR* it = OpIT(kCondLs, "");
273 LoadConstant(rl_result.reg, false_val);
274 OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
275 } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
276 OpRegImm(kOpCmp, rl_src.reg, 0);
277 LIR* it = OpIT(ccode, "E");
278 LoadConstant(rl_result.reg, true_val);
279 LoadConstant(rl_result.reg, false_val);
280 OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
281 } else {
282 // Unlikely case - could be tuned.
283 RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
284 RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
285 LoadConstant(t_reg1, true_val);
286 LoadConstant(t_reg2, false_val);
287 OpRegImm(kOpCmp, rl_src.reg, 0);
288 LIR* it = OpIT(ccode, "E");
289 OpRegCopy(rl_result.reg, t_reg1);
290 OpRegCopy(rl_result.reg, t_reg2);
291 OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
292 }
293 } else {
294 // MOVE case
295 RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
296 RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
297 rl_true = LoadValue(rl_true, result_reg_class);
298 rl_false = LoadValue(rl_false, result_reg_class);
299 rl_result = EvalLoc(rl_dest, result_reg_class, true);
300 OpRegImm(kOpCmp, rl_src.reg, 0);
301 LIR* it = nullptr;
302 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place?
303 it = OpIT(NegateComparison(ccode), "");
304 OpRegCopy(rl_result.reg, rl_false.reg);
305 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place?
306 it = OpIT(ccode, "");
307 OpRegCopy(rl_result.reg, rl_true.reg);
308 } else { // Normal - select between the two.
309 it = OpIT(ccode, "E");
310 OpRegCopy(rl_result.reg, rl_true.reg);
311 OpRegCopy(rl_result.reg, rl_false.reg);
312 }
313 OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
314 }
315 StoreValue(rl_dest, rl_result);
316 }
317
GenFusedLongCmpBranch(BasicBlock * bb,MIR * mir)318 void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
319 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
320 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
321 // Normalize such that if either operand is constant, src2 will be constant.
322 ConditionCode ccode = mir->meta.ccode;
323 if (rl_src1.is_const) {
324 std::swap(rl_src1, rl_src2);
325 ccode = FlipComparisonOrder(ccode);
326 }
327 if (rl_src2.is_const) {
328 rl_src2 = UpdateLocWide(rl_src2);
329 // Do special compare/branch against simple const operand if not already in registers.
330 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
331 if ((rl_src2.location != kLocPhysReg) &&
332 ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
333 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
334 return;
335 }
336 }
337 LIR* taken = &block_label_list_[bb->taken];
338 LIR* not_taken = &block_label_list_[bb->fall_through];
339 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
340 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
341 OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
342 switch (ccode) {
343 case kCondEq:
344 OpCondBranch(kCondNe, not_taken);
345 break;
346 case kCondNe:
347 OpCondBranch(kCondNe, taken);
348 break;
349 case kCondLt:
350 OpCondBranch(kCondLt, taken);
351 OpCondBranch(kCondGt, not_taken);
352 ccode = kCondUlt;
353 break;
354 case kCondLe:
355 OpCondBranch(kCondLt, taken);
356 OpCondBranch(kCondGt, not_taken);
357 ccode = kCondLs;
358 break;
359 case kCondGt:
360 OpCondBranch(kCondGt, taken);
361 OpCondBranch(kCondLt, not_taken);
362 ccode = kCondHi;
363 break;
364 case kCondGe:
365 OpCondBranch(kCondGt, taken);
366 OpCondBranch(kCondLt, not_taken);
367 ccode = kCondUge;
368 break;
369 default:
370 LOG(FATAL) << "Unexpected ccode: " << ccode;
371 }
372 OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
373 OpCondBranch(ccode, taken);
374 }
375
376 /*
377 * Generate a register comparison to an immediate and branch. Caller
378 * is responsible for setting branch target field.
379 */
OpCmpImmBranch(ConditionCode cond,RegStorage reg,int check_value,LIR * target)380 LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
381 LIR* branch = nullptr;
382 ArmConditionCode arm_cond = ArmConditionEncoding(cond);
383 /*
384 * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
385 * compare-and-branch if zero is ideal if it will reach. However, because null checks
386 * branch forward to a slow path, they will frequently not reach - and thus have to
387 * be converted to a long form during assembly (which will trigger another assembly
388 * pass). Here we estimate the branch distance for checks, and if large directly
389 * generate the long form in an attempt to avoid an extra assembly pass.
390 * TODO: consider interspersing slowpaths in code following unconditional branches.
391 */
392 bool skip = ((target != nullptr) && (target->opcode == kPseudoThrowTarget));
393 skip &= ((mir_graph_->GetNumDalvikInsns() - current_dalvik_offset_) > 64);
394 if (!skip && reg.Low8() && (check_value == 0)) {
395 if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
396 branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
397 reg.GetReg(), 0);
398 } else if (arm_cond == kArmCondLs) {
399 // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
400 // This case happens for a bounds check of array[0].
401 branch = NewLIR2(kThumb2Cbz, reg.GetReg(), 0);
402 }
403 }
404
405 if (branch == nullptr) {
406 OpRegImm(kOpCmp, reg, check_value);
407 branch = NewLIR2(kThumbBCond, 0, arm_cond);
408 }
409
410 branch->target = target;
411 return branch;
412 }
413
OpRegCopyNoInsert(RegStorage r_dest,RegStorage r_src)414 LIR* ArmMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
415 LIR* res;
416 int opcode;
417 // If src or dest is a pair, we'll be using low reg.
418 if (r_dest.IsPair()) {
419 r_dest = r_dest.GetLow();
420 }
421 if (r_src.IsPair()) {
422 r_src = r_src.GetLow();
423 }
424 if (r_dest.IsFloat() || r_src.IsFloat())
425 return OpFpRegCopy(r_dest, r_src);
426 if (r_dest.Low8() && r_src.Low8())
427 opcode = kThumbMovRR;
428 else if (!r_dest.Low8() && !r_src.Low8())
429 opcode = kThumbMovRR_H2H;
430 else if (r_dest.Low8())
431 opcode = kThumbMovRR_H2L;
432 else
433 opcode = kThumbMovRR_L2H;
434 res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
435 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
436 res->flags.is_nop = true;
437 }
438 return res;
439 }
440
OpRegCopy(RegStorage r_dest,RegStorage r_src)441 void ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
442 if (r_dest != r_src) {
443 LIR* res = OpRegCopyNoInsert(r_dest, r_src);
444 AppendLIR(res);
445 }
446 }
447
OpRegCopyWide(RegStorage r_dest,RegStorage r_src)448 void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
449 if (r_dest != r_src) {
450 bool dest_fp = r_dest.IsFloat();
451 bool src_fp = r_src.IsFloat();
452 DCHECK(r_dest.Is64Bit());
453 DCHECK(r_src.Is64Bit());
454 // Note: If the register is get by register allocator, it should never be a pair.
455 // But some functions in mir_2_lir assume 64-bit registers are 32-bit register pairs.
456 // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect().
457 if (dest_fp && r_dest.IsPair()) {
458 r_dest = As64BitFloatReg(r_dest);
459 }
460 if (src_fp && r_src.IsPair()) {
461 r_src = As64BitFloatReg(r_src);
462 }
463 if (dest_fp) {
464 if (src_fp) {
465 OpRegCopy(r_dest, r_src);
466 } else {
467 NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
468 }
469 } else {
470 if (src_fp) {
471 NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
472 } else {
473 // Handle overlap
474 if (r_src.GetHighReg() != r_dest.GetLowReg()) {
475 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
476 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
477 } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
478 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
479 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
480 } else {
481 RegStorage r_tmp = AllocTemp();
482 OpRegCopy(r_tmp, r_src.GetHigh());
483 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
484 OpRegCopy(r_dest.GetHigh(), r_tmp);
485 FreeTemp(r_tmp);
486 }
487 }
488 }
489 }
490 }
491
492 // Table of magic divisors
493 struct MagicTable {
494 uint32_t magic;
495 uint32_t shift;
496 DividePattern pattern;
497 };
498
499 static const MagicTable magic_table[] = {
500 {0, 0, DivideNone}, // 0
501 {0, 0, DivideNone}, // 1
502 {0, 0, DivideNone}, // 2
503 {0x55555556, 0, Divide3}, // 3
504 {0, 0, DivideNone}, // 4
505 {0x66666667, 1, Divide5}, // 5
506 {0x2AAAAAAB, 0, Divide3}, // 6
507 {0x92492493, 2, Divide7}, // 7
508 {0, 0, DivideNone}, // 8
509 {0x38E38E39, 1, Divide5}, // 9
510 {0x66666667, 2, Divide5}, // 10
511 {0x2E8BA2E9, 1, Divide5}, // 11
512 {0x2AAAAAAB, 1, Divide5}, // 12
513 {0x4EC4EC4F, 2, Divide5}, // 13
514 {0x92492493, 3, Divide7}, // 14
515 {0x88888889, 3, Divide7}, // 15
516 };
517
518 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
SmallLiteralDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)519 bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
520 RegLocation rl_src, RegLocation rl_dest, int lit) {
521 UNUSED(dalvik_opcode);
522 if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
523 return false;
524 }
525 DividePattern pattern = magic_table[lit].pattern;
526 if (pattern == DivideNone) {
527 return false;
528 }
529
530 RegStorage r_magic = AllocTemp();
531 LoadConstant(r_magic, magic_table[lit].magic);
532 rl_src = LoadValue(rl_src, kCoreReg);
533 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
534 RegStorage r_hi = AllocTemp();
535 RegStorage r_lo = AllocTemp();
536
537 // rl_dest and rl_src might overlap.
538 // Reuse r_hi to save the div result for reminder case.
539 RegStorage r_div_result = is_div ? rl_result.reg : r_hi;
540
541 NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
542 switch (pattern) {
543 case Divide3:
544 OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
545 break;
546 case Divide5:
547 OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
548 OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
549 EncodeShift(kArmAsr, magic_table[lit].shift));
550 break;
551 case Divide7:
552 OpRegReg(kOpAdd, r_hi, rl_src.reg);
553 OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
554 OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
555 EncodeShift(kArmAsr, magic_table[lit].shift));
556 break;
557 default:
558 LOG(FATAL) << "Unexpected pattern: " << pattern;
559 }
560
561 if (!is_div) {
562 // div_result = src / lit
563 // tmp1 = div_result * lit
564 // dest = src - tmp1
565 RegStorage tmp1 = r_lo;
566 EasyMultiplyOp ops[2];
567
568 bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
569 DCHECK_NE(canEasyMultiply, false);
570
571 GenEasyMultiplyTwoOps(tmp1, r_div_result, ops);
572 OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
573 }
574
575 StoreValue(rl_dest, rl_result);
576 return true;
577 }
578
579 // Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
GetEasyMultiplyOp(int lit,ArmMir2Lir::EasyMultiplyOp * op)580 bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) {
581 if (lit == 0) {
582 // Special case for *divide-by-zero*. The ops won't actually be used to generate code, as
583 // GenArithOpIntLit will directly generate exception-throwing code, and multiply-by-zero will
584 // have been optimized away earlier.
585 op->op = kOpInvalid;
586 op->shift = 0;
587 return true;
588 }
589
590 if (IsPowerOfTwo(lit)) {
591 op->op = kOpLsl;
592 op->shift = CTZ(lit);
593 return true;
594 }
595
596 if (IsPowerOfTwo(lit - 1)) {
597 op->op = kOpAdd;
598 op->shift = CTZ(lit - 1);
599 return true;
600 }
601
602 if (IsPowerOfTwo(lit + 1)) {
603 op->op = kOpRsub;
604 op->shift = CTZ(lit + 1);
605 return true;
606 }
607
608 op->op = kOpInvalid;
609 op->shift = 0;
610 return false;
611 }
612
613 // Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
GetEasyMultiplyTwoOps(int lit,EasyMultiplyOp * ops)614 bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
615 if (GetEasyMultiplyOp(lit, &ops[0])) {
616 ops[1].op = kOpInvalid;
617 ops[1].shift = 0;
618 return true;
619 }
620
621 int lit1 = lit;
622 uint32_t shift = CTZ(lit1);
623 if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
624 ops[1].op = kOpLsl;
625 ops[1].shift = shift;
626 return true;
627 }
628
629 lit1 = lit - 1;
630 shift = CTZ(lit1);
631 if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
632 ops[1].op = kOpAdd;
633 ops[1].shift = shift;
634 return true;
635 }
636
637 lit1 = lit + 1;
638 shift = CTZ(lit1);
639 if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
640 ops[1].op = kOpRsub;
641 ops[1].shift = shift;
642 return true;
643 }
644
645 ops[1].op = kOpInvalid;
646 ops[1].shift = 0;
647
648 return false;
649 }
650
651 // Generate instructions to do multiply.
652 // Additional temporary register is required,
653 // if it need to generate 2 instructions and src/dest overlap.
GenEasyMultiplyTwoOps(RegStorage r_dest,RegStorage r_src,EasyMultiplyOp * ops)654 void ArmMir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
655 // tmp1 = ( src << shift1) + [ src | -src | 0 ]
656 // dest = (tmp1 << shift2) + [ src | -src | 0 ]
657
658 RegStorage r_tmp1;
659 if (ops[1].op == kOpInvalid) {
660 r_tmp1 = r_dest;
661 } else if (r_dest.GetReg() != r_src.GetReg()) {
662 r_tmp1 = r_dest;
663 } else {
664 r_tmp1 = AllocTemp();
665 }
666
667 switch (ops[0].op) {
668 case kOpLsl:
669 OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift);
670 break;
671 case kOpAdd:
672 OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
673 break;
674 case kOpRsub:
675 OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
676 break;
677 default:
678 DCHECK_EQ(ops[0].op, kOpInvalid);
679 break;
680 }
681
682 switch (ops[1].op) {
683 case kOpInvalid:
684 return;
685 case kOpLsl:
686 OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift);
687 break;
688 case kOpAdd:
689 OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
690 break;
691 case kOpRsub:
692 OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
693 break;
694 default:
695 LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps";
696 break;
697 }
698 }
699
EasyMultiply(RegLocation rl_src,RegLocation rl_dest,int lit)700 bool ArmMir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
701 EasyMultiplyOp ops[2];
702
703 if (!GetEasyMultiplyTwoOps(lit, ops)) {
704 return false;
705 }
706
707 rl_src = LoadValue(rl_src, kCoreReg);
708 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
709
710 GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
711 StoreValue(rl_dest, rl_result);
712 return true;
713 }
714
GenDivRem(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,int flags)715 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
716 RegLocation rl_src2, bool is_div, int flags) {
717 UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags);
718 LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
719 UNREACHABLE();
720 }
721
GenDivRemLit(RegLocation rl_dest,RegLocation rl_src1,int lit,bool is_div)722 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
723 bool is_div) {
724 UNUSED(rl_dest, rl_src1, lit, is_div);
725 LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
726 UNREACHABLE();
727 }
728
GenDivRemLit(RegLocation rl_dest,RegStorage reg1,int lit,bool is_div)729 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
730 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
731
732 // Put the literal in a temp.
733 RegStorage lit_temp = AllocTemp();
734 LoadConstant(lit_temp, lit);
735 // Use the generic case for div/rem with arg2 in a register.
736 // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
737 rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
738 FreeTemp(lit_temp);
739
740 return rl_result;
741 }
742
GenDivRem(RegLocation rl_dest,RegStorage reg1,RegStorage reg2,bool is_div)743 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
744 bool is_div) {
745 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
746 if (is_div) {
747 // Simple case, use sdiv instruction.
748 OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
749 } else {
750 // Remainder case, use the following code:
751 // temp = reg1 / reg2 - integer division
752 // temp = temp * reg2
753 // dest = reg1 - temp
754
755 RegStorage temp = AllocTemp();
756 OpRegRegReg(kOpDiv, temp, reg1, reg2);
757 OpRegReg(kOpMul, temp, reg2);
758 OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
759 FreeTemp(temp);
760 }
761
762 return rl_result;
763 }
764
GenInlinedMinMax(CallInfo * info,bool is_min,bool is_long)765 bool ArmMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
766 DCHECK_EQ(cu_->instruction_set, kThumb2);
767 if (is_long) {
768 return false;
769 }
770 RegLocation rl_src1 = info->args[0];
771 RegLocation rl_src2 = info->args[1];
772 rl_src1 = LoadValue(rl_src1, kCoreReg);
773 rl_src2 = LoadValue(rl_src2, kCoreReg);
774 RegLocation rl_dest = InlineTarget(info);
775 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
776 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
777 LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E");
778 OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
779 OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
780 OpEndIT(it);
781 StoreValue(rl_dest, rl_result);
782 return true;
783 }
784
GenInlinedPeek(CallInfo * info,OpSize size)785 bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
786 RegLocation rl_src_address = info->args[0]; // long address
787 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1]
788 RegLocation rl_dest = InlineTarget(info);
789 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
790 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
791 if (size == k64) {
792 // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
793 if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
794 Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
795 Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
796 } else {
797 Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
798 Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
799 }
800 StoreValueWide(rl_dest, rl_result);
801 } else {
802 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
803 // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
804 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
805 StoreValue(rl_dest, rl_result);
806 }
807 return true;
808 }
809
GenInlinedPoke(CallInfo * info,OpSize size)810 bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
811 RegLocation rl_src_address = info->args[0]; // long address
812 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1]
813 RegLocation rl_src_value = info->args[2]; // [size] value
814 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
815 if (size == k64) {
816 // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
817 RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
818 StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32, kNotVolatile);
819 StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32, kNotVolatile);
820 } else {
821 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
822 // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
823 RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
824 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
825 }
826 return true;
827 }
828
829 // Generate a CAS with memory_order_seq_cst semantics.
GenInlinedCas(CallInfo * info,bool is_long,bool is_object)830 bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
831 DCHECK_EQ(cu_->instruction_set, kThumb2);
832 // Unused - RegLocation rl_src_unsafe = info->args[0];
833 RegLocation rl_src_obj = info->args[1]; // Object - known non-null
834 RegLocation rl_src_offset = info->args[2]; // long low
835 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3]
836 RegLocation rl_src_expected = info->args[4]; // int, long or Object
837 // If is_long, high half is in info->args[5]
838 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
839 // If is_long, high half is in info->args[7]
840 RegLocation rl_dest = InlineTarget(info); // boolean place for result
841
842 // We have only 5 temporary registers available and actually only 4 if the InlineTarget
843 // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
844 // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
845 // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
846 // into the same temps, reducing the number of required temps down to 5. We shall work
847 // around the potentially locked temp by using LR for r_ptr, unconditionally.
848 // TODO: Pass information about the need for more temps to the stack frame generation
849 // code so that we can rely on being able to allocate enough temps.
850 DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
851 MarkTemp(rs_rARM_LR);
852 FreeTemp(rs_rARM_LR);
853 LockTemp(rs_rARM_LR);
854 bool load_early = true;
855 if (is_long) {
856 RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
857 rl_src_expected.reg;
858 RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
859 rl_src_new_value.reg;
860 bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
861 bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
862 bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
863 bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
864
865 if (!expected_is_good_reg && !new_value_is_good_reg) {
866 // None of expected/new_value is non-temp reg, need to load both late
867 load_early = false;
868 // Make sure they are not in the temp regs and the load will not be skipped.
869 if (expected_is_core_reg) {
870 FlushRegWide(rl_src_expected.reg);
871 ClobberSReg(rl_src_expected.s_reg_low);
872 ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
873 rl_src_expected.location = kLocDalvikFrame;
874 }
875 if (new_value_is_core_reg) {
876 FlushRegWide(rl_src_new_value.reg);
877 ClobberSReg(rl_src_new_value.s_reg_low);
878 ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
879 rl_src_new_value.location = kLocDalvikFrame;
880 }
881 }
882 }
883
884 // Prevent reordering with prior memory operations.
885 GenMemBarrier(kAnyStore);
886
887 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
888 RegLocation rl_new_value;
889 if (!is_long) {
890 rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
891 } else if (load_early) {
892 rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
893 }
894
895 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
896 // Mark card for object assuming new value is stored.
897 MarkGCCard(0, rl_new_value.reg, rl_object.reg);
898 }
899
900 RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
901
902 RegStorage r_ptr = rs_rARM_LR;
903 OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
904
905 // Free now unneeded rl_object and rl_offset to give more temps.
906 ClobberSReg(rl_object.s_reg_low);
907 FreeTemp(rl_object.reg);
908 ClobberSReg(rl_offset.s_reg_low);
909 FreeTemp(rl_offset.reg);
910
911 RegLocation rl_expected;
912 if (!is_long) {
913 rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
914 } else if (load_early) {
915 rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
916 } else {
917 // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
918 RegStorage low_reg = AllocTemp();
919 RegStorage high_reg = AllocTemp();
920 rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
921 rl_expected = rl_new_value;
922 }
923
924 // do {
925 // tmp = [r_ptr] - expected;
926 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
927 // result = tmp != 0;
928
929 RegStorage r_tmp = AllocTemp();
930 LIR* target = NewLIR0(kPseudoTargetLabel);
931
932 LIR* it = nullptr;
933 if (is_long) {
934 RegStorage r_tmp_high = AllocTemp();
935 if (!load_early) {
936 LoadValueDirectWide(rl_src_expected, rl_expected.reg);
937 }
938 NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
939 OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
940 OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
941 if (!load_early) {
942 LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
943 }
944 // Make sure we use ORR that sets the ccode
945 if (r_tmp.Low8() && r_tmp_high.Low8()) {
946 NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
947 } else {
948 NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
949 }
950 FreeTemp(r_tmp_high); // Now unneeded
951
952 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
953 it = OpIT(kCondEq, "T");
954 NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
955
956 } else {
957 NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
958 OpRegReg(kOpSub, r_tmp, rl_expected.reg);
959 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
960 it = OpIT(kCondEq, "T");
961 NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
962 }
963
964 // Still one conditional left from OpIT(kCondEq, "T") from either branch
965 OpRegImm(kOpCmp /* eq */, r_tmp, 1);
966 OpEndIT(it);
967
968 OpCondBranch(kCondEq, target);
969
970 if (!load_early) {
971 FreeTemp(rl_expected.reg); // Now unneeded.
972 }
973
974 // Prevent reordering with subsequent memory operations.
975 GenMemBarrier(kLoadAny);
976
977 // result := (tmp1 != 0) ? 0 : 1;
978 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
979 OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
980 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
981 it = OpIT(kCondUlt, "");
982 LoadConstant(rl_result.reg, 0); /* cc */
983 FreeTemp(r_tmp); // Now unneeded.
984 OpEndIT(it); // Barrier to terminate OpIT.
985
986 StoreValue(rl_dest, rl_result);
987
988 // Now, restore lr to its non-temp status.
989 Clobber(rs_rARM_LR);
990 UnmarkTemp(rs_rARM_LR);
991 return true;
992 }
993
GenInlinedArrayCopyCharArray(CallInfo * info)994 bool ArmMir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
995 constexpr int kLargeArrayThreshold = 256;
996
997 RegLocation rl_src = info->args[0];
998 RegLocation rl_src_pos = info->args[1];
999 RegLocation rl_dst = info->args[2];
1000 RegLocation rl_dst_pos = info->args[3];
1001 RegLocation rl_length = info->args[4];
1002 // Compile time check, handle exception by non-inline method to reduce related meta-data.
1003 if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
1004 (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
1005 (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
1006 return false;
1007 }
1008
1009 ClobberCallerSave();
1010 LockCallTemps(); // Prepare for explicit register usage.
1011 LockTemp(rs_r12);
1012 RegStorage rs_src = rs_r0;
1013 RegStorage rs_dst = rs_r1;
1014 LoadValueDirectFixed(rl_src, rs_src);
1015 LoadValueDirectFixed(rl_dst, rs_dst);
1016
1017 // Handle null pointer exception in slow-path.
1018 LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
1019 LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
1020 // Handle potential overlapping in slow-path.
1021 LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
1022 // Handle exception or big length in slow-path.
1023 RegStorage rs_length = rs_r2;
1024 LoadValueDirectFixed(rl_length, rs_length);
1025 LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
1026 // Src bounds check.
1027 RegStorage rs_pos = rs_r3;
1028 RegStorage rs_arr_length = rs_r12;
1029 LoadValueDirectFixed(rl_src_pos, rs_pos);
1030 LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
1031 Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
1032 OpRegReg(kOpSub, rs_arr_length, rs_pos);
1033 LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
1034 // Dst bounds check.
1035 LoadValueDirectFixed(rl_dst_pos, rs_pos);
1036 LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
1037 Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
1038 OpRegReg(kOpSub, rs_arr_length, rs_pos);
1039 LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
1040
1041 // Everything is checked now.
1042 OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
1043 OpRegReg(kOpAdd, rs_dst, rs_pos);
1044 OpRegReg(kOpAdd, rs_dst, rs_pos);
1045 OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
1046 LoadValueDirectFixed(rl_src_pos, rs_pos);
1047 OpRegReg(kOpAdd, rs_src, rs_pos);
1048 OpRegReg(kOpAdd, rs_src, rs_pos);
1049
1050 RegStorage rs_tmp = rs_pos;
1051 OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
1052
1053 // Copy one element.
1054 OpRegRegImm(kOpAnd, rs_tmp, rs_length, 2);
1055 LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
1056 OpRegImm(kOpSub, rs_length, 2);
1057 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
1058 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
1059
1060 // Copy two elements.
1061 LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
1062 LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
1063 OpRegImm(kOpSub, rs_length, 4);
1064 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
1065 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
1066 OpUnconditionalBranch(begin_loop);
1067
1068 LIR *check_failed = NewLIR0(kPseudoTargetLabel);
1069 LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
1070 LIR* return_point = NewLIR0(kPseudoTargetLabel);
1071
1072 src_check_branch->target = check_failed;
1073 dst_check_branch->target = check_failed;
1074 src_dst_same->target = check_failed;
1075 len_neg_or_too_big->target = check_failed;
1076 src_pos_negative->target = check_failed;
1077 src_bad_len->target = check_failed;
1078 dst_pos_negative->target = check_failed;
1079 dst_bad_len->target = check_failed;
1080 jmp_to_begin_loop->target = begin_loop;
1081 jmp_to_ret->target = return_point;
1082
1083 AddIntrinsicSlowPath(info, launchpad_branch, return_point);
1084 ClobberCallerSave(); // We must clobber everything because slow path will return here
1085
1086 return true;
1087 }
1088
OpPcRelLoad(RegStorage reg,LIR * target)1089 void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1090 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1091 LIR* lir = NewLIR2(kThumb2LdrPcRel12, reg.GetReg(), 0);
1092 lir->target = target;
1093 }
1094
CanUseOpPcRelDexCacheArrayLoad() const1095 bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
1096 return dex_cache_arrays_layout_.Valid();
1097 }
1098
OpPcRelDexCacheArrayAddr(const DexFile * dex_file,int offset,RegStorage r_dest)1099 void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
1100 LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
1101 LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
1102 ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
1103 LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg());
1104 add_pc->flags.fixup = kFixupLabel;
1105 movw->operands[2] = WrapPointer(dex_file);
1106 movw->operands[3] = offset;
1107 movw->operands[4] = WrapPointer(add_pc);
1108 movt->operands[2] = movw->operands[2];
1109 movt->operands[3] = movw->operands[3];
1110 movt->operands[4] = movw->operands[4];
1111 dex_cache_access_insns_.push_back(movw);
1112 dex_cache_access_insns_.push_back(movt);
1113 }
1114
OpPcRelDexCacheArrayLoad(const DexFile * dex_file,int offset,RegStorage r_dest,bool wide)1115 void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
1116 bool wide) {
1117 DCHECK(!wide) << "Unsupported";
1118 if (dex_cache_arrays_base_reg_.Valid()) {
1119 LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
1120 r_dest, kNotVolatile);
1121 } else {
1122 OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
1123 LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
1124 }
1125 }
1126
OpVldm(RegStorage r_base,int count)1127 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
1128 return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
1129 }
1130
OpVstm(RegStorage r_base,int count)1131 LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) {
1132 return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
1133 }
1134
GenMaddMsubInt(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,RegLocation rl_src3,bool is_sub)1135 void ArmMir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
1136 RegLocation rl_src3, bool is_sub) {
1137 rl_src1 = LoadValue(rl_src1, kCoreReg);
1138 rl_src2 = LoadValue(rl_src2, kCoreReg);
1139 rl_src3 = LoadValue(rl_src3, kCoreReg);
1140 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1141 NewLIR4(is_sub ? kThumb2Mls : kThumb2Mla, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
1142 rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
1143 StoreValue(rl_dest, rl_result);
1144 }
1145
GenMultiplyByTwoBitMultiplier(RegLocation rl_src,RegLocation rl_result,int lit,int first_bit,int second_bit)1146 void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1147 RegLocation rl_result, int lit,
1148 int first_bit, int second_bit) {
1149 UNUSED(lit);
1150 OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
1151 EncodeShift(kArmLsl, second_bit - first_bit));
1152 if (first_bit != 0) {
1153 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1154 }
1155 }
1156
GenDivZeroCheckWide(RegStorage reg)1157 void ArmMir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1158 DCHECK(reg.IsPair()); // TODO: support k64BitSolo.
1159 RegStorage t_reg = AllocTemp();
1160 NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
1161 FreeTemp(t_reg);
1162 GenDivZeroCheck(kCondEq);
1163 }
1164
1165 // Test suspend flag, return target of taken suspend branch
OpTestSuspend(LIR * target)1166 LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
1167 #ifdef ARM_R4_SUSPEND_FLAG
1168 NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
1169 return OpCondBranch((target == nullptr) ? kCondEq : kCondNe, target);
1170 #else
1171 RegStorage t_reg = AllocTemp();
1172 LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(),
1173 t_reg, kUnsignedHalf, kNotVolatile);
1174 LIR* cmp_branch = OpCmpImmBranch((target == nullptr) ? kCondNe : kCondEq, t_reg,
1175 0, target);
1176 FreeTemp(t_reg);
1177 return cmp_branch;
1178 #endif
1179 }
1180
1181 // Decrement register and branch on condition
OpDecAndBranch(ConditionCode c_code,RegStorage reg,LIR * target)1182 LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1183 // Combine sub & test using sub setflags encoding here
1184 OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags.
1185 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
1186 return OpCondBranch(c_code, target);
1187 }
1188
GenMemBarrier(MemBarrierKind barrier_kind)1189 bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
1190 if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
1191 return false;
1192 }
1193 // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
1194 LIR* barrier = last_lir_insn_;
1195
1196 int dmb_flavor;
1197 // TODO: revisit Arm barrier kinds
1198 switch (barrier_kind) {
1199 case kAnyStore: dmb_flavor = kISH; break;
1200 case kLoadAny: dmb_flavor = kISH; break;
1201 case kStoreStore: dmb_flavor = kISHST; break;
1202 case kAnyAny: dmb_flavor = kISH; break;
1203 default:
1204 LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
1205 dmb_flavor = kSY; // quiet gcc.
1206 break;
1207 }
1208
1209 bool ret = false;
1210
1211 // If the same barrier already exists, don't generate another.
1212 if (barrier == nullptr
1213 || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
1214 barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
1215 ret = true;
1216 }
1217
1218 // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
1219 DCHECK(!barrier->flags.use_def_invalid);
1220 barrier->u.m.def_mask = &kEncodeAll;
1221 return ret;
1222 }
1223
GenNegLong(RegLocation rl_dest,RegLocation rl_src)1224 void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1225 rl_src = LoadValueWide(rl_src, kCoreReg);
1226 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1227 RegStorage z_reg = AllocTemp();
1228 LoadConstantNoClobber(z_reg, 0);
1229 // Check for destructive overlap
1230 if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1231 RegStorage t_reg = AllocTemp();
1232 OpRegCopy(t_reg, rl_result.reg.GetLow());
1233 OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
1234 OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
1235 FreeTemp(t_reg);
1236 } else {
1237 OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
1238 OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
1239 }
1240 FreeTemp(z_reg);
1241 StoreValueWide(rl_dest, rl_result);
1242 }
1243
GenMulLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1244 void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
1245 RegLocation rl_src1, RegLocation rl_src2) {
1246 UNUSED(opcode);
1247 /*
1248 * tmp1 = src1.hi * src2.lo; // src1.hi is no longer needed
1249 * dest = src1.lo * src2.lo;
1250 * tmp1 += src1.lo * src2.hi;
1251 * dest.hi += tmp1;
1252 *
1253 * To pull off inline multiply, we have a worst-case requirement of 7 temporary
1254 * registers. Normally for Arm, we get 5. We can get to 6 by including
1255 * lr in the temp set. The only problematic case is all operands and result are
1256 * distinct, and none have been promoted. In that case, we can succeed by aggressively
1257 * freeing operand temp registers after they are no longer needed. All other cases
1258 * can proceed normally. We'll just punt on the case of the result having a misaligned
1259 * overlap with either operand and send that case to a runtime handler.
1260 */
1261 RegLocation rl_result;
1262 if (PartiallyIntersects(rl_src1, rl_dest) || (PartiallyIntersects(rl_src2, rl_dest))) {
1263 FlushAllRegs();
1264 CallRuntimeHelperRegLocationRegLocation(kQuickLmul, rl_src1, rl_src2, false);
1265 rl_result = GetReturnWide(kCoreReg);
1266 StoreValueWide(rl_dest, rl_result);
1267 return;
1268 }
1269
1270 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1271 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1272
1273 int reg_status = 0;
1274 RegStorage res_lo;
1275 RegStorage res_hi;
1276 bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
1277 !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
1278 bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
1279 bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
1280 // Check if rl_dest is *not* either operand and we have enough temp registers.
1281 if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
1282 (dest_promoted || src1_promoted || src2_promoted)) {
1283 // In this case, we do not need to manually allocate temp registers for result.
1284 rl_result = EvalLoc(rl_dest, kCoreReg, true);
1285 res_lo = rl_result.reg.GetLow();
1286 res_hi = rl_result.reg.GetHigh();
1287 } else {
1288 res_lo = AllocTemp();
1289 if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
1290 // In this case, we have enough temp registers to be allocated for result.
1291 res_hi = AllocTemp();
1292 reg_status = 1;
1293 } else {
1294 // In this case, all temps are now allocated.
1295 // res_hi will be allocated after we can free src1_hi.
1296 reg_status = 2;
1297 }
1298 }
1299
1300 // Temporarily add LR to the temp pool, and assign it to tmp1
1301 MarkTemp(rs_rARM_LR);
1302 FreeTemp(rs_rARM_LR);
1303 RegStorage tmp1 = rs_rARM_LR;
1304 LockTemp(rs_rARM_LR);
1305
1306 if (rl_src1.reg == rl_src2.reg) {
1307 DCHECK(res_hi.Valid());
1308 DCHECK(res_lo.Valid());
1309 NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
1310 NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
1311 rl_src1.reg.GetLowReg());
1312 OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
1313 } else {
1314 NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
1315 if (reg_status == 2) {
1316 DCHECK(!res_hi.Valid());
1317 DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
1318 DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
1319 // Will force free src1_hi, so must clobber.
1320 Clobber(rl_src1.reg);
1321 FreeTemp(rl_src1.reg.GetHigh());
1322 res_hi = AllocTemp();
1323 }
1324 DCHECK(res_hi.Valid());
1325 DCHECK(res_lo.Valid());
1326 NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
1327 rl_src1.reg.GetLowReg());
1328 NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
1329 tmp1.GetReg());
1330 NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
1331 if (reg_status == 2) {
1332 FreeTemp(rl_src1.reg.GetLow());
1333 }
1334 }
1335
1336 if (reg_status != 0) {
1337 // We had manually allocated registers for rl_result.
1338 // Now construct a RegLocation.
1339 rl_result = GetReturnWide(kCoreReg); // Just using as a template.
1340 rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
1341 }
1342
1343 // Free tmp1 but keep LR as temp for StoreValueWide() if needed.
1344 FreeTemp(tmp1);
1345
1346 StoreValueWide(rl_dest, rl_result);
1347
1348 // Now, restore lr to its non-temp status.
1349 Clobber(rs_rARM_LR);
1350 UnmarkTemp(rs_rARM_LR);
1351 }
1352
GenArithOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1353 void ArmMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1354 RegLocation rl_src2, int flags) {
1355 switch (opcode) {
1356 case Instruction::MUL_LONG:
1357 case Instruction::MUL_LONG_2ADDR:
1358 GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
1359 return;
1360 case Instruction::NEG_LONG:
1361 GenNegLong(rl_dest, rl_src2);
1362 return;
1363
1364 default:
1365 break;
1366 }
1367
1368 // Fallback for all other ops.
1369 Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1370 }
1371
1372 /*
1373 * Generate array load
1374 */
GenArrayGet(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_dest,int scale)1375 void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
1376 RegLocation rl_index, RegLocation rl_dest, int scale) {
1377 RegisterClass reg_class = RegClassBySize(size);
1378 int len_offset = mirror::Array::LengthOffset().Int32Value();
1379 int data_offset;
1380 RegLocation rl_result;
1381 bool constant_index = rl_index.is_const;
1382 rl_array = LoadValue(rl_array, kRefReg);
1383 if (!constant_index) {
1384 rl_index = LoadValue(rl_index, kCoreReg);
1385 }
1386
1387 if (rl_dest.wide) {
1388 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1389 } else {
1390 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1391 }
1392
1393 // If index is constant, just fold it into the data offset
1394 if (constant_index) {
1395 data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1396 }
1397
1398 /* null object? */
1399 GenNullCheck(rl_array.reg, opt_flags);
1400
1401 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1402 RegStorage reg_len;
1403 if (needs_range_check) {
1404 reg_len = AllocTemp();
1405 /* Get len */
1406 Load32Disp(rl_array.reg, len_offset, reg_len);
1407 MarkPossibleNullPointerException(opt_flags);
1408 } else {
1409 ForceImplicitNullCheck(rl_array.reg, opt_flags);
1410 }
1411 if (rl_dest.wide || rl_dest.fp || constant_index) {
1412 RegStorage reg_ptr;
1413 if (constant_index) {
1414 reg_ptr = rl_array.reg; // NOTE: must not alter reg_ptr in constant case.
1415 } else {
1416 // No special indexed operation, lea + load w/ displacement
1417 reg_ptr = AllocTempRef();
1418 OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
1419 FreeTemp(rl_index.reg);
1420 }
1421 rl_result = EvalLoc(rl_dest, reg_class, true);
1422
1423 if (needs_range_check) {
1424 if (constant_index) {
1425 GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1426 } else {
1427 GenArrayBoundsCheck(rl_index.reg, reg_len);
1428 }
1429 FreeTemp(reg_len);
1430 }
1431 LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
1432 if (!constant_index) {
1433 FreeTemp(reg_ptr);
1434 }
1435 if (rl_dest.wide) {
1436 StoreValueWide(rl_dest, rl_result);
1437 } else {
1438 StoreValue(rl_dest, rl_result);
1439 }
1440 } else {
1441 // Offset base, then use indexed load
1442 RegStorage reg_ptr = AllocTempRef();
1443 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1444 FreeTemp(rl_array.reg);
1445 rl_result = EvalLoc(rl_dest, reg_class, true);
1446
1447 if (needs_range_check) {
1448 GenArrayBoundsCheck(rl_index.reg, reg_len);
1449 FreeTemp(reg_len);
1450 }
1451 LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
1452 FreeTemp(reg_ptr);
1453 StoreValue(rl_dest, rl_result);
1454 }
1455 }
1456
1457 /*
1458 * Generate array store
1459 *
1460 */
GenArrayPut(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_src,int scale,bool card_mark)1461 void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1462 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1463 RegisterClass reg_class = RegClassBySize(size);
1464 int len_offset = mirror::Array::LengthOffset().Int32Value();
1465 bool constant_index = rl_index.is_const;
1466
1467 int data_offset;
1468 if (size == k64 || size == kDouble) {
1469 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1470 } else {
1471 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1472 }
1473
1474 // If index is constant, just fold it into the data offset.
1475 if (constant_index) {
1476 data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1477 }
1478
1479 rl_array = LoadValue(rl_array, kRefReg);
1480 if (!constant_index) {
1481 rl_index = LoadValue(rl_index, kCoreReg);
1482 }
1483
1484 RegStorage reg_ptr;
1485 bool allocated_reg_ptr_temp = false;
1486 if (constant_index) {
1487 reg_ptr = rl_array.reg;
1488 } else if (IsTemp(rl_array.reg) && !card_mark) {
1489 Clobber(rl_array.reg);
1490 reg_ptr = rl_array.reg;
1491 } else {
1492 allocated_reg_ptr_temp = true;
1493 reg_ptr = AllocTempRef();
1494 }
1495
1496 /* null object? */
1497 GenNullCheck(rl_array.reg, opt_flags);
1498
1499 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1500 RegStorage reg_len;
1501 if (needs_range_check) {
1502 reg_len = AllocTemp();
1503 // NOTE: max live temps(4) here.
1504 /* Get len */
1505 Load32Disp(rl_array.reg, len_offset, reg_len);
1506 MarkPossibleNullPointerException(opt_flags);
1507 } else {
1508 ForceImplicitNullCheck(rl_array.reg, opt_flags);
1509 }
1510 /* at this point, reg_ptr points to array, 2 live temps */
1511 if (rl_src.wide || rl_src.fp || constant_index) {
1512 if (rl_src.wide) {
1513 rl_src = LoadValueWide(rl_src, reg_class);
1514 } else {
1515 rl_src = LoadValue(rl_src, reg_class);
1516 }
1517 if (!constant_index) {
1518 OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
1519 }
1520 if (needs_range_check) {
1521 if (constant_index) {
1522 GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1523 } else {
1524 GenArrayBoundsCheck(rl_index.reg, reg_len);
1525 }
1526 FreeTemp(reg_len);
1527 }
1528
1529 StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
1530 } else {
1531 /* reg_ptr -> array data */
1532 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1533 rl_src = LoadValue(rl_src, reg_class);
1534 if (needs_range_check) {
1535 GenArrayBoundsCheck(rl_index.reg, reg_len);
1536 FreeTemp(reg_len);
1537 }
1538 StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
1539 }
1540 if (allocated_reg_ptr_temp) {
1541 FreeTemp(reg_ptr);
1542 }
1543 if (card_mark) {
1544 MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
1545 }
1546 }
1547
1548
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,RegLocation rl_shift,int flags)1549 void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1550 RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
1551 int flags) {
1552 UNUSED(flags);
1553 rl_src = LoadValueWide(rl_src, kCoreReg);
1554 // Per spec, we only care about low 6 bits of shift amount.
1555 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1556 if (shift_amount == 0) {
1557 StoreValueWide(rl_dest, rl_src);
1558 return;
1559 }
1560 if (PartiallyIntersects(rl_src, rl_dest)) {
1561 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
1562 return;
1563 }
1564 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1565 switch (opcode) {
1566 case Instruction::SHL_LONG:
1567 case Instruction::SHL_LONG_2ADDR:
1568 if (shift_amount == 1) {
1569 OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
1570 OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
1571 } else if (shift_amount == 32) {
1572 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
1573 LoadConstant(rl_result.reg.GetLow(), 0);
1574 } else if (shift_amount > 31) {
1575 OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
1576 LoadConstant(rl_result.reg.GetLow(), 0);
1577 } else {
1578 OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1579 OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
1580 EncodeShift(kArmLsr, 32 - shift_amount));
1581 OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
1582 }
1583 break;
1584 case Instruction::SHR_LONG:
1585 case Instruction::SHR_LONG_2ADDR:
1586 if (shift_amount == 32) {
1587 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1588 OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1589 } else if (shift_amount > 31) {
1590 OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1591 OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1592 } else {
1593 RegStorage t_reg = AllocTemp();
1594 OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1595 OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
1596 EncodeShift(kArmLsl, 32 - shift_amount));
1597 FreeTemp(t_reg);
1598 OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1599 }
1600 break;
1601 case Instruction::USHR_LONG:
1602 case Instruction::USHR_LONG_2ADDR:
1603 if (shift_amount == 32) {
1604 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1605 LoadConstant(rl_result.reg.GetHigh(), 0);
1606 } else if (shift_amount > 31) {
1607 OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1608 LoadConstant(rl_result.reg.GetHigh(), 0);
1609 } else {
1610 RegStorage t_reg = AllocTemp();
1611 OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1612 OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
1613 EncodeShift(kArmLsl, 32 - shift_amount));
1614 FreeTemp(t_reg);
1615 OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1616 }
1617 break;
1618 default:
1619 LOG(FATAL) << "Unexpected case";
1620 }
1621 StoreValueWide(rl_dest, rl_result);
1622 }
1623
GenArithImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1624 void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
1625 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
1626 int flags) {
1627 if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
1628 if (!rl_src2.is_const) {
1629 // Don't bother with special handling for subtract from immediate.
1630 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1631 return;
1632 }
1633 } else {
1634 // Normalize
1635 if (!rl_src2.is_const) {
1636 DCHECK(rl_src1.is_const);
1637 std::swap(rl_src1, rl_src2);
1638 }
1639 }
1640 if (PartiallyIntersects(rl_src1, rl_dest)) {
1641 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1642 return;
1643 }
1644 DCHECK(rl_src2.is_const);
1645 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1646 uint32_t val_lo = Low32Bits(val);
1647 uint32_t val_hi = High32Bits(val);
1648 int32_t mod_imm_lo = ModifiedImmediate(val_lo);
1649 int32_t mod_imm_hi = ModifiedImmediate(val_hi);
1650
1651 // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
1652 switch (opcode) {
1653 case Instruction::ADD_LONG:
1654 case Instruction::ADD_LONG_2ADDR:
1655 case Instruction::SUB_LONG:
1656 case Instruction::SUB_LONG_2ADDR:
1657 if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
1658 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1659 return;
1660 }
1661 break;
1662 default:
1663 break;
1664 }
1665 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1666 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1667 // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
1668 switch (opcode) {
1669 case Instruction::ADD_LONG:
1670 case Instruction::ADD_LONG_2ADDR:
1671 NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1672 NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1673 break;
1674 case Instruction::OR_LONG:
1675 case Instruction::OR_LONG_2ADDR:
1676 if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1677 OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1678 }
1679 if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1680 OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1681 }
1682 break;
1683 case Instruction::XOR_LONG:
1684 case Instruction::XOR_LONG_2ADDR:
1685 OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1686 OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1687 break;
1688 case Instruction::AND_LONG:
1689 case Instruction::AND_LONG_2ADDR:
1690 if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1691 OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1692 }
1693 if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1694 OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1695 }
1696 break;
1697 case Instruction::SUB_LONG_2ADDR:
1698 case Instruction::SUB_LONG:
1699 NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1700 NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1701 break;
1702 default:
1703 LOG(FATAL) << "Unexpected opcode " << opcode;
1704 }
1705 StoreValueWide(rl_dest, rl_result);
1706 }
1707
HandleEasyDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)1708 bool ArmMir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
1709 RegLocation rl_src, RegLocation rl_dest, int lit) {
1710 if (lit < 2) {
1711 return false;
1712 }
1713
1714 // ARM does either not support a division instruction, or it is potentially expensive. Look for
1715 // more special cases.
1716 if (!IsPowerOfTwo(lit)) {
1717 return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
1718 }
1719
1720 return Mir2Lir::HandleEasyDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
1721 }
1722
1723 } // namespace art
1724