1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "codegen_arm.h"
18
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "arm_lir.h"
21 #include "base/logging.h"
22 #include "dex/mir_graph.h"
23 #include "dex/quick/mir_to_lir-inl.h"
24 #include "dex/reg_storage_eq.h"
25 #include "driver/compiler_driver.h"
26
27 namespace art {
28
29 /* This file contains codegen for the Thumb ISA. */
30
EncodeImmSingle(int32_t value)31 static int32_t EncodeImmSingle(int32_t value) {
32 int32_t res;
33 int32_t bit_a = (value & 0x80000000) >> 31;
34 int32_t not_bit_b = (value & 0x40000000) >> 30;
35 int32_t bit_b = (value & 0x20000000) >> 29;
36 int32_t b_smear = (value & 0x3e000000) >> 25;
37 int32_t slice = (value & 0x01f80000) >> 19;
38 int32_t zeroes = (value & 0x0007ffff);
39 if (zeroes != 0)
40 return -1;
41 if (bit_b) {
42 if ((not_bit_b != 0) || (b_smear != 0x1f))
43 return -1;
44 } else {
45 if ((not_bit_b != 1) || (b_smear != 0x0))
46 return -1;
47 }
48 res = (bit_a << 7) | (bit_b << 6) | slice;
49 return res;
50 }
51
52 /*
53 * Determine whether value can be encoded as a Thumb2 floating point
54 * immediate. If not, return -1. If so return encoded 8-bit value.
55 */
EncodeImmDouble(int64_t value)56 static int32_t EncodeImmDouble(int64_t value) {
57 int32_t res;
58 int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63;
59 int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62;
60 int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61;
61 int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54;
62 int32_t slice = (value & INT64_C(0x003f000000000000)) >> 48;
63 uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff));
64 if (zeroes != 0ull)
65 return -1;
66 if (bit_b) {
67 if ((not_bit_b != 0) || (b_smear != 0xff))
68 return -1;
69 } else {
70 if ((not_bit_b != 1) || (b_smear != 0x0))
71 return -1;
72 }
73 res = (bit_a << 7) | (bit_b << 6) | slice;
74 return res;
75 }
76
LoadFPConstantValue(int r_dest,int value)77 LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) {
78 DCHECK(RegStorage::IsSingle(r_dest));
79 if (value == 0) {
80 // TODO: we need better info about the target CPU. a vector exclusive or
81 // would probably be better here if we could rely on its existance.
82 // Load an immediate +2.0 (which encodes to 0)
83 NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0);
84 // +0.0 = +2.0 - +2.0
85 return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest);
86 } else {
87 int encoded_imm = EncodeImmSingle(value);
88 if (encoded_imm >= 0) {
89 return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm);
90 }
91 }
92 LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
93 if (data_target == nullptr) {
94 data_target = AddWordData(&literal_list_, value);
95 }
96 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
97 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
98 r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target);
99 AppendLIR(load_pc_rel);
100 return load_pc_rel;
101 }
102
103 /*
104 * Determine whether value can be encoded as a Thumb2 modified
105 * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form.
106 */
ModifiedImmediate(uint32_t value)107 int ArmMir2Lir::ModifiedImmediate(uint32_t value) {
108 uint32_t b0 = value & 0xff;
109
110 /* Note: case of value==0 must use 0:000:0:0000000 encoding */
111 if (value <= 0xFF)
112 return b0; // 0:000:a:bcdefgh
113 if (value == ((b0 << 16) | b0))
114 return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */
115 if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
116 return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */
117 b0 = (value >> 8) & 0xff;
118 if (value == ((b0 << 24) | (b0 << 8)))
119 return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
120 /* Can we do it with rotation? */
121 int z_leading = CLZ(value);
122 int z_trailing = CTZ(value);
123 /* A run of eight or fewer active bits? */
124 if ((z_leading + z_trailing) < 24)
125 return -1; /* No - bail */
126 /* left-justify the constant, discarding msb (known to be 1) */
127 value <<= z_leading + 1;
128 /* Create bcdefgh */
129 value >>= 25;
130 /* Put it all together */
131 return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
132 }
133
InexpensiveConstantInt(int32_t value)134 bool ArmMir2Lir::InexpensiveConstantInt(int32_t value) {
135 return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
136 }
137
InexpensiveConstantInt(int32_t value,Instruction::Code opcode)138 bool ArmMir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
139 switch (opcode) {
140 case Instruction::ADD_INT:
141 case Instruction::ADD_INT_2ADDR:
142 case Instruction::SUB_INT:
143 case Instruction::SUB_INT_2ADDR:
144 if ((value >> 12) == (value >> 31)) { // Signed 12-bit, RRI12 versions of ADD/SUB.
145 return true;
146 }
147 FALLTHROUGH_INTENDED;
148 case Instruction::IF_EQ:
149 case Instruction::IF_NE:
150 case Instruction::IF_LT:
151 case Instruction::IF_GE:
152 case Instruction::IF_GT:
153 case Instruction::IF_LE:
154 return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(-value) >= 0);
155 case Instruction::SHL_INT:
156 case Instruction::SHL_INT_2ADDR:
157 case Instruction::SHR_INT:
158 case Instruction::SHR_INT_2ADDR:
159 case Instruction::USHR_INT:
160 case Instruction::USHR_INT_2ADDR:
161 return true;
162 case Instruction::CONST:
163 case Instruction::CONST_4:
164 case Instruction::CONST_16:
165 if ((value >> 16) == 0) {
166 return true; // movw, 16-bit unsigned.
167 }
168 FALLTHROUGH_INTENDED;
169 case Instruction::AND_INT:
170 case Instruction::AND_INT_2ADDR:
171 case Instruction::AND_INT_LIT16:
172 case Instruction::AND_INT_LIT8:
173 case Instruction::OR_INT:
174 case Instruction::OR_INT_2ADDR:
175 case Instruction::OR_INT_LIT16:
176 case Instruction::OR_INT_LIT8:
177 return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
178 case Instruction::XOR_INT:
179 case Instruction::XOR_INT_2ADDR:
180 case Instruction::XOR_INT_LIT16:
181 case Instruction::XOR_INT_LIT8:
182 return (ModifiedImmediate(value) >= 0);
183 case Instruction::MUL_INT:
184 case Instruction::MUL_INT_2ADDR:
185 case Instruction::MUL_INT_LIT8:
186 case Instruction::MUL_INT_LIT16:
187 case Instruction::DIV_INT:
188 case Instruction::DIV_INT_2ADDR:
189 case Instruction::DIV_INT_LIT8:
190 case Instruction::DIV_INT_LIT16:
191 case Instruction::REM_INT:
192 case Instruction::REM_INT_2ADDR:
193 case Instruction::REM_INT_LIT8:
194 case Instruction::REM_INT_LIT16: {
195 EasyMultiplyOp ops[2];
196 return GetEasyMultiplyTwoOps(value, ops);
197 }
198 default:
199 return false;
200 }
201 }
202
InexpensiveConstantFloat(int32_t value)203 bool ArmMir2Lir::InexpensiveConstantFloat(int32_t value) {
204 return EncodeImmSingle(value) >= 0;
205 }
206
InexpensiveConstantLong(int64_t value)207 bool ArmMir2Lir::InexpensiveConstantLong(int64_t value) {
208 return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
209 }
210
InexpensiveConstantDouble(int64_t value)211 bool ArmMir2Lir::InexpensiveConstantDouble(int64_t value) {
212 return EncodeImmDouble(value) >= 0;
213 }
214
215 /*
216 * Load a immediate using a shortcut if possible; otherwise
217 * grab from the per-translation literal pool.
218 *
219 * No additional register clobbering operation performed. Use this version when
220 * 1) r_dest is freshly returned from AllocTemp or
221 * 2) The codegen is under fixed register usage
222 */
LoadConstantNoClobber(RegStorage r_dest,int value)223 LIR* ArmMir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
224 LIR* res;
225 int mod_imm;
226
227 if (r_dest.IsFloat()) {
228 return LoadFPConstantValue(r_dest.GetReg(), value);
229 }
230
231 /* See if the value can be constructed cheaply */
232 if (r_dest.Low8() && (value >= 0) && (value <= 255)) {
233 return NewLIR2(kThumbMovImm, r_dest.GetReg(), value);
234 }
235 /* Check Modified immediate special cases */
236 mod_imm = ModifiedImmediate(value);
237 if (mod_imm >= 0) {
238 res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm);
239 return res;
240 }
241 mod_imm = ModifiedImmediate(~value);
242 if (mod_imm >= 0) {
243 res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm);
244 return res;
245 }
246 /* 16-bit immediate? */
247 if ((value & 0xffff) == value) {
248 res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value);
249 return res;
250 }
251 /* Do a low/high pair */
252 res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value));
253 NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value));
254 return res;
255 }
256
OpUnconditionalBranch(LIR * target)257 LIR* ArmMir2Lir::OpUnconditionalBranch(LIR* target) {
258 LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched during assembly */);
259 res->target = target;
260 return res;
261 }
262
OpCondBranch(ConditionCode cc,LIR * target)263 LIR* ArmMir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
264 LIR* branch = NewLIR2(kThumbBCond, 0 /* offset to be patched */,
265 ArmConditionEncoding(cc));
266 branch->target = target;
267 return branch;
268 }
269
OpReg(OpKind op,RegStorage r_dest_src)270 LIR* ArmMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
271 ArmOpcode opcode = kThumbBkpt;
272 switch (op) {
273 case kOpBlx:
274 opcode = kThumbBlxR;
275 break;
276 case kOpBx:
277 opcode = kThumbBx;
278 break;
279 default:
280 LOG(FATAL) << "Bad opcode " << op;
281 }
282 return NewLIR1(opcode, r_dest_src.GetReg());
283 }
284
OpRegRegShift(OpKind op,RegStorage r_dest_src1,RegStorage r_src2,int shift)285 LIR* ArmMir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
286 int shift) {
287 bool thumb_form =
288 ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8());
289 ArmOpcode opcode = kThumbBkpt;
290 switch (op) {
291 case kOpAdc:
292 opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR;
293 break;
294 case kOpAnd:
295 opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR;
296 break;
297 case kOpBic:
298 opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR;
299 break;
300 case kOpCmn:
301 DCHECK_EQ(shift, 0);
302 opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR;
303 break;
304 case kOpCmp:
305 if (thumb_form)
306 opcode = kThumbCmpRR;
307 else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8())
308 opcode = kThumbCmpHH;
309 else if ((shift == 0) && r_dest_src1.Low8())
310 opcode = kThumbCmpLH;
311 else if (shift == 0)
312 opcode = kThumbCmpHL;
313 else
314 opcode = kThumb2CmpRR;
315 break;
316 case kOpXor:
317 opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR;
318 break;
319 case kOpMov:
320 DCHECK_EQ(shift, 0);
321 if (r_dest_src1.Low8() && r_src2.Low8())
322 opcode = kThumbMovRR;
323 else if (!r_dest_src1.Low8() && !r_src2.Low8())
324 opcode = kThumbMovRR_H2H;
325 else if (r_dest_src1.Low8())
326 opcode = kThumbMovRR_H2L;
327 else
328 opcode = kThumbMovRR_L2H;
329 break;
330 case kOpMul:
331 DCHECK_EQ(shift, 0);
332 opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR;
333 break;
334 case kOpMvn:
335 opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR;
336 break;
337 case kOpNeg:
338 DCHECK_EQ(shift, 0);
339 opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR;
340 break;
341 case kOpOr:
342 opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR;
343 break;
344 case kOpSbc:
345 opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR;
346 break;
347 case kOpTst:
348 opcode = (thumb_form) ? kThumbTst : kThumb2TstRR;
349 break;
350 case kOpLsl:
351 DCHECK_EQ(shift, 0);
352 opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR;
353 break;
354 case kOpLsr:
355 DCHECK_EQ(shift, 0);
356 opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR;
357 break;
358 case kOpAsr:
359 DCHECK_EQ(shift, 0);
360 opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR;
361 break;
362 case kOpRor:
363 DCHECK_EQ(shift, 0);
364 opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR;
365 break;
366 case kOpAdd:
367 opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
368 break;
369 case kOpSub:
370 opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
371 break;
372 case kOpRev:
373 DCHECK_EQ(shift, 0);
374 if (!thumb_form) {
375 // Binary, but rm is encoded twice.
376 return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
377 }
378 opcode = kThumbRev;
379 break;
380 case kOpRevsh:
381 DCHECK_EQ(shift, 0);
382 if (!thumb_form) {
383 // Binary, but rm is encoded twice.
384 return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
385 }
386 opcode = kThumbRevsh;
387 break;
388 case kOp2Byte:
389 DCHECK_EQ(shift, 0);
390 return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8);
391 case kOp2Short:
392 DCHECK_EQ(shift, 0);
393 return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
394 case kOp2Char:
395 DCHECK_EQ(shift, 0);
396 return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
397 default:
398 LOG(FATAL) << "Bad opcode: " << op;
399 break;
400 }
401 DCHECK(!IsPseudoLirOp(opcode));
402 if (EncodingMap[opcode].flags & IS_BINARY_OP) {
403 return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
404 } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
405 if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) {
406 return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
407 } else {
408 return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg());
409 }
410 } else if (EncodingMap[opcode].flags & IS_QUAD_OP) {
411 return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
412 } else {
413 LOG(FATAL) << "Unexpected encoding operand count";
414 return nullptr;
415 }
416 }
417
OpRegReg(OpKind op,RegStorage r_dest_src1,RegStorage r_src2)418 LIR* ArmMir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
419 return OpRegRegShift(op, r_dest_src1, r_src2, 0);
420 }
421
OpMovRegMem(RegStorage r_dest,RegStorage r_base,int offset,MoveType move_type)422 LIR* ArmMir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
423 UNUSED(r_dest, r_base, offset, move_type);
424 UNIMPLEMENTED(FATAL);
425 UNREACHABLE();
426 }
427
OpMovMemReg(RegStorage r_base,int offset,RegStorage r_src,MoveType move_type)428 LIR* ArmMir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
429 UNUSED(r_base, offset, r_src, move_type);
430 UNIMPLEMENTED(FATAL);
431 UNREACHABLE();
432 }
433
OpCondRegReg(OpKind op,ConditionCode cc,RegStorage r_dest,RegStorage r_src)434 LIR* ArmMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
435 UNUSED(op, cc, r_dest, r_src);
436 LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
437 UNREACHABLE();
438 }
439
OpRegRegRegShift(OpKind op,RegStorage r_dest,RegStorage r_src1,RegStorage r_src2,int shift)440 LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
441 RegStorage r_src2, int shift) {
442 ArmOpcode opcode = kThumbBkpt;
443 bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8();
444 switch (op) {
445 case kOpAdd:
446 opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
447 break;
448 case kOpSub:
449 opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
450 break;
451 case kOpRsub:
452 opcode = kThumb2RsubRRR;
453 break;
454 case kOpAdc:
455 opcode = kThumb2AdcRRR;
456 break;
457 case kOpAnd:
458 opcode = kThumb2AndRRR;
459 break;
460 case kOpBic:
461 opcode = kThumb2BicRRR;
462 break;
463 case kOpXor:
464 opcode = kThumb2EorRRR;
465 break;
466 case kOpMul:
467 DCHECK_EQ(shift, 0);
468 opcode = kThumb2MulRRR;
469 break;
470 case kOpDiv:
471 DCHECK_EQ(shift, 0);
472 opcode = kThumb2SdivRRR;
473 break;
474 case kOpOr:
475 opcode = kThumb2OrrRRR;
476 break;
477 case kOpSbc:
478 opcode = kThumb2SbcRRR;
479 break;
480 case kOpLsl:
481 DCHECK_EQ(shift, 0);
482 opcode = kThumb2LslRRR;
483 break;
484 case kOpLsr:
485 DCHECK_EQ(shift, 0);
486 opcode = kThumb2LsrRRR;
487 break;
488 case kOpAsr:
489 DCHECK_EQ(shift, 0);
490 opcode = kThumb2AsrRRR;
491 break;
492 case kOpRor:
493 DCHECK_EQ(shift, 0);
494 opcode = kThumb2RorRRR;
495 break;
496 default:
497 LOG(FATAL) << "Bad opcode: " << op;
498 break;
499 }
500 DCHECK(!IsPseudoLirOp(opcode));
501 if (EncodingMap[opcode].flags & IS_QUAD_OP) {
502 return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
503 } else {
504 DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
505 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
506 }
507 }
508
OpRegRegReg(OpKind op,RegStorage r_dest,RegStorage r_src1,RegStorage r_src2)509 LIR* ArmMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
510 return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0);
511 }
512
OpRegRegImm(OpKind op,RegStorage r_dest,RegStorage r_src1,int value)513 LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
514 bool neg = (value < 0);
515 int32_t abs_value = (neg) ? -value : value;
516 ArmOpcode opcode = kThumbBkpt;
517 ArmOpcode alt_opcode = kThumbBkpt;
518 bool all_low_regs = r_dest.Low8() && r_src1.Low8();
519 int32_t mod_imm = ModifiedImmediate(value);
520
521 switch (op) {
522 case kOpLsl:
523 if (all_low_regs)
524 return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
525 else
526 return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
527 case kOpLsr:
528 if (all_low_regs)
529 return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
530 else
531 return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
532 case kOpAsr:
533 if (all_low_regs)
534 return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
535 else
536 return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
537 case kOpRor:
538 return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
539 case kOpAdd:
540 if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) {
541 return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
542 } else if (r_dest.Low8() && (r_src1 == rs_r15pc) &&
543 (value <= 1020) && ((value & 0x3) == 0)) {
544 return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
545 }
546 FALLTHROUGH_INTENDED;
547 case kOpSub:
548 if (all_low_regs && ((abs_value & 0x7) == abs_value)) {
549 if (op == kOpAdd)
550 opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3;
551 else
552 opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
553 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
554 }
555 if (mod_imm < 0) {
556 mod_imm = ModifiedImmediate(-value);
557 if (mod_imm >= 0) {
558 op = (op == kOpAdd) ? kOpSub : kOpAdd;
559 }
560 }
561 if (mod_imm < 0 && (abs_value >> 12) == 0) {
562 // This is deliberately used only if modified immediate encoding is inadequate since
563 // we sometimes actually use the flags for small values but not necessarily low regs.
564 if (op == kOpAdd)
565 opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
566 else
567 opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
568 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
569 }
570 if (op == kOpSub) {
571 opcode = kThumb2SubRRI8M;
572 alt_opcode = kThumb2SubRRR;
573 } else {
574 opcode = kThumb2AddRRI8M;
575 alt_opcode = kThumb2AddRRR;
576 }
577 break;
578 case kOpRsub:
579 opcode = kThumb2RsubRRI8M;
580 alt_opcode = kThumb2RsubRRR;
581 break;
582 case kOpAdc:
583 opcode = kThumb2AdcRRI8M;
584 alt_opcode = kThumb2AdcRRR;
585 break;
586 case kOpSbc:
587 opcode = kThumb2SbcRRI8M;
588 alt_opcode = kThumb2SbcRRR;
589 break;
590 case kOpOr:
591 opcode = kThumb2OrrRRI8M;
592 alt_opcode = kThumb2OrrRRR;
593 if (mod_imm < 0) {
594 mod_imm = ModifiedImmediate(~value);
595 if (mod_imm >= 0) {
596 opcode = kThumb2OrnRRI8M;
597 }
598 }
599 break;
600 case kOpAnd:
601 if (mod_imm < 0) {
602 mod_imm = ModifiedImmediate(~value);
603 if (mod_imm >= 0) {
604 return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
605 }
606 }
607 opcode = kThumb2AndRRI8M;
608 alt_opcode = kThumb2AndRRR;
609 break;
610 case kOpXor:
611 opcode = kThumb2EorRRI8M;
612 alt_opcode = kThumb2EorRRR;
613 break;
614 case kOpMul:
615 // TUNING: power of 2, shift & add
616 mod_imm = -1;
617 alt_opcode = kThumb2MulRRR;
618 break;
619 case kOpCmp: {
620 LIR* res;
621 if (mod_imm >= 0) {
622 res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm);
623 } else {
624 mod_imm = ModifiedImmediate(-value);
625 if (mod_imm >= 0) {
626 res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm);
627 } else {
628 RegStorage r_tmp = AllocTemp();
629 res = LoadConstant(r_tmp, value);
630 OpRegReg(kOpCmp, r_src1, r_tmp);
631 FreeTemp(r_tmp);
632 }
633 }
634 return res;
635 }
636 default:
637 LOG(FATAL) << "Bad opcode: " << op;
638 }
639
640 if (mod_imm >= 0) {
641 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
642 } else {
643 RegStorage r_scratch = AllocTemp();
644 LoadConstant(r_scratch, value);
645 LIR* res;
646 if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
647 res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
648 else
649 res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
650 FreeTemp(r_scratch);
651 return res;
652 }
653 }
654
655 /* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */
OpRegImm(OpKind op,RegStorage r_dest_src1,int value)656 LIR* ArmMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
657 bool neg = (value < 0);
658 int32_t abs_value = (neg) ? -value : value;
659 bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8());
660 ArmOpcode opcode = kThumbBkpt;
661 switch (op) {
662 case kOpAdd:
663 if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
664 DCHECK_EQ((value & 0x3), 0);
665 return NewLIR1(kThumbAddSpI7, value >> 2);
666 } else if (short_form) {
667 opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8;
668 }
669 break;
670 case kOpSub:
671 if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
672 DCHECK_EQ((value & 0x3), 0);
673 return NewLIR1(kThumbSubSpI7, value >> 2);
674 } else if (short_form) {
675 opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8;
676 }
677 break;
678 case kOpCmp:
679 if (!neg && short_form) {
680 opcode = kThumbCmpRI8;
681 } else {
682 short_form = false;
683 }
684 break;
685 default:
686 /* Punt to OpRegRegImm - if bad case catch it there */
687 short_form = false;
688 break;
689 }
690 if (short_form) {
691 return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value);
692 } else {
693 return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
694 }
695 }
696
LoadConstantWide(RegStorage r_dest,int64_t value)697 LIR* ArmMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
698 LIR* res = nullptr;
699 int32_t val_lo = Low32Bits(value);
700 int32_t val_hi = High32Bits(value);
701 if (r_dest.IsFloat()) {
702 DCHECK(!r_dest.IsPair());
703 if ((val_lo == 0) && (val_hi == 0)) {
704 // TODO: we need better info about the target CPU. a vector exclusive or
705 // would probably be better here if we could rely on its existance.
706 // Load an immediate +2.0 (which encodes to 0)
707 NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0);
708 // +0.0 = +2.0 - +2.0
709 res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg());
710 } else {
711 int encoded_imm = EncodeImmDouble(value);
712 if (encoded_imm >= 0) {
713 res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm);
714 }
715 }
716 } else {
717 // NOTE: Arm32 assumption here.
718 DCHECK(r_dest.IsPair());
719 if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
720 res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
721 LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
722 }
723 }
724 if (res == nullptr) {
725 // No short form - load from the literal pool.
726 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
727 if (data_target == nullptr) {
728 data_target = AddWideData(&literal_list_, val_lo, val_hi);
729 }
730 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
731 if (r_dest.IsFloat()) {
732 res = RawLIR(current_dalvik_offset_, kThumb2Vldrd,
733 r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target);
734 } else {
735 DCHECK(r_dest.IsPair());
736 res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8,
737 r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target);
738 }
739 AppendLIR(res);
740 }
741 return res;
742 }
743
EncodeShift(int code,int amount)744 int ArmMir2Lir::EncodeShift(int code, int amount) {
745 return ((amount & 0x1f) << 2) | code;
746 }
747
LoadBaseIndexed(RegStorage r_base,RegStorage r_index,RegStorage r_dest,int scale,OpSize size)748 LIR* ArmMir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
749 int scale, OpSize size) {
750 bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8();
751 LIR* load;
752 ArmOpcode opcode = kThumbBkpt;
753 bool thumb_form = (all_low_regs && (scale == 0));
754 RegStorage reg_ptr;
755
756 if (r_dest.IsFloat()) {
757 if (r_dest.IsSingle()) {
758 DCHECK((size == k32) || (size == kSingle) || (size == kReference));
759 opcode = kThumb2Vldrs;
760 size = kSingle;
761 } else {
762 DCHECK(r_dest.IsDouble());
763 DCHECK((size == k64) || (size == kDouble));
764 opcode = kThumb2Vldrd;
765 size = kDouble;
766 }
767 } else {
768 if (size == kSingle)
769 size = k32;
770 }
771
772 switch (size) {
773 case kDouble: // fall-through
774 // Intentional fall-though.
775 case kSingle:
776 reg_ptr = AllocTemp();
777 if (scale) {
778 NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
779 EncodeShift(kArmLsl, scale));
780 } else {
781 OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
782 }
783 load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0);
784 FreeTemp(reg_ptr);
785 return load;
786 case k32:
787 // Intentional fall-though.
788 case kReference:
789 opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR;
790 break;
791 case kUnsignedHalf:
792 opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR;
793 break;
794 case kSignedHalf:
795 opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR;
796 break;
797 case kUnsignedByte:
798 opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR;
799 break;
800 case kSignedByte:
801 opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR;
802 break;
803 default:
804 LOG(FATAL) << "Bad size: " << size;
805 }
806 if (thumb_form)
807 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
808 else
809 load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
810
811 return load;
812 }
813
StoreBaseIndexed(RegStorage r_base,RegStorage r_index,RegStorage r_src,int scale,OpSize size)814 LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
815 int scale, OpSize size) {
816 bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8();
817 LIR* store = nullptr;
818 ArmOpcode opcode = kThumbBkpt;
819 bool thumb_form = (all_low_regs && (scale == 0));
820 RegStorage reg_ptr;
821
822 if (r_src.IsFloat()) {
823 if (r_src.IsSingle()) {
824 DCHECK((size == k32) || (size == kSingle) || (size == kReference));
825 opcode = kThumb2Vstrs;
826 size = kSingle;
827 } else {
828 DCHECK(r_src.IsDouble());
829 DCHECK((size == k64) || (size == kDouble));
830 DCHECK_EQ((r_src.GetReg() & 0x1), 0);
831 opcode = kThumb2Vstrd;
832 size = kDouble;
833 }
834 } else {
835 if (size == kSingle)
836 size = k32;
837 }
838
839 switch (size) {
840 case kDouble: // fall-through
841 // Intentional fall-though.
842 case kSingle:
843 reg_ptr = AllocTemp();
844 if (scale) {
845 NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
846 EncodeShift(kArmLsl, scale));
847 } else {
848 OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
849 }
850 store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0);
851 FreeTemp(reg_ptr);
852 return store;
853 case k32:
854 // Intentional fall-though.
855 case kReference:
856 opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR;
857 break;
858 case kUnsignedHalf:
859 // Intentional fall-though.
860 case kSignedHalf:
861 opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR;
862 break;
863 case kUnsignedByte:
864 // Intentional fall-though.
865 case kSignedByte:
866 opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR;
867 break;
868 default:
869 LOG(FATAL) << "Bad size: " << size;
870 }
871 if (thumb_form)
872 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
873 else
874 store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
875
876 return store;
877 }
878
879 // Helper function for LoadBaseDispBody()/StoreBaseDispBody().
LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode,RegStorage r_base,int displacement,RegStorage r_src_dest,RegStorage r_work)880 LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
881 int displacement, RegStorage r_src_dest,
882 RegStorage r_work) {
883 DCHECK_EQ(displacement & 3, 0);
884 constexpr int kOffsetMask = 0xff << 2;
885 int encoded_disp = (displacement & kOffsetMask) >> 2; // Within range of the instruction.
886 RegStorage r_ptr = r_base;
887 if ((displacement & ~kOffsetMask) != 0) {
888 r_ptr = r_work.Valid() ? r_work : AllocTemp();
889 // Add displacement & ~kOffsetMask to base, it's a single instruction for up to +-256KiB.
890 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~kOffsetMask);
891 }
892 LIR* lir = nullptr;
893 if (!r_src_dest.IsPair()) {
894 lir = NewLIR3(opcode, r_src_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
895 } else {
896 lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(),
897 encoded_disp);
898 }
899 if ((displacement & ~kOffsetMask) != 0 && !r_work.Valid()) {
900 FreeTemp(r_ptr);
901 }
902 return lir;
903 }
904
905 /*
906 * Load value from base + displacement. Optionally perform null check
907 * on base (which must have an associated s_reg and MIR). If not
908 * performing null check, incoming MIR can be null.
909 */
LoadBaseDispBody(RegStorage r_base,int displacement,RegStorage r_dest,OpSize size)910 LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
911 OpSize size) {
912 LIR* load = nullptr;
913 ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode.
914 ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode.
915 bool short_form = false;
916 bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
917 int scale = 0; // Used for opcode16 and some indexed loads.
918 bool already_generated = false;
919 switch (size) {
920 case kDouble:
921 // Intentional fall-though.
922 case k64:
923 if (r_dest.IsFloat()) {
924 DCHECK(!r_dest.IsPair());
925 load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrd, r_base, displacement, r_dest);
926 } else {
927 DCHECK(r_dest.IsPair());
928 // Use the r_dest.GetLow() for the temporary pointer if needed.
929 load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2LdrdI8, r_base, displacement, r_dest,
930 r_dest.GetLow());
931 }
932 already_generated = true;
933 break;
934 case kSingle:
935 // Intentional fall-though.
936 case k32:
937 // Intentional fall-though.
938 case kReference:
939 if (r_dest.IsFloat()) {
940 DCHECK(r_dest.IsSingle());
941 load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrs, r_base, displacement, r_dest);
942 already_generated = true;
943 break;
944 }
945 DCHECK_EQ((displacement & 0x3), 0);
946 scale = 2;
947 if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
948 (displacement >= 0)) {
949 short_form = true;
950 opcode16 = kThumbLdrPcRel;
951 } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
952 (displacement >= 0)) {
953 short_form = true;
954 opcode16 = kThumbLdrSpRel;
955 } else {
956 short_form = all_low && (displacement >> (5 + scale)) == 0;
957 opcode16 = kThumbLdrRRI5;
958 opcode32 = kThumb2LdrRRI12;
959 }
960 break;
961 case kUnsignedHalf:
962 DCHECK_EQ((displacement & 0x1), 0);
963 scale = 1;
964 short_form = all_low && (displacement >> (5 + scale)) == 0;
965 opcode16 = kThumbLdrhRRI5;
966 opcode32 = kThumb2LdrhRRI12;
967 break;
968 case kSignedHalf:
969 DCHECK_EQ((displacement & 0x1), 0);
970 scale = 1;
971 DCHECK_EQ(opcode16, kThumbBkpt); // Not available.
972 opcode32 = kThumb2LdrshRRI12;
973 break;
974 case kUnsignedByte:
975 DCHECK_EQ(scale, 0); // Keep scale = 0.
976 short_form = all_low && (displacement >> (5 + scale)) == 0;
977 opcode16 = kThumbLdrbRRI5;
978 opcode32 = kThumb2LdrbRRI12;
979 break;
980 case kSignedByte:
981 DCHECK_EQ(scale, 0); // Keep scale = 0.
982 DCHECK_EQ(opcode16, kThumbBkpt); // Not available.
983 opcode32 = kThumb2LdrsbRRI12;
984 break;
985 default:
986 LOG(FATAL) << "Bad size: " << size;
987 }
988
989 if (!already_generated) {
990 if (short_form) {
991 load = NewLIR3(opcode16, r_dest.GetReg(), r_base.GetReg(), displacement >> scale);
992 } else if ((displacement >> 12) == 0) { // Thumb2 form.
993 load = NewLIR3(opcode32, r_dest.GetReg(), r_base.GetReg(), displacement);
994 } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
995 InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
996 // In this case, using LoadIndexed would emit 3 insns (movw+movt+ldr) but we can
997 // actually do it in two because we know that the kOpAdd is a single insn. On the
998 // other hand, we introduce an extra dependency, so this is not necessarily faster.
999 if (opcode16 != kThumbBkpt && r_dest.Low8() &&
1000 InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
1001 // We can use the 16-bit Thumb opcode for the load.
1002 OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~(0x1f << scale));
1003 load = NewLIR3(opcode16, r_dest.GetReg(), r_dest.GetReg(), (displacement >> scale) & 0x1f);
1004 } else {
1005 DCHECK_NE(opcode32, kThumbBkpt);
1006 OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~0x00000fff);
1007 load = NewLIR3(opcode32, r_dest.GetReg(), r_dest.GetReg(), displacement & 0x00000fff);
1008 }
1009 } else {
1010 if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
1011 (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
1012 scale = 0; // Prefer unscaled indexing if the same number of insns.
1013 }
1014 RegStorage reg_offset = AllocTemp();
1015 LoadConstant(reg_offset, displacement >> scale);
1016 DCHECK(!r_dest.IsFloat());
1017 load = LoadBaseIndexed(r_base, reg_offset, r_dest, scale, size);
1018 FreeTemp(reg_offset);
1019 }
1020 }
1021
1022 // TODO: in future may need to differentiate Dalvik accesses w/ spills
1023 if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1024 DCHECK_EQ(r_base, rs_rARM_SP);
1025 AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
1026 }
1027 return load;
1028 }
1029
LoadBaseDisp(RegStorage r_base,int displacement,RegStorage r_dest,OpSize size,VolatileKind is_volatile)1030 LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
1031 OpSize size, VolatileKind is_volatile) {
1032 // TODO: base this on target.
1033 if (size == kWord) {
1034 size = k32;
1035 }
1036 LIR* load;
1037 if (is_volatile == kVolatile && (size == k64 || size == kDouble) &&
1038 !cu_->compiler_driver->GetInstructionSetFeatures()->
1039 AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) {
1040 // Only 64-bit load needs special handling.
1041 // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp().
1042 DCHECK(!r_dest.IsFloat()); // See RegClassForFieldLoadSave().
1043 // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.)
1044 RegStorage r_ptr = AllocTemp();
1045 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
1046 load = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg());
1047 FreeTemp(r_ptr);
1048 } else {
1049 load = LoadBaseDispBody(r_base, displacement, r_dest, size);
1050 }
1051
1052 if (UNLIKELY(is_volatile == kVolatile)) {
1053 GenMemBarrier(kLoadAny);
1054 }
1055
1056 return load;
1057 }
1058
1059
StoreBaseDispBody(RegStorage r_base,int displacement,RegStorage r_src,OpSize size)1060 LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
1061 OpSize size) {
1062 LIR* store = nullptr;
1063 ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode.
1064 ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode.
1065 bool short_form = false;
1066 bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
1067 int scale = 0; // Used for opcode16 and some indexed loads.
1068 bool already_generated = false;
1069 switch (size) {
1070 case kDouble:
1071 // Intentional fall-though.
1072 case k64:
1073 if (r_src.IsFloat()) {
1074 // Note: If the register is retrieved by register allocator, it should never be a pair.
1075 // But some functions in mir2lir assume 64-bit registers are 32-bit register pairs.
1076 // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect().
1077 if (r_src.IsPair()) {
1078 r_src = As64BitFloatReg(r_src);
1079 }
1080 DCHECK(!r_src.IsPair());
1081 store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrd, r_base, displacement, r_src);
1082 } else {
1083 DCHECK(r_src.IsPair());
1084 store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2StrdI8, r_base, displacement, r_src);
1085 }
1086 already_generated = true;
1087 break;
1088 case kSingle:
1089 // Intentional fall-through.
1090 case k32:
1091 // Intentional fall-through.
1092 case kReference:
1093 if (r_src.IsFloat()) {
1094 DCHECK(r_src.IsSingle());
1095 store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrs, r_base, displacement, r_src);
1096 already_generated = true;
1097 break;
1098 }
1099 DCHECK_EQ((displacement & 0x3), 0);
1100 scale = 2;
1101 if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
1102 short_form = true;
1103 opcode16 = kThumbStrSpRel;
1104 } else {
1105 short_form = all_low && (displacement >> (5 + scale)) == 0;
1106 opcode16 = kThumbStrRRI5;
1107 opcode32 = kThumb2StrRRI12;
1108 }
1109 break;
1110 case kUnsignedHalf:
1111 case kSignedHalf:
1112 DCHECK_EQ((displacement & 0x1), 0);
1113 scale = 1;
1114 short_form = all_low && (displacement >> (5 + scale)) == 0;
1115 opcode16 = kThumbStrhRRI5;
1116 opcode32 = kThumb2StrhRRI12;
1117 break;
1118 case kUnsignedByte:
1119 case kSignedByte:
1120 DCHECK_EQ(scale, 0); // Keep scale = 0.
1121 short_form = all_low && (displacement >> (5 + scale)) == 0;
1122 opcode16 = kThumbStrbRRI5;
1123 opcode32 = kThumb2StrbRRI12;
1124 break;
1125 default:
1126 LOG(FATAL) << "Bad size: " << size;
1127 }
1128 if (!already_generated) {
1129 if (short_form) {
1130 store = NewLIR3(opcode16, r_src.GetReg(), r_base.GetReg(), displacement >> scale);
1131 } else if ((displacement >> 12) == 0) {
1132 store = NewLIR3(opcode32, r_src.GetReg(), r_base.GetReg(), displacement);
1133 } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
1134 InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
1135 // In this case, using StoreIndexed would emit 3 insns (movw+movt+str) but we can
1136 // actually do it in two because we know that the kOpAdd is a single insn. On the
1137 // other hand, we introduce an extra dependency, so this is not necessarily faster.
1138 RegStorage r_scratch = AllocTemp();
1139 if (opcode16 != kThumbBkpt && r_src.Low8() && r_scratch.Low8() &&
1140 InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
1141 // We can use the 16-bit Thumb opcode for the load.
1142 OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~(0x1f << scale));
1143 store = NewLIR3(opcode16, r_src.GetReg(), r_scratch.GetReg(),
1144 (displacement >> scale) & 0x1f);
1145 } else {
1146 DCHECK_NE(opcode32, kThumbBkpt);
1147 OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~0x00000fff);
1148 store = NewLIR3(opcode32, r_src.GetReg(), r_scratch.GetReg(), displacement & 0x00000fff);
1149 }
1150 FreeTemp(r_scratch);
1151 } else {
1152 if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
1153 (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
1154 scale = 0; // Prefer unscaled indexing if the same number of insns.
1155 }
1156 RegStorage r_scratch = AllocTemp();
1157 LoadConstant(r_scratch, displacement >> scale);
1158 DCHECK(!r_src.IsFloat());
1159 store = StoreBaseIndexed(r_base, r_scratch, r_src, scale, size);
1160 FreeTemp(r_scratch);
1161 }
1162 }
1163
1164 // TODO: In future, may need to differentiate Dalvik & spill accesses
1165 if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1166 DCHECK_EQ(r_base, rs_rARM_SP);
1167 AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
1168 }
1169 return store;
1170 }
1171
StoreBaseDisp(RegStorage r_base,int displacement,RegStorage r_src,OpSize size,VolatileKind is_volatile)1172 LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
1173 OpSize size, VolatileKind is_volatile) {
1174 if (UNLIKELY(is_volatile == kVolatile)) {
1175 // Ensure that prior accesses become visible to other threads first.
1176 GenMemBarrier(kAnyStore);
1177 }
1178
1179 LIR* null_ck_insn;
1180 if (is_volatile == kVolatile && (size == k64 || size == kDouble) &&
1181 !cu_->compiler_driver->GetInstructionSetFeatures()->
1182 AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) {
1183 // Only 64-bit store needs special handling.
1184 // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp().
1185 // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.)
1186 DCHECK(!r_src.IsFloat()); // See RegClassForFieldLoadSave().
1187 RegStorage r_ptr = AllocTemp();
1188 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
1189 LIR* fail_target = NewLIR0(kPseudoTargetLabel);
1190 // We have only 5 temporary registers available and if r_base, r_src and r_ptr already
1191 // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr
1192 // in LDREXD and recalculate it from r_base.
1193 RegStorage r_temp = AllocTemp();
1194 RegStorage r_temp_high = AllocTemp(false); // We may not have another temp.
1195 if (r_temp_high.Valid()) {
1196 null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
1197 FreeTemp(r_temp_high);
1198 FreeTemp(r_temp);
1199 } else {
1200 // If we don't have another temp, clobber r_ptr in LDREXD and reload it.
1201 null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg());
1202 FreeTemp(r_temp); // May need the temp for kOpAdd.
1203 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
1204 }
1205 NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg());
1206 OpCmpImmBranch(kCondNe, r_temp, 0, fail_target);
1207 FreeTemp(r_ptr);
1208 } else {
1209 // TODO: base this on target.
1210 if (size == kWord) {
1211 size = k32;
1212 }
1213
1214 null_ck_insn = StoreBaseDispBody(r_base, displacement, r_src, size);
1215 }
1216
1217 if (UNLIKELY(is_volatile == kVolatile)) {
1218 // Preserve order with respect to any subsequent volatile loads.
1219 // We need StoreLoad, but that generally requires the most expensive barrier.
1220 GenMemBarrier(kAnyAny);
1221 }
1222
1223 return null_ck_insn;
1224 }
1225
OpFpRegCopy(RegStorage r_dest,RegStorage r_src)1226 LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
1227 int opcode;
1228 DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
1229 if (r_dest.IsDouble()) {
1230 opcode = kThumb2Vmovd;
1231 } else {
1232 if (r_dest.IsSingle()) {
1233 opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr;
1234 } else {
1235 DCHECK(r_src.IsSingle());
1236 opcode = kThumb2Fmrs;
1237 }
1238 }
1239 LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
1240 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
1241 res->flags.is_nop = true;
1242 }
1243 return res;
1244 }
1245
OpMem(OpKind op,RegStorage r_base,int disp)1246 LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
1247 UNUSED(op, r_base, disp);
1248 LOG(FATAL) << "Unexpected use of OpMem for Arm";
1249 UNREACHABLE();
1250 }
1251
InvokeTrampoline(OpKind op,RegStorage r_tgt,QuickEntrypointEnum trampoline)1252 LIR* ArmMir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
1253 UNUSED(trampoline); // The address of the trampoline is already loaded into r_tgt.
1254 return OpReg(op, r_tgt);
1255 }
1256
GetInstructionOffset(LIR * lir)1257 size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) {
1258 uint64_t check_flags = GetTargetInstFlags(lir->opcode);
1259 DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
1260 size_t offset = (check_flags & IS_TERTIARY_OP) ? lir->operands[2] : 0;
1261
1262 if (check_flags & SCALED_OFFSET_X2) {
1263 offset = offset * 2;
1264 } else if (check_flags & SCALED_OFFSET_X4) {
1265 offset = offset * 4;
1266 }
1267 return offset;
1268 }
1269
CountRefs(RefCounts * core_counts,RefCounts * fp_counts,size_t num_regs)1270 void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
1271 // Start with the default counts.
1272 Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
1273
1274 if (pc_rel_temp_ != nullptr) {
1275 // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
1276 // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative
1277 // load sequence is 4 instructions long and by promoting the PC base we save up to 3
1278 // instructions per use.
1279 int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
1280 if (core_counts[p_map_idx].count == 1) {
1281 core_counts[p_map_idx].count = 0;
1282 } else {
1283 core_counts[p_map_idx].count *= 3;
1284 }
1285 }
1286 }
1287
DoPromotion()1288 void ArmMir2Lir::DoPromotion() {
1289 if (CanUseOpPcRelDexCacheArrayLoad()) {
1290 pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
1291 }
1292
1293 Mir2Lir::DoPromotion();
1294
1295 if (pc_rel_temp_ != nullptr) {
1296 // Now, if the dex cache array base temp is promoted, remember the register but
1297 // always remove the temp's stack location to avoid unnecessarily bloating the stack.
1298 dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
1299 DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat());
1300 mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
1301 pc_rel_temp_ = nullptr;
1302 }
1303 }
1304
1305 } // namespace art
1306