1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* This file contains codegen for the X86 ISA */
18
19 #include "codegen_x86.h"
20
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "base/logging.h"
24 #include "dex/quick/mir_to_lir-inl.h"
25 #include "dex/reg_storage_eq.h"
26 #include "mirror/array-inl.h"
27 #include "x86_lir.h"
28
29 namespace art {
30
31 /*
32 * Compare two 64-bit values
33 * x = y return 0
34 * x < y return -1
35 * x > y return 1
36 */
GenCmpLong(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)37 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
38 RegLocation rl_src2) {
39 if (cu_->target64) {
40 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
41 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
42 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
43 RegStorage temp_reg = AllocTemp();
44 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
45 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0
46 NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1
47 NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
48 NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
49
50 StoreValue(rl_dest, rl_result);
51 FreeTemp(temp_reg);
52 return;
53 }
54
55 // Prepare for explicit register usage
56 ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
57 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
58 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
59 LoadValueDirectWideFixed(rl_src1, r_tmp1);
60 LoadValueDirectWideFixed(rl_src2, r_tmp2);
61 // Compute (r1:r0) = (r1:r0) - (r3:r2)
62 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2
63 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF
64 NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0
65 NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
66 OpReg(kOpNeg, rs_r2); // r2 = -r2
67 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF
68 NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0
69 NewLIR2(kX86Movzx8RR, r0, r0);
70 OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2
71 RegLocation rl_result = LocCReturn();
72 StoreValue(rl_dest, rl_result);
73 }
74
X86ConditionEncoding(ConditionCode cond)75 X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
76 switch (cond) {
77 case kCondEq: return kX86CondEq;
78 case kCondNe: return kX86CondNe;
79 case kCondCs: return kX86CondC;
80 case kCondCc: return kX86CondNc;
81 case kCondUlt: return kX86CondC;
82 case kCondUge: return kX86CondNc;
83 case kCondMi: return kX86CondS;
84 case kCondPl: return kX86CondNs;
85 case kCondVs: return kX86CondO;
86 case kCondVc: return kX86CondNo;
87 case kCondHi: return kX86CondA;
88 case kCondLs: return kX86CondBe;
89 case kCondGe: return kX86CondGe;
90 case kCondLt: return kX86CondL;
91 case kCondGt: return kX86CondG;
92 case kCondLe: return kX86CondLe;
93 case kCondAl:
94 case kCondNv: LOG(FATAL) << "Should not reach here";
95 }
96 return kX86CondO;
97 }
98
OpCmpBranch(ConditionCode cond,RegStorage src1,RegStorage src2,LIR * target)99 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
100 NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
101 X86ConditionCode cc = X86ConditionEncoding(cond);
102 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
103 cc);
104 branch->target = target;
105 return branch;
106 }
107
OpCmpImmBranch(ConditionCode cond,RegStorage reg,int check_value,LIR * target)108 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
109 int check_value, LIR* target) {
110 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
111 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
112 NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
113 } else {
114 if (reg.Is64Bit()) {
115 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
116 } else {
117 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
118 }
119 }
120 X86ConditionCode cc = X86ConditionEncoding(cond);
121 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
122 branch->target = target;
123 return branch;
124 }
125
OpRegCopyNoInsert(RegStorage r_dest,RegStorage r_src)126 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
127 // If src or dest is a pair, we'll be using low reg.
128 if (r_dest.IsPair()) {
129 r_dest = r_dest.GetLow();
130 }
131 if (r_src.IsPair()) {
132 r_src = r_src.GetLow();
133 }
134 if (r_dest.IsFloat() || r_src.IsFloat())
135 return OpFpRegCopy(r_dest, r_src);
136 LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
137 r_dest.GetReg(), r_src.GetReg());
138 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
139 res->flags.is_nop = true;
140 }
141 return res;
142 }
143
OpRegCopy(RegStorage r_dest,RegStorage r_src)144 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
145 if (r_dest != r_src) {
146 LIR *res = OpRegCopyNoInsert(r_dest, r_src);
147 AppendLIR(res);
148 }
149 }
150
OpRegCopyWide(RegStorage r_dest,RegStorage r_src)151 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
152 if (r_dest != r_src) {
153 bool dest_fp = r_dest.IsFloat();
154 bool src_fp = r_src.IsFloat();
155 if (dest_fp) {
156 if (src_fp) {
157 OpRegCopy(r_dest, r_src);
158 } else {
159 // TODO: Prevent this from happening in the code. The result is often
160 // unused or could have been loaded more easily from memory.
161 if (!r_src.IsPair()) {
162 DCHECK(!r_dest.IsPair());
163 NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
164 } else {
165 NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
166 RegStorage r_tmp = AllocTempDouble();
167 NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
168 NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
169 FreeTemp(r_tmp);
170 }
171 }
172 } else {
173 if (src_fp) {
174 if (!r_dest.IsPair()) {
175 DCHECK(!r_src.IsPair());
176 NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
177 } else {
178 NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
179 RegStorage temp_reg = AllocTempDouble();
180 NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
181 NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
182 NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
183 }
184 } else {
185 DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
186 if (!r_src.IsPair()) {
187 // Just copy the register directly.
188 OpRegCopy(r_dest, r_src);
189 } else {
190 // Handle overlap
191 if (r_src.GetHighReg() == r_dest.GetLowReg() &&
192 r_src.GetLowReg() == r_dest.GetHighReg()) {
193 // Deal with cycles.
194 RegStorage temp_reg = AllocTemp();
195 OpRegCopy(temp_reg, r_dest.GetHigh());
196 OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
197 OpRegCopy(r_dest.GetLow(), temp_reg);
198 FreeTemp(temp_reg);
199 } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
200 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
201 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
202 } else {
203 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
204 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
205 }
206 }
207 }
208 }
209 }
210 }
211
GenSelectConst32(RegStorage left_op,RegStorage right_op,ConditionCode code,int32_t true_val,int32_t false_val,RegStorage rs_dest,RegisterClass dest_reg_class)212 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
213 int32_t true_val, int32_t false_val, RegStorage rs_dest,
214 RegisterClass dest_reg_class) {
215 DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
216 DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
217
218 // We really need this check for correctness, otherwise we will need to do more checks in
219 // non zero/one case
220 if (true_val == false_val) {
221 LoadConstantNoClobber(rs_dest, true_val);
222 return;
223 }
224
225 const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
226
227 const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
228 if (zero_one_case && IsByteRegister(rs_dest)) {
229 if (!dest_intersect) {
230 LoadConstantNoClobber(rs_dest, 0);
231 }
232 OpRegReg(kOpCmp, left_op, right_op);
233 // Set the low byte of the result to 0 or 1 from the compare condition code.
234 NewLIR2(kX86Set8R, rs_dest.GetReg(),
235 X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
236 if (dest_intersect) {
237 NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
238 }
239 } else {
240 // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
241 // and it cannot use xor because it makes cc flags to be dirty
242 RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
243 if (temp_reg.Valid()) {
244 if (false_val == 0 && dest_intersect) {
245 code = FlipComparisonOrder(code);
246 std::swap(true_val, false_val);
247 }
248 if (!dest_intersect) {
249 LoadConstantNoClobber(rs_dest, false_val);
250 }
251 LoadConstantNoClobber(temp_reg, true_val);
252 OpRegReg(kOpCmp, left_op, right_op);
253 if (dest_intersect) {
254 LoadConstantNoClobber(rs_dest, false_val);
255 DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
256 }
257 OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
258 FreeTemp(temp_reg);
259 } else {
260 // slow path
261 LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
262 LoadConstantNoClobber(rs_dest, false_val);
263 LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
264 LIR* true_case = NewLIR0(kPseudoTargetLabel);
265 cmp_branch->target = true_case;
266 LoadConstantNoClobber(rs_dest, true_val);
267 LIR* end = NewLIR0(kPseudoTargetLabel);
268 that_is_it->target = end;
269 }
270 }
271 }
272
GenSelect(BasicBlock * bb,MIR * mir)273 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
274 UNUSED(bb);
275 RegLocation rl_result;
276 RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
277 RegLocation rl_dest = mir_graph_->GetDest(mir);
278 // Avoid using float regs here.
279 RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
280 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
281 ConditionCode ccode = mir->meta.ccode;
282
283 // The kMirOpSelect has two variants, one for constants and one for moves.
284 const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
285
286 if (is_constant_case) {
287 int true_val = mir->dalvikInsn.vB;
288 int false_val = mir->dalvikInsn.vC;
289
290 // simplest strange case
291 if (true_val == false_val) {
292 rl_result = EvalLoc(rl_dest, result_reg_class, true);
293 LoadConstantNoClobber(rl_result.reg, true_val);
294 } else {
295 // TODO: use GenSelectConst32 and handle additional opcode patterns such as
296 // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
297 rl_src = LoadValue(rl_src, src_reg_class);
298 rl_result = EvalLoc(rl_dest, result_reg_class, true);
299 /*
300 * For ccode == kCondEq:
301 *
302 * 1) When the true case is zero and result_reg is not same as src_reg:
303 * xor result_reg, result_reg
304 * cmp $0, src_reg
305 * mov t1, $false_case
306 * cmovnz result_reg, t1
307 * 2) When the false case is zero and result_reg is not same as src_reg:
308 * xor result_reg, result_reg
309 * cmp $0, src_reg
310 * mov t1, $true_case
311 * cmovz result_reg, t1
312 * 3) All other cases (we do compare first to set eflags):
313 * cmp $0, src_reg
314 * mov result_reg, $false_case
315 * mov t1, $true_case
316 * cmovz result_reg, t1
317 */
318 // FIXME: depending on how you use registers you could get a false != mismatch when dealing
319 // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
320 const bool result_reg_same_as_src =
321 (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
322 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
323 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
324 const bool catch_all_case = !(true_zero_case || false_zero_case);
325
326 if (true_zero_case || false_zero_case) {
327 OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
328 }
329
330 if (true_zero_case || false_zero_case || catch_all_case) {
331 OpRegImm(kOpCmp, rl_src.reg, 0);
332 }
333
334 if (catch_all_case) {
335 OpRegImm(kOpMov, rl_result.reg, false_val);
336 }
337
338 if (true_zero_case || false_zero_case || catch_all_case) {
339 ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
340 int immediateForTemp = true_zero_case ? false_val : true_val;
341 RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
342 OpRegImm(kOpMov, temp1_reg, immediateForTemp);
343
344 OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
345
346 FreeTemp(temp1_reg);
347 }
348 }
349 } else {
350 rl_src = LoadValue(rl_src, src_reg_class);
351 RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
352 RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
353 rl_true = LoadValue(rl_true, result_reg_class);
354 rl_false = LoadValue(rl_false, result_reg_class);
355 rl_result = EvalLoc(rl_dest, result_reg_class, true);
356
357 /*
358 * For ccode == kCondEq:
359 *
360 * 1) When true case is already in place:
361 * cmp $0, src_reg
362 * cmovnz result_reg, false_reg
363 * 2) When false case is already in place:
364 * cmp $0, src_reg
365 * cmovz result_reg, true_reg
366 * 3) When neither cases are in place:
367 * cmp $0, src_reg
368 * mov result_reg, false_reg
369 * cmovz result_reg, true_reg
370 */
371
372 // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
373 OpRegImm(kOpCmp, rl_src.reg, 0);
374
375 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
376 OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
377 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
378 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
379 } else {
380 OpRegCopy(rl_result.reg, rl_false.reg);
381 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
382 }
383 }
384
385 StoreValue(rl_dest, rl_result);
386 }
387
GenFusedLongCmpBranch(BasicBlock * bb,MIR * mir)388 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
389 LIR* taken = &block_label_list_[bb->taken];
390 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
391 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
392 ConditionCode ccode = mir->meta.ccode;
393
394 if (rl_src1.is_const) {
395 std::swap(rl_src1, rl_src2);
396 ccode = FlipComparisonOrder(ccode);
397 }
398 if (rl_src2.is_const) {
399 // Do special compare/branch against simple const operand
400 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
401 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
402 return;
403 }
404
405 if (cu_->target64) {
406 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
407 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
408
409 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
410 OpCondBranch(ccode, taken);
411 return;
412 }
413
414 // Prepare for explicit register usage
415 ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
416 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
417 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
418 LoadValueDirectWideFixed(rl_src1, r_tmp1);
419 LoadValueDirectWideFixed(rl_src2, r_tmp2);
420
421 // Swap operands and condition code to prevent use of zero flag.
422 if (ccode == kCondLe || ccode == kCondGt) {
423 // Compute (r3:r2) = (r3:r2) - (r1:r0)
424 OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0
425 OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF
426 } else {
427 // Compute (r1:r0) = (r1:r0) - (r3:r2)
428 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2
429 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF
430 }
431 switch (ccode) {
432 case kCondEq:
433 case kCondNe:
434 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1
435 break;
436 case kCondLe:
437 ccode = kCondGe;
438 break;
439 case kCondGt:
440 ccode = kCondLt;
441 break;
442 case kCondLt:
443 case kCondGe:
444 break;
445 default:
446 LOG(FATAL) << "Unexpected ccode: " << ccode;
447 }
448 OpCondBranch(ccode, taken);
449 }
450
GenFusedLongCmpImmBranch(BasicBlock * bb,RegLocation rl_src1,int64_t val,ConditionCode ccode)451 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
452 int64_t val, ConditionCode ccode) {
453 int32_t val_lo = Low32Bits(val);
454 int32_t val_hi = High32Bits(val);
455 LIR* taken = &block_label_list_[bb->taken];
456 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
457 bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
458
459 if (cu_->target64) {
460 if (is_equality_test && val == 0) {
461 // We can simplify of comparing for ==, != to 0.
462 NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
463 } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
464 OpRegImm(kOpCmp, rl_src1.reg, val_lo);
465 } else {
466 RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
467 LoadConstantWide(tmp, val);
468 OpRegReg(kOpCmp, rl_src1.reg, tmp);
469 FreeTemp(tmp);
470 }
471 OpCondBranch(ccode, taken);
472 return;
473 }
474
475 if (is_equality_test && val != 0) {
476 rl_src1 = ForceTempWide(rl_src1);
477 }
478 RegStorage low_reg = rl_src1.reg.GetLow();
479 RegStorage high_reg = rl_src1.reg.GetHigh();
480
481 if (is_equality_test) {
482 // We can simplify of comparing for ==, != to 0.
483 if (val == 0) {
484 if (IsTemp(low_reg)) {
485 OpRegReg(kOpOr, low_reg, high_reg);
486 // We have now changed it; ignore the old values.
487 Clobber(rl_src1.reg);
488 } else {
489 RegStorage t_reg = AllocTemp();
490 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
491 FreeTemp(t_reg);
492 }
493 OpCondBranch(ccode, taken);
494 return;
495 }
496
497 // Need to compute the actual value for ==, !=.
498 OpRegImm(kOpSub, low_reg, val_lo);
499 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
500 OpRegReg(kOpOr, high_reg, low_reg);
501 Clobber(rl_src1.reg);
502 } else if (ccode == kCondLe || ccode == kCondGt) {
503 // Swap operands and condition code to prevent use of zero flag.
504 RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
505 LoadConstantWide(tmp, val);
506 OpRegReg(kOpSub, tmp.GetLow(), low_reg);
507 OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
508 ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
509 FreeTemp(tmp);
510 } else {
511 // We can use a compare for the low word to set CF.
512 OpRegImm(kOpCmp, low_reg, val_lo);
513 if (IsTemp(high_reg)) {
514 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
515 // We have now changed it; ignore the old values.
516 Clobber(rl_src1.reg);
517 } else {
518 // mov temp_reg, high_reg; sbb temp_reg, high_constant
519 RegStorage t_reg = AllocTemp();
520 OpRegCopy(t_reg, high_reg);
521 NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
522 FreeTemp(t_reg);
523 }
524 }
525
526 OpCondBranch(ccode, taken);
527 }
528
CalculateMagicAndShift(int64_t divisor,int64_t & magic,int & shift,bool is_long)529 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
530 // It does not make sense to calculate magic and shift for zero divisor.
531 DCHECK_NE(divisor, 0);
532
533 /* According to H.S.Warren's Hacker's Delight Chapter 10 and
534 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
535 * The magic number M and shift S can be calculated in the following way:
536 * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
537 * where divisor(d) >=2.
538 * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
539 * where divisor(d) <= -2.
540 * Thus nc can be calculated like:
541 * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
542 * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
543 *
544 * So the shift p is the smallest p satisfying
545 * 2^p > nc * (d - 2^p % d), where d >= 2
546 * 2^p > nc * (d + 2^p % d), where d <= -2.
547 *
548 * the magic number M is calcuated by
549 * M = (2^p + d - 2^p % d) / d, where d >= 2
550 * M = (2^p - d - 2^p % d) / d, where d <= -2.
551 *
552 * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
553 * the shift number S.
554 */
555
556 int64_t p = (is_long) ? 63 : 31;
557 const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
558
559 // Initialize the computations.
560 uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
561 uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
562 static_cast<uint32_t>(divisor) >> 31);
563 uint64_t abs_nc = tmp - 1 - tmp % abs_d;
564 uint64_t quotient1 = exp / abs_nc;
565 uint64_t remainder1 = exp % abs_nc;
566 uint64_t quotient2 = exp / abs_d;
567 uint64_t remainder2 = exp % abs_d;
568
569 /*
570 * To avoid handling both positive and negative divisor, Hacker's Delight
571 * introduces a method to handle these 2 cases together to avoid duplication.
572 */
573 uint64_t delta;
574 do {
575 p++;
576 quotient1 = 2 * quotient1;
577 remainder1 = 2 * remainder1;
578 if (remainder1 >= abs_nc) {
579 quotient1++;
580 remainder1 = remainder1 - abs_nc;
581 }
582 quotient2 = 2 * quotient2;
583 remainder2 = 2 * remainder2;
584 if (remainder2 >= abs_d) {
585 quotient2++;
586 remainder2 = remainder2 - abs_d;
587 }
588 delta = abs_d - remainder2;
589 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
590
591 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
592
593 if (!is_long) {
594 magic = static_cast<int>(magic);
595 }
596
597 shift = (is_long) ? p - 64 : p - 32;
598 }
599
GenDivRemLit(RegLocation rl_dest,RegStorage reg_lo,int lit,bool is_div)600 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
601 UNUSED(rl_dest, reg_lo, lit, is_div);
602 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
603 UNREACHABLE();
604 }
605
GenDivRemLit(RegLocation rl_dest,RegLocation rl_src,int imm,bool is_div)606 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
607 int imm, bool is_div) {
608 // Use a multiply (and fixup) to perform an int div/rem by a constant.
609 RegLocation rl_result;
610
611 if (imm == 1) {
612 rl_result = EvalLoc(rl_dest, kCoreReg, true);
613 if (is_div) {
614 // x / 1 == x.
615 LoadValueDirectFixed(rl_src, rl_result.reg);
616 } else {
617 // x % 1 == 0.
618 LoadConstantNoClobber(rl_result.reg, 0);
619 }
620 } else if (imm == -1) { // handle 0x80000000 / -1 special case.
621 rl_result = EvalLoc(rl_dest, kCoreReg, true);
622 if (is_div) {
623 LoadValueDirectFixed(rl_src, rl_result.reg);
624
625 // Check if numerator is 0
626 OpRegImm(kOpCmp, rl_result.reg, 0);
627 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
628
629 // handle 0x80000000 / -1
630 OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
631 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
632
633 // for x != MIN_INT, x / -1 == -x.
634 NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
635
636 // EAX already contains the right value (0x80000000),
637 minint_branch->target = NewLIR0(kPseudoTargetLabel);
638 branch->target = NewLIR0(kPseudoTargetLabel);
639 } else {
640 // x % -1 == 0.
641 LoadConstantNoClobber(rl_result.reg, 0);
642 }
643 } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
644 // Division using shifting.
645 rl_src = LoadValue(rl_src, kCoreReg);
646 rl_result = EvalLoc(rl_dest, kCoreReg, true);
647 if (IsSameReg(rl_result.reg, rl_src.reg)) {
648 RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
649 rl_result.reg.SetReg(rs_temp.GetReg());
650 }
651
652 // Check if numerator is 0
653 OpRegImm(kOpCmp, rl_src.reg, 0);
654 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
655 LoadConstantNoClobber(rl_result.reg, 0);
656 LIR* done = NewLIR1(kX86Jmp8, 0);
657 branch->target = NewLIR0(kPseudoTargetLabel);
658
659 NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
660 NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
661 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
662 int shift_amount = CTZ(imm);
663 OpRegImm(kOpAsr, rl_result.reg, shift_amount);
664 if (imm < 0) {
665 OpReg(kOpNeg, rl_result.reg);
666 }
667 done->target = NewLIR0(kPseudoTargetLabel);
668 } else {
669 CHECK(imm <= -2 || imm >= 2);
670
671 // Use H.S.Warren's Hacker's Delight Chapter 10 and
672 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
673 int64_t magic;
674 int shift;
675 CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
676
677 /*
678 * For imm >= 2,
679 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
680 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
681 * For imm <= -2,
682 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
683 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
684 * We implement this algorithm in the following way:
685 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
686 * 2. if imm > 0 and magic < 0, add numerator to EDX
687 * if imm < 0 and magic > 0, sub numerator from EDX
688 * 3. if S !=0, SAR S bits for EDX
689 * 4. add 1 to EDX if EDX < 0
690 * 5. Thus, EDX is the quotient
691 */
692
693 FlushReg(rs_r0);
694 Clobber(rs_r0);
695 LockTemp(rs_r0);
696 FlushReg(rs_r2);
697 Clobber(rs_r2);
698 LockTemp(rs_r2);
699
700 // Assume that the result will be in EDX for divide, and EAX for remainder.
701 rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0,
702 INVALID_SREG, INVALID_SREG};
703
704 // We need the value at least twice. Load into a temp.
705 rl_src = LoadValue(rl_src, kCoreReg);
706 RegStorage numerator_reg = rl_src.reg;
707
708 // Check if numerator is 0.
709 OpRegImm(kOpCmp, numerator_reg, 0);
710 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
711 // Return result 0 if numerator was 0.
712 LoadConstantNoClobber(rl_result.reg, 0);
713 LIR* done = NewLIR1(kX86Jmp8, 0);
714 branch->target = NewLIR0(kPseudoTargetLabel);
715
716 // EAX = magic.
717 LoadConstant(rs_r0, magic);
718
719 // EDX:EAX = magic * numerator.
720 NewLIR1(kX86Imul32DaR, numerator_reg.GetReg());
721
722 if (imm > 0 && magic < 0) {
723 // Add numerator to EDX.
724 DCHECK(numerator_reg.Valid());
725 NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
726 } else if (imm < 0 && magic > 0) {
727 DCHECK(numerator_reg.Valid());
728 NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
729 }
730
731 // Do we need the shift?
732 if (shift != 0) {
733 // Shift EDX by 'shift' bits.
734 NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
735 }
736
737 // Add 1 to EDX if EDX < 0.
738
739 // Move EDX to EAX.
740 OpRegCopy(rs_r0, rs_r2);
741
742 // Move sign bit to bit 0, zeroing the rest.
743 NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
744
745 // EDX = EDX + EAX.
746 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
747
748 // Quotient is in EDX.
749 if (!is_div) {
750 // We need to compute the remainder.
751 // Remainder is divisor - (quotient * imm).
752 DCHECK(numerator_reg.Valid());
753 OpRegCopy(rs_r0, numerator_reg);
754
755 // EAX = numerator * imm.
756 OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
757
758 // EAX -= EDX.
759 NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
760
761 // For this case, return the result in EAX.
762 }
763 done->target = NewLIR0(kPseudoTargetLabel);
764 }
765
766 return rl_result;
767 }
768
GenDivRem(RegLocation rl_dest,RegStorage reg_lo,RegStorage reg_hi,bool is_div)769 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
770 bool is_div) {
771 UNUSED(rl_dest, reg_lo, reg_hi, is_div);
772 LOG(FATAL) << "Unexpected use of GenDivRem for x86";
773 UNREACHABLE();
774 }
775
GenDivRem(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,int flags)776 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
777 RegLocation rl_src2, bool is_div, int flags) {
778 UNUSED(rl_dest);
779 // We have to use fixed registers, so flush all the temps.
780
781 // Prepare for explicit register usage.
782 ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
783
784 // Load LHS into EAX.
785 LoadValueDirectFixed(rl_src1, rs_r0);
786
787 // Load RHS into EBX.
788 LoadValueDirectFixed(rl_src2, rs_r1);
789
790 // Copy LHS sign bit into EDX.
791 NewLIR0(kx86Cdq32Da);
792
793 if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
794 // Handle division by zero case.
795 GenDivZeroCheck(rs_r1);
796 }
797
798 // Check if numerator is 0
799 OpRegImm(kOpCmp, rs_r0, 0);
800 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
801
802 // Have to catch 0x80000000/-1 case, or we will get an exception!
803 OpRegImm(kOpCmp, rs_r1, -1);
804 LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
805
806 // RHS is -1.
807 OpRegImm(kOpCmp, rs_r0, 0x80000000);
808 LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
809
810 branch->target = NewLIR0(kPseudoTargetLabel);
811
812 // In 0x80000000/-1 case.
813 if (!is_div) {
814 // For DIV, EAX is already right. For REM, we need EDX 0.
815 LoadConstantNoClobber(rs_r2, 0);
816 }
817 LIR* done = NewLIR1(kX86Jmp8, 0);
818
819 // Expected case.
820 minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
821 minint_branch->target = minus_one_branch->target;
822 NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
823 done->target = NewLIR0(kPseudoTargetLabel);
824
825 // Result is in EAX for div and EDX for rem.
826 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
827 if (!is_div) {
828 rl_result.reg.SetReg(r2);
829 }
830 return rl_result;
831 }
832
DwarfCoreReg(bool is_x86_64,int num)833 static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
834 return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
835 }
836
GenInlinedMinMax(CallInfo * info,bool is_min,bool is_long)837 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
838 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
839
840 if (is_long && !cu_->target64) {
841 /*
842 * We want to implement the following algorithm
843 * mov eax, low part of arg1
844 * mov edx, high part of arg1
845 * mov ebx, low part of arg2
846 * mov ecx, high part of arg2
847 * mov edi, eax
848 * sub edi, ebx
849 * mov edi, edx
850 * sbb edi, ecx
851 * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx"
852 * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx"
853 *
854 * The algorithm above needs 5 registers: a pair for the first operand
855 * (which later will be used as result), a pair for the second operand
856 * and a temp register (e.g. 'edi') for intermediate calculations.
857 * Ideally we have 6 GP caller-save registers in 32-bit mode. They are:
858 * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be
859 * always enough registers to operate on. Practically, there is a pair
860 * of registers 'edi' and 'esi' which holds promoted values and
861 * sometimes should be treated as 'callee save'. If one of the operands
862 * is in the promoted registers then we have enough register to
863 * operate on. Otherwise there is lack of resources and we have to
864 * save 'edi' before calculations and restore after.
865 */
866
867 RegLocation rl_src1 = info->args[0];
868 RegLocation rl_src2 = info->args[2];
869 RegLocation rl_dest = InlineTargetWide(info);
870
871 if (rl_dest.s_reg_low == INVALID_SREG) {
872 // Result is unused, the code is dead. Inlining successful, no code generated.
873 return true;
874 }
875
876 if (PartiallyIntersects(rl_src1, rl_dest) &&
877 PartiallyIntersects(rl_src2, rl_dest)) {
878 // A special case which we don't want to handle.
879 // This is when src1 is mapped on v0 and v1,
880 // src2 is mapped on v2, v3,
881 // result is mapped on v1, v2
882 return false;
883 }
884
885
886 /*
887 * If the result register is the same as the second element, then we
888 * need to be careful. The reason is that the first copy will
889 * inadvertently clobber the second element with the first one thus
890 * yielding the wrong result. Thus we do a swap in that case.
891 */
892 if (Intersects(rl_src2, rl_dest)) {
893 std::swap(rl_src1, rl_src2);
894 }
895
896 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
897 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
898
899 // Pick the first integer as min/max.
900 OpRegCopyWide(rl_result.reg, rl_src1.reg);
901
902 /*
903 * If the integers are both in the same register, then there is
904 * nothing else to do because they are equal and we have already
905 * moved one into the result.
906 */
907 if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
908 mir_graph_->SRegToVReg(rl_src2.s_reg_low)) {
909 StoreValueWide(rl_dest, rl_result);
910 return true;
911 }
912
913 // Free registers to make some room for the second operand.
914 // But don't try to free part of a source which intersects
915 // part of result or promoted registers.
916
917 if (IsTemp(rl_src1.reg.GetLow()) &&
918 (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) &&
919 (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) {
920 // Is low part temporary and doesn't intersect any parts of result?
921 FreeTemp(rl_src1.reg.GetLow());
922 }
923
924 if (IsTemp(rl_src1.reg.GetHigh()) &&
925 (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) &&
926 (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) {
927 // Is high part temporary and doesn't intersect any parts of result?
928 FreeTemp(rl_src1.reg.GetHigh());
929 }
930
931 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
932
933 // Do we have a free register for intermediate calculations?
934 RegStorage tmp = AllocTemp(false);
935 const int kRegSize = cu_->target64 ? 8 : 4;
936 if (tmp == RegStorage::InvalidReg()) {
937 /*
938 * No, will use 'edi'.
939 *
940 * As mentioned above we have 4 temporary and 2 promotable
941 * caller-save registers. Therefore, we assume that a free
942 * register can be allocated only if 'esi' and 'edi' are
943 * already used as operands. If number of promotable registers
944 * increases from 2 to 4 then our assumption fails and operand
945 * data is corrupted.
946 * Let's DCHECK it.
947 */
948 DCHECK(IsTemp(rl_src2.reg.GetLow()) &&
949 IsTemp(rl_src2.reg.GetHigh()) &&
950 IsTemp(rl_result.reg.GetLow()) &&
951 IsTemp(rl_result.reg.GetHigh()));
952 tmp = rs_rDI;
953 NewLIR1(kX86Push32R, tmp.GetReg());
954 cfi_.AdjustCFAOffset(kRegSize);
955 // Record cfi only if it is not already spilled.
956 if (!CoreSpillMaskContains(tmp.GetReg())) {
957 cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
958 }
959 }
960
961 // Now we are ready to do calculations.
962 OpRegReg(kOpMov, tmp, rl_result.reg.GetLow());
963 OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow());
964 OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh());
965 OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh());
966
967 // Let's put pop 'edi' here to break a bit the dependency chain.
968 if (tmp == rs_rDI) {
969 NewLIR1(kX86Pop32R, tmp.GetReg());
970 cfi_.AdjustCFAOffset(-kRegSize);
971 if (!CoreSpillMaskContains(tmp.GetReg())) {
972 cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
973 }
974 } else {
975 FreeTemp(tmp);
976 }
977
978 // Conditionally move the other integer into the destination register.
979 ConditionCode cc = is_min ? kCondGe : kCondLt;
980 OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
981 OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
982 FreeTemp(rl_src2.reg);
983 StoreValueWide(rl_dest, rl_result);
984 return true;
985 }
986
987 // Get the two arguments to the invoke and place them in GP registers.
988 RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
989 if (rl_dest.s_reg_low == INVALID_SREG) {
990 // Result is unused, the code is dead. Inlining successful, no code generated.
991 return true;
992 }
993 RegLocation rl_src1 = info->args[0];
994 RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
995 rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
996 rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
997
998 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
999
1000 /*
1001 * If the result register is the same as the second element, then we need to be careful.
1002 * The reason is that the first copy will inadvertently clobber the second element with
1003 * the first one thus yielding the wrong result. Thus we do a swap in that case.
1004 */
1005 if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1006 std::swap(rl_src1, rl_src2);
1007 }
1008
1009 // Pick the first integer as min/max.
1010 OpRegCopy(rl_result.reg, rl_src1.reg);
1011
1012 // If the integers are both in the same register, then there is nothing else to do
1013 // because they are equal and we have already moved one into the result.
1014 if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
1015 // It is possible we didn't pick correctly so do the actual comparison now.
1016 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
1017
1018 // Conditionally move the other integer into the destination register.
1019 ConditionCode condition_code = is_min ? kCondGt : kCondLt;
1020 OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
1021 }
1022
1023 if (is_long) {
1024 StoreValueWide(rl_dest, rl_result);
1025 } else {
1026 StoreValue(rl_dest, rl_result);
1027 }
1028 return true;
1029 }
1030
GenInlinedPeek(CallInfo * info,OpSize size)1031 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
1032 RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
1033 if (rl_dest.s_reg_low == INVALID_SREG) {
1034 // Result is unused, the code is dead. Inlining successful, no code generated.
1035 return true;
1036 }
1037 RegLocation rl_src_address = info->args[0]; // long address
1038 RegLocation rl_address;
1039 if (!cu_->target64) {
1040 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
1041 rl_address = LoadValue(rl_src_address, kCoreReg);
1042 } else {
1043 rl_address = LoadValueWide(rl_src_address, kCoreReg);
1044 }
1045 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1046 // Unaligned access is allowed on x86.
1047 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
1048 if (size == k64) {
1049 StoreValueWide(rl_dest, rl_result);
1050 } else {
1051 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
1052 StoreValue(rl_dest, rl_result);
1053 }
1054 return true;
1055 }
1056
GenInlinedPoke(CallInfo * info,OpSize size)1057 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
1058 RegLocation rl_src_address = info->args[0]; // long address
1059 RegLocation rl_address;
1060 if (!cu_->target64) {
1061 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
1062 rl_address = LoadValue(rl_src_address, kCoreReg);
1063 } else {
1064 rl_address = LoadValueWide(rl_src_address, kCoreReg);
1065 }
1066 RegLocation rl_src_value = info->args[2]; // [size] value
1067 RegLocation rl_value;
1068 if (size == k64) {
1069 // Unaligned access is allowed on x86.
1070 rl_value = LoadValueWide(rl_src_value, kCoreReg);
1071 } else {
1072 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
1073 // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
1074 if (!cu_->target64 && size == kSignedByte) {
1075 rl_src_value = UpdateLocTyped(rl_src_value);
1076 if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
1077 RegStorage temp = AllocateByteRegister();
1078 OpRegCopy(temp, rl_src_value.reg);
1079 rl_value.reg = temp;
1080 } else {
1081 rl_value = LoadValue(rl_src_value, kCoreReg);
1082 }
1083 } else {
1084 rl_value = LoadValue(rl_src_value, kCoreReg);
1085 }
1086 }
1087 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
1088 return true;
1089 }
1090
OpLea(RegStorage r_base,RegStorage reg1,RegStorage reg2,int scale,int offset)1091 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
1092 NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
1093 }
1094
OpTlsCmp(ThreadOffset<4> offset,int val)1095 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
1096 DCHECK_EQ(kX86, cu_->instruction_set);
1097 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
1098 }
1099
OpTlsCmp(ThreadOffset<8> offset,int val)1100 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
1101 DCHECK_EQ(kX86_64, cu_->instruction_set);
1102 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
1103 }
1104
IsInReg(X86Mir2Lir * pMir2Lir,const RegLocation & rl,RegStorage reg)1105 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
1106 return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
1107 }
1108
GenInlinedCas(CallInfo * info,bool is_long,bool is_object)1109 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
1110 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
1111 // Unused - RegLocation rl_src_unsafe = info->args[0];
1112 RegLocation rl_src_obj = info->args[1]; // Object - known non-null
1113 RegLocation rl_src_offset = info->args[2]; // long low
1114 if (!cu_->target64) {
1115 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3]
1116 }
1117 RegLocation rl_src_expected = info->args[4]; // int, long or Object
1118 // If is_long, high half is in info->args[5]
1119 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
1120 // If is_long, high half is in info->args[7]
1121 const int kRegSize = cu_->target64 ? 8 : 4;
1122
1123 if (is_long && cu_->target64) {
1124 // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
1125 FlushReg(rs_r0q);
1126 Clobber(rs_r0q);
1127 LockTemp(rs_r0q);
1128
1129 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1130 RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
1131 RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1132 LoadValueDirectWide(rl_src_expected, rs_r0q);
1133 NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1134 rl_new_value.reg.GetReg());
1135
1136 // After a store we need to insert barrier in case of potential load. Since the
1137 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
1138 GenMemBarrier(kAnyAny);
1139
1140 FreeTemp(rs_r0q);
1141 } else if (is_long) {
1142 // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
1143 FlushAllRegs();
1144 LockCallTemps();
1145 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
1146 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
1147 LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
1148 LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
1149 // FIXME: needs 64-bit update.
1150 const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
1151 const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
1152 DCHECK(!obj_in_si || !obj_in_di);
1153 const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
1154 const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
1155 DCHECK(!off_in_si || !off_in_di);
1156 // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
1157 RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
1158 RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
1159 bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
1160 bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
1161 if (push_di) {
1162 NewLIR1(kX86Push32R, rs_rDI.GetReg());
1163 MarkTemp(rs_rDI);
1164 LockTemp(rs_rDI);
1165 cfi_.AdjustCFAOffset(kRegSize);
1166 // Record cfi only if it is not already spilled.
1167 if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
1168 cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
1169 }
1170 }
1171 if (push_si) {
1172 NewLIR1(kX86Push32R, rs_rSI.GetReg());
1173 MarkTemp(rs_rSI);
1174 LockTemp(rs_rSI);
1175 cfi_.AdjustCFAOffset(kRegSize);
1176 // Record cfi only if it is not already spilled.
1177 if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
1178 cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
1179 }
1180 }
1181 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1182 const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
1183 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1184 if (!obj_in_si && !obj_in_di) {
1185 LoadWordDisp(rs_rSP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
1186 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
1187 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
1188 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
1189 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1190 }
1191 if (!off_in_si && !off_in_di) {
1192 LoadWordDisp(rs_rSP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
1193 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
1194 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
1195 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
1196 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1197 }
1198 NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
1199
1200 // After a store we need to insert barrier to prevent reordering with either
1201 // earlier or later memory accesses. Since
1202 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1203 // and it will be associated with the cmpxchg instruction, preventing both.
1204 GenMemBarrier(kAnyAny);
1205
1206 if (push_si) {
1207 FreeTemp(rs_rSI);
1208 UnmarkTemp(rs_rSI);
1209 NewLIR1(kX86Pop32R, rs_rSI.GetReg());
1210 cfi_.AdjustCFAOffset(-kRegSize);
1211 if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
1212 cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
1213 }
1214 }
1215 if (push_di) {
1216 FreeTemp(rs_rDI);
1217 UnmarkTemp(rs_rDI);
1218 NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1219 cfi_.AdjustCFAOffset(-kRegSize);
1220 if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
1221 cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
1222 }
1223 }
1224 FreeCallTemps();
1225 } else {
1226 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
1227 FlushReg(rs_r0);
1228 Clobber(rs_r0);
1229 LockTemp(rs_r0);
1230
1231 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1232 RegLocation rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
1233
1234 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
1235 // Mark card for object assuming new value is stored.
1236 FreeTemp(rs_r0); // Temporarily release EAX for MarkGCCard().
1237 MarkGCCard(0, rl_new_value.reg, rl_object.reg);
1238 LockTemp(rs_r0);
1239 }
1240
1241 RegLocation rl_offset;
1242 if (cu_->target64) {
1243 rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1244 } else {
1245 rl_offset = LoadValue(rl_src_offset, kCoreReg);
1246 }
1247 LoadValueDirect(rl_src_expected, rs_r0);
1248 NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1249 rl_new_value.reg.GetReg());
1250
1251 // After a store we need to insert barrier to prevent reordering with either
1252 // earlier or later memory accesses. Since
1253 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1254 // and it will be associated with the cmpxchg instruction, preventing both.
1255 GenMemBarrier(kAnyAny);
1256
1257 FreeTemp(rs_r0);
1258 }
1259
1260 // Convert ZF to boolean
1261 RegLocation rl_dest = InlineTarget(info); // boolean place for result
1262 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1263 RegStorage result_reg = rl_result.reg;
1264
1265 // For 32-bit, SETcc only works with EAX..EDX.
1266 if (!IsByteRegister(result_reg)) {
1267 result_reg = AllocateByteRegister();
1268 }
1269 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
1270 NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
1271 if (IsTemp(result_reg)) {
1272 FreeTemp(result_reg);
1273 }
1274 StoreValue(rl_dest, rl_result);
1275 return true;
1276 }
1277
SwapBits(RegStorage result_reg,int shift,int32_t value)1278 void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) {
1279 RegStorage r_temp = AllocTemp();
1280 OpRegCopy(r_temp, result_reg);
1281 OpRegImm(kOpLsr, result_reg, shift);
1282 OpRegImm(kOpAnd, r_temp, value);
1283 OpRegImm(kOpAnd, result_reg, value);
1284 OpRegImm(kOpLsl, r_temp, shift);
1285 OpRegReg(kOpOr, result_reg, r_temp);
1286 FreeTemp(r_temp);
1287 }
1288
SwapBits64(RegStorage result_reg,int shift,int64_t value)1289 void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) {
1290 RegStorage r_temp = AllocTempWide();
1291 OpRegCopy(r_temp, result_reg);
1292 OpRegImm(kOpLsr, result_reg, shift);
1293 RegStorage r_value = AllocTempWide();
1294 LoadConstantWide(r_value, value);
1295 OpRegReg(kOpAnd, r_temp, r_value);
1296 OpRegReg(kOpAnd, result_reg, r_value);
1297 OpRegImm(kOpLsl, r_temp, shift);
1298 OpRegReg(kOpOr, result_reg, r_temp);
1299 FreeTemp(r_temp);
1300 FreeTemp(r_value);
1301 }
1302
GenInlinedReverseBits(CallInfo * info,OpSize size)1303 bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
1304 RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
1305 if (rl_dest.s_reg_low == INVALID_SREG) {
1306 // Result is unused, the code is dead. Inlining successful, no code generated.
1307 return true;
1308 }
1309 RegLocation rl_src_i = info->args[0];
1310 RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg)
1311 : LoadValue(rl_src_i, kCoreReg);
1312 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1313 if (size == k64) {
1314 if (cu_->instruction_set == kX86_64) {
1315 /* Use one bswap instruction to reverse byte order first and then use 3 rounds of
1316 swapping bits to reverse bits in a long number x. Using bswap to save instructions
1317 compared to generic luni implementation which has 5 rounds of swapping bits.
1318 x = bswap x
1319 x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1320 x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1321 x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1322 */
1323 OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
1324 SwapBits64(rl_result.reg, 1, 0x5555555555555555);
1325 SwapBits64(rl_result.reg, 2, 0x3333333333333333);
1326 SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f);
1327 StoreValueWide(rl_dest, rl_result);
1328 return true;
1329 }
1330 RegStorage r_i_low = rl_i.reg.GetLow();
1331 if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
1332 // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second
1333 // REV.
1334 r_i_low = AllocTemp();
1335 OpRegCopy(r_i_low, rl_i.reg);
1336 }
1337 OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
1338 OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
1339 // Free up at least one input register if it was a temp. Otherwise we may be in the bad
1340 // situation of not having a temp available for SwapBits. Make sure it's not overlapping
1341 // with the output, though.
1342 if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
1343 // There's definitely a free temp after this.
1344 FreeTemp(r_i_low);
1345 } else {
1346 // We opportunistically release both here. That saves duplication of the register state
1347 // lookup (to see if it's actually a temp).
1348 if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) {
1349 FreeTemp(rl_i.reg.GetLow());
1350 }
1351 if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() &&
1352 rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) {
1353 FreeTemp(rl_i.reg.GetHigh());
1354 }
1355 }
1356
1357 SwapBits(rl_result.reg.GetLow(), 1, 0x55555555);
1358 SwapBits(rl_result.reg.GetLow(), 2, 0x33333333);
1359 SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f);
1360 SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555);
1361 SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333);
1362 SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f);
1363 StoreValueWide(rl_dest, rl_result);
1364 } else {
1365 OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
1366 SwapBits(rl_result.reg, 1, 0x55555555);
1367 SwapBits(rl_result.reg, 2, 0x33333333);
1368 SwapBits(rl_result.reg, 4, 0x0f0f0f0f);
1369 StoreValue(rl_dest, rl_result);
1370 }
1371 return true;
1372 }
1373
OpPcRelLoad(RegStorage reg,LIR * target)1374 void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1375 if (cu_->target64) {
1376 // We can do this directly using RIP addressing.
1377 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1378 LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset);
1379 res->target = target;
1380 res->flags.fixup = kFixupLoad;
1381 return;
1382 }
1383
1384 // Get the PC to a register and get the anchor.
1385 LIR* anchor;
1386 RegStorage r_pc = GetPcAndAnchor(&anchor);
1387
1388 // Load the proper value from the literal area.
1389 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1390 LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
1391 res->operands[4] = WrapPointer(anchor);
1392 res->target = target;
1393 res->flags.fixup = kFixupLoad;
1394 }
1395
CanUseOpPcRelDexCacheArrayLoad() const1396 bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
1397 return dex_cache_arrays_layout_.Valid();
1398 }
1399
OpLoadPc(RegStorage r_dest)1400 LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
1401 DCHECK(!cu_->target64);
1402 LIR* call = NewLIR1(kX86CallI, 0);
1403 call->flags.fixup = kFixupLabel;
1404 LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
1405 pop->flags.fixup = kFixupLabel;
1406 DCHECK(NEXT_LIR(call) == pop);
1407 return call;
1408 }
1409
GetPcAndAnchor(LIR ** anchor,RegStorage r_tmp)1410 RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
1411 if (pc_rel_base_reg_.Valid()) {
1412 DCHECK(setup_pc_rel_base_reg_ != nullptr);
1413 *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
1414 DCHECK(*anchor != nullptr);
1415 DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
1416 pc_rel_base_reg_used_ = true;
1417 return pc_rel_base_reg_;
1418 } else {
1419 RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
1420 LIR* load_pc = OpLoadPc(r_pc);
1421 *anchor = NEXT_LIR(load_pc);
1422 DCHECK(*anchor != nullptr);
1423 DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
1424 return r_pc;
1425 }
1426 }
1427
OpPcRelDexCacheArrayLoad(const DexFile * dex_file,int offset,RegStorage r_dest,bool wide)1428 void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
1429 bool wide) {
1430 if (cu_->target64) {
1431 LIR* mov = NewLIR3(wide ? kX86Mov64RM : kX86Mov32RM, r_dest.GetReg(), kRIPReg,
1432 kDummy32BitOffset);
1433 mov->flags.fixup = kFixupLabel;
1434 mov->operands[3] = WrapPointer(dex_file);
1435 mov->operands[4] = offset;
1436 mov->target = mov; // Used for pc_insn_offset (not used by x86-64 relative patcher).
1437 dex_cache_access_insns_.push_back(mov);
1438 } else {
1439 CHECK(!wide) << "Unsupported";
1440 // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
1441 LIR* anchor;
1442 RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
1443 LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
1444 mov->flags.fixup = kFixupLabel;
1445 mov->operands[3] = WrapPointer(dex_file);
1446 mov->operands[4] = offset;
1447 mov->target = anchor; // Used for pc_insn_offset.
1448 dex_cache_access_insns_.push_back(mov);
1449 }
1450 }
1451
OpVldm(RegStorage r_base,int count)1452 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
1453 UNUSED(r_base, count);
1454 LOG(FATAL) << "Unexpected use of OpVldm for x86";
1455 UNREACHABLE();
1456 }
1457
OpVstm(RegStorage r_base,int count)1458 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
1459 UNUSED(r_base, count);
1460 LOG(FATAL) << "Unexpected use of OpVstm for x86";
1461 UNREACHABLE();
1462 }
1463
GenMultiplyByTwoBitMultiplier(RegLocation rl_src,RegLocation rl_result,int lit,int first_bit,int second_bit)1464 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1465 RegLocation rl_result, int lit,
1466 int first_bit, int second_bit) {
1467 UNUSED(lit);
1468 RegStorage t_reg = AllocTemp();
1469 OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
1470 OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
1471 FreeTemp(t_reg);
1472 if (first_bit != 0) {
1473 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1474 }
1475 }
1476
GenDivZeroCheckWide(RegStorage reg)1477 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1478 if (cu_->target64) {
1479 DCHECK(reg.Is64Bit());
1480
1481 NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
1482 } else {
1483 DCHECK(reg.IsPair());
1484
1485 // We are not supposed to clobber the incoming storage, so allocate a temporary.
1486 RegStorage t_reg = AllocTemp();
1487 // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
1488 OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
1489 // The temp is no longer needed so free it at this time.
1490 FreeTemp(t_reg);
1491 }
1492
1493 // In case of zero, throw ArithmeticException.
1494 GenDivZeroCheck(kCondEq);
1495 }
1496
GenArrayBoundsCheck(RegStorage index,RegStorage array_base,int len_offset)1497 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
1498 RegStorage array_base,
1499 int len_offset) {
1500 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1501 public:
1502 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
1503 RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in)
1504 : LIRSlowPath(m2l, branch_in),
1505 index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
1506 }
1507
1508 void Compile() OVERRIDE {
1509 m2l_->ResetRegPool();
1510 m2l_->ResetDefTracking();
1511 GenerateTargetLabel(kPseudoThrowTarget);
1512
1513 RegStorage new_index = index_;
1514 // Move index out of kArg1, either directly to kArg0, or to kArg2.
1515 // TODO: clean-up to check not a number but with type
1516 if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
1517 if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
1518 m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
1519 new_index = m2l_->TargetReg(kArg2, kNotWide);
1520 } else {
1521 m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
1522 new_index = m2l_->TargetReg(kArg0, kNotWide);
1523 }
1524 }
1525 // Load array length to kArg1.
1526 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1527 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1528 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
1529 m2l_->TargetReg(kArg1, kNotWide), true);
1530 }
1531
1532 private:
1533 const RegStorage index_;
1534 const RegStorage array_base_;
1535 const int32_t len_offset_;
1536 };
1537
1538 OpRegMem(kOpCmp, index, array_base, len_offset);
1539 MarkPossibleNullPointerException(0);
1540 LIR* branch = OpCondBranch(kCondUge, nullptr);
1541 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1542 index, array_base, len_offset));
1543 }
1544
GenArrayBoundsCheck(int32_t index,RegStorage array_base,int32_t len_offset)1545 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
1546 RegStorage array_base,
1547 int32_t len_offset) {
1548 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1549 public:
1550 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
1551 int32_t index_in, RegStorage array_base_in, int32_t len_offset_in)
1552 : LIRSlowPath(m2l, branch_in),
1553 index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
1554 }
1555
1556 void Compile() OVERRIDE {
1557 m2l_->ResetRegPool();
1558 m2l_->ResetDefTracking();
1559 GenerateTargetLabel(kPseudoThrowTarget);
1560
1561 // Load array length to kArg1.
1562 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1563 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1564 x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
1565 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
1566 m2l_->TargetReg(kArg1, kNotWide), true);
1567 }
1568
1569 private:
1570 const int32_t index_;
1571 const RegStorage array_base_;
1572 const int32_t len_offset_;
1573 };
1574
1575 NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
1576 MarkPossibleNullPointerException(0);
1577 LIR* branch = OpCondBranch(kCondLs, nullptr);
1578 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1579 index, array_base, len_offset));
1580 }
1581
1582 // Test suspend flag, return target of taken suspend branch
OpTestSuspend(LIR * target)1583 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
1584 if (cu_->target64) {
1585 OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
1586 } else {
1587 OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
1588 }
1589 return OpCondBranch((target == nullptr) ? kCondNe : kCondEq, target);
1590 }
1591
1592 // Decrement register and branch on condition
OpDecAndBranch(ConditionCode c_code,RegStorage reg,LIR * target)1593 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1594 OpRegImm(kOpSub, reg, 1);
1595 return OpCondBranch(c_code, target);
1596 }
1597
SmallLiteralDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)1598 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
1599 RegLocation rl_src, RegLocation rl_dest, int lit) {
1600 UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit);
1601 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
1602 UNREACHABLE();
1603 }
1604
EasyMultiply(RegLocation rl_src,RegLocation rl_dest,int lit)1605 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
1606 UNUSED(rl_src, rl_dest, lit);
1607 LOG(FATAL) << "Unexpected use of easyMultiply in x86";
1608 UNREACHABLE();
1609 }
1610
OpIT(ConditionCode cond,const char * guide)1611 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
1612 UNUSED(cond, guide);
1613 LOG(FATAL) << "Unexpected use of OpIT in x86";
1614 UNREACHABLE();
1615 }
1616
OpEndIT(LIR * it)1617 void X86Mir2Lir::OpEndIT(LIR* it) {
1618 UNUSED(it);
1619 LOG(FATAL) << "Unexpected use of OpEndIT in x86";
1620 UNREACHABLE();
1621 }
1622
GenImulRegImm(RegStorage dest,RegStorage src,int val)1623 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
1624 switch (val) {
1625 case 0:
1626 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1627 break;
1628 case 1:
1629 OpRegCopy(dest, src);
1630 break;
1631 default:
1632 OpRegRegImm(kOpMul, dest, src, val);
1633 break;
1634 }
1635 }
1636
GenImulMemImm(RegStorage dest,int sreg,int displacement,int val)1637 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
1638 UNUSED(sreg);
1639 // All memory accesses below reference dalvik regs.
1640 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1641
1642 LIR *m;
1643 switch (val) {
1644 case 0:
1645 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1646 break;
1647 case 1: {
1648 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1649 LoadBaseDisp(rs_rSP, displacement, dest, k32, kNotVolatile);
1650 break;
1651 }
1652 default:
1653 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
1654 rs_rX86_SP_32.GetReg(), displacement, val);
1655 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
1656 break;
1657 }
1658 }
1659
GenArithOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1660 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1661 RegLocation rl_src2, int flags) {
1662 if (!cu_->target64) {
1663 // Some x86 32b ops are fallback.
1664 switch (opcode) {
1665 case Instruction::NOT_LONG:
1666 case Instruction::DIV_LONG:
1667 case Instruction::DIV_LONG_2ADDR:
1668 case Instruction::REM_LONG:
1669 case Instruction::REM_LONG_2ADDR:
1670 Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1671 return;
1672
1673 default:
1674 // Everything else we can handle.
1675 break;
1676 }
1677 }
1678
1679 switch (opcode) {
1680 case Instruction::NOT_LONG:
1681 GenNotLong(rl_dest, rl_src2);
1682 return;
1683
1684 case Instruction::ADD_LONG:
1685 case Instruction::ADD_LONG_2ADDR:
1686 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1687 return;
1688
1689 case Instruction::SUB_LONG:
1690 case Instruction::SUB_LONG_2ADDR:
1691 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
1692 return;
1693
1694 case Instruction::MUL_LONG:
1695 case Instruction::MUL_LONG_2ADDR:
1696 GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1697 return;
1698
1699 case Instruction::DIV_LONG:
1700 case Instruction::DIV_LONG_2ADDR:
1701 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
1702 return;
1703
1704 case Instruction::REM_LONG:
1705 case Instruction::REM_LONG_2ADDR:
1706 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
1707 return;
1708
1709 case Instruction::AND_LONG_2ADDR:
1710 case Instruction::AND_LONG:
1711 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1712 return;
1713
1714 case Instruction::OR_LONG:
1715 case Instruction::OR_LONG_2ADDR:
1716 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1717 return;
1718
1719 case Instruction::XOR_LONG:
1720 case Instruction::XOR_LONG_2ADDR:
1721 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1722 return;
1723
1724 case Instruction::NEG_LONG:
1725 GenNegLong(rl_dest, rl_src2);
1726 return;
1727
1728 default:
1729 LOG(FATAL) << "Invalid long arith op";
1730 return;
1731 }
1732 }
1733
GenMulLongConst(RegLocation rl_dest,RegLocation rl_src1,int64_t val,int flags)1734 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) {
1735 // All memory accesses below reference dalvik regs.
1736 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1737
1738 if (val == 0) {
1739 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1740 if (cu_->target64) {
1741 OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
1742 } else {
1743 OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
1744 OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
1745 }
1746 StoreValueWide(rl_dest, rl_result);
1747 return true;
1748 } else if (val == 1) {
1749 StoreValueWide(rl_dest, rl_src1);
1750 return true;
1751 } else if (val == 2) {
1752 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags);
1753 return true;
1754 } else if (IsPowerOfTwo(val)) {
1755 int shift_amount = CTZ(val);
1756 if (!PartiallyIntersects(rl_src1, rl_dest)) {
1757 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1758 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
1759 shift_amount, flags);
1760 StoreValueWide(rl_dest, rl_result);
1761 return true;
1762 }
1763 }
1764
1765 // Okay, on 32b just bite the bullet and do it, still better than the general case.
1766 if (!cu_->target64) {
1767 int32_t val_lo = Low32Bits(val);
1768 int32_t val_hi = High32Bits(val);
1769 // Prepare for explicit register usage.
1770 ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
1771 rl_src1 = UpdateLocWideTyped(rl_src1);
1772 bool src1_in_reg = rl_src1.location == kLocPhysReg;
1773 int displacement = SRegOffset(rl_src1.s_reg_low);
1774
1775 // ECX <- 1H * 2L
1776 // EAX <- 1L * 2H
1777 if (src1_in_reg) {
1778 GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
1779 GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
1780 } else {
1781 GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
1782 GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
1783 }
1784
1785 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
1786 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1787
1788 // EAX <- 2L
1789 LoadConstantNoClobber(rs_r0, val_lo);
1790
1791 // EDX:EAX <- 2L * 1L (double precision)
1792 if (src1_in_reg) {
1793 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1794 } else {
1795 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
1796 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1797 true /* is_load */, true /* is_64bit */);
1798 }
1799
1800 // EDX <- EDX + ECX (add high words)
1801 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1802
1803 // Result is EDX:EAX
1804 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1805 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1806 StoreValueWide(rl_dest, rl_result);
1807 return true;
1808 }
1809 return false;
1810 }
1811
GenMulLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1812 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1813 RegLocation rl_src2, int flags) {
1814 if (rl_src1.is_const) {
1815 std::swap(rl_src1, rl_src2);
1816 }
1817
1818 if (rl_src2.is_const) {
1819 if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) {
1820 return;
1821 }
1822 }
1823
1824 // All memory accesses below reference dalvik regs.
1825 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1826
1827 if (cu_->target64) {
1828 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1829 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1830 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1831 if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1832 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1833 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
1834 } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
1835 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1836 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
1837 } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1838 rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
1839 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1840 } else {
1841 OpRegCopy(rl_result.reg, rl_src1.reg);
1842 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1843 }
1844 StoreValueWide(rl_dest, rl_result);
1845 return;
1846 }
1847
1848 // Not multiplying by a constant. Do it the hard way
1849 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
1850 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
1851 mir_graph_->SRegToVReg(rl_src2.s_reg_low);
1852
1853 // Prepare for explicit register usage.
1854 ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
1855 rl_src1 = UpdateLocWideTyped(rl_src1);
1856 rl_src2 = UpdateLocWideTyped(rl_src2);
1857
1858 // At this point, the VRs are in their home locations.
1859 bool src1_in_reg = rl_src1.location == kLocPhysReg;
1860 bool src2_in_reg = rl_src2.location == kLocPhysReg;
1861 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1862
1863 // ECX <- 1H
1864 if (src1_in_reg) {
1865 NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
1866 } else {
1867 LoadBaseDisp(rs_rSP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
1868 kNotVolatile);
1869 }
1870
1871 if (is_square) {
1872 // Take advantage of the fact that the values are the same.
1873 // ECX <- ECX * 2L (1H * 2L)
1874 if (src2_in_reg) {
1875 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1876 } else {
1877 int displacement = SRegOffset(rl_src2.s_reg_low);
1878 LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
1879 displacement + LOWORD_OFFSET);
1880 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1881 true /* is_load */, true /* is_64bit */);
1882 }
1883
1884 // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
1885 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
1886 } else {
1887 // EAX <- 2H
1888 if (src2_in_reg) {
1889 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
1890 } else {
1891 LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
1892 kNotVolatile);
1893 }
1894
1895 // EAX <- EAX * 1L (2H * 1L)
1896 if (src1_in_reg) {
1897 NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
1898 } else {
1899 int displacement = SRegOffset(rl_src1.s_reg_low);
1900 LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP_32.GetReg(),
1901 displacement + LOWORD_OFFSET);
1902 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1903 true /* is_load */, true /* is_64bit */);
1904 }
1905
1906 // ECX <- ECX * 2L (1H * 2L)
1907 if (src2_in_reg) {
1908 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1909 } else {
1910 int displacement = SRegOffset(rl_src2.s_reg_low);
1911 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
1912 displacement + LOWORD_OFFSET);
1913 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1914 true /* is_load */, true /* is_64bit */);
1915 }
1916
1917 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
1918 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1919 }
1920
1921 // EAX <- 2L
1922 if (src2_in_reg) {
1923 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
1924 } else {
1925 LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
1926 kNotVolatile);
1927 }
1928
1929 // EDX:EAX <- 2L * 1L (double precision)
1930 if (src1_in_reg) {
1931 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1932 } else {
1933 int displacement = SRegOffset(rl_src1.s_reg_low);
1934 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
1935 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1936 true /* is_load */, true /* is_64bit */);
1937 }
1938
1939 // EDX <- EDX + ECX (add high words)
1940 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1941
1942 // Result is EDX:EAX
1943 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1944 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1945 StoreValueWide(rl_dest, rl_result);
1946 }
1947
GenLongRegOrMemOp(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1948 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
1949 Instruction::Code op) {
1950 DCHECK_EQ(rl_dest.location, kLocPhysReg);
1951 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1952 if (rl_src.location == kLocPhysReg) {
1953 // Both operands are in registers.
1954 // But we must ensure that rl_src is in pair
1955 if (cu_->target64) {
1956 NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
1957 } else {
1958 rl_src = LoadValueWide(rl_src, kCoreReg);
1959 if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1960 // The registers are the same, so we would clobber it before the use.
1961 RegStorage temp_reg = AllocTemp();
1962 OpRegCopy(temp_reg, rl_dest.reg);
1963 rl_src.reg.SetHighReg(temp_reg.GetReg());
1964 }
1965 NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
1966
1967 x86op = GetOpcode(op, rl_dest, rl_src, true);
1968 NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
1969 }
1970 return;
1971 }
1972
1973 // RHS is in memory.
1974 DCHECK((rl_src.location == kLocDalvikFrame) ||
1975 (rl_src.location == kLocCompilerTemp));
1976 int r_base = rs_rX86_SP_32.GetReg();
1977 int displacement = SRegOffset(rl_src.s_reg_low);
1978
1979 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1980 LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
1981 r_base, displacement + LOWORD_OFFSET);
1982 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1983 true /* is_load */, true /* is64bit */);
1984 if (!cu_->target64) {
1985 x86op = GetOpcode(op, rl_dest, rl_src, true);
1986 lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
1987 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1988 true /* is_load */, true /* is64bit */);
1989 }
1990 }
1991
GenLongArith(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1992 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
1993 rl_dest = UpdateLocWideTyped(rl_dest);
1994 if (rl_dest.location == kLocPhysReg) {
1995 // Ensure we are in a register pair
1996 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1997
1998 rl_src = UpdateLocWideTyped(rl_src);
1999 GenLongRegOrMemOp(rl_result, rl_src, op);
2000 StoreFinalValueWide(rl_dest, rl_result);
2001 return;
2002 } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) {
2003 // Handle the case when src and dest are intersect.
2004 rl_src = LoadValueWide(rl_src, kCoreReg);
2005 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2006 rl_src = UpdateLocWideTyped(rl_src);
2007 GenLongRegOrMemOp(rl_result, rl_src, op);
2008 StoreFinalValueWide(rl_dest, rl_result);
2009 return;
2010 }
2011
2012 // It wasn't in registers, so it better be in memory.
2013 DCHECK((rl_dest.location == kLocDalvikFrame) ||
2014 (rl_dest.location == kLocCompilerTemp));
2015 rl_src = LoadValueWide(rl_src, kCoreReg);
2016
2017 // Operate directly into memory.
2018 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
2019 int r_base = rs_rX86_SP_32.GetReg();
2020 int displacement = SRegOffset(rl_dest.s_reg_low);
2021
2022 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2023 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
2024 cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
2025 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2026 true /* is_load */, true /* is64bit */);
2027 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2028 false /* is_load */, true /* is64bit */);
2029 if (!cu_->target64) {
2030 x86op = GetOpcode(op, rl_dest, rl_src, true);
2031 lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
2032 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2033 true /* is_load */, true /* is64bit */);
2034 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2035 false /* is_load */, true /* is64bit */);
2036 }
2037
2038 int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
2039 int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
2040
2041 // If the left operand is in memory and the right operand is in a register
2042 // and both belong to the same dalvik register then we should clobber the
2043 // right one because it doesn't hold valid data anymore.
2044 if (v_src_reg == v_dst_reg) {
2045 Clobber(rl_src.reg);
2046 }
2047 }
2048
GenLongArith(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op,bool is_commutative)2049 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
2050 RegLocation rl_src2, Instruction::Code op,
2051 bool is_commutative) {
2052 // Is this really a 2 operand operation?
2053 switch (op) {
2054 case Instruction::ADD_LONG_2ADDR:
2055 case Instruction::SUB_LONG_2ADDR:
2056 case Instruction::AND_LONG_2ADDR:
2057 case Instruction::OR_LONG_2ADDR:
2058 case Instruction::XOR_LONG_2ADDR:
2059 if (GenerateTwoOperandInstructions()) {
2060 GenLongArith(rl_dest, rl_src2, op);
2061 return;
2062 }
2063 break;
2064
2065 default:
2066 break;
2067 }
2068
2069 if (rl_dest.location == kLocPhysReg) {
2070 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
2071
2072 // We are about to clobber the LHS, so it needs to be a temp.
2073 rl_result = ForceTempWide(rl_result);
2074
2075 // Perform the operation using the RHS.
2076 rl_src2 = UpdateLocWideTyped(rl_src2);
2077 GenLongRegOrMemOp(rl_result, rl_src2, op);
2078
2079 // And now record that the result is in the temp.
2080 StoreFinalValueWide(rl_dest, rl_result);
2081 return;
2082 }
2083
2084 // It wasn't in registers, so it better be in memory.
2085 DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp));
2086 rl_src1 = UpdateLocWideTyped(rl_src1);
2087 rl_src2 = UpdateLocWideTyped(rl_src2);
2088
2089 // Get one of the source operands into temporary register.
2090 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2091 if (cu_->target64) {
2092 if (IsTemp(rl_src1.reg)) {
2093 GenLongRegOrMemOp(rl_src1, rl_src2, op);
2094 } else if (is_commutative) {
2095 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
2096 // We need at least one of them to be a temporary.
2097 if (!IsTemp(rl_src2.reg)) {
2098 rl_src1 = ForceTempWide(rl_src1);
2099 GenLongRegOrMemOp(rl_src1, rl_src2, op);
2100 } else {
2101 GenLongRegOrMemOp(rl_src2, rl_src1, op);
2102 StoreFinalValueWide(rl_dest, rl_src2);
2103 return;
2104 }
2105 } else {
2106 // Need LHS to be the temp.
2107 rl_src1 = ForceTempWide(rl_src1);
2108 GenLongRegOrMemOp(rl_src1, rl_src2, op);
2109 }
2110 } else {
2111 if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
2112 GenLongRegOrMemOp(rl_src1, rl_src2, op);
2113 } else if (is_commutative) {
2114 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
2115 // We need at least one of them to be a temporary.
2116 if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
2117 rl_src1 = ForceTempWide(rl_src1);
2118 GenLongRegOrMemOp(rl_src1, rl_src2, op);
2119 } else {
2120 GenLongRegOrMemOp(rl_src2, rl_src1, op);
2121 StoreFinalValueWide(rl_dest, rl_src2);
2122 return;
2123 }
2124 } else {
2125 // Need LHS to be the temp.
2126 rl_src1 = ForceTempWide(rl_src1);
2127 GenLongRegOrMemOp(rl_src1, rl_src2, op);
2128 }
2129 }
2130
2131 StoreFinalValueWide(rl_dest, rl_src1);
2132 }
2133
GenNotLong(RegLocation rl_dest,RegLocation rl_src)2134 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
2135 if (cu_->target64) {
2136 rl_src = LoadValueWide(rl_src, kCoreReg);
2137 RegLocation rl_result;
2138 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2139 OpRegCopy(rl_result.reg, rl_src.reg);
2140 OpReg(kOpNot, rl_result.reg);
2141 StoreValueWide(rl_dest, rl_result);
2142 } else {
2143 LOG(FATAL) << "Unexpected use GenNotLong()";
2144 }
2145 }
2146
GenDivRemLongLit(RegLocation rl_dest,RegLocation rl_src,int64_t imm,bool is_div)2147 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
2148 int64_t imm, bool is_div) {
2149 if (imm == 0) {
2150 GenDivZeroException();
2151 } else if (imm == 1) {
2152 if (is_div) {
2153 // x / 1 == x.
2154 StoreValueWide(rl_dest, rl_src);
2155 } else {
2156 // x % 1 == 0.
2157 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2158 LoadConstantWide(rl_result.reg, 0);
2159 StoreValueWide(rl_dest, rl_result);
2160 }
2161 } else if (imm == -1) { // handle 0x8000000000000000 / -1 special case.
2162 if (is_div) {
2163 rl_src = LoadValueWide(rl_src, kCoreReg);
2164 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2165 RegStorage rs_temp = AllocTempWide();
2166
2167 OpRegCopy(rl_result.reg, rl_src.reg);
2168 LoadConstantWide(rs_temp, 0x8000000000000000);
2169
2170 // If x == MIN_LONG, return MIN_LONG.
2171 OpRegReg(kOpCmp, rl_src.reg, rs_temp);
2172 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
2173
2174 // For x != MIN_LONG, x / -1 == -x.
2175 OpReg(kOpNeg, rl_result.reg);
2176
2177 minint_branch->target = NewLIR0(kPseudoTargetLabel);
2178 FreeTemp(rs_temp);
2179 StoreValueWide(rl_dest, rl_result);
2180 } else {
2181 // x % -1 == 0.
2182 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2183 LoadConstantWide(rl_result.reg, 0);
2184 StoreValueWide(rl_dest, rl_result);
2185 }
2186 } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
2187 // Division using shifting.
2188 rl_src = LoadValueWide(rl_src, kCoreReg);
2189 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2190 if (IsSameReg(rl_result.reg, rl_src.reg)) {
2191 RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
2192 rl_result.reg.SetReg(rs_temp.GetReg());
2193 }
2194 LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
2195 OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
2196 NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
2197 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
2198 int shift_amount = CTZ(imm);
2199 OpRegImm(kOpAsr, rl_result.reg, shift_amount);
2200 if (imm < 0) {
2201 OpReg(kOpNeg, rl_result.reg);
2202 }
2203 StoreValueWide(rl_dest, rl_result);
2204 } else {
2205 CHECK(imm <= -2 || imm >= 2);
2206
2207 FlushReg(rs_r0q);
2208 Clobber(rs_r0q);
2209 LockTemp(rs_r0q);
2210 FlushReg(rs_r2q);
2211 Clobber(rs_r2q);
2212 LockTemp(rs_r2q);
2213
2214 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
2215 is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG};
2216
2217 // Use H.S.Warren's Hacker's Delight Chapter 10 and
2218 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
2219 int64_t magic;
2220 int shift;
2221 CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
2222
2223 /*
2224 * For imm >= 2,
2225 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
2226 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
2227 * For imm <= -2,
2228 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
2229 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
2230 * We implement this algorithm in the following way:
2231 * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
2232 * 2. if imm > 0 and magic < 0, add numerator to RDX
2233 * if imm < 0 and magic > 0, sub numerator from RDX
2234 * 3. if S !=0, SAR S bits for RDX
2235 * 4. add 1 to RDX if RDX < 0
2236 * 5. Thus, RDX is the quotient
2237 */
2238
2239 // RAX = magic.
2240 LoadConstantWide(rs_r0q, magic);
2241
2242 // Multiply by numerator.
2243 RegStorage numerator_reg;
2244 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
2245 // We will need the value later.
2246 rl_src = LoadValueWide(rl_src, kCoreReg);
2247 numerator_reg = rl_src.reg;
2248
2249 // RDX:RAX = magic * numerator.
2250 NewLIR1(kX86Imul64DaR, numerator_reg.GetReg());
2251 } else {
2252 // Only need this once. Multiply directly from the value.
2253 rl_src = UpdateLocWideTyped(rl_src);
2254 if (rl_src.location != kLocPhysReg) {
2255 // Okay, we can do this from memory.
2256 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2257 int displacement = SRegOffset(rl_src.s_reg_low);
2258 // RDX:RAX = magic * numerator.
2259 LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP_32.GetReg(), displacement);
2260 AnnotateDalvikRegAccess(m, displacement >> 2,
2261 true /* is_load */, true /* is_64bit */);
2262 } else {
2263 // RDX:RAX = magic * numerator.
2264 NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg());
2265 }
2266 }
2267
2268 if (imm > 0 && magic < 0) {
2269 // Add numerator to RDX.
2270 DCHECK(numerator_reg.Valid());
2271 OpRegReg(kOpAdd, rs_r2q, numerator_reg);
2272 } else if (imm < 0 && magic > 0) {
2273 DCHECK(numerator_reg.Valid());
2274 OpRegReg(kOpSub, rs_r2q, numerator_reg);
2275 }
2276
2277 // Do we need the shift?
2278 if (shift != 0) {
2279 // Shift RDX by 'shift' bits.
2280 OpRegImm(kOpAsr, rs_r2q, shift);
2281 }
2282
2283 // Move RDX to RAX.
2284 OpRegCopyWide(rs_r0q, rs_r2q);
2285
2286 // Move sign bit to bit 0, zeroing the rest.
2287 OpRegImm(kOpLsr, rs_r2q, 63);
2288
2289 // RDX = RDX + RAX.
2290 OpRegReg(kOpAdd, rs_r2q, rs_r0q);
2291
2292 // Quotient is in RDX.
2293 if (!is_div) {
2294 // We need to compute the remainder.
2295 // Remainder is divisor - (quotient * imm).
2296 DCHECK(numerator_reg.Valid());
2297 OpRegCopyWide(rs_r0q, numerator_reg);
2298
2299 // Imul doesn't support 64-bit imms.
2300 if (imm > std::numeric_limits<int32_t>::max() ||
2301 imm < std::numeric_limits<int32_t>::min()) {
2302 RegStorage rs_temp = AllocTempWide();
2303 LoadConstantWide(rs_temp, imm);
2304
2305 // RAX = numerator * imm.
2306 NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
2307
2308 FreeTemp(rs_temp);
2309 } else {
2310 // RAX = numerator * imm.
2311 int short_imm = static_cast<int>(imm);
2312 NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
2313 }
2314
2315 // RAX -= RDX.
2316 OpRegReg(kOpSub, rs_r0q, rs_r2q);
2317
2318 // Result in RAX.
2319 } else {
2320 // Result in RDX.
2321 }
2322 StoreValueWide(rl_dest, rl_result);
2323 FreeTemp(rs_r0q);
2324 FreeTemp(rs_r2q);
2325 }
2326 }
2327
GenDivRemLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,int flags)2328 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
2329 RegLocation rl_src2, bool is_div, int flags) {
2330 if (!cu_->target64) {
2331 LOG(FATAL) << "Unexpected use GenDivRemLong()";
2332 return;
2333 }
2334
2335 if (rl_src2.is_const) {
2336 DCHECK(rl_src2.wide);
2337 int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
2338 GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
2339 return;
2340 }
2341
2342 // We have to use fixed registers, so flush all the temps.
2343 // Prepare for explicit register usage.
2344 ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q);
2345
2346 // Load LHS into RAX.
2347 LoadValueDirectWideFixed(rl_src1, rs_r0q);
2348
2349 // Load RHS into RCX.
2350 LoadValueDirectWideFixed(rl_src2, rs_r1q);
2351
2352 // Copy LHS sign bit into RDX.
2353 NewLIR0(kx86Cqo64Da);
2354
2355 // Handle division by zero case.
2356 if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
2357 GenDivZeroCheckWide(rs_r1q);
2358 }
2359
2360 // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
2361 NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
2362 LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
2363
2364 // RHS is -1.
2365 LoadConstantWide(rs_r6q, 0x8000000000000000);
2366 NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
2367 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
2368
2369 // In 0x8000000000000000/-1 case.
2370 if (!is_div) {
2371 // For DIV, RAX is already right. For REM, we need RDX 0.
2372 NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
2373 }
2374 LIR* done = NewLIR1(kX86Jmp8, 0);
2375
2376 // Expected case.
2377 minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
2378 minint_branch->target = minus_one_branch->target;
2379 NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
2380 done->target = NewLIR0(kPseudoTargetLabel);
2381
2382 // Result is in RAX for div and RDX for rem.
2383 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
2384 if (!is_div) {
2385 rl_result.reg.SetReg(r2q);
2386 }
2387
2388 StoreValueWide(rl_dest, rl_result);
2389 }
2390
GenNegLong(RegLocation rl_dest,RegLocation rl_src)2391 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
2392 rl_src = LoadValueWide(rl_src, kCoreReg);
2393 RegLocation rl_result;
2394 if (cu_->target64) {
2395 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2396 OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
2397 } else {
2398 rl_result = ForceTempWide(rl_src);
2399 OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow
2400 OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF
2401 OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh
2402 }
2403 StoreValueWide(rl_dest, rl_result);
2404 }
2405
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<4> thread_offset)2406 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
2407 DCHECK_EQ(kX86, cu_->instruction_set);
2408 X86OpCode opcode = kX86Bkpt;
2409 switch (op) {
2410 case kOpCmp: opcode = kX86Cmp32RT; break;
2411 case kOpMov: opcode = kX86Mov32RT; break;
2412 default:
2413 LOG(FATAL) << "Bad opcode: " << op;
2414 break;
2415 }
2416 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2417 }
2418
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<8> thread_offset)2419 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
2420 DCHECK_EQ(kX86_64, cu_->instruction_set);
2421 X86OpCode opcode = kX86Bkpt;
2422 if (cu_->target64 && r_dest.Is64BitSolo()) {
2423 switch (op) {
2424 case kOpCmp: opcode = kX86Cmp64RT; break;
2425 case kOpMov: opcode = kX86Mov64RT; break;
2426 default:
2427 LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
2428 break;
2429 }
2430 } else {
2431 switch (op) {
2432 case kOpCmp: opcode = kX86Cmp32RT; break;
2433 case kOpMov: opcode = kX86Mov32RT; break;
2434 default:
2435 LOG(FATAL) << "Bad opcode: " << op;
2436 break;
2437 }
2438 }
2439 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2440 }
2441
2442 /*
2443 * Generate array load
2444 */
GenArrayGet(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_dest,int scale)2445 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
2446 RegLocation rl_index, RegLocation rl_dest, int scale) {
2447 RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
2448 int len_offset = mirror::Array::LengthOffset().Int32Value();
2449 RegLocation rl_result;
2450 rl_array = LoadValue(rl_array, kRefReg);
2451
2452 int data_offset;
2453 if (size == k64 || size == kDouble) {
2454 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2455 } else {
2456 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2457 }
2458
2459 bool constant_index = rl_index.is_const;
2460 int32_t constant_index_value = 0;
2461 if (!constant_index) {
2462 rl_index = LoadValue(rl_index, kCoreReg);
2463 } else {
2464 constant_index_value = mir_graph_->ConstantValue(rl_index);
2465 // If index is constant, just fold it into the data offset
2466 data_offset += constant_index_value << scale;
2467 // treat as non array below
2468 rl_index.reg = RegStorage::InvalidReg();
2469 }
2470
2471 /* null object? */
2472 GenNullCheck(rl_array.reg, opt_flags);
2473
2474 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2475 if (constant_index) {
2476 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2477 } else {
2478 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2479 }
2480 }
2481 rl_result = EvalLoc(rl_dest, reg_class, true);
2482 LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
2483 if ((size == k64) || (size == kDouble)) {
2484 StoreValueWide(rl_dest, rl_result);
2485 } else {
2486 StoreValue(rl_dest, rl_result);
2487 }
2488 }
2489
2490 /*
2491 * Generate array store
2492 *
2493 */
GenArrayPut(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_src,int scale,bool card_mark)2494 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
2495 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
2496 RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
2497 int len_offset = mirror::Array::LengthOffset().Int32Value();
2498 int data_offset;
2499
2500 if (size == k64 || size == kDouble) {
2501 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2502 } else {
2503 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2504 }
2505
2506 rl_array = LoadValue(rl_array, kRefReg);
2507 bool constant_index = rl_index.is_const;
2508 int32_t constant_index_value = 0;
2509 if (!constant_index) {
2510 rl_index = LoadValue(rl_index, kCoreReg);
2511 } else {
2512 // If index is constant, just fold it into the data offset
2513 constant_index_value = mir_graph_->ConstantValue(rl_index);
2514 data_offset += constant_index_value << scale;
2515 // treat as non array below
2516 rl_index.reg = RegStorage::InvalidReg();
2517 }
2518
2519 /* null object? */
2520 GenNullCheck(rl_array.reg, opt_flags);
2521
2522 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2523 if (constant_index) {
2524 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2525 } else {
2526 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2527 }
2528 }
2529 if ((size == k64) || (size == kDouble)) {
2530 rl_src = LoadValueWide(rl_src, reg_class);
2531 } else {
2532 rl_src = LoadValue(rl_src, reg_class);
2533 }
2534 // If the src reg can't be byte accessed, move it to a temp first.
2535 if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
2536 RegStorage temp = AllocTemp();
2537 OpRegCopy(temp, rl_src.reg);
2538 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags);
2539 } else {
2540 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags);
2541 }
2542 if (card_mark) {
2543 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
2544 if (!constant_index) {
2545 FreeTemp(rl_index.reg);
2546 }
2547 MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
2548 }
2549 }
2550
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,int shift_amount,int flags)2551 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2552 RegLocation rl_src, int shift_amount, int flags) {
2553 UNUSED(flags);
2554 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2555 if (cu_->target64) {
2556 OpKind op = static_cast<OpKind>(0); /* Make gcc happy */
2557 switch (opcode) {
2558 case Instruction::SHL_LONG:
2559 case Instruction::SHL_LONG_2ADDR:
2560 op = kOpLsl;
2561 break;
2562 case Instruction::SHR_LONG:
2563 case Instruction::SHR_LONG_2ADDR:
2564 op = kOpAsr;
2565 break;
2566 case Instruction::USHR_LONG:
2567 case Instruction::USHR_LONG_2ADDR:
2568 op = kOpLsr;
2569 break;
2570 default:
2571 LOG(FATAL) << "Unexpected case";
2572 }
2573 OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
2574 } else {
2575 switch (opcode) {
2576 case Instruction::SHL_LONG:
2577 case Instruction::SHL_LONG_2ADDR:
2578 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening.
2579 if (shift_amount == 32) {
2580 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2581 LoadConstant(rl_result.reg.GetLow(), 0);
2582 } else if (shift_amount > 31) {
2583 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2584 NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
2585 LoadConstant(rl_result.reg.GetLow(), 0);
2586 } else {
2587 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2588 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2589 NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
2590 shift_amount);
2591 NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
2592 }
2593 break;
2594 case Instruction::SHR_LONG:
2595 case Instruction::SHR_LONG_2ADDR:
2596 if (shift_amount == 32) {
2597 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2598 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2599 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2600 } else if (shift_amount > 31) {
2601 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2602 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2603 NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2604 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2605 } else {
2606 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2607 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2608 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2609 shift_amount);
2610 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
2611 }
2612 break;
2613 case Instruction::USHR_LONG:
2614 case Instruction::USHR_LONG_2ADDR:
2615 if (shift_amount == 32) {
2616 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2617 LoadConstant(rl_result.reg.GetHigh(), 0);
2618 } else if (shift_amount > 31) {
2619 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2620 NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2621 LoadConstant(rl_result.reg.GetHigh(), 0);
2622 } else {
2623 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2624 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2625 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2626 shift_amount);
2627 NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
2628 }
2629 break;
2630 default:
2631 LOG(FATAL) << "Unexpected case";
2632 }
2633 }
2634 return rl_result;
2635 }
2636
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,RegLocation rl_shift,int flags)2637 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2638 RegLocation rl_src, RegLocation rl_shift, int flags) {
2639 // Per spec, we only care about low 6 bits of shift amount.
2640 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
2641 if (shift_amount == 0) {
2642 rl_src = LoadValueWide(rl_src, kCoreReg);
2643 StoreValueWide(rl_dest, rl_src);
2644 return;
2645 } else if (shift_amount == 1 &&
2646 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
2647 // Need to handle this here to avoid calling StoreValueWide twice.
2648 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags);
2649 return;
2650 }
2651 if (PartiallyIntersects(rl_src, rl_dest)) {
2652 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
2653 return;
2654 }
2655 rl_src = LoadValueWide(rl_src, kCoreReg);
2656 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags);
2657 StoreValueWide(rl_dest, rl_result);
2658 }
2659
GenArithImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)2660 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
2661 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
2662 int flags) {
2663 bool isConstSuccess = false;
2664 switch (opcode) {
2665 case Instruction::ADD_LONG:
2666 case Instruction::AND_LONG:
2667 case Instruction::OR_LONG:
2668 case Instruction::XOR_LONG:
2669 if (rl_src2.is_const) {
2670 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2671 } else {
2672 DCHECK(rl_src1.is_const);
2673 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2674 }
2675 break;
2676 case Instruction::SUB_LONG:
2677 case Instruction::SUB_LONG_2ADDR:
2678 if (rl_src2.is_const) {
2679 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2680 } else {
2681 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
2682 isConstSuccess = true;
2683 }
2684 break;
2685 case Instruction::ADD_LONG_2ADDR:
2686 case Instruction::OR_LONG_2ADDR:
2687 case Instruction::XOR_LONG_2ADDR:
2688 case Instruction::AND_LONG_2ADDR:
2689 if (rl_src2.is_const) {
2690 if (GenerateTwoOperandInstructions()) {
2691 isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
2692 } else {
2693 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2694 }
2695 } else {
2696 DCHECK(rl_src1.is_const);
2697 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2698 }
2699 break;
2700 default:
2701 isConstSuccess = false;
2702 break;
2703 }
2704
2705 if (!isConstSuccess) {
2706 // Default - bail to non-const handler.
2707 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
2708 }
2709 }
2710
IsNoOp(Instruction::Code op,int32_t value)2711 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
2712 switch (op) {
2713 case Instruction::AND_LONG_2ADDR:
2714 case Instruction::AND_LONG:
2715 return value == -1;
2716 case Instruction::OR_LONG:
2717 case Instruction::OR_LONG_2ADDR:
2718 case Instruction::XOR_LONG:
2719 case Instruction::XOR_LONG_2ADDR:
2720 return value == 0;
2721 default:
2722 return false;
2723 }
2724 }
2725
GetOpcode(Instruction::Code op,RegLocation dest,RegLocation rhs,bool is_high_op)2726 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
2727 bool is_high_op) {
2728 bool rhs_in_mem = rhs.location != kLocPhysReg;
2729 bool dest_in_mem = dest.location != kLocPhysReg;
2730 bool is64Bit = cu_->target64;
2731 DCHECK(!rhs_in_mem || !dest_in_mem);
2732 switch (op) {
2733 case Instruction::ADD_LONG:
2734 case Instruction::ADD_LONG_2ADDR:
2735 if (dest_in_mem) {
2736 return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
2737 } else if (rhs_in_mem) {
2738 return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
2739 }
2740 return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
2741 case Instruction::SUB_LONG:
2742 case Instruction::SUB_LONG_2ADDR:
2743 if (dest_in_mem) {
2744 return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
2745 } else if (rhs_in_mem) {
2746 return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
2747 }
2748 return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
2749 case Instruction::AND_LONG_2ADDR:
2750 case Instruction::AND_LONG:
2751 if (dest_in_mem) {
2752 return is64Bit ? kX86And64MR : kX86And32MR;
2753 }
2754 if (is64Bit) {
2755 return rhs_in_mem ? kX86And64RM : kX86And64RR;
2756 }
2757 return rhs_in_mem ? kX86And32RM : kX86And32RR;
2758 case Instruction::OR_LONG:
2759 case Instruction::OR_LONG_2ADDR:
2760 if (dest_in_mem) {
2761 return is64Bit ? kX86Or64MR : kX86Or32MR;
2762 }
2763 if (is64Bit) {
2764 return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
2765 }
2766 return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
2767 case Instruction::XOR_LONG:
2768 case Instruction::XOR_LONG_2ADDR:
2769 if (dest_in_mem) {
2770 return is64Bit ? kX86Xor64MR : kX86Xor32MR;
2771 }
2772 if (is64Bit) {
2773 return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
2774 }
2775 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
2776 default:
2777 LOG(FATAL) << "Unexpected opcode: " << op;
2778 return kX86Add32RR;
2779 }
2780 }
2781
GetOpcode(Instruction::Code op,RegLocation loc,bool is_high_op,int32_t value)2782 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
2783 int32_t value) {
2784 bool in_mem = loc.location != kLocPhysReg;
2785 bool is64Bit = cu_->target64;
2786 bool byte_imm = IS_SIMM8(value);
2787 DCHECK(in_mem || !loc.reg.IsFloat());
2788 switch (op) {
2789 case Instruction::ADD_LONG:
2790 case Instruction::ADD_LONG_2ADDR:
2791 if (byte_imm) {
2792 if (in_mem) {
2793 return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
2794 }
2795 return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
2796 }
2797 if (in_mem) {
2798 return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
2799 }
2800 return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
2801 case Instruction::SUB_LONG:
2802 case Instruction::SUB_LONG_2ADDR:
2803 if (byte_imm) {
2804 if (in_mem) {
2805 return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
2806 }
2807 return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
2808 }
2809 if (in_mem) {
2810 return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
2811 }
2812 return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
2813 case Instruction::AND_LONG_2ADDR:
2814 case Instruction::AND_LONG:
2815 if (byte_imm) {
2816 if (is64Bit) {
2817 return in_mem ? kX86And64MI8 : kX86And64RI8;
2818 }
2819 return in_mem ? kX86And32MI8 : kX86And32RI8;
2820 }
2821 if (is64Bit) {
2822 return in_mem ? kX86And64MI : kX86And64RI;
2823 }
2824 return in_mem ? kX86And32MI : kX86And32RI;
2825 case Instruction::OR_LONG:
2826 case Instruction::OR_LONG_2ADDR:
2827 if (byte_imm) {
2828 if (is64Bit) {
2829 return in_mem ? kX86Or64MI8 : kX86Or64RI8;
2830 }
2831 return in_mem ? kX86Or32MI8 : kX86Or32RI8;
2832 }
2833 if (is64Bit) {
2834 return in_mem ? kX86Or64MI : kX86Or64RI;
2835 }
2836 return in_mem ? kX86Or32MI : kX86Or32RI;
2837 case Instruction::XOR_LONG:
2838 case Instruction::XOR_LONG_2ADDR:
2839 if (byte_imm) {
2840 if (is64Bit) {
2841 return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
2842 }
2843 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
2844 }
2845 if (is64Bit) {
2846 return in_mem ? kX86Xor64MI : kX86Xor64RI;
2847 }
2848 return in_mem ? kX86Xor32MI : kX86Xor32RI;
2849 default:
2850 LOG(FATAL) << "Unexpected opcode: " << op;
2851 UNREACHABLE();
2852 }
2853 }
2854
GenLongImm(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)2855 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
2856 DCHECK(rl_src.is_const);
2857 int64_t val = mir_graph_->ConstantValueWide(rl_src);
2858
2859 if (cu_->target64) {
2860 // We can do with imm only if it fits 32 bit
2861 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2862 return false;
2863 }
2864
2865 rl_dest = UpdateLocWideTyped(rl_dest);
2866
2867 if ((rl_dest.location == kLocDalvikFrame) ||
2868 (rl_dest.location == kLocCompilerTemp)) {
2869 int r_base = rs_rX86_SP_32.GetReg();
2870 int displacement = SRegOffset(rl_dest.s_reg_low);
2871
2872 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2873 X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2874 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
2875 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2876 true /* is_load */, true /* is64bit */);
2877 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2878 false /* is_load */, true /* is64bit */);
2879 return true;
2880 }
2881
2882 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2883 DCHECK_EQ(rl_result.location, kLocPhysReg);
2884 DCHECK(!rl_result.reg.IsFloat());
2885
2886 X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2887 NewLIR2(x86op, rl_result.reg.GetReg(), val);
2888
2889 StoreValueWide(rl_dest, rl_result);
2890 return true;
2891 }
2892
2893 int32_t val_lo = Low32Bits(val);
2894 int32_t val_hi = High32Bits(val);
2895 rl_dest = UpdateLocWideTyped(rl_dest);
2896
2897 // Can we just do this into memory?
2898 if ((rl_dest.location == kLocDalvikFrame) ||
2899 (rl_dest.location == kLocCompilerTemp)) {
2900 int r_base = rs_rX86_SP_32.GetReg();
2901 int displacement = SRegOffset(rl_dest.s_reg_low);
2902
2903 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2904 if (!IsNoOp(op, val_lo)) {
2905 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2906 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
2907 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2908 true /* is_load */, true /* is64bit */);
2909 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2910 false /* is_load */, true /* is64bit */);
2911 }
2912 if (!IsNoOp(op, val_hi)) {
2913 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2914 LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
2915 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2916 true /* is_load */, true /* is64bit */);
2917 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2918 false /* is_load */, true /* is64bit */);
2919 }
2920 return true;
2921 }
2922
2923 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2924 DCHECK_EQ(rl_result.location, kLocPhysReg);
2925 DCHECK(!rl_result.reg.IsFloat());
2926
2927 if (!IsNoOp(op, val_lo)) {
2928 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2929 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2930 }
2931 if (!IsNoOp(op, val_hi)) {
2932 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2933 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2934 }
2935 StoreValueWide(rl_dest, rl_result);
2936 return true;
2937 }
2938
GenLongLongImm(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op)2939 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
2940 RegLocation rl_src2, Instruction::Code op) {
2941 DCHECK(rl_src2.is_const);
2942 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
2943
2944 if (cu_->target64) {
2945 // We can do with imm only if it fits 32 bit
2946 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2947 return false;
2948 }
2949 if (rl_dest.location == kLocPhysReg &&
2950 rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
2951 X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2952 OpRegCopy(rl_dest.reg, rl_src1.reg);
2953 NewLIR2(x86op, rl_dest.reg.GetReg(), val);
2954 StoreFinalValueWide(rl_dest, rl_dest);
2955 return true;
2956 }
2957
2958 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2959 // We need the values to be in a temporary
2960 RegLocation rl_result = ForceTempWide(rl_src1);
2961
2962 X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2963 NewLIR2(x86op, rl_result.reg.GetReg(), val);
2964
2965 StoreFinalValueWide(rl_dest, rl_result);
2966 return true;
2967 }
2968
2969 int32_t val_lo = Low32Bits(val);
2970 int32_t val_hi = High32Bits(val);
2971 rl_dest = UpdateLocWideTyped(rl_dest);
2972 rl_src1 = UpdateLocWideTyped(rl_src1);
2973
2974 // Can we do this directly into the destination registers?
2975 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
2976 rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
2977 rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
2978 if (!IsNoOp(op, val_lo)) {
2979 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2980 NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
2981 }
2982 if (!IsNoOp(op, val_hi)) {
2983 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2984 NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
2985 }
2986
2987 StoreFinalValueWide(rl_dest, rl_dest);
2988 return true;
2989 }
2990
2991 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2992 DCHECK_EQ(rl_src1.location, kLocPhysReg);
2993
2994 // We need the values to be in a temporary
2995 RegLocation rl_result = ForceTempWide(rl_src1);
2996 if (!IsNoOp(op, val_lo)) {
2997 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2998 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2999 }
3000 if (!IsNoOp(op, val_hi)) {
3001 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
3002 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
3003 }
3004
3005 StoreFinalValueWide(rl_dest, rl_result);
3006 return true;
3007 }
3008
3009 // For final classes there are no sub-classes to check and so we can answer the instance-of
3010 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
GenInstanceofFinal(bool use_declaring_class,uint32_t type_idx,RegLocation rl_dest,RegLocation rl_src)3011 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
3012 RegLocation rl_dest, RegLocation rl_src) {
3013 RegLocation object = LoadValue(rl_src, kRefReg);
3014 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
3015 RegStorage result_reg = rl_result.reg;
3016
3017 // For 32-bit, SETcc only works with EAX..EDX.
3018 RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
3019 if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
3020 result_reg = AllocateByteRegister();
3021 }
3022
3023 // Assume that there is no match.
3024 LoadConstant(result_reg, 0);
3025 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr);
3026
3027 // We will use this register to compare to memory below.
3028 // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
3029 // For this reason, force allocation of a 32 bit register to use, so that the
3030 // compare to memory will be done using a 32 bit comparision.
3031 // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
3032 RegStorage check_class = AllocTemp();
3033
3034 // If Method* is already in a register, we can save a copy.
3035 RegLocation rl_method = mir_graph_->GetMethodLoc();
3036 int32_t offset_of_type = mirror::Array::DataOffset(
3037 sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
3038 (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
3039
3040 if (rl_method.location == kLocPhysReg) {
3041 if (use_declaring_class) {
3042 LoadRefDisp(rl_method.reg, ArtMethod::DeclaringClassOffset().Int32Value(),
3043 check_class, kNotVolatile);
3044 } else {
3045 LoadRefDisp(rl_method.reg, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
3046 check_class, kNotVolatile);
3047 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
3048 }
3049 } else {
3050 LoadCurrMethodDirect(check_class);
3051 if (use_declaring_class) {
3052 LoadRefDisp(check_class, ArtMethod::DeclaringClassOffset().Int32Value(),
3053 check_class, kNotVolatile);
3054 } else {
3055 LoadRefDisp(check_class, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
3056 check_class, kNotVolatile);
3057 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
3058 }
3059 }
3060
3061 // Compare the computed class to the class in the object.
3062 DCHECK_EQ(object.location, kLocPhysReg);
3063 OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
3064
3065 // Set the low byte of the result to 0 or 1 from the compare condition code.
3066 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
3067
3068 LIR* target = NewLIR0(kPseudoTargetLabel);
3069 null_branchover->target = target;
3070 FreeTemp(check_class);
3071 if (IsTemp(result_reg)) {
3072 OpRegCopy(rl_result.reg, result_reg);
3073 FreeTemp(result_reg);
3074 }
3075 StoreValue(rl_dest, rl_result);
3076 }
3077
GenArithOpInt(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_lhs,RegLocation rl_rhs,int flags)3078 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
3079 RegLocation rl_lhs, RegLocation rl_rhs, int flags) {
3080 OpKind op = kOpBkpt;
3081 bool is_div_rem = false;
3082 bool unary = false;
3083 bool shift_op = false;
3084 bool is_two_addr = false;
3085 RegLocation rl_result;
3086 switch (opcode) {
3087 case Instruction::NEG_INT:
3088 op = kOpNeg;
3089 unary = true;
3090 break;
3091 case Instruction::NOT_INT:
3092 op = kOpMvn;
3093 unary = true;
3094 break;
3095 case Instruction::ADD_INT_2ADDR:
3096 is_two_addr = true;
3097 FALLTHROUGH_INTENDED;
3098 case Instruction::ADD_INT:
3099 op = kOpAdd;
3100 break;
3101 case Instruction::SUB_INT_2ADDR:
3102 is_two_addr = true;
3103 FALLTHROUGH_INTENDED;
3104 case Instruction::SUB_INT:
3105 op = kOpSub;
3106 break;
3107 case Instruction::MUL_INT_2ADDR:
3108 is_two_addr = true;
3109 FALLTHROUGH_INTENDED;
3110 case Instruction::MUL_INT:
3111 op = kOpMul;
3112 break;
3113 case Instruction::DIV_INT_2ADDR:
3114 is_two_addr = true;
3115 FALLTHROUGH_INTENDED;
3116 case Instruction::DIV_INT:
3117 op = kOpDiv;
3118 is_div_rem = true;
3119 break;
3120 /* NOTE: returns in kArg1 */
3121 case Instruction::REM_INT_2ADDR:
3122 is_two_addr = true;
3123 FALLTHROUGH_INTENDED;
3124 case Instruction::REM_INT:
3125 op = kOpRem;
3126 is_div_rem = true;
3127 break;
3128 case Instruction::AND_INT_2ADDR:
3129 is_two_addr = true;
3130 FALLTHROUGH_INTENDED;
3131 case Instruction::AND_INT:
3132 op = kOpAnd;
3133 break;
3134 case Instruction::OR_INT_2ADDR:
3135 is_two_addr = true;
3136 FALLTHROUGH_INTENDED;
3137 case Instruction::OR_INT:
3138 op = kOpOr;
3139 break;
3140 case Instruction::XOR_INT_2ADDR:
3141 is_two_addr = true;
3142 FALLTHROUGH_INTENDED;
3143 case Instruction::XOR_INT:
3144 op = kOpXor;
3145 break;
3146 case Instruction::SHL_INT_2ADDR:
3147 is_two_addr = true;
3148 FALLTHROUGH_INTENDED;
3149 case Instruction::SHL_INT:
3150 shift_op = true;
3151 op = kOpLsl;
3152 break;
3153 case Instruction::SHR_INT_2ADDR:
3154 is_two_addr = true;
3155 FALLTHROUGH_INTENDED;
3156 case Instruction::SHR_INT:
3157 shift_op = true;
3158 op = kOpAsr;
3159 break;
3160 case Instruction::USHR_INT_2ADDR:
3161 is_two_addr = true;
3162 FALLTHROUGH_INTENDED;
3163 case Instruction::USHR_INT:
3164 shift_op = true;
3165 op = kOpLsr;
3166 break;
3167 default:
3168 LOG(FATAL) << "Invalid word arith op: " << opcode;
3169 }
3170
3171 // Can we convert to a two address instruction?
3172 if (!is_two_addr &&
3173 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
3174 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
3175 is_two_addr = true;
3176 }
3177
3178 if (!GenerateTwoOperandInstructions()) {
3179 is_two_addr = false;
3180 }
3181
3182 // Get the div/rem stuff out of the way.
3183 if (is_div_rem) {
3184 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags);
3185 StoreValue(rl_dest, rl_result);
3186 return;
3187 }
3188
3189 // If we generate any memory access below, it will reference a dalvik reg.
3190 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3191
3192 if (unary) {
3193 rl_lhs = LoadValue(rl_lhs, kCoreReg);
3194 rl_result = UpdateLocTyped(rl_dest);
3195 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3196 OpRegReg(op, rl_result.reg, rl_lhs.reg);
3197 } else {
3198 if (shift_op) {
3199 // X86 doesn't require masking and must use ECX.
3200 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX
3201 LoadValueDirectFixed(rl_rhs, t_reg);
3202 if (is_two_addr) {
3203 // Can we do this directly into memory?
3204 rl_result = UpdateLocTyped(rl_dest);
3205 if (rl_result.location != kLocPhysReg) {
3206 // Okay, we can do this into memory
3207 OpMemReg(op, rl_result, t_reg.GetReg());
3208 FreeTemp(t_reg);
3209 return;
3210 } else if (!rl_result.reg.IsFloat()) {
3211 // Can do this directly into the result register
3212 OpRegReg(op, rl_result.reg, t_reg);
3213 FreeTemp(t_reg);
3214 StoreFinalValue(rl_dest, rl_result);
3215 return;
3216 }
3217 }
3218 // Three address form, or we can't do directly.
3219 rl_lhs = LoadValue(rl_lhs, kCoreReg);
3220 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3221 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
3222 FreeTemp(t_reg);
3223 } else {
3224 // Multiply is 3 operand only (sort of).
3225 if (is_two_addr && op != kOpMul) {
3226 // Can we do this directly into memory?
3227 rl_result = UpdateLocTyped(rl_dest);
3228 if (rl_result.location == kLocPhysReg) {
3229 // Ensure res is in a core reg
3230 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3231 // Can we do this from memory directly?
3232 rl_rhs = UpdateLocTyped(rl_rhs);
3233 if (rl_rhs.location != kLocPhysReg) {
3234 OpRegMem(op, rl_result.reg, rl_rhs);
3235 StoreFinalValue(rl_dest, rl_result);
3236 return;
3237 } else if (!rl_rhs.reg.IsFloat()) {
3238 OpRegReg(op, rl_result.reg, rl_rhs.reg);
3239 StoreFinalValue(rl_dest, rl_result);
3240 return;
3241 }
3242 }
3243 rl_rhs = LoadValue(rl_rhs, kCoreReg);
3244 // It might happen rl_rhs and rl_dest are the same VR
3245 // in this case rl_dest is in reg after LoadValue while
3246 // rl_result is not updated yet, so do this
3247 rl_result = UpdateLocTyped(rl_dest);
3248 if (rl_result.location != kLocPhysReg) {
3249 // Okay, we can do this into memory.
3250 OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
3251 return;
3252 } else if (!rl_result.reg.IsFloat()) {
3253 // Can do this directly into the result register.
3254 OpRegReg(op, rl_result.reg, rl_rhs.reg);
3255 StoreFinalValue(rl_dest, rl_result);
3256 return;
3257 } else {
3258 rl_lhs = LoadValue(rl_lhs, kCoreReg);
3259 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3260 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3261 }
3262 } else {
3263 // Try to use reg/memory instructions.
3264 rl_lhs = UpdateLocTyped(rl_lhs);
3265 rl_rhs = UpdateLocTyped(rl_rhs);
3266 // We can't optimize with FP registers.
3267 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
3268 // Something is difficult, so fall back to the standard case.
3269 rl_lhs = LoadValue(rl_lhs, kCoreReg);
3270 rl_rhs = LoadValue(rl_rhs, kCoreReg);
3271 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3272 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3273 } else {
3274 // We can optimize by moving to result and using memory operands.
3275 if (rl_rhs.location != kLocPhysReg) {
3276 // Force LHS into result.
3277 // We should be careful with order here
3278 // If rl_dest and rl_lhs points to the same VR we should load first
3279 // If the are different we should find a register first for dest
3280 if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
3281 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
3282 rl_lhs = LoadValue(rl_lhs, kCoreReg);
3283 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3284 // No-op if these are the same.
3285 OpRegCopy(rl_result.reg, rl_lhs.reg);
3286 } else {
3287 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3288 LoadValueDirect(rl_lhs, rl_result.reg);
3289 }
3290 OpRegMem(op, rl_result.reg, rl_rhs);
3291 } else if (rl_lhs.location != kLocPhysReg) {
3292 // RHS is in a register; LHS is in memory.
3293 if (op != kOpSub) {
3294 // Force RHS into result and operate on memory.
3295 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3296 OpRegCopy(rl_result.reg, rl_rhs.reg);
3297 OpRegMem(op, rl_result.reg, rl_lhs);
3298 } else {
3299 // Subtraction isn't commutative.
3300 rl_lhs = LoadValue(rl_lhs, kCoreReg);
3301 rl_rhs = LoadValue(rl_rhs, kCoreReg);
3302 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3303 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3304 }
3305 } else {
3306 // Both are in registers.
3307 rl_lhs = LoadValue(rl_lhs, kCoreReg);
3308 rl_rhs = LoadValue(rl_rhs, kCoreReg);
3309 rl_result = EvalLoc(rl_dest, kCoreReg, true);
3310 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3311 }
3312 }
3313 }
3314 }
3315 }
3316 StoreValue(rl_dest, rl_result);
3317 }
3318
IsOperationSafeWithoutTemps(RegLocation rl_lhs,RegLocation rl_rhs)3319 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
3320 // If we have non-core registers, then we can't do good things.
3321 if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
3322 return false;
3323 }
3324 if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
3325 return false;
3326 }
3327
3328 // Everything will be fine :-).
3329 return true;
3330 }
3331
GenIntToLong(RegLocation rl_dest,RegLocation rl_src)3332 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
3333 if (!cu_->target64) {
3334 Mir2Lir::GenIntToLong(rl_dest, rl_src);
3335 return;
3336 }
3337 rl_src = UpdateLocTyped(rl_src);
3338 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
3339 if (rl_src.location == kLocPhysReg) {
3340 NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
3341 } else {
3342 int displacement = SRegOffset(rl_src.s_reg_low);
3343 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3344 LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP_32.GetReg(),
3345 displacement + LOWORD_OFFSET);
3346 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
3347 true /* is_load */, true /* is_64bit */);
3348 }
3349 StoreValueWide(rl_dest, rl_result);
3350 }
3351
GenLongToInt(RegLocation rl_dest,RegLocation rl_src)3352 void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
3353 rl_src = UpdateLocWide(rl_src);
3354 rl_src = NarrowRegLoc(rl_src);
3355 StoreValue(rl_dest, rl_src);
3356
3357 if (cu_->target64) {
3358 // if src and dest are in the same phys reg then StoreValue generates
3359 // no operation but we need explicit 32-bit mov R, R to clear
3360 // the higher 32-bits
3361 rl_dest = UpdateLoc(rl_dest);
3362 if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg
3363 && IsSameReg(rl_src.reg, rl_dest.reg)) {
3364 LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg);
3365 // remove nop flag set by OpRegCopyNoInsert if src == dest
3366 copy_lir->flags.is_nop = false;
3367 AppendLIR(copy_lir);
3368 }
3369 }
3370 }
3371
GenShiftOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_shift)3372 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
3373 RegLocation rl_src1, RegLocation rl_shift) {
3374 if (!cu_->target64) {
3375 // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from
3376 // the other half, shift the other half, if the shift amount is less than 32 we're done,
3377 // otherwise move one register to the other and place zero or sign bits in the other.
3378 LIR* branch;
3379 FlushAllRegs();
3380 LockCallTemps();
3381 LoadValueDirectFixed(rl_shift, rs_rCX);
3382 RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
3383 LoadValueDirectWideFixed(rl_src1, r_tmp);
3384 switch (opcode) {
3385 case Instruction::SHL_LONG:
3386 case Instruction::SHL_LONG_2ADDR:
3387 NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg());
3388 NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg());
3389 NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3390 branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3391 OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow());
3392 LoadConstant(r_tmp.GetLow(), 0);
3393 branch->target = NewLIR0(kPseudoTargetLabel);
3394 break;
3395 case Instruction::SHR_LONG:
3396 case Instruction::SHR_LONG_2ADDR:
3397 NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg());
3398 NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
3399 NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3400 branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3401 OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
3402 NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31);
3403 branch->target = NewLIR0(kPseudoTargetLabel);
3404 break;
3405 case Instruction::USHR_LONG:
3406 case Instruction::USHR_LONG_2ADDR:
3407 NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(),
3408 rs_rCX.GetReg());
3409 NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
3410 NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3411 branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3412 OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
3413 LoadConstant(r_tmp.GetHigh(), 0);
3414 branch->target = NewLIR0(kPseudoTargetLabel);
3415 break;
3416 default:
3417 LOG(FATAL) << "Unexpected case: " << opcode;
3418 return;
3419 }
3420 RegLocation rl_result = LocCReturnWide();
3421 StoreValueWide(rl_dest, rl_result);
3422 return;
3423 }
3424
3425 bool is_two_addr = false;
3426 OpKind op = kOpBkpt;
3427 RegLocation rl_result;
3428
3429 switch (opcode) {
3430 case Instruction::SHL_LONG_2ADDR:
3431 is_two_addr = true;
3432 FALLTHROUGH_INTENDED;
3433 case Instruction::SHL_LONG:
3434 op = kOpLsl;
3435 break;
3436 case Instruction::SHR_LONG_2ADDR:
3437 is_two_addr = true;
3438 FALLTHROUGH_INTENDED;
3439 case Instruction::SHR_LONG:
3440 op = kOpAsr;
3441 break;
3442 case Instruction::USHR_LONG_2ADDR:
3443 is_two_addr = true;
3444 FALLTHROUGH_INTENDED;
3445 case Instruction::USHR_LONG:
3446 op = kOpLsr;
3447 break;
3448 default:
3449 op = kOpBkpt;
3450 }
3451
3452 // X86 doesn't require masking and must use ECX.
3453 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX
3454 LoadValueDirectFixed(rl_shift, t_reg);
3455 if (is_two_addr) {
3456 // Can we do this directly into memory?
3457 rl_result = UpdateLocWideTyped(rl_dest);
3458 if (rl_result.location != kLocPhysReg) {
3459 // Okay, we can do this into memory
3460 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3461 OpMemReg(op, rl_result, t_reg.GetReg());
3462 } else if (!rl_result.reg.IsFloat()) {
3463 // Can do this directly into the result register
3464 OpRegReg(op, rl_result.reg, t_reg);
3465 StoreFinalValueWide(rl_dest, rl_result);
3466 }
3467 } else {
3468 // Three address form, or we can't do directly.
3469 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
3470 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
3471 OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
3472 StoreFinalValueWide(rl_dest, rl_result);
3473 }
3474
3475 FreeTemp(t_reg);
3476 }
3477
3478 } // namespace art
3479