1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* This file contains codegen for the X86 ISA */
18
19 #include "codegen_x86.h"
20 #include "dex/quick/mir_to_lir-inl.h"
21 #include "dex/reg_storage_eq.h"
22 #include "mirror/art_method.h"
23 #include "mirror/array.h"
24 #include "x86_lir.h"
25
26 namespace art {
27
28 /*
29 * Compare two 64-bit values
30 * x = y return 0
31 * x < y return -1
32 * x > y return 1
33 */
GenCmpLong(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)34 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
35 RegLocation rl_src2) {
36 if (cu_->target64) {
37 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
38 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
39 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
40 RegStorage temp_reg = AllocTemp();
41 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
42 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0
43 NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1
44 NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
45 NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
46
47 StoreValue(rl_dest, rl_result);
48 FreeTemp(temp_reg);
49 return;
50 }
51
52 FlushAllRegs();
53 LockCallTemps(); // Prepare for explicit register usage
54 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
55 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
56 LoadValueDirectWideFixed(rl_src1, r_tmp1);
57 LoadValueDirectWideFixed(rl_src2, r_tmp2);
58 // Compute (r1:r0) = (r1:r0) - (r3:r2)
59 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2
60 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF
61 NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0
62 NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
63 OpReg(kOpNeg, rs_r2); // r2 = -r2
64 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF
65 NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0
66 NewLIR2(kX86Movzx8RR, r0, r0);
67 OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2
68 RegLocation rl_result = LocCReturn();
69 StoreValue(rl_dest, rl_result);
70 }
71
X86ConditionEncoding(ConditionCode cond)72 X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
73 switch (cond) {
74 case kCondEq: return kX86CondEq;
75 case kCondNe: return kX86CondNe;
76 case kCondCs: return kX86CondC;
77 case kCondCc: return kX86CondNc;
78 case kCondUlt: return kX86CondC;
79 case kCondUge: return kX86CondNc;
80 case kCondMi: return kX86CondS;
81 case kCondPl: return kX86CondNs;
82 case kCondVs: return kX86CondO;
83 case kCondVc: return kX86CondNo;
84 case kCondHi: return kX86CondA;
85 case kCondLs: return kX86CondBe;
86 case kCondGe: return kX86CondGe;
87 case kCondLt: return kX86CondL;
88 case kCondGt: return kX86CondG;
89 case kCondLe: return kX86CondLe;
90 case kCondAl:
91 case kCondNv: LOG(FATAL) << "Should not reach here";
92 }
93 return kX86CondO;
94 }
95
OpCmpBranch(ConditionCode cond,RegStorage src1,RegStorage src2,LIR * target)96 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
97 NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
98 X86ConditionCode cc = X86ConditionEncoding(cond);
99 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
100 cc);
101 branch->target = target;
102 return branch;
103 }
104
OpCmpImmBranch(ConditionCode cond,RegStorage reg,int check_value,LIR * target)105 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
106 int check_value, LIR* target) {
107 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
108 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
109 NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
110 } else {
111 if (reg.Is64Bit()) {
112 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
113 } else {
114 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
115 }
116 }
117 X86ConditionCode cc = X86ConditionEncoding(cond);
118 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
119 branch->target = target;
120 return branch;
121 }
122
OpRegCopyNoInsert(RegStorage r_dest,RegStorage r_src)123 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
124 // If src or dest is a pair, we'll be using low reg.
125 if (r_dest.IsPair()) {
126 r_dest = r_dest.GetLow();
127 }
128 if (r_src.IsPair()) {
129 r_src = r_src.GetLow();
130 }
131 if (r_dest.IsFloat() || r_src.IsFloat())
132 return OpFpRegCopy(r_dest, r_src);
133 LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
134 r_dest.GetReg(), r_src.GetReg());
135 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
136 res->flags.is_nop = true;
137 }
138 return res;
139 }
140
OpRegCopy(RegStorage r_dest,RegStorage r_src)141 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
142 if (r_dest != r_src) {
143 LIR *res = OpRegCopyNoInsert(r_dest, r_src);
144 AppendLIR(res);
145 }
146 }
147
OpRegCopyWide(RegStorage r_dest,RegStorage r_src)148 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
149 if (r_dest != r_src) {
150 bool dest_fp = r_dest.IsFloat();
151 bool src_fp = r_src.IsFloat();
152 if (dest_fp) {
153 if (src_fp) {
154 OpRegCopy(r_dest, r_src);
155 } else {
156 // TODO: Prevent this from happening in the code. The result is often
157 // unused or could have been loaded more easily from memory.
158 if (!r_src.IsPair()) {
159 DCHECK(!r_dest.IsPair());
160 NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
161 } else {
162 NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
163 RegStorage r_tmp = AllocTempDouble();
164 NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
165 NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
166 FreeTemp(r_tmp);
167 }
168 }
169 } else {
170 if (src_fp) {
171 if (!r_dest.IsPair()) {
172 DCHECK(!r_src.IsPair());
173 NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
174 } else {
175 NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
176 RegStorage temp_reg = AllocTempDouble();
177 NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
178 NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
179 NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
180 }
181 } else {
182 DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
183 if (!r_src.IsPair()) {
184 // Just copy the register directly.
185 OpRegCopy(r_dest, r_src);
186 } else {
187 // Handle overlap
188 if (r_src.GetHighReg() == r_dest.GetLowReg() &&
189 r_src.GetLowReg() == r_dest.GetHighReg()) {
190 // Deal with cycles.
191 RegStorage temp_reg = AllocTemp();
192 OpRegCopy(temp_reg, r_dest.GetHigh());
193 OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
194 OpRegCopy(r_dest.GetLow(), temp_reg);
195 FreeTemp(temp_reg);
196 } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
197 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
198 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
199 } else {
200 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
201 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
202 }
203 }
204 }
205 }
206 }
207 }
208
GenSelectConst32(RegStorage left_op,RegStorage right_op,ConditionCode code,int32_t true_val,int32_t false_val,RegStorage rs_dest,int dest_reg_class)209 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
210 int32_t true_val, int32_t false_val, RegStorage rs_dest,
211 int dest_reg_class) {
212 DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
213 DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
214
215 // We really need this check for correctness, otherwise we will need to do more checks in
216 // non zero/one case
217 if (true_val == false_val) {
218 LoadConstantNoClobber(rs_dest, true_val);
219 return;
220 }
221
222 const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
223
224 const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
225 if (zero_one_case && IsByteRegister(rs_dest)) {
226 if (!dest_intersect) {
227 LoadConstantNoClobber(rs_dest, 0);
228 }
229 OpRegReg(kOpCmp, left_op, right_op);
230 // Set the low byte of the result to 0 or 1 from the compare condition code.
231 NewLIR2(kX86Set8R, rs_dest.GetReg(),
232 X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
233 if (dest_intersect) {
234 NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
235 }
236 } else {
237 // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
238 // and it cannot use xor because it makes cc flags to be dirty
239 RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
240 if (temp_reg.Valid()) {
241 if (false_val == 0 && dest_intersect) {
242 code = FlipComparisonOrder(code);
243 std::swap(true_val, false_val);
244 }
245 if (!dest_intersect) {
246 LoadConstantNoClobber(rs_dest, false_val);
247 }
248 LoadConstantNoClobber(temp_reg, true_val);
249 OpRegReg(kOpCmp, left_op, right_op);
250 if (dest_intersect) {
251 LoadConstantNoClobber(rs_dest, false_val);
252 DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
253 }
254 OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
255 FreeTemp(temp_reg);
256 } else {
257 // slow path
258 LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
259 LoadConstantNoClobber(rs_dest, false_val);
260 LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
261 LIR* true_case = NewLIR0(kPseudoTargetLabel);
262 cmp_branch->target = true_case;
263 LoadConstantNoClobber(rs_dest, true_val);
264 LIR* end = NewLIR0(kPseudoTargetLabel);
265 that_is_it->target = end;
266 }
267 }
268 }
269
GenSelect(BasicBlock * bb,MIR * mir)270 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
271 RegLocation rl_result;
272 RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
273 RegLocation rl_dest = mir_graph_->GetDest(mir);
274 // Avoid using float regs here.
275 RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
276 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
277 ConditionCode ccode = mir->meta.ccode;
278
279 // The kMirOpSelect has two variants, one for constants and one for moves.
280 const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
281
282 if (is_constant_case) {
283 int true_val = mir->dalvikInsn.vB;
284 int false_val = mir->dalvikInsn.vC;
285
286 // simplest strange case
287 if (true_val == false_val) {
288 rl_result = EvalLoc(rl_dest, result_reg_class, true);
289 LoadConstantNoClobber(rl_result.reg, true_val);
290 } else {
291 // TODO: use GenSelectConst32 and handle additional opcode patterns such as
292 // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
293 rl_src = LoadValue(rl_src, src_reg_class);
294 rl_result = EvalLoc(rl_dest, result_reg_class, true);
295 /*
296 * For ccode == kCondEq:
297 *
298 * 1) When the true case is zero and result_reg is not same as src_reg:
299 * xor result_reg, result_reg
300 * cmp $0, src_reg
301 * mov t1, $false_case
302 * cmovnz result_reg, t1
303 * 2) When the false case is zero and result_reg is not same as src_reg:
304 * xor result_reg, result_reg
305 * cmp $0, src_reg
306 * mov t1, $true_case
307 * cmovz result_reg, t1
308 * 3) All other cases (we do compare first to set eflags):
309 * cmp $0, src_reg
310 * mov result_reg, $false_case
311 * mov t1, $true_case
312 * cmovz result_reg, t1
313 */
314 // FIXME: depending on how you use registers you could get a false != mismatch when dealing
315 // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
316 const bool result_reg_same_as_src =
317 (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
318 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
319 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
320 const bool catch_all_case = !(true_zero_case || false_zero_case);
321
322 if (true_zero_case || false_zero_case) {
323 OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
324 }
325
326 if (true_zero_case || false_zero_case || catch_all_case) {
327 OpRegImm(kOpCmp, rl_src.reg, 0);
328 }
329
330 if (catch_all_case) {
331 OpRegImm(kOpMov, rl_result.reg, false_val);
332 }
333
334 if (true_zero_case || false_zero_case || catch_all_case) {
335 ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
336 int immediateForTemp = true_zero_case ? false_val : true_val;
337 RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
338 OpRegImm(kOpMov, temp1_reg, immediateForTemp);
339
340 OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
341
342 FreeTemp(temp1_reg);
343 }
344 }
345 } else {
346 rl_src = LoadValue(rl_src, src_reg_class);
347 RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
348 RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
349 rl_true = LoadValue(rl_true, result_reg_class);
350 rl_false = LoadValue(rl_false, result_reg_class);
351 rl_result = EvalLoc(rl_dest, result_reg_class, true);
352
353 /*
354 * For ccode == kCondEq:
355 *
356 * 1) When true case is already in place:
357 * cmp $0, src_reg
358 * cmovnz result_reg, false_reg
359 * 2) When false case is already in place:
360 * cmp $0, src_reg
361 * cmovz result_reg, true_reg
362 * 3) When neither cases are in place:
363 * cmp $0, src_reg
364 * mov result_reg, false_reg
365 * cmovz result_reg, true_reg
366 */
367
368 // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
369 OpRegImm(kOpCmp, rl_src.reg, 0);
370
371 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
372 OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
373 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
374 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
375 } else {
376 OpRegCopy(rl_result.reg, rl_false.reg);
377 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
378 }
379 }
380
381 StoreValue(rl_dest, rl_result);
382 }
383
GenFusedLongCmpBranch(BasicBlock * bb,MIR * mir)384 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
385 LIR* taken = &block_label_list_[bb->taken];
386 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
387 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
388 ConditionCode ccode = mir->meta.ccode;
389
390 if (rl_src1.is_const) {
391 std::swap(rl_src1, rl_src2);
392 ccode = FlipComparisonOrder(ccode);
393 }
394 if (rl_src2.is_const) {
395 // Do special compare/branch against simple const operand
396 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
397 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
398 return;
399 }
400
401 if (cu_->target64) {
402 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
403 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
404
405 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
406 OpCondBranch(ccode, taken);
407 return;
408 }
409
410 FlushAllRegs();
411 LockCallTemps(); // Prepare for explicit register usage
412 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
413 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
414 LoadValueDirectWideFixed(rl_src1, r_tmp1);
415 LoadValueDirectWideFixed(rl_src2, r_tmp2);
416
417 // Swap operands and condition code to prevent use of zero flag.
418 if (ccode == kCondLe || ccode == kCondGt) {
419 // Compute (r3:r2) = (r3:r2) - (r1:r0)
420 OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0
421 OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF
422 } else {
423 // Compute (r1:r0) = (r1:r0) - (r3:r2)
424 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2
425 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF
426 }
427 switch (ccode) {
428 case kCondEq:
429 case kCondNe:
430 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1
431 break;
432 case kCondLe:
433 ccode = kCondGe;
434 break;
435 case kCondGt:
436 ccode = kCondLt;
437 break;
438 case kCondLt:
439 case kCondGe:
440 break;
441 default:
442 LOG(FATAL) << "Unexpected ccode: " << ccode;
443 }
444 OpCondBranch(ccode, taken);
445 }
446
GenFusedLongCmpImmBranch(BasicBlock * bb,RegLocation rl_src1,int64_t val,ConditionCode ccode)447 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
448 int64_t val, ConditionCode ccode) {
449 int32_t val_lo = Low32Bits(val);
450 int32_t val_hi = High32Bits(val);
451 LIR* taken = &block_label_list_[bb->taken];
452 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
453 bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
454
455 if (cu_->target64) {
456 if (is_equality_test && val == 0) {
457 // We can simplify of comparing for ==, != to 0.
458 NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
459 } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
460 OpRegImm(kOpCmp, rl_src1.reg, val_lo);
461 } else {
462 RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
463 LoadConstantWide(tmp, val);
464 OpRegReg(kOpCmp, rl_src1.reg, tmp);
465 FreeTemp(tmp);
466 }
467 OpCondBranch(ccode, taken);
468 return;
469 }
470
471 if (is_equality_test && val != 0) {
472 rl_src1 = ForceTempWide(rl_src1);
473 }
474 RegStorage low_reg = rl_src1.reg.GetLow();
475 RegStorage high_reg = rl_src1.reg.GetHigh();
476
477 if (is_equality_test) {
478 // We can simplify of comparing for ==, != to 0.
479 if (val == 0) {
480 if (IsTemp(low_reg)) {
481 OpRegReg(kOpOr, low_reg, high_reg);
482 // We have now changed it; ignore the old values.
483 Clobber(rl_src1.reg);
484 } else {
485 RegStorage t_reg = AllocTemp();
486 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
487 FreeTemp(t_reg);
488 }
489 OpCondBranch(ccode, taken);
490 return;
491 }
492
493 // Need to compute the actual value for ==, !=.
494 OpRegImm(kOpSub, low_reg, val_lo);
495 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
496 OpRegReg(kOpOr, high_reg, low_reg);
497 Clobber(rl_src1.reg);
498 } else if (ccode == kCondLe || ccode == kCondGt) {
499 // Swap operands and condition code to prevent use of zero flag.
500 RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
501 LoadConstantWide(tmp, val);
502 OpRegReg(kOpSub, tmp.GetLow(), low_reg);
503 OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
504 ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
505 FreeTemp(tmp);
506 } else {
507 // We can use a compare for the low word to set CF.
508 OpRegImm(kOpCmp, low_reg, val_lo);
509 if (IsTemp(high_reg)) {
510 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
511 // We have now changed it; ignore the old values.
512 Clobber(rl_src1.reg);
513 } else {
514 // mov temp_reg, high_reg; sbb temp_reg, high_constant
515 RegStorage t_reg = AllocTemp();
516 OpRegCopy(t_reg, high_reg);
517 NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
518 FreeTemp(t_reg);
519 }
520 }
521
522 OpCondBranch(ccode, taken);
523 }
524
CalculateMagicAndShift(int64_t divisor,int64_t & magic,int & shift,bool is_long)525 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
526 // It does not make sense to calculate magic and shift for zero divisor.
527 DCHECK_NE(divisor, 0);
528
529 /* According to H.S.Warren's Hacker's Delight Chapter 10 and
530 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
531 * The magic number M and shift S can be calculated in the following way:
532 * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
533 * where divisor(d) >=2.
534 * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
535 * where divisor(d) <= -2.
536 * Thus nc can be calculated like:
537 * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
538 * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
539 *
540 * So the shift p is the smallest p satisfying
541 * 2^p > nc * (d - 2^p % d), where d >= 2
542 * 2^p > nc * (d + 2^p % d), where d <= -2.
543 *
544 * the magic number M is calcuated by
545 * M = (2^p + d - 2^p % d) / d, where d >= 2
546 * M = (2^p - d - 2^p % d) / d, where d <= -2.
547 *
548 * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
549 * the shift number S.
550 */
551
552 int64_t p = (is_long) ? 63 : 31;
553 const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
554
555 // Initialize the computations.
556 uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
557 uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
558 static_cast<uint32_t>(divisor) >> 31);
559 uint64_t abs_nc = tmp - 1 - tmp % abs_d;
560 uint64_t quotient1 = exp / abs_nc;
561 uint64_t remainder1 = exp % abs_nc;
562 uint64_t quotient2 = exp / abs_d;
563 uint64_t remainder2 = exp % abs_d;
564
565 /*
566 * To avoid handling both positive and negative divisor, Hacker's Delight
567 * introduces a method to handle these 2 cases together to avoid duplication.
568 */
569 uint64_t delta;
570 do {
571 p++;
572 quotient1 = 2 * quotient1;
573 remainder1 = 2 * remainder1;
574 if (remainder1 >= abs_nc) {
575 quotient1++;
576 remainder1 = remainder1 - abs_nc;
577 }
578 quotient2 = 2 * quotient2;
579 remainder2 = 2 * remainder2;
580 if (remainder2 >= abs_d) {
581 quotient2++;
582 remainder2 = remainder2 - abs_d;
583 }
584 delta = abs_d - remainder2;
585 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
586
587 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
588
589 if (!is_long) {
590 magic = static_cast<int>(magic);
591 }
592
593 shift = (is_long) ? p - 64 : p - 32;
594 }
595
GenDivRemLit(RegLocation rl_dest,RegStorage reg_lo,int lit,bool is_div)596 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
597 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
598 return rl_dest;
599 }
600
GenDivRemLit(RegLocation rl_dest,RegLocation rl_src,int imm,bool is_div)601 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
602 int imm, bool is_div) {
603 // Use a multiply (and fixup) to perform an int div/rem by a constant.
604 RegLocation rl_result;
605
606 if (imm == 1) {
607 rl_result = EvalLoc(rl_dest, kCoreReg, true);
608 if (is_div) {
609 // x / 1 == x.
610 LoadValueDirectFixed(rl_src, rl_result.reg);
611 } else {
612 // x % 1 == 0.
613 LoadConstantNoClobber(rl_result.reg, 0);
614 }
615 } else if (imm == -1) { // handle 0x80000000 / -1 special case.
616 rl_result = EvalLoc(rl_dest, kCoreReg, true);
617 if (is_div) {
618 LoadValueDirectFixed(rl_src, rl_result.reg);
619 OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
620 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
621
622 // for x != MIN_INT, x / -1 == -x.
623 NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
624
625 // EAX already contains the right value (0x80000000),
626 minint_branch->target = NewLIR0(kPseudoTargetLabel);
627 } else {
628 // x % -1 == 0.
629 LoadConstantNoClobber(rl_result.reg, 0);
630 }
631 } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
632 // Division using shifting.
633 rl_src = LoadValue(rl_src, kCoreReg);
634 rl_result = EvalLoc(rl_dest, kCoreReg, true);
635 if (IsSameReg(rl_result.reg, rl_src.reg)) {
636 RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
637 rl_result.reg.SetReg(rs_temp.GetReg());
638 }
639 NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
640 NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
641 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
642 int shift_amount = LowestSetBit(imm);
643 OpRegImm(kOpAsr, rl_result.reg, shift_amount);
644 if (imm < 0) {
645 OpReg(kOpNeg, rl_result.reg);
646 }
647 } else {
648 CHECK(imm <= -2 || imm >= 2);
649
650 // Use H.S.Warren's Hacker's Delight Chapter 10 and
651 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
652 int64_t magic;
653 int shift;
654 CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
655
656 /*
657 * For imm >= 2,
658 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
659 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
660 * For imm <= -2,
661 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
662 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
663 * We implement this algorithm in the following way:
664 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
665 * 2. if imm > 0 and magic < 0, add numerator to EDX
666 * if imm < 0 and magic > 0, sub numerator from EDX
667 * 3. if S !=0, SAR S bits for EDX
668 * 4. add 1 to EDX if EDX < 0
669 * 5. Thus, EDX is the quotient
670 */
671
672 FlushReg(rs_r0);
673 Clobber(rs_r0);
674 LockTemp(rs_r0);
675 FlushReg(rs_r2);
676 Clobber(rs_r2);
677 LockTemp(rs_r2);
678
679 // Assume that the result will be in EDX.
680 rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
681
682 // Numerator into EAX.
683 RegStorage numerator_reg;
684 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
685 // We will need the value later.
686 rl_src = LoadValue(rl_src, kCoreReg);
687 numerator_reg = rl_src.reg;
688 OpRegCopy(rs_r0, numerator_reg);
689 } else {
690 // Only need this once. Just put it into EAX.
691 LoadValueDirectFixed(rl_src, rs_r0);
692 }
693
694 // EDX = magic.
695 LoadConstantNoClobber(rs_r2, magic);
696
697 // EDX:EAX = magic & dividend.
698 NewLIR1(kX86Imul32DaR, rs_r2.GetReg());
699
700 if (imm > 0 && magic < 0) {
701 // Add numerator to EDX.
702 DCHECK(numerator_reg.Valid());
703 NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
704 } else if (imm < 0 && magic > 0) {
705 DCHECK(numerator_reg.Valid());
706 NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
707 }
708
709 // Do we need the shift?
710 if (shift != 0) {
711 // Shift EDX by 'shift' bits.
712 NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
713 }
714
715 // Add 1 to EDX if EDX < 0.
716
717 // Move EDX to EAX.
718 OpRegCopy(rs_r0, rs_r2);
719
720 // Move sign bit to bit 0, zeroing the rest.
721 NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
722
723 // EDX = EDX + EAX.
724 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
725
726 // Quotient is in EDX.
727 if (!is_div) {
728 // We need to compute the remainder.
729 // Remainder is divisor - (quotient * imm).
730 DCHECK(numerator_reg.Valid());
731 OpRegCopy(rs_r0, numerator_reg);
732
733 // EAX = numerator * imm.
734 OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
735
736 // EDX -= EAX.
737 NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
738
739 // For this case, return the result in EAX.
740 rl_result.reg.SetReg(r0);
741 }
742 }
743
744 return rl_result;
745 }
746
GenDivRem(RegLocation rl_dest,RegStorage reg_lo,RegStorage reg_hi,bool is_div)747 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
748 bool is_div) {
749 LOG(FATAL) << "Unexpected use of GenDivRem for x86";
750 return rl_dest;
751 }
752
GenDivRem(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,bool check_zero)753 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
754 RegLocation rl_src2, bool is_div, bool check_zero) {
755 // We have to use fixed registers, so flush all the temps.
756 FlushAllRegs();
757 LockCallTemps(); // Prepare for explicit register usage.
758
759 // Load LHS into EAX.
760 LoadValueDirectFixed(rl_src1, rs_r0);
761
762 // Load RHS into EBX.
763 LoadValueDirectFixed(rl_src2, rs_r1);
764
765 // Copy LHS sign bit into EDX.
766 NewLIR0(kx86Cdq32Da);
767
768 if (check_zero) {
769 // Handle division by zero case.
770 GenDivZeroCheck(rs_r1);
771 }
772
773 // Have to catch 0x80000000/-1 case, or we will get an exception!
774 OpRegImm(kOpCmp, rs_r1, -1);
775 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
776
777 // RHS is -1.
778 OpRegImm(kOpCmp, rs_r0, 0x80000000);
779 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
780
781 // In 0x80000000/-1 case.
782 if (!is_div) {
783 // For DIV, EAX is already right. For REM, we need EDX 0.
784 LoadConstantNoClobber(rs_r2, 0);
785 }
786 LIR* done = NewLIR1(kX86Jmp8, 0);
787
788 // Expected case.
789 minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
790 minint_branch->target = minus_one_branch->target;
791 NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
792 done->target = NewLIR0(kPseudoTargetLabel);
793
794 // Result is in EAX for div and EDX for rem.
795 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
796 if (!is_div) {
797 rl_result.reg.SetReg(r2);
798 }
799 return rl_result;
800 }
801
GenInlinedMinMax(CallInfo * info,bool is_min,bool is_long)802 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
803 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
804
805 if (is_long && cu_->instruction_set == kX86) {
806 return false;
807 }
808
809 // Get the two arguments to the invoke and place them in GP registers.
810 RegLocation rl_src1 = info->args[0];
811 RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
812 rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
813 rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
814
815 RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
816 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
817
818 /*
819 * If the result register is the same as the second element, then we need to be careful.
820 * The reason is that the first copy will inadvertently clobber the second element with
821 * the first one thus yielding the wrong result. Thus we do a swap in that case.
822 */
823 if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
824 std::swap(rl_src1, rl_src2);
825 }
826
827 // Pick the first integer as min/max.
828 OpRegCopy(rl_result.reg, rl_src1.reg);
829
830 // If the integers are both in the same register, then there is nothing else to do
831 // because they are equal and we have already moved one into the result.
832 if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
833 // It is possible we didn't pick correctly so do the actual comparison now.
834 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
835
836 // Conditionally move the other integer into the destination register.
837 ConditionCode condition_code = is_min ? kCondGt : kCondLt;
838 OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
839 }
840
841 if (is_long) {
842 StoreValueWide(rl_dest, rl_result);
843 } else {
844 StoreValue(rl_dest, rl_result);
845 }
846 return true;
847 }
848
GenInlinedPeek(CallInfo * info,OpSize size)849 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
850 RegLocation rl_src_address = info->args[0]; // long address
851 RegLocation rl_address;
852 if (!cu_->target64) {
853 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
854 rl_address = LoadValue(rl_src_address, kCoreReg);
855 } else {
856 rl_address = LoadValueWide(rl_src_address, kCoreReg);
857 }
858 RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
859 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
860 // Unaligned access is allowed on x86.
861 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
862 if (size == k64) {
863 StoreValueWide(rl_dest, rl_result);
864 } else {
865 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
866 StoreValue(rl_dest, rl_result);
867 }
868 return true;
869 }
870
GenInlinedPoke(CallInfo * info,OpSize size)871 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
872 RegLocation rl_src_address = info->args[0]; // long address
873 RegLocation rl_address;
874 if (!cu_->target64) {
875 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
876 rl_address = LoadValue(rl_src_address, kCoreReg);
877 } else {
878 rl_address = LoadValueWide(rl_src_address, kCoreReg);
879 }
880 RegLocation rl_src_value = info->args[2]; // [size] value
881 RegLocation rl_value;
882 if (size == k64) {
883 // Unaligned access is allowed on x86.
884 rl_value = LoadValueWide(rl_src_value, kCoreReg);
885 } else {
886 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
887 // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
888 if (!cu_->target64 && size == kSignedByte) {
889 rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
890 if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
891 RegStorage temp = AllocateByteRegister();
892 OpRegCopy(temp, rl_src_value.reg);
893 rl_value.reg = temp;
894 } else {
895 rl_value = LoadValue(rl_src_value, kCoreReg);
896 }
897 } else {
898 rl_value = LoadValue(rl_src_value, kCoreReg);
899 }
900 }
901 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
902 return true;
903 }
904
OpLea(RegStorage r_base,RegStorage reg1,RegStorage reg2,int scale,int offset)905 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
906 NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
907 }
908
OpTlsCmp(ThreadOffset<4> offset,int val)909 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
910 DCHECK_EQ(kX86, cu_->instruction_set);
911 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
912 }
913
OpTlsCmp(ThreadOffset<8> offset,int val)914 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
915 DCHECK_EQ(kX86_64, cu_->instruction_set);
916 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
917 }
918
IsInReg(X86Mir2Lir * pMir2Lir,const RegLocation & rl,RegStorage reg)919 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
920 return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
921 }
922
GenInlinedCas(CallInfo * info,bool is_long,bool is_object)923 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
924 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
925 // Unused - RegLocation rl_src_unsafe = info->args[0];
926 RegLocation rl_src_obj = info->args[1]; // Object - known non-null
927 RegLocation rl_src_offset = info->args[2]; // long low
928 if (!cu_->target64) {
929 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3]
930 }
931 RegLocation rl_src_expected = info->args[4]; // int, long or Object
932 // If is_long, high half is in info->args[5]
933 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
934 // If is_long, high half is in info->args[7]
935
936 if (is_long && cu_->target64) {
937 // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
938 FlushReg(rs_r0q);
939 Clobber(rs_r0q);
940 LockTemp(rs_r0q);
941
942 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
943 RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
944 RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
945 LoadValueDirectWide(rl_src_expected, rs_r0q);
946 NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
947 rl_new_value.reg.GetReg());
948
949 // After a store we need to insert barrier in case of potential load. Since the
950 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
951 GenMemBarrier(kAnyAny);
952
953 FreeTemp(rs_r0q);
954 } else if (is_long) {
955 // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
956 // TODO: CFI support.
957 FlushAllRegs();
958 LockCallTemps();
959 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
960 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
961 LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
962 LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
963 // FIXME: needs 64-bit update.
964 const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
965 const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
966 DCHECK(!obj_in_si || !obj_in_di);
967 const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
968 const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
969 DCHECK(!off_in_si || !off_in_di);
970 // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
971 RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
972 RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
973 bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
974 bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
975 if (push_di) {
976 NewLIR1(kX86Push32R, rs_rDI.GetReg());
977 MarkTemp(rs_rDI);
978 LockTemp(rs_rDI);
979 }
980 if (push_si) {
981 NewLIR1(kX86Push32R, rs_rSI.GetReg());
982 MarkTemp(rs_rSI);
983 LockTemp(rs_rSI);
984 }
985 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
986 const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
987 if (!obj_in_si && !obj_in_di) {
988 LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
989 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
990 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
991 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
992 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
993 }
994 if (!off_in_si && !off_in_di) {
995 LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
996 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
997 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
998 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
999 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1000 }
1001 NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
1002
1003 // After a store we need to insert barrier to prevent reordering with either
1004 // earlier or later memory accesses. Since
1005 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1006 // and it will be associated with the cmpxchg instruction, preventing both.
1007 GenMemBarrier(kAnyAny);
1008
1009 if (push_si) {
1010 FreeTemp(rs_rSI);
1011 UnmarkTemp(rs_rSI);
1012 NewLIR1(kX86Pop32R, rs_rSI.GetReg());
1013 }
1014 if (push_di) {
1015 FreeTemp(rs_rDI);
1016 UnmarkTemp(rs_rDI);
1017 NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1018 }
1019 FreeCallTemps();
1020 } else {
1021 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
1022 FlushReg(rs_r0);
1023 Clobber(rs_r0);
1024 LockTemp(rs_r0);
1025
1026 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1027 RegLocation rl_new_value = LoadValue(rl_src_new_value);
1028
1029 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
1030 // Mark card for object assuming new value is stored.
1031 FreeTemp(rs_r0); // Temporarily release EAX for MarkGCCard().
1032 MarkGCCard(rl_new_value.reg, rl_object.reg);
1033 LockTemp(rs_r0);
1034 }
1035
1036 RegLocation rl_offset;
1037 if (cu_->target64) {
1038 rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1039 } else {
1040 rl_offset = LoadValue(rl_src_offset, kCoreReg);
1041 }
1042 LoadValueDirect(rl_src_expected, rs_r0);
1043 NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1044 rl_new_value.reg.GetReg());
1045
1046 // After a store we need to insert barrier to prevent reordering with either
1047 // earlier or later memory accesses. Since
1048 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1049 // and it will be associated with the cmpxchg instruction, preventing both.
1050 GenMemBarrier(kAnyAny);
1051
1052 FreeTemp(rs_r0);
1053 }
1054
1055 // Convert ZF to boolean
1056 RegLocation rl_dest = InlineTarget(info); // boolean place for result
1057 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1058 RegStorage result_reg = rl_result.reg;
1059
1060 // For 32-bit, SETcc only works with EAX..EDX.
1061 if (!IsByteRegister(result_reg)) {
1062 result_reg = AllocateByteRegister();
1063 }
1064 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
1065 NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
1066 if (IsTemp(result_reg)) {
1067 FreeTemp(result_reg);
1068 }
1069 StoreValue(rl_dest, rl_result);
1070 return true;
1071 }
1072
OpPcRelLoad(RegStorage reg,LIR * target)1073 LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1074 CHECK(base_of_code_ != nullptr);
1075
1076 // Address the start of the method
1077 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1078 if (rl_method.wide) {
1079 LoadValueDirectWideFixed(rl_method, reg);
1080 } else {
1081 LoadValueDirectFixed(rl_method, reg);
1082 }
1083 store_method_addr_used_ = true;
1084
1085 // Load the proper value from the literal area.
1086 // We don't know the proper offset for the value, so pick one that will force
1087 // 4 byte offset. We will fix this up in the assembler later to have the right
1088 // value.
1089 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1090 LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
1091 0, 0, target);
1092 res->target = target;
1093 res->flags.fixup = kFixupLoad;
1094 store_method_addr_used_ = true;
1095 return res;
1096 }
1097
OpVldm(RegStorage r_base,int count)1098 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
1099 LOG(FATAL) << "Unexpected use of OpVldm for x86";
1100 return NULL;
1101 }
1102
OpVstm(RegStorage r_base,int count)1103 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
1104 LOG(FATAL) << "Unexpected use of OpVstm for x86";
1105 return NULL;
1106 }
1107
GenMultiplyByTwoBitMultiplier(RegLocation rl_src,RegLocation rl_result,int lit,int first_bit,int second_bit)1108 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1109 RegLocation rl_result, int lit,
1110 int first_bit, int second_bit) {
1111 RegStorage t_reg = AllocTemp();
1112 OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
1113 OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
1114 FreeTemp(t_reg);
1115 if (first_bit != 0) {
1116 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1117 }
1118 }
1119
GenDivZeroCheckWide(RegStorage reg)1120 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1121 if (cu_->target64) {
1122 DCHECK(reg.Is64Bit());
1123
1124 NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
1125 } else {
1126 DCHECK(reg.IsPair());
1127
1128 // We are not supposed to clobber the incoming storage, so allocate a temporary.
1129 RegStorage t_reg = AllocTemp();
1130 // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
1131 OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
1132 // The temp is no longer needed so free it at this time.
1133 FreeTemp(t_reg);
1134 }
1135
1136 // In case of zero, throw ArithmeticException.
1137 GenDivZeroCheck(kCondEq);
1138 }
1139
GenArrayBoundsCheck(RegStorage index,RegStorage array_base,int len_offset)1140 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
1141 RegStorage array_base,
1142 int len_offset) {
1143 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1144 public:
1145 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1146 RegStorage index, RegStorage array_base, int32_t len_offset)
1147 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1148 index_(index), array_base_(array_base), len_offset_(len_offset) {
1149 }
1150
1151 void Compile() OVERRIDE {
1152 m2l_->ResetRegPool();
1153 m2l_->ResetDefTracking();
1154 GenerateTargetLabel(kPseudoThrowTarget);
1155
1156 RegStorage new_index = index_;
1157 // Move index out of kArg1, either directly to kArg0, or to kArg2.
1158 // TODO: clean-up to check not a number but with type
1159 if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
1160 if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
1161 m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
1162 new_index = m2l_->TargetReg(kArg2, kNotWide);
1163 } else {
1164 m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
1165 new_index = m2l_->TargetReg(kArg0, kNotWide);
1166 }
1167 }
1168 // Load array length to kArg1.
1169 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1170 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1171 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
1172 m2l_->TargetReg(kArg1, kNotWide), true);
1173 }
1174
1175 private:
1176 const RegStorage index_;
1177 const RegStorage array_base_;
1178 const int32_t len_offset_;
1179 };
1180
1181 OpRegMem(kOpCmp, index, array_base, len_offset);
1182 MarkPossibleNullPointerException(0);
1183 LIR* branch = OpCondBranch(kCondUge, nullptr);
1184 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1185 index, array_base, len_offset));
1186 }
1187
GenArrayBoundsCheck(int32_t index,RegStorage array_base,int32_t len_offset)1188 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
1189 RegStorage array_base,
1190 int32_t len_offset) {
1191 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1192 public:
1193 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1194 int32_t index, RegStorage array_base, int32_t len_offset)
1195 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1196 index_(index), array_base_(array_base), len_offset_(len_offset) {
1197 }
1198
1199 void Compile() OVERRIDE {
1200 m2l_->ResetRegPool();
1201 m2l_->ResetDefTracking();
1202 GenerateTargetLabel(kPseudoThrowTarget);
1203
1204 // Load array length to kArg1.
1205 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1206 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1207 x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
1208 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
1209 m2l_->TargetReg(kArg1, kNotWide), true);
1210 }
1211
1212 private:
1213 const int32_t index_;
1214 const RegStorage array_base_;
1215 const int32_t len_offset_;
1216 };
1217
1218 NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
1219 MarkPossibleNullPointerException(0);
1220 LIR* branch = OpCondBranch(kCondLs, nullptr);
1221 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1222 index, array_base, len_offset));
1223 }
1224
1225 // Test suspend flag, return target of taken suspend branch
OpTestSuspend(LIR * target)1226 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
1227 if (cu_->target64) {
1228 OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
1229 } else {
1230 OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
1231 }
1232 return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target);
1233 }
1234
1235 // Decrement register and branch on condition
OpDecAndBranch(ConditionCode c_code,RegStorage reg,LIR * target)1236 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1237 OpRegImm(kOpSub, reg, 1);
1238 return OpCondBranch(c_code, target);
1239 }
1240
SmallLiteralDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)1241 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
1242 RegLocation rl_src, RegLocation rl_dest, int lit) {
1243 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
1244 return false;
1245 }
1246
EasyMultiply(RegLocation rl_src,RegLocation rl_dest,int lit)1247 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
1248 LOG(FATAL) << "Unexpected use of easyMultiply in x86";
1249 return false;
1250 }
1251
OpIT(ConditionCode cond,const char * guide)1252 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
1253 LOG(FATAL) << "Unexpected use of OpIT in x86";
1254 return NULL;
1255 }
1256
OpEndIT(LIR * it)1257 void X86Mir2Lir::OpEndIT(LIR* it) {
1258 LOG(FATAL) << "Unexpected use of OpEndIT in x86";
1259 }
1260
GenImulRegImm(RegStorage dest,RegStorage src,int val)1261 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
1262 switch (val) {
1263 case 0:
1264 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1265 break;
1266 case 1:
1267 OpRegCopy(dest, src);
1268 break;
1269 default:
1270 OpRegRegImm(kOpMul, dest, src, val);
1271 break;
1272 }
1273 }
1274
GenImulMemImm(RegStorage dest,int sreg,int displacement,int val)1275 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
1276 // All memory accesses below reference dalvik regs.
1277 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1278
1279 LIR *m;
1280 switch (val) {
1281 case 0:
1282 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1283 break;
1284 case 1:
1285 LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile);
1286 break;
1287 default:
1288 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
1289 rs_rX86_SP.GetReg(), displacement, val);
1290 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
1291 break;
1292 }
1293 }
1294
GenArithOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1295 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1296 RegLocation rl_src2) {
1297 if (!cu_->target64) {
1298 // Some x86 32b ops are fallback.
1299 switch (opcode) {
1300 case Instruction::NOT_LONG:
1301 case Instruction::DIV_LONG:
1302 case Instruction::DIV_LONG_2ADDR:
1303 case Instruction::REM_LONG:
1304 case Instruction::REM_LONG_2ADDR:
1305 Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1306 return;
1307
1308 default:
1309 // Everything else we can handle.
1310 break;
1311 }
1312 }
1313
1314 switch (opcode) {
1315 case Instruction::NOT_LONG:
1316 GenNotLong(rl_dest, rl_src2);
1317 return;
1318
1319 case Instruction::ADD_LONG:
1320 case Instruction::ADD_LONG_2ADDR:
1321 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1322 return;
1323
1324 case Instruction::SUB_LONG:
1325 case Instruction::SUB_LONG_2ADDR:
1326 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
1327 return;
1328
1329 case Instruction::MUL_LONG:
1330 case Instruction::MUL_LONG_2ADDR:
1331 GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
1332 return;
1333
1334 case Instruction::DIV_LONG:
1335 case Instruction::DIV_LONG_2ADDR:
1336 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
1337 return;
1338
1339 case Instruction::REM_LONG:
1340 case Instruction::REM_LONG_2ADDR:
1341 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
1342 return;
1343
1344 case Instruction::AND_LONG_2ADDR:
1345 case Instruction::AND_LONG:
1346 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1347 return;
1348
1349 case Instruction::OR_LONG:
1350 case Instruction::OR_LONG_2ADDR:
1351 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1352 return;
1353
1354 case Instruction::XOR_LONG:
1355 case Instruction::XOR_LONG_2ADDR:
1356 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1357 return;
1358
1359 case Instruction::NEG_LONG:
1360 GenNegLong(rl_dest, rl_src2);
1361 return;
1362
1363 default:
1364 LOG(FATAL) << "Invalid long arith op";
1365 return;
1366 }
1367 }
1368
GenMulLongConst(RegLocation rl_dest,RegLocation rl_src1,int64_t val)1369 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val) {
1370 // All memory accesses below reference dalvik regs.
1371 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1372
1373 if (val == 0) {
1374 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1375 if (cu_->target64) {
1376 OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
1377 } else {
1378 OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
1379 OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
1380 }
1381 StoreValueWide(rl_dest, rl_result);
1382 return true;
1383 } else if (val == 1) {
1384 StoreValueWide(rl_dest, rl_src1);
1385 return true;
1386 } else if (val == 2) {
1387 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
1388 return true;
1389 } else if (IsPowerOfTwo(val)) {
1390 int shift_amount = LowestSetBit(val);
1391 if (!BadOverlap(rl_src1, rl_dest)) {
1392 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1393 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
1394 shift_amount);
1395 StoreValueWide(rl_dest, rl_result);
1396 return true;
1397 }
1398 }
1399
1400 // Okay, on 32b just bite the bullet and do it, still better than the general case.
1401 if (!cu_->target64) {
1402 int32_t val_lo = Low32Bits(val);
1403 int32_t val_hi = High32Bits(val);
1404 FlushAllRegs();
1405 LockCallTemps(); // Prepare for explicit register usage.
1406 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1407 bool src1_in_reg = rl_src1.location == kLocPhysReg;
1408 int displacement = SRegOffset(rl_src1.s_reg_low);
1409
1410 // ECX <- 1H * 2L
1411 // EAX <- 1L * 2H
1412 if (src1_in_reg) {
1413 GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
1414 GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
1415 } else {
1416 GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
1417 GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
1418 }
1419
1420 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
1421 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1422
1423 // EAX <- 2L
1424 LoadConstantNoClobber(rs_r0, val_lo);
1425
1426 // EDX:EAX <- 2L * 1L (double precision)
1427 if (src1_in_reg) {
1428 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1429 } else {
1430 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1431 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1432 true /* is_load */, true /* is_64bit */);
1433 }
1434
1435 // EDX <- EDX + ECX (add high words)
1436 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1437
1438 // Result is EDX:EAX
1439 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1440 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1441 StoreValueWide(rl_dest, rl_result);
1442 return true;
1443 }
1444 return false;
1445 }
1446
GenMulLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1447 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1448 RegLocation rl_src2) {
1449 if (rl_src1.is_const) {
1450 std::swap(rl_src1, rl_src2);
1451 }
1452
1453 if (rl_src2.is_const) {
1454 if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2))) {
1455 return;
1456 }
1457 }
1458
1459 // All memory accesses below reference dalvik regs.
1460 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1461
1462 if (cu_->target64) {
1463 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1464 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1465 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1466 if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1467 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1468 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
1469 } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
1470 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1471 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
1472 } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1473 rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
1474 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1475 } else {
1476 OpRegCopy(rl_result.reg, rl_src1.reg);
1477 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1478 }
1479 StoreValueWide(rl_dest, rl_result);
1480 return;
1481 }
1482
1483 // Not multiplying by a constant. Do it the hard way
1484 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
1485 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
1486 mir_graph_->SRegToVReg(rl_src2.s_reg_low);
1487
1488 FlushAllRegs();
1489 LockCallTemps(); // Prepare for explicit register usage.
1490 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1491 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1492
1493 // At this point, the VRs are in their home locations.
1494 bool src1_in_reg = rl_src1.location == kLocPhysReg;
1495 bool src2_in_reg = rl_src2.location == kLocPhysReg;
1496
1497 // ECX <- 1H
1498 if (src1_in_reg) {
1499 NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
1500 } else {
1501 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
1502 kNotVolatile);
1503 }
1504
1505 if (is_square) {
1506 // Take advantage of the fact that the values are the same.
1507 // ECX <- ECX * 2L (1H * 2L)
1508 if (src2_in_reg) {
1509 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1510 } else {
1511 int displacement = SRegOffset(rl_src2.s_reg_low);
1512 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1513 displacement + LOWORD_OFFSET);
1514 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1515 true /* is_load */, true /* is_64bit */);
1516 }
1517
1518 // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
1519 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
1520 } else {
1521 // EAX <- 2H
1522 if (src2_in_reg) {
1523 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
1524 } else {
1525 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
1526 kNotVolatile);
1527 }
1528
1529 // EAX <- EAX * 1L (2H * 1L)
1530 if (src1_in_reg) {
1531 NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
1532 } else {
1533 int displacement = SRegOffset(rl_src1.s_reg_low);
1534 LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(),
1535 displacement + LOWORD_OFFSET);
1536 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1537 true /* is_load */, true /* is_64bit */);
1538 }
1539
1540 // ECX <- ECX * 2L (1H * 2L)
1541 if (src2_in_reg) {
1542 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1543 } else {
1544 int displacement = SRegOffset(rl_src2.s_reg_low);
1545 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1546 displacement + LOWORD_OFFSET);
1547 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1548 true /* is_load */, true /* is_64bit */);
1549 }
1550
1551 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
1552 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1553 }
1554
1555 // EAX <- 2L
1556 if (src2_in_reg) {
1557 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
1558 } else {
1559 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
1560 kNotVolatile);
1561 }
1562
1563 // EDX:EAX <- 2L * 1L (double precision)
1564 if (src1_in_reg) {
1565 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1566 } else {
1567 int displacement = SRegOffset(rl_src1.s_reg_low);
1568 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1569 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1570 true /* is_load */, true /* is_64bit */);
1571 }
1572
1573 // EDX <- EDX + ECX (add high words)
1574 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1575
1576 // Result is EDX:EAX
1577 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1578 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1579 StoreValueWide(rl_dest, rl_result);
1580 }
1581
GenLongRegOrMemOp(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1582 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
1583 Instruction::Code op) {
1584 DCHECK_EQ(rl_dest.location, kLocPhysReg);
1585 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1586 if (rl_src.location == kLocPhysReg) {
1587 // Both operands are in registers.
1588 // But we must ensure that rl_src is in pair
1589 if (cu_->target64) {
1590 NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
1591 } else {
1592 rl_src = LoadValueWide(rl_src, kCoreReg);
1593 if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1594 // The registers are the same, so we would clobber it before the use.
1595 RegStorage temp_reg = AllocTemp();
1596 OpRegCopy(temp_reg, rl_dest.reg);
1597 rl_src.reg.SetHighReg(temp_reg.GetReg());
1598 }
1599 NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
1600
1601 x86op = GetOpcode(op, rl_dest, rl_src, true);
1602 NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
1603 FreeTemp(rl_src.reg); // ???
1604 }
1605 return;
1606 }
1607
1608 // RHS is in memory.
1609 DCHECK((rl_src.location == kLocDalvikFrame) ||
1610 (rl_src.location == kLocCompilerTemp));
1611 int r_base = rs_rX86_SP.GetReg();
1612 int displacement = SRegOffset(rl_src.s_reg_low);
1613
1614 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1615 LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
1616 r_base, displacement + LOWORD_OFFSET);
1617 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1618 true /* is_load */, true /* is64bit */);
1619 if (!cu_->target64) {
1620 x86op = GetOpcode(op, rl_dest, rl_src, true);
1621 lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
1622 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1623 true /* is_load */, true /* is64bit */);
1624 }
1625 }
1626
GenLongArith(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1627 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
1628 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
1629 if (rl_dest.location == kLocPhysReg) {
1630 // Ensure we are in a register pair
1631 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1632
1633 rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
1634 GenLongRegOrMemOp(rl_result, rl_src, op);
1635 StoreFinalValueWide(rl_dest, rl_result);
1636 return;
1637 }
1638
1639 // It wasn't in registers, so it better be in memory.
1640 DCHECK((rl_dest.location == kLocDalvikFrame) ||
1641 (rl_dest.location == kLocCompilerTemp));
1642 rl_src = LoadValueWide(rl_src, kCoreReg);
1643
1644 // Operate directly into memory.
1645 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1646 int r_base = rs_rX86_SP.GetReg();
1647 int displacement = SRegOffset(rl_dest.s_reg_low);
1648
1649 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1650 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
1651 cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
1652 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1653 true /* is_load */, true /* is64bit */);
1654 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1655 false /* is_load */, true /* is64bit */);
1656 if (!cu_->target64) {
1657 x86op = GetOpcode(op, rl_dest, rl_src, true);
1658 lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
1659 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1660 true /* is_load */, true /* is64bit */);
1661 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1662 false /* is_load */, true /* is64bit */);
1663 }
1664
1665 int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
1666 int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
1667
1668 // If the left operand is in memory and the right operand is in a register
1669 // and both belong to the same dalvik register then we should clobber the
1670 // right one because it doesn't hold valid data anymore.
1671 if (v_src_reg == v_dst_reg) {
1672 Clobber(rl_src.reg);
1673 }
1674 }
1675
GenLongArith(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op,bool is_commutative)1676 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
1677 RegLocation rl_src2, Instruction::Code op,
1678 bool is_commutative) {
1679 // Is this really a 2 operand operation?
1680 switch (op) {
1681 case Instruction::ADD_LONG_2ADDR:
1682 case Instruction::SUB_LONG_2ADDR:
1683 case Instruction::AND_LONG_2ADDR:
1684 case Instruction::OR_LONG_2ADDR:
1685 case Instruction::XOR_LONG_2ADDR:
1686 if (GenerateTwoOperandInstructions()) {
1687 GenLongArith(rl_dest, rl_src2, op);
1688 return;
1689 }
1690 break;
1691
1692 default:
1693 break;
1694 }
1695
1696 if (rl_dest.location == kLocPhysReg) {
1697 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
1698
1699 // We are about to clobber the LHS, so it needs to be a temp.
1700 rl_result = ForceTempWide(rl_result);
1701
1702 // Perform the operation using the RHS.
1703 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1704 GenLongRegOrMemOp(rl_result, rl_src2, op);
1705
1706 // And now record that the result is in the temp.
1707 StoreFinalValueWide(rl_dest, rl_result);
1708 return;
1709 }
1710
1711 // It wasn't in registers, so it better be in memory.
1712 DCHECK((rl_dest.location == kLocDalvikFrame) ||
1713 (rl_dest.location == kLocCompilerTemp));
1714 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1715 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1716
1717 // Get one of the source operands into temporary register.
1718 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1719 if (cu_->target64) {
1720 if (IsTemp(rl_src1.reg)) {
1721 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1722 } else if (is_commutative) {
1723 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1724 // We need at least one of them to be a temporary.
1725 if (!IsTemp(rl_src2.reg)) {
1726 rl_src1 = ForceTempWide(rl_src1);
1727 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1728 } else {
1729 GenLongRegOrMemOp(rl_src2, rl_src1, op);
1730 StoreFinalValueWide(rl_dest, rl_src2);
1731 return;
1732 }
1733 } else {
1734 // Need LHS to be the temp.
1735 rl_src1 = ForceTempWide(rl_src1);
1736 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1737 }
1738 } else {
1739 if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
1740 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1741 } else if (is_commutative) {
1742 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1743 // We need at least one of them to be a temporary.
1744 if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
1745 rl_src1 = ForceTempWide(rl_src1);
1746 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1747 } else {
1748 GenLongRegOrMemOp(rl_src2, rl_src1, op);
1749 StoreFinalValueWide(rl_dest, rl_src2);
1750 return;
1751 }
1752 } else {
1753 // Need LHS to be the temp.
1754 rl_src1 = ForceTempWide(rl_src1);
1755 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1756 }
1757 }
1758
1759 StoreFinalValueWide(rl_dest, rl_src1);
1760 }
1761
GenNotLong(RegLocation rl_dest,RegLocation rl_src)1762 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
1763 if (cu_->target64) {
1764 rl_src = LoadValueWide(rl_src, kCoreReg);
1765 RegLocation rl_result;
1766 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1767 OpRegCopy(rl_result.reg, rl_src.reg);
1768 OpReg(kOpNot, rl_result.reg);
1769 StoreValueWide(rl_dest, rl_result);
1770 } else {
1771 LOG(FATAL) << "Unexpected use GenNotLong()";
1772 }
1773 }
1774
GenDivRemLongLit(RegLocation rl_dest,RegLocation rl_src,int64_t imm,bool is_div)1775 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
1776 int64_t imm, bool is_div) {
1777 if (imm == 0) {
1778 GenDivZeroException();
1779 } else if (imm == 1) {
1780 if (is_div) {
1781 // x / 1 == x.
1782 StoreValueWide(rl_dest, rl_src);
1783 } else {
1784 // x % 1 == 0.
1785 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1786 LoadConstantWide(rl_result.reg, 0);
1787 StoreValueWide(rl_dest, rl_result);
1788 }
1789 } else if (imm == -1) { // handle 0x8000000000000000 / -1 special case.
1790 if (is_div) {
1791 rl_src = LoadValueWide(rl_src, kCoreReg);
1792 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1793 RegStorage rs_temp = AllocTempWide();
1794
1795 OpRegCopy(rl_result.reg, rl_src.reg);
1796 LoadConstantWide(rs_temp, 0x8000000000000000);
1797
1798 // If x == MIN_LONG, return MIN_LONG.
1799 OpRegReg(kOpCmp, rl_src.reg, rs_temp);
1800 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
1801
1802 // For x != MIN_LONG, x / -1 == -x.
1803 OpReg(kOpNeg, rl_result.reg);
1804
1805 minint_branch->target = NewLIR0(kPseudoTargetLabel);
1806 FreeTemp(rs_temp);
1807 StoreValueWide(rl_dest, rl_result);
1808 } else {
1809 // x % -1 == 0.
1810 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1811 LoadConstantWide(rl_result.reg, 0);
1812 StoreValueWide(rl_dest, rl_result);
1813 }
1814 } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
1815 // Division using shifting.
1816 rl_src = LoadValueWide(rl_src, kCoreReg);
1817 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1818 if (IsSameReg(rl_result.reg, rl_src.reg)) {
1819 RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
1820 rl_result.reg.SetReg(rs_temp.GetReg());
1821 }
1822 LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
1823 OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
1824 NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
1825 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
1826 int shift_amount = LowestSetBit(imm);
1827 OpRegImm(kOpAsr, rl_result.reg, shift_amount);
1828 if (imm < 0) {
1829 OpReg(kOpNeg, rl_result.reg);
1830 }
1831 StoreValueWide(rl_dest, rl_result);
1832 } else {
1833 CHECK(imm <= -2 || imm >= 2);
1834
1835 FlushReg(rs_r0q);
1836 Clobber(rs_r0q);
1837 LockTemp(rs_r0q);
1838 FlushReg(rs_r2q);
1839 Clobber(rs_r2q);
1840 LockTemp(rs_r2q);
1841
1842 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r2q, INVALID_SREG, INVALID_SREG};
1843
1844 // Use H.S.Warren's Hacker's Delight Chapter 10 and
1845 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
1846 int64_t magic;
1847 int shift;
1848 CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
1849
1850 /*
1851 * For imm >= 2,
1852 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
1853 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
1854 * For imm <= -2,
1855 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
1856 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
1857 * We implement this algorithm in the following way:
1858 * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
1859 * 2. if imm > 0 and magic < 0, add numerator to RDX
1860 * if imm < 0 and magic > 0, sub numerator from RDX
1861 * 3. if S !=0, SAR S bits for RDX
1862 * 4. add 1 to RDX if RDX < 0
1863 * 5. Thus, RDX is the quotient
1864 */
1865
1866 // Numerator into RAX.
1867 RegStorage numerator_reg;
1868 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
1869 // We will need the value later.
1870 rl_src = LoadValueWide(rl_src, kCoreReg);
1871 numerator_reg = rl_src.reg;
1872 OpRegCopyWide(rs_r0q, numerator_reg);
1873 } else {
1874 // Only need this once. Just put it into RAX.
1875 LoadValueDirectWideFixed(rl_src, rs_r0q);
1876 }
1877
1878 // RDX = magic.
1879 LoadConstantWide(rs_r2q, magic);
1880
1881 // RDX:RAX = magic & dividend.
1882 NewLIR1(kX86Imul64DaR, rs_r2q.GetReg());
1883
1884 if (imm > 0 && magic < 0) {
1885 // Add numerator to RDX.
1886 DCHECK(numerator_reg.Valid());
1887 OpRegReg(kOpAdd, rs_r2q, numerator_reg);
1888 } else if (imm < 0 && magic > 0) {
1889 DCHECK(numerator_reg.Valid());
1890 OpRegReg(kOpSub, rs_r2q, numerator_reg);
1891 }
1892
1893 // Do we need the shift?
1894 if (shift != 0) {
1895 // Shift RDX by 'shift' bits.
1896 OpRegImm(kOpAsr, rs_r2q, shift);
1897 }
1898
1899 // Move RDX to RAX.
1900 OpRegCopyWide(rs_r0q, rs_r2q);
1901
1902 // Move sign bit to bit 0, zeroing the rest.
1903 OpRegImm(kOpLsr, rs_r2q, 63);
1904
1905 // RDX = RDX + RAX.
1906 OpRegReg(kOpAdd, rs_r2q, rs_r0q);
1907
1908 // Quotient is in RDX.
1909 if (!is_div) {
1910 // We need to compute the remainder.
1911 // Remainder is divisor - (quotient * imm).
1912 DCHECK(numerator_reg.Valid());
1913 OpRegCopyWide(rs_r0q, numerator_reg);
1914
1915 // Imul doesn't support 64-bit imms.
1916 if (imm > std::numeric_limits<int32_t>::max() ||
1917 imm < std::numeric_limits<int32_t>::min()) {
1918 RegStorage rs_temp = AllocTempWide();
1919 LoadConstantWide(rs_temp, imm);
1920
1921 // RAX = numerator * imm.
1922 NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
1923
1924 FreeTemp(rs_temp);
1925 } else {
1926 // RAX = numerator * imm.
1927 int short_imm = static_cast<int>(imm);
1928 NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
1929 }
1930
1931 // RDX -= RAX.
1932 OpRegReg(kOpSub, rs_r0q, rs_r2q);
1933
1934 // Store result.
1935 OpRegCopyWide(rl_result.reg, rs_r0q);
1936 } else {
1937 // Store result.
1938 OpRegCopyWide(rl_result.reg, rs_r2q);
1939 }
1940 StoreValueWide(rl_dest, rl_result);
1941 FreeTemp(rs_r0q);
1942 FreeTemp(rs_r2q);
1943 }
1944 }
1945
GenDivRemLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div)1946 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1947 RegLocation rl_src2, bool is_div) {
1948 if (!cu_->target64) {
1949 LOG(FATAL) << "Unexpected use GenDivRemLong()";
1950 return;
1951 }
1952
1953 if (rl_src2.is_const) {
1954 DCHECK(rl_src2.wide);
1955 int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
1956 GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
1957 return;
1958 }
1959
1960 // We have to use fixed registers, so flush all the temps.
1961 FlushAllRegs();
1962 LockCallTemps(); // Prepare for explicit register usage.
1963
1964 // Load LHS into RAX.
1965 LoadValueDirectWideFixed(rl_src1, rs_r0q);
1966
1967 // Load RHS into RCX.
1968 LoadValueDirectWideFixed(rl_src2, rs_r1q);
1969
1970 // Copy LHS sign bit into RDX.
1971 NewLIR0(kx86Cqo64Da);
1972
1973 // Handle division by zero case.
1974 GenDivZeroCheckWide(rs_r1q);
1975
1976 // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
1977 NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
1978 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1979
1980 // RHS is -1.
1981 LoadConstantWide(rs_r6q, 0x8000000000000000);
1982 NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
1983 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1984
1985 // In 0x8000000000000000/-1 case.
1986 if (!is_div) {
1987 // For DIV, RAX is already right. For REM, we need RDX 0.
1988 NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
1989 }
1990 LIR* done = NewLIR1(kX86Jmp8, 0);
1991
1992 // Expected case.
1993 minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
1994 minint_branch->target = minus_one_branch->target;
1995 NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
1996 done->target = NewLIR0(kPseudoTargetLabel);
1997
1998 // Result is in RAX for div and RDX for rem.
1999 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
2000 if (!is_div) {
2001 rl_result.reg.SetReg(r2q);
2002 }
2003
2004 StoreValueWide(rl_dest, rl_result);
2005 }
2006
GenNegLong(RegLocation rl_dest,RegLocation rl_src)2007 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
2008 rl_src = LoadValueWide(rl_src, kCoreReg);
2009 RegLocation rl_result;
2010 if (cu_->target64) {
2011 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2012 OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
2013 } else {
2014 rl_result = ForceTempWide(rl_src);
2015 OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow
2016 OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF
2017 OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh
2018 }
2019 StoreValueWide(rl_dest, rl_result);
2020 }
2021
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<4> thread_offset)2022 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
2023 DCHECK_EQ(kX86, cu_->instruction_set);
2024 X86OpCode opcode = kX86Bkpt;
2025 switch (op) {
2026 case kOpCmp: opcode = kX86Cmp32RT; break;
2027 case kOpMov: opcode = kX86Mov32RT; break;
2028 default:
2029 LOG(FATAL) << "Bad opcode: " << op;
2030 break;
2031 }
2032 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2033 }
2034
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<8> thread_offset)2035 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
2036 DCHECK_EQ(kX86_64, cu_->instruction_set);
2037 X86OpCode opcode = kX86Bkpt;
2038 if (cu_->target64 && r_dest.Is64BitSolo()) {
2039 switch (op) {
2040 case kOpCmp: opcode = kX86Cmp64RT; break;
2041 case kOpMov: opcode = kX86Mov64RT; break;
2042 default:
2043 LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
2044 break;
2045 }
2046 } else {
2047 switch (op) {
2048 case kOpCmp: opcode = kX86Cmp32RT; break;
2049 case kOpMov: opcode = kX86Mov32RT; break;
2050 default:
2051 LOG(FATAL) << "Bad opcode: " << op;
2052 break;
2053 }
2054 }
2055 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2056 }
2057
2058 /*
2059 * Generate array load
2060 */
GenArrayGet(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_dest,int scale)2061 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
2062 RegLocation rl_index, RegLocation rl_dest, int scale) {
2063 RegisterClass reg_class = RegClassBySize(size);
2064 int len_offset = mirror::Array::LengthOffset().Int32Value();
2065 RegLocation rl_result;
2066 rl_array = LoadValue(rl_array, kRefReg);
2067
2068 int data_offset;
2069 if (size == k64 || size == kDouble) {
2070 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2071 } else {
2072 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2073 }
2074
2075 bool constant_index = rl_index.is_const;
2076 int32_t constant_index_value = 0;
2077 if (!constant_index) {
2078 rl_index = LoadValue(rl_index, kCoreReg);
2079 } else {
2080 constant_index_value = mir_graph_->ConstantValue(rl_index);
2081 // If index is constant, just fold it into the data offset
2082 data_offset += constant_index_value << scale;
2083 // treat as non array below
2084 rl_index.reg = RegStorage::InvalidReg();
2085 }
2086
2087 /* null object? */
2088 GenNullCheck(rl_array.reg, opt_flags);
2089
2090 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2091 if (constant_index) {
2092 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2093 } else {
2094 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2095 }
2096 }
2097 rl_result = EvalLoc(rl_dest, reg_class, true);
2098 LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
2099 if ((size == k64) || (size == kDouble)) {
2100 StoreValueWide(rl_dest, rl_result);
2101 } else {
2102 StoreValue(rl_dest, rl_result);
2103 }
2104 }
2105
2106 /*
2107 * Generate array store
2108 *
2109 */
GenArrayPut(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_src,int scale,bool card_mark)2110 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
2111 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
2112 RegisterClass reg_class = RegClassBySize(size);
2113 int len_offset = mirror::Array::LengthOffset().Int32Value();
2114 int data_offset;
2115
2116 if (size == k64 || size == kDouble) {
2117 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2118 } else {
2119 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2120 }
2121
2122 rl_array = LoadValue(rl_array, kRefReg);
2123 bool constant_index = rl_index.is_const;
2124 int32_t constant_index_value = 0;
2125 if (!constant_index) {
2126 rl_index = LoadValue(rl_index, kCoreReg);
2127 } else {
2128 // If index is constant, just fold it into the data offset
2129 constant_index_value = mir_graph_->ConstantValue(rl_index);
2130 data_offset += constant_index_value << scale;
2131 // treat as non array below
2132 rl_index.reg = RegStorage::InvalidReg();
2133 }
2134
2135 /* null object? */
2136 GenNullCheck(rl_array.reg, opt_flags);
2137
2138 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2139 if (constant_index) {
2140 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2141 } else {
2142 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2143 }
2144 }
2145 if ((size == k64) || (size == kDouble)) {
2146 rl_src = LoadValueWide(rl_src, reg_class);
2147 } else {
2148 rl_src = LoadValue(rl_src, reg_class);
2149 }
2150 // If the src reg can't be byte accessed, move it to a temp first.
2151 if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
2152 RegStorage temp = AllocTemp();
2153 OpRegCopy(temp, rl_src.reg);
2154 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
2155 } else {
2156 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
2157 }
2158 if (card_mark) {
2159 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
2160 if (!constant_index) {
2161 FreeTemp(rl_index.reg);
2162 }
2163 MarkGCCard(rl_src.reg, rl_array.reg);
2164 }
2165 }
2166
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,int shift_amount)2167 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2168 RegLocation rl_src, int shift_amount) {
2169 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2170 if (cu_->target64) {
2171 OpKind op = static_cast<OpKind>(0); /* Make gcc happy */
2172 switch (opcode) {
2173 case Instruction::SHL_LONG:
2174 case Instruction::SHL_LONG_2ADDR:
2175 op = kOpLsl;
2176 break;
2177 case Instruction::SHR_LONG:
2178 case Instruction::SHR_LONG_2ADDR:
2179 op = kOpAsr;
2180 break;
2181 case Instruction::USHR_LONG:
2182 case Instruction::USHR_LONG_2ADDR:
2183 op = kOpLsr;
2184 break;
2185 default:
2186 LOG(FATAL) << "Unexpected case";
2187 }
2188 OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
2189 } else {
2190 switch (opcode) {
2191 case Instruction::SHL_LONG:
2192 case Instruction::SHL_LONG_2ADDR:
2193 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening.
2194 if (shift_amount == 32) {
2195 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2196 LoadConstant(rl_result.reg.GetLow(), 0);
2197 } else if (shift_amount > 31) {
2198 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2199 NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
2200 LoadConstant(rl_result.reg.GetLow(), 0);
2201 } else {
2202 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2203 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2204 NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
2205 shift_amount);
2206 NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
2207 }
2208 break;
2209 case Instruction::SHR_LONG:
2210 case Instruction::SHR_LONG_2ADDR:
2211 if (shift_amount == 32) {
2212 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2213 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2214 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2215 } else if (shift_amount > 31) {
2216 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2217 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2218 NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2219 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2220 } else {
2221 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2222 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2223 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2224 shift_amount);
2225 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
2226 }
2227 break;
2228 case Instruction::USHR_LONG:
2229 case Instruction::USHR_LONG_2ADDR:
2230 if (shift_amount == 32) {
2231 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2232 LoadConstant(rl_result.reg.GetHigh(), 0);
2233 } else if (shift_amount > 31) {
2234 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2235 NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2236 LoadConstant(rl_result.reg.GetHigh(), 0);
2237 } else {
2238 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2239 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2240 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2241 shift_amount);
2242 NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
2243 }
2244 break;
2245 default:
2246 LOG(FATAL) << "Unexpected case";
2247 }
2248 }
2249 return rl_result;
2250 }
2251
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,RegLocation rl_shift)2252 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2253 RegLocation rl_src, RegLocation rl_shift) {
2254 // Per spec, we only care about low 6 bits of shift amount.
2255 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
2256 if (shift_amount == 0) {
2257 rl_src = LoadValueWide(rl_src, kCoreReg);
2258 StoreValueWide(rl_dest, rl_src);
2259 return;
2260 } else if (shift_amount == 1 &&
2261 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
2262 // Need to handle this here to avoid calling StoreValueWide twice.
2263 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
2264 return;
2265 }
2266 if (BadOverlap(rl_src, rl_dest)) {
2267 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
2268 return;
2269 }
2270 rl_src = LoadValueWide(rl_src, kCoreReg);
2271 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
2272 StoreValueWide(rl_dest, rl_result);
2273 }
2274
GenArithImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)2275 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
2276 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
2277 bool isConstSuccess = false;
2278 switch (opcode) {
2279 case Instruction::ADD_LONG:
2280 case Instruction::AND_LONG:
2281 case Instruction::OR_LONG:
2282 case Instruction::XOR_LONG:
2283 if (rl_src2.is_const) {
2284 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2285 } else {
2286 DCHECK(rl_src1.is_const);
2287 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2288 }
2289 break;
2290 case Instruction::SUB_LONG:
2291 case Instruction::SUB_LONG_2ADDR:
2292 if (rl_src2.is_const) {
2293 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2294 } else {
2295 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
2296 isConstSuccess = true;
2297 }
2298 break;
2299 case Instruction::ADD_LONG_2ADDR:
2300 case Instruction::OR_LONG_2ADDR:
2301 case Instruction::XOR_LONG_2ADDR:
2302 case Instruction::AND_LONG_2ADDR:
2303 if (rl_src2.is_const) {
2304 if (GenerateTwoOperandInstructions()) {
2305 isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
2306 } else {
2307 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2308 }
2309 } else {
2310 DCHECK(rl_src1.is_const);
2311 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2312 }
2313 break;
2314 default:
2315 isConstSuccess = false;
2316 break;
2317 }
2318
2319 if (!isConstSuccess) {
2320 // Default - bail to non-const handler.
2321 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
2322 }
2323 }
2324
IsNoOp(Instruction::Code op,int32_t value)2325 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
2326 switch (op) {
2327 case Instruction::AND_LONG_2ADDR:
2328 case Instruction::AND_LONG:
2329 return value == -1;
2330 case Instruction::OR_LONG:
2331 case Instruction::OR_LONG_2ADDR:
2332 case Instruction::XOR_LONG:
2333 case Instruction::XOR_LONG_2ADDR:
2334 return value == 0;
2335 default:
2336 return false;
2337 }
2338 }
2339
GetOpcode(Instruction::Code op,RegLocation dest,RegLocation rhs,bool is_high_op)2340 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
2341 bool is_high_op) {
2342 bool rhs_in_mem = rhs.location != kLocPhysReg;
2343 bool dest_in_mem = dest.location != kLocPhysReg;
2344 bool is64Bit = cu_->target64;
2345 DCHECK(!rhs_in_mem || !dest_in_mem);
2346 switch (op) {
2347 case Instruction::ADD_LONG:
2348 case Instruction::ADD_LONG_2ADDR:
2349 if (dest_in_mem) {
2350 return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
2351 } else if (rhs_in_mem) {
2352 return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
2353 }
2354 return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
2355 case Instruction::SUB_LONG:
2356 case Instruction::SUB_LONG_2ADDR:
2357 if (dest_in_mem) {
2358 return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
2359 } else if (rhs_in_mem) {
2360 return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
2361 }
2362 return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
2363 case Instruction::AND_LONG_2ADDR:
2364 case Instruction::AND_LONG:
2365 if (dest_in_mem) {
2366 return is64Bit ? kX86And64MR : kX86And32MR;
2367 }
2368 if (is64Bit) {
2369 return rhs_in_mem ? kX86And64RM : kX86And64RR;
2370 }
2371 return rhs_in_mem ? kX86And32RM : kX86And32RR;
2372 case Instruction::OR_LONG:
2373 case Instruction::OR_LONG_2ADDR:
2374 if (dest_in_mem) {
2375 return is64Bit ? kX86Or64MR : kX86Or32MR;
2376 }
2377 if (is64Bit) {
2378 return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
2379 }
2380 return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
2381 case Instruction::XOR_LONG:
2382 case Instruction::XOR_LONG_2ADDR:
2383 if (dest_in_mem) {
2384 return is64Bit ? kX86Xor64MR : kX86Xor32MR;
2385 }
2386 if (is64Bit) {
2387 return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
2388 }
2389 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
2390 default:
2391 LOG(FATAL) << "Unexpected opcode: " << op;
2392 return kX86Add32RR;
2393 }
2394 }
2395
GetOpcode(Instruction::Code op,RegLocation loc,bool is_high_op,int32_t value)2396 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
2397 int32_t value) {
2398 bool in_mem = loc.location != kLocPhysReg;
2399 bool is64Bit = cu_->target64;
2400 bool byte_imm = IS_SIMM8(value);
2401 DCHECK(in_mem || !loc.reg.IsFloat());
2402 switch (op) {
2403 case Instruction::ADD_LONG:
2404 case Instruction::ADD_LONG_2ADDR:
2405 if (byte_imm) {
2406 if (in_mem) {
2407 return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
2408 }
2409 return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
2410 }
2411 if (in_mem) {
2412 return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
2413 }
2414 return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
2415 case Instruction::SUB_LONG:
2416 case Instruction::SUB_LONG_2ADDR:
2417 if (byte_imm) {
2418 if (in_mem) {
2419 return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
2420 }
2421 return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
2422 }
2423 if (in_mem) {
2424 return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
2425 }
2426 return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
2427 case Instruction::AND_LONG_2ADDR:
2428 case Instruction::AND_LONG:
2429 if (byte_imm) {
2430 if (is64Bit) {
2431 return in_mem ? kX86And64MI8 : kX86And64RI8;
2432 }
2433 return in_mem ? kX86And32MI8 : kX86And32RI8;
2434 }
2435 if (is64Bit) {
2436 return in_mem ? kX86And64MI : kX86And64RI;
2437 }
2438 return in_mem ? kX86And32MI : kX86And32RI;
2439 case Instruction::OR_LONG:
2440 case Instruction::OR_LONG_2ADDR:
2441 if (byte_imm) {
2442 if (is64Bit) {
2443 return in_mem ? kX86Or64MI8 : kX86Or64RI8;
2444 }
2445 return in_mem ? kX86Or32MI8 : kX86Or32RI8;
2446 }
2447 if (is64Bit) {
2448 return in_mem ? kX86Or64MI : kX86Or64RI;
2449 }
2450 return in_mem ? kX86Or32MI : kX86Or32RI;
2451 case Instruction::XOR_LONG:
2452 case Instruction::XOR_LONG_2ADDR:
2453 if (byte_imm) {
2454 if (is64Bit) {
2455 return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
2456 }
2457 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
2458 }
2459 if (is64Bit) {
2460 return in_mem ? kX86Xor64MI : kX86Xor64RI;
2461 }
2462 return in_mem ? kX86Xor32MI : kX86Xor32RI;
2463 default:
2464 LOG(FATAL) << "Unexpected opcode: " << op;
2465 return kX86Add32MI;
2466 }
2467 }
2468
GenLongImm(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)2469 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
2470 DCHECK(rl_src.is_const);
2471 int64_t val = mir_graph_->ConstantValueWide(rl_src);
2472
2473 if (cu_->target64) {
2474 // We can do with imm only if it fits 32 bit
2475 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2476 return false;
2477 }
2478
2479 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2480
2481 if ((rl_dest.location == kLocDalvikFrame) ||
2482 (rl_dest.location == kLocCompilerTemp)) {
2483 int r_base = rs_rX86_SP.GetReg();
2484 int displacement = SRegOffset(rl_dest.s_reg_low);
2485
2486 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2487 X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2488 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
2489 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2490 true /* is_load */, true /* is64bit */);
2491 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2492 false /* is_load */, true /* is64bit */);
2493 return true;
2494 }
2495
2496 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2497 DCHECK_EQ(rl_result.location, kLocPhysReg);
2498 DCHECK(!rl_result.reg.IsFloat());
2499
2500 X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2501 NewLIR2(x86op, rl_result.reg.GetReg(), val);
2502
2503 StoreValueWide(rl_dest, rl_result);
2504 return true;
2505 }
2506
2507 int32_t val_lo = Low32Bits(val);
2508 int32_t val_hi = High32Bits(val);
2509 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2510
2511 // Can we just do this into memory?
2512 if ((rl_dest.location == kLocDalvikFrame) ||
2513 (rl_dest.location == kLocCompilerTemp)) {
2514 int r_base = rs_rX86_SP.GetReg();
2515 int displacement = SRegOffset(rl_dest.s_reg_low);
2516
2517 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2518 if (!IsNoOp(op, val_lo)) {
2519 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2520 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
2521 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2522 true /* is_load */, true /* is64bit */);
2523 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2524 false /* is_load */, true /* is64bit */);
2525 }
2526 if (!IsNoOp(op, val_hi)) {
2527 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2528 LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
2529 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2530 true /* is_load */, true /* is64bit */);
2531 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2532 false /* is_load */, true /* is64bit */);
2533 }
2534 return true;
2535 }
2536
2537 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2538 DCHECK_EQ(rl_result.location, kLocPhysReg);
2539 DCHECK(!rl_result.reg.IsFloat());
2540
2541 if (!IsNoOp(op, val_lo)) {
2542 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2543 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2544 }
2545 if (!IsNoOp(op, val_hi)) {
2546 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2547 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2548 }
2549 StoreValueWide(rl_dest, rl_result);
2550 return true;
2551 }
2552
GenLongLongImm(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op)2553 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
2554 RegLocation rl_src2, Instruction::Code op) {
2555 DCHECK(rl_src2.is_const);
2556 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
2557
2558 if (cu_->target64) {
2559 // We can do with imm only if it fits 32 bit
2560 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2561 return false;
2562 }
2563 if (rl_dest.location == kLocPhysReg &&
2564 rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
2565 X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2566 OpRegCopy(rl_dest.reg, rl_src1.reg);
2567 NewLIR2(x86op, rl_dest.reg.GetReg(), val);
2568 StoreFinalValueWide(rl_dest, rl_dest);
2569 return true;
2570 }
2571
2572 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2573 // We need the values to be in a temporary
2574 RegLocation rl_result = ForceTempWide(rl_src1);
2575
2576 X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2577 NewLIR2(x86op, rl_result.reg.GetReg(), val);
2578
2579 StoreFinalValueWide(rl_dest, rl_result);
2580 return true;
2581 }
2582
2583 int32_t val_lo = Low32Bits(val);
2584 int32_t val_hi = High32Bits(val);
2585 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2586 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
2587
2588 // Can we do this directly into the destination registers?
2589 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
2590 rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
2591 rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
2592 if (!IsNoOp(op, val_lo)) {
2593 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2594 NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
2595 }
2596 if (!IsNoOp(op, val_hi)) {
2597 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2598 NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
2599 }
2600
2601 StoreFinalValueWide(rl_dest, rl_dest);
2602 return true;
2603 }
2604
2605 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2606 DCHECK_EQ(rl_src1.location, kLocPhysReg);
2607
2608 // We need the values to be in a temporary
2609 RegLocation rl_result = ForceTempWide(rl_src1);
2610 if (!IsNoOp(op, val_lo)) {
2611 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2612 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2613 }
2614 if (!IsNoOp(op, val_hi)) {
2615 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2616 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2617 }
2618
2619 StoreFinalValueWide(rl_dest, rl_result);
2620 return true;
2621 }
2622
2623 // For final classes there are no sub-classes to check and so we can answer the instance-of
2624 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
GenInstanceofFinal(bool use_declaring_class,uint32_t type_idx,RegLocation rl_dest,RegLocation rl_src)2625 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
2626 RegLocation rl_dest, RegLocation rl_src) {
2627 RegLocation object = LoadValue(rl_src, kRefReg);
2628 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2629 RegStorage result_reg = rl_result.reg;
2630
2631 // For 32-bit, SETcc only works with EAX..EDX.
2632 RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
2633 if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
2634 result_reg = AllocateByteRegister();
2635 }
2636
2637 // Assume that there is no match.
2638 LoadConstant(result_reg, 0);
2639 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
2640
2641 // We will use this register to compare to memory below.
2642 // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
2643 // For this reason, force allocation of a 32 bit register to use, so that the
2644 // compare to memory will be done using a 32 bit comparision.
2645 // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
2646 RegStorage check_class = AllocTemp();
2647
2648 // If Method* is already in a register, we can save a copy.
2649 RegLocation rl_method = mir_graph_->GetMethodLoc();
2650 int32_t offset_of_type = mirror::Array::DataOffset(
2651 sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
2652 (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
2653
2654 if (rl_method.location == kLocPhysReg) {
2655 if (use_declaring_class) {
2656 LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2657 check_class, kNotVolatile);
2658 } else {
2659 LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2660 check_class, kNotVolatile);
2661 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2662 }
2663 } else {
2664 LoadCurrMethodDirect(check_class);
2665 if (use_declaring_class) {
2666 LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2667 check_class, kNotVolatile);
2668 } else {
2669 LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2670 check_class, kNotVolatile);
2671 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2672 }
2673 }
2674
2675 // Compare the computed class to the class in the object.
2676 DCHECK_EQ(object.location, kLocPhysReg);
2677 OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
2678
2679 // Set the low byte of the result to 0 or 1 from the compare condition code.
2680 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
2681
2682 LIR* target = NewLIR0(kPseudoTargetLabel);
2683 null_branchover->target = target;
2684 FreeTemp(check_class);
2685 if (IsTemp(result_reg)) {
2686 OpRegCopy(rl_result.reg, result_reg);
2687 FreeTemp(result_reg);
2688 }
2689 StoreValue(rl_dest, rl_result);
2690 }
2691
GenArithOpInt(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_lhs,RegLocation rl_rhs)2692 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
2693 RegLocation rl_lhs, RegLocation rl_rhs) {
2694 OpKind op = kOpBkpt;
2695 bool is_div_rem = false;
2696 bool unary = false;
2697 bool shift_op = false;
2698 bool is_two_addr = false;
2699 RegLocation rl_result;
2700 switch (opcode) {
2701 case Instruction::NEG_INT:
2702 op = kOpNeg;
2703 unary = true;
2704 break;
2705 case Instruction::NOT_INT:
2706 op = kOpMvn;
2707 unary = true;
2708 break;
2709 case Instruction::ADD_INT_2ADDR:
2710 is_two_addr = true;
2711 // Fallthrough
2712 case Instruction::ADD_INT:
2713 op = kOpAdd;
2714 break;
2715 case Instruction::SUB_INT_2ADDR:
2716 is_two_addr = true;
2717 // Fallthrough
2718 case Instruction::SUB_INT:
2719 op = kOpSub;
2720 break;
2721 case Instruction::MUL_INT_2ADDR:
2722 is_two_addr = true;
2723 // Fallthrough
2724 case Instruction::MUL_INT:
2725 op = kOpMul;
2726 break;
2727 case Instruction::DIV_INT_2ADDR:
2728 is_two_addr = true;
2729 // Fallthrough
2730 case Instruction::DIV_INT:
2731 op = kOpDiv;
2732 is_div_rem = true;
2733 break;
2734 /* NOTE: returns in kArg1 */
2735 case Instruction::REM_INT_2ADDR:
2736 is_two_addr = true;
2737 // Fallthrough
2738 case Instruction::REM_INT:
2739 op = kOpRem;
2740 is_div_rem = true;
2741 break;
2742 case Instruction::AND_INT_2ADDR:
2743 is_two_addr = true;
2744 // Fallthrough
2745 case Instruction::AND_INT:
2746 op = kOpAnd;
2747 break;
2748 case Instruction::OR_INT_2ADDR:
2749 is_two_addr = true;
2750 // Fallthrough
2751 case Instruction::OR_INT:
2752 op = kOpOr;
2753 break;
2754 case Instruction::XOR_INT_2ADDR:
2755 is_two_addr = true;
2756 // Fallthrough
2757 case Instruction::XOR_INT:
2758 op = kOpXor;
2759 break;
2760 case Instruction::SHL_INT_2ADDR:
2761 is_two_addr = true;
2762 // Fallthrough
2763 case Instruction::SHL_INT:
2764 shift_op = true;
2765 op = kOpLsl;
2766 break;
2767 case Instruction::SHR_INT_2ADDR:
2768 is_two_addr = true;
2769 // Fallthrough
2770 case Instruction::SHR_INT:
2771 shift_op = true;
2772 op = kOpAsr;
2773 break;
2774 case Instruction::USHR_INT_2ADDR:
2775 is_two_addr = true;
2776 // Fallthrough
2777 case Instruction::USHR_INT:
2778 shift_op = true;
2779 op = kOpLsr;
2780 break;
2781 default:
2782 LOG(FATAL) << "Invalid word arith op: " << opcode;
2783 }
2784
2785 // Can we convert to a two address instruction?
2786 if (!is_two_addr &&
2787 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2788 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
2789 is_two_addr = true;
2790 }
2791
2792 if (!GenerateTwoOperandInstructions()) {
2793 is_two_addr = false;
2794 }
2795
2796 // Get the div/rem stuff out of the way.
2797 if (is_div_rem) {
2798 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
2799 StoreValue(rl_dest, rl_result);
2800 return;
2801 }
2802
2803 // If we generate any memory access below, it will reference a dalvik reg.
2804 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2805
2806 if (unary) {
2807 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2808 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2809 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2810 OpRegReg(op, rl_result.reg, rl_lhs.reg);
2811 } else {
2812 if (shift_op) {
2813 // X86 doesn't require masking and must use ECX.
2814 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX
2815 LoadValueDirectFixed(rl_rhs, t_reg);
2816 if (is_two_addr) {
2817 // Can we do this directly into memory?
2818 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2819 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2820 if (rl_result.location != kLocPhysReg) {
2821 // Okay, we can do this into memory
2822 OpMemReg(op, rl_result, t_reg.GetReg());
2823 FreeTemp(t_reg);
2824 return;
2825 } else if (!rl_result.reg.IsFloat()) {
2826 // Can do this directly into the result register
2827 OpRegReg(op, rl_result.reg, t_reg);
2828 FreeTemp(t_reg);
2829 StoreFinalValue(rl_dest, rl_result);
2830 return;
2831 }
2832 }
2833 // Three address form, or we can't do directly.
2834 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2835 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2836 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
2837 FreeTemp(t_reg);
2838 } else {
2839 // Multiply is 3 operand only (sort of).
2840 if (is_two_addr && op != kOpMul) {
2841 // Can we do this directly into memory?
2842 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2843 if (rl_result.location == kLocPhysReg) {
2844 // Ensure res is in a core reg
2845 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2846 // Can we do this from memory directly?
2847 rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2848 if (rl_rhs.location != kLocPhysReg) {
2849 OpRegMem(op, rl_result.reg, rl_rhs);
2850 StoreFinalValue(rl_dest, rl_result);
2851 return;
2852 } else if (!rl_rhs.reg.IsFloat()) {
2853 OpRegReg(op, rl_result.reg, rl_rhs.reg);
2854 StoreFinalValue(rl_dest, rl_result);
2855 return;
2856 }
2857 }
2858 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2859 // It might happen rl_rhs and rl_dest are the same VR
2860 // in this case rl_dest is in reg after LoadValue while
2861 // rl_result is not updated yet, so do this
2862 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2863 if (rl_result.location != kLocPhysReg) {
2864 // Okay, we can do this into memory.
2865 OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
2866 return;
2867 } else if (!rl_result.reg.IsFloat()) {
2868 // Can do this directly into the result register.
2869 OpRegReg(op, rl_result.reg, rl_rhs.reg);
2870 StoreFinalValue(rl_dest, rl_result);
2871 return;
2872 } else {
2873 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2874 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2875 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2876 }
2877 } else {
2878 // Try to use reg/memory instructions.
2879 rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
2880 rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2881 // We can't optimize with FP registers.
2882 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
2883 // Something is difficult, so fall back to the standard case.
2884 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2885 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2886 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2887 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2888 } else {
2889 // We can optimize by moving to result and using memory operands.
2890 if (rl_rhs.location != kLocPhysReg) {
2891 // Force LHS into result.
2892 // We should be careful with order here
2893 // If rl_dest and rl_lhs points to the same VR we should load first
2894 // If the are different we should find a register first for dest
2895 if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2896 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
2897 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2898 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2899 // No-op if these are the same.
2900 OpRegCopy(rl_result.reg, rl_lhs.reg);
2901 } else {
2902 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2903 LoadValueDirect(rl_lhs, rl_result.reg);
2904 }
2905 OpRegMem(op, rl_result.reg, rl_rhs);
2906 } else if (rl_lhs.location != kLocPhysReg) {
2907 // RHS is in a register; LHS is in memory.
2908 if (op != kOpSub) {
2909 // Force RHS into result and operate on memory.
2910 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2911 OpRegCopy(rl_result.reg, rl_rhs.reg);
2912 OpRegMem(op, rl_result.reg, rl_lhs);
2913 } else {
2914 // Subtraction isn't commutative.
2915 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2916 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2917 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2918 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2919 }
2920 } else {
2921 // Both are in registers.
2922 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2923 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2924 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2925 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2926 }
2927 }
2928 }
2929 }
2930 }
2931 StoreValue(rl_dest, rl_result);
2932 }
2933
IsOperationSafeWithoutTemps(RegLocation rl_lhs,RegLocation rl_rhs)2934 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
2935 // If we have non-core registers, then we can't do good things.
2936 if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
2937 return false;
2938 }
2939 if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
2940 return false;
2941 }
2942
2943 // Everything will be fine :-).
2944 return true;
2945 }
2946
GenIntToLong(RegLocation rl_dest,RegLocation rl_src)2947 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
2948 if (!cu_->target64) {
2949 Mir2Lir::GenIntToLong(rl_dest, rl_src);
2950 return;
2951 }
2952 rl_src = UpdateLocTyped(rl_src, kCoreReg);
2953 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2954 if (rl_src.location == kLocPhysReg) {
2955 NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
2956 } else {
2957 int displacement = SRegOffset(rl_src.s_reg_low);
2958 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2959 LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
2960 displacement + LOWORD_OFFSET);
2961 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
2962 true /* is_load */, true /* is_64bit */);
2963 }
2964 StoreValueWide(rl_dest, rl_result);
2965 }
2966
GenShiftOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_shift)2967 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
2968 RegLocation rl_src1, RegLocation rl_shift) {
2969 if (!cu_->target64) {
2970 Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
2971 return;
2972 }
2973
2974 bool is_two_addr = false;
2975 OpKind op = kOpBkpt;
2976 RegLocation rl_result;
2977
2978 switch (opcode) {
2979 case Instruction::SHL_LONG_2ADDR:
2980 is_two_addr = true;
2981 // Fallthrough
2982 case Instruction::SHL_LONG:
2983 op = kOpLsl;
2984 break;
2985 case Instruction::SHR_LONG_2ADDR:
2986 is_two_addr = true;
2987 // Fallthrough
2988 case Instruction::SHR_LONG:
2989 op = kOpAsr;
2990 break;
2991 case Instruction::USHR_LONG_2ADDR:
2992 is_two_addr = true;
2993 // Fallthrough
2994 case Instruction::USHR_LONG:
2995 op = kOpLsr;
2996 break;
2997 default:
2998 op = kOpBkpt;
2999 }
3000
3001 // X86 doesn't require masking and must use ECX.
3002 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX
3003 LoadValueDirectFixed(rl_shift, t_reg);
3004 if (is_two_addr) {
3005 // Can we do this directly into memory?
3006 rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
3007 if (rl_result.location != kLocPhysReg) {
3008 // Okay, we can do this into memory
3009 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3010 OpMemReg(op, rl_result, t_reg.GetReg());
3011 } else if (!rl_result.reg.IsFloat()) {
3012 // Can do this directly into the result register
3013 OpRegReg(op, rl_result.reg, t_reg);
3014 StoreFinalValueWide(rl_dest, rl_result);
3015 }
3016 } else {
3017 // Three address form, or we can't do directly.
3018 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
3019 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
3020 OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
3021 StoreFinalValueWide(rl_dest, rl_result);
3022 }
3023
3024 FreeTemp(t_reg);
3025 }
3026
3027 } // namespace art
3028