/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This file contains codegen for the X86 ISA */ #include "codegen_x86.h" #include "art_method.h" #include "base/bit_utils.h" #include "base/logging.h" #include "dex/quick/mir_to_lir-inl.h" #include "dex/reg_storage_eq.h" #include "mirror/array-inl.h" #include "x86_lir.h" namespace art { /* * Compare two 64-bit values * x = y return 0 * x < y return -1 * x > y return 1 */ void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { if (cu_->target64) { rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage temp_reg = AllocTemp(); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0 NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1 NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg()); NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); StoreValue(rl_dest, rl_result); FreeTemp(temp_reg); return; } // Prepare for explicit register usage ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3); RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); LoadValueDirectWideFixed(rl_src1, r_tmp1); LoadValueDirectWideFixed(rl_src2, r_tmp2); // Compute (r1:r0) = (r1:r0) - (r3:r2) OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg()); OpReg(kOpNeg, rs_r2); // r2 = -r2 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 NewLIR2(kX86Movzx8RR, r0, r0); OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2 RegLocation rl_result = LocCReturn(); StoreValue(rl_dest, rl_result); } X86ConditionCode X86ConditionEncoding(ConditionCode cond) { switch (cond) { case kCondEq: return kX86CondEq; case kCondNe: return kX86CondNe; case kCondCs: return kX86CondC; case kCondCc: return kX86CondNc; case kCondUlt: return kX86CondC; case kCondUge: return kX86CondNc; case kCondMi: return kX86CondS; case kCondPl: return kX86CondNs; case kCondVs: return kX86CondO; case kCondVc: return kX86CondNo; case kCondHi: return kX86CondA; case kCondLs: return kX86CondBe; case kCondGe: return kX86CondGe; case kCondLt: return kX86CondL; case kCondGt: return kX86CondG; case kCondLe: return kX86CondLe; case kCondAl: case kCondNv: LOG(FATAL) << "Should not reach here"; } return kX86CondO; } LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg()); X86ConditionCode cc = X86ConditionEncoding(cond); LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); branch->target = target; return branch; } LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) { if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg()); } else { if (reg.Is64Bit()) { NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value); } else { NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value); } } X86ConditionCode cc = X86ConditionEncoding(cond); LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); branch->target = target; return branch; } LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { // If src or dest is a pair, we'll be using low reg. if (r_dest.IsPair()) { r_dest = r_dest.GetLow(); } if (r_src.IsPair()) { r_src = r_src.GetLow(); } if (r_dest.IsFloat() || r_src.IsFloat()) return OpFpRegCopy(r_dest, r_src); LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR, r_dest.GetReg(), r_src.GetReg()); if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { res->flags.is_nop = true; } return res; } void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { if (r_dest != r_src) { LIR *res = OpRegCopyNoInsert(r_dest, r_src); AppendLIR(res); } } void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { if (r_dest != r_src) { bool dest_fp = r_dest.IsFloat(); bool src_fp = r_src.IsFloat(); if (dest_fp) { if (src_fp) { OpRegCopy(r_dest, r_src); } else { // TODO: Prevent this from happening in the code. The result is often // unused or could have been loaded more easily from memory. if (!r_src.IsPair()) { DCHECK(!r_dest.IsPair()); NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg()); } else { NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); RegStorage r_tmp = AllocTempDouble(); NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); FreeTemp(r_tmp); } } } else { if (src_fp) { if (!r_dest.IsPair()) { DCHECK(!r_src.IsPair()); NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg()); } else { NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); RegStorage temp_reg = AllocTempDouble(); NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); } } else { DCHECK_EQ(r_dest.IsPair(), r_src.IsPair()); if (!r_src.IsPair()) { // Just copy the register directly. OpRegCopy(r_dest, r_src); } else { // Handle overlap if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) { // Deal with cycles. RegStorage temp_reg = AllocTemp(); OpRegCopy(temp_reg, r_dest.GetHigh()); OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); OpRegCopy(r_dest.GetLow(), temp_reg); FreeTemp(temp_reg); } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); OpRegCopy(r_dest.GetLow(), r_src.GetLow()); } else { OpRegCopy(r_dest.GetLow(), r_src.GetLow()); OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); } } } } } } void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, int32_t true_val, int32_t false_val, RegStorage rs_dest, RegisterClass dest_reg_class) { DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair()); DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat()); // We really need this check for correctness, otherwise we will need to do more checks in // non zero/one case if (true_val == false_val) { LoadConstantNoClobber(rs_dest, true_val); return; } const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op); const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0); if (zero_one_case && IsByteRegister(rs_dest)) { if (!dest_intersect) { LoadConstantNoClobber(rs_dest, 0); } OpRegReg(kOpCmp, left_op, right_op); // Set the low byte of the result to 0 or 1 from the compare condition code. NewLIR2(kX86Set8R, rs_dest.GetReg(), X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code))); if (dest_intersect) { NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg()); } } else { // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops // and it cannot use xor because it makes cc flags to be dirty RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false); if (temp_reg.Valid()) { if (false_val == 0 && dest_intersect) { code = FlipComparisonOrder(code); std::swap(true_val, false_val); } if (!dest_intersect) { LoadConstantNoClobber(rs_dest, false_val); } LoadConstantNoClobber(temp_reg, true_val); OpRegReg(kOpCmp, left_op, right_op); if (dest_intersect) { LoadConstantNoClobber(rs_dest, false_val); DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); } OpCondRegReg(kOpCmov, code, rs_dest, temp_reg); FreeTemp(temp_reg); } else { // slow path LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr); LoadConstantNoClobber(rs_dest, false_val); LIR* that_is_it = NewLIR1(kX86Jmp8, 0); LIR* true_case = NewLIR0(kPseudoTargetLabel); cmp_branch->target = true_case; LoadConstantNoClobber(rs_dest, true_val); LIR* end = NewLIR0(kPseudoTargetLabel); that_is_it->target = end; } } } void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { UNUSED(bb); RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); RegLocation rl_dest = mir_graph_->GetDest(mir); // Avoid using float regs here. RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; ConditionCode ccode = mir->meta.ccode; // The kMirOpSelect has two variants, one for constants and one for moves. const bool is_constant_case = (mir->ssa_rep->num_uses == 1); if (is_constant_case) { int true_val = mir->dalvikInsn.vB; int false_val = mir->dalvikInsn.vC; // simplest strange case if (true_val == false_val) { rl_result = EvalLoc(rl_dest, result_reg_class, true); LoadConstantNoClobber(rl_result.reg, true_val); } else { // TODO: use GenSelectConst32 and handle additional opcode patterns such as // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal". rl_src = LoadValue(rl_src, src_reg_class); rl_result = EvalLoc(rl_dest, result_reg_class, true); /* * For ccode == kCondEq: * * 1) When the true case is zero and result_reg is not same as src_reg: * xor result_reg, result_reg * cmp $0, src_reg * mov t1, $false_case * cmovnz result_reg, t1 * 2) When the false case is zero and result_reg is not same as src_reg: * xor result_reg, result_reg * cmp $0, src_reg * mov t1, $true_case * cmovz result_reg, t1 * 3) All other cases (we do compare first to set eflags): * cmp $0, src_reg * mov result_reg, $false_case * mov t1, $true_case * cmovz result_reg, t1 */ // FIXME: depending on how you use registers you could get a false != mismatch when dealing // with different views of the same underlying physical resource (i.e. solo32 vs. solo64). const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum()); const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); const bool catch_all_case = !(true_zero_case || false_zero_case); if (true_zero_case || false_zero_case) { OpRegReg(kOpXor, rl_result.reg, rl_result.reg); } if (true_zero_case || false_zero_case || catch_all_case) { OpRegImm(kOpCmp, rl_src.reg, 0); } if (catch_all_case) { OpRegImm(kOpMov, rl_result.reg, false_val); } if (true_zero_case || false_zero_case || catch_all_case) { ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode; int immediateForTemp = true_zero_case ? false_val : true_val; RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class); OpRegImm(kOpMov, temp1_reg, immediateForTemp); OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg); FreeTemp(temp1_reg); } } } else { rl_src = LoadValue(rl_src, src_reg_class); RegLocation rl_true = mir_graph_->GetSrc(mir, 1); RegLocation rl_false = mir_graph_->GetSrc(mir, 2); rl_true = LoadValue(rl_true, result_reg_class); rl_false = LoadValue(rl_false, result_reg_class); rl_result = EvalLoc(rl_dest, result_reg_class, true); /* * For ccode == kCondEq: * * 1) When true case is already in place: * cmp $0, src_reg * cmovnz result_reg, false_reg * 2) When false case is already in place: * cmp $0, src_reg * cmovz result_reg, true_reg * 3) When neither cases are in place: * cmp $0, src_reg * mov result_reg, false_reg * cmovz result_reg, true_reg */ // kMirOpSelect is generated just for conditional cases when comparison is done with zero. OpRegImm(kOpCmp, rl_src.reg, 0); if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg); } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); } else { OpRegCopy(rl_result.reg, rl_false.reg); OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); } } StoreValue(rl_dest, rl_result); } void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { LIR* taken = &block_label_list_[bb->taken]; RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); ConditionCode ccode = mir->meta.ccode; if (rl_src1.is_const) { std::swap(rl_src1, rl_src2); ccode = FlipComparisonOrder(ccode); } if (rl_src2.is_const) { // Do special compare/branch against simple const operand int64_t val = mir_graph_->ConstantValueWide(rl_src2); GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); return; } if (cu_->target64) { rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); OpCondBranch(ccode, taken); return; } // Prepare for explicit register usage ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3); RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); LoadValueDirectWideFixed(rl_src1, r_tmp1); LoadValueDirectWideFixed(rl_src2, r_tmp2); // Swap operands and condition code to prevent use of zero flag. if (ccode == kCondLe || ccode == kCondGt) { // Compute (r3:r2) = (r3:r2) - (r1:r0) OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0 OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF } else { // Compute (r1:r0) = (r1:r0) - (r3:r2) OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF } switch (ccode) { case kCondEq: case kCondNe: OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1 break; case kCondLe: ccode = kCondGe; break; case kCondGt: ccode = kCondLt; break; case kCondLt: case kCondGe: break; default: LOG(FATAL) << "Unexpected ccode: " << ccode; } OpCondBranch(ccode, taken); } void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode) { int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); LIR* taken = &block_label_list_[bb->taken]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); bool is_equality_test = ccode == kCondEq || ccode == kCondNe; if (cu_->target64) { if (is_equality_test && val == 0) { // We can simplify of comparing for ==, != to 0. NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg()); } else if (is_equality_test && val_hi == 0 && val_lo > 0) { OpRegImm(kOpCmp, rl_src1.reg, val_lo); } else { RegStorage tmp = AllocTypedTempWide(false, kCoreReg); LoadConstantWide(tmp, val); OpRegReg(kOpCmp, rl_src1.reg, tmp); FreeTemp(tmp); } OpCondBranch(ccode, taken); return; } if (is_equality_test && val != 0) { rl_src1 = ForceTempWide(rl_src1); } RegStorage low_reg = rl_src1.reg.GetLow(); RegStorage high_reg = rl_src1.reg.GetHigh(); if (is_equality_test) { // We can simplify of comparing for ==, != to 0. if (val == 0) { if (IsTemp(low_reg)) { OpRegReg(kOpOr, low_reg, high_reg); // We have now changed it; ignore the old values. Clobber(rl_src1.reg); } else { RegStorage t_reg = AllocTemp(); OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); FreeTemp(t_reg); } OpCondBranch(ccode, taken); return; } // Need to compute the actual value for ==, !=. OpRegImm(kOpSub, low_reg, val_lo); NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); OpRegReg(kOpOr, high_reg, low_reg); Clobber(rl_src1.reg); } else if (ccode == kCondLe || ccode == kCondGt) { // Swap operands and condition code to prevent use of zero flag. RegStorage tmp = AllocTypedTempWide(false, kCoreReg); LoadConstantWide(tmp, val); OpRegReg(kOpSub, tmp.GetLow(), low_reg); OpRegReg(kOpSbc, tmp.GetHigh(), high_reg); ccode = (ccode == kCondLe) ? kCondGe : kCondLt; FreeTemp(tmp); } else { // We can use a compare for the low word to set CF. OpRegImm(kOpCmp, low_reg, val_lo); if (IsTemp(high_reg)) { NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); // We have now changed it; ignore the old values. Clobber(rl_src1.reg); } else { // mov temp_reg, high_reg; sbb temp_reg, high_constant RegStorage t_reg = AllocTemp(); OpRegCopy(t_reg, high_reg); NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi); FreeTemp(t_reg); } } OpCondBranch(ccode, taken); } void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) { // It does not make sense to calculate magic and shift for zero divisor. DCHECK_NE(divisor, 0); /* According to H.S.Warren's Hacker's Delight Chapter 10 and * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. * The magic number M and shift S can be calculated in the following way: * Let nc be the most positive value of numerator(n) such that nc = kd - 1, * where divisor(d) >=2. * Let nc be the most negative value of numerator(n) such that nc = kd + 1, * where divisor(d) <= -2. * Thus nc can be calculated like: * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long * * So the shift p is the smallest p satisfying * 2^p > nc * (d - 2^p % d), where d >= 2 * 2^p > nc * (d + 2^p % d), where d <= -2. * * the magic number M is calcuated by * M = (2^p + d - 2^p % d) / d, where d >= 2 * M = (2^p - d - 2^p % d) / d, where d <= -2. * * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as * the shift number S. */ int64_t p = (is_long) ? 63 : 31; const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U; // Initialize the computations. uint64_t abs_d = (divisor >= 0) ? divisor : -divisor; uint64_t tmp = exp + ((is_long) ? static_cast(divisor) >> 63 : static_cast(divisor) >> 31); uint64_t abs_nc = tmp - 1 - tmp % abs_d; uint64_t quotient1 = exp / abs_nc; uint64_t remainder1 = exp % abs_nc; uint64_t quotient2 = exp / abs_d; uint64_t remainder2 = exp % abs_d; /* * To avoid handling both positive and negative divisor, Hacker's Delight * introduces a method to handle these 2 cases together to avoid duplication. */ uint64_t delta; do { p++; quotient1 = 2 * quotient1; remainder1 = 2 * remainder1; if (remainder1 >= abs_nc) { quotient1++; remainder1 = remainder1 - abs_nc; } quotient2 = 2 * quotient2; remainder2 = 2 * remainder2; if (remainder2 >= abs_d) { quotient2++; remainder2 = remainder2 - abs_d; } delta = abs_d - remainder2; } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); if (!is_long) { magic = static_cast(magic); } shift = (is_long) ? p - 64 : p - 32; } RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) { UNUSED(rl_dest, reg_lo, lit, is_div); LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; UNREACHABLE(); } RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int imm, bool is_div) { // Use a multiply (and fixup) to perform an int div/rem by a constant. RegLocation rl_result; if (imm == 1) { rl_result = EvalLoc(rl_dest, kCoreReg, true); if (is_div) { // x / 1 == x. LoadValueDirectFixed(rl_src, rl_result.reg); } else { // x % 1 == 0. LoadConstantNoClobber(rl_result.reg, 0); } } else if (imm == -1) { // handle 0x80000000 / -1 special case. rl_result = EvalLoc(rl_dest, kCoreReg, true); if (is_div) { LoadValueDirectFixed(rl_src, rl_result.reg); // Check if numerator is 0 OpRegImm(kOpCmp, rl_result.reg, 0); LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); // handle 0x80000000 / -1 OpRegImm(kOpCmp, rl_result.reg, 0x80000000); LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); // for x != MIN_INT, x / -1 == -x. NewLIR1(kX86Neg32R, rl_result.reg.GetReg()); // EAX already contains the right value (0x80000000), minint_branch->target = NewLIR0(kPseudoTargetLabel); branch->target = NewLIR0(kPseudoTargetLabel); } else { // x % -1 == 0. LoadConstantNoClobber(rl_result.reg, 0); } } else if (is_div && IsPowerOfTwo(std::abs(imm))) { // Division using shifting. rl_src = LoadValue(rl_src, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); if (IsSameReg(rl_result.reg, rl_src.reg)) { RegStorage rs_temp = AllocTypedTemp(false, kCoreReg); rl_result.reg.SetReg(rs_temp.GetReg()); } // Check if numerator is 0 OpRegImm(kOpCmp, rl_src.reg, 0); LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); LoadConstantNoClobber(rl_result.reg, 0); LIR* done = NewLIR1(kX86Jmp8, 0); branch->target = NewLIR0(kPseudoTargetLabel); NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1); NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); int shift_amount = CTZ(imm); OpRegImm(kOpAsr, rl_result.reg, shift_amount); if (imm < 0) { OpReg(kOpNeg, rl_result.reg); } done->target = NewLIR0(kPseudoTargetLabel); } else { CHECK(imm <= -2 || imm >= 2); // Use H.S.Warren's Hacker's Delight Chapter 10 and // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. int64_t magic; int shift; CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */); /* * For imm >= 2, * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. * For imm <= -2, * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. * We implement this algorithm in the following way: * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX * 2. if imm > 0 and magic < 0, add numerator to EDX * if imm < 0 and magic > 0, sub numerator from EDX * 3. if S !=0, SAR S bits for EDX * 4. add 1 to EDX if EDX < 0 * 5. Thus, EDX is the quotient */ FlushReg(rs_r0); Clobber(rs_r0); LockTemp(rs_r0); FlushReg(rs_r2); Clobber(rs_r2); LockTemp(rs_r2); // Assume that the result will be in EDX for divide, and EAX for remainder. rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0, INVALID_SREG, INVALID_SREG}; // We need the value at least twice. Load into a temp. rl_src = LoadValue(rl_src, kCoreReg); RegStorage numerator_reg = rl_src.reg; // Check if numerator is 0. OpRegImm(kOpCmp, numerator_reg, 0); LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); // Return result 0 if numerator was 0. LoadConstantNoClobber(rl_result.reg, 0); LIR* done = NewLIR1(kX86Jmp8, 0); branch->target = NewLIR0(kPseudoTargetLabel); // EAX = magic. LoadConstant(rs_r0, magic); // EDX:EAX = magic * numerator. NewLIR1(kX86Imul32DaR, numerator_reg.GetReg()); if (imm > 0 && magic < 0) { // Add numerator to EDX. DCHECK(numerator_reg.Valid()); NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg()); } else if (imm < 0 && magic > 0) { DCHECK(numerator_reg.Valid()); NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg()); } // Do we need the shift? if (shift != 0) { // Shift EDX by 'shift' bits. NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift); } // Add 1 to EDX if EDX < 0. // Move EDX to EAX. OpRegCopy(rs_r0, rs_r2); // Move sign bit to bit 0, zeroing the rest. NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31); // EDX = EDX + EAX. NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg()); // Quotient is in EDX. if (!is_div) { // We need to compute the remainder. // Remainder is divisor - (quotient * imm). DCHECK(numerator_reg.Valid()); OpRegCopy(rs_r0, numerator_reg); // EAX = numerator * imm. OpRegRegImm(kOpMul, rs_r2, rs_r2, imm); // EAX -= EDX. NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg()); // For this case, return the result in EAX. } done->target = NewLIR0(kPseudoTargetLabel); } return rl_result; } RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div) { UNUSED(rl_dest, reg_lo, reg_hi, is_div); LOG(FATAL) << "Unexpected use of GenDivRem for x86"; UNREACHABLE(); } RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) { UNUSED(rl_dest); // We have to use fixed registers, so flush all the temps. // Prepare for explicit register usage. ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); // Load LHS into EAX. LoadValueDirectFixed(rl_src1, rs_r0); // Load RHS into EBX. LoadValueDirectFixed(rl_src2, rs_r1); // Copy LHS sign bit into EDX. NewLIR0(kx86Cdq32Da); if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { // Handle division by zero case. GenDivZeroCheck(rs_r1); } // Check if numerator is 0 OpRegImm(kOpCmp, rs_r0, 0); LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); // Have to catch 0x80000000/-1 case, or we will get an exception! OpRegImm(kOpCmp, rs_r1, -1); LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); // RHS is -1. OpRegImm(kOpCmp, rs_r0, 0x80000000); LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); branch->target = NewLIR0(kPseudoTargetLabel); // In 0x80000000/-1 case. if (!is_div) { // For DIV, EAX is already right. For REM, we need EDX 0. LoadConstantNoClobber(rs_r2, 0); } LIR* done = NewLIR1(kX86Jmp8, 0); // Expected case. minus_one_branch->target = NewLIR0(kPseudoTargetLabel); minint_branch->target = minus_one_branch->target; NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg()); done->target = NewLIR0(kPseudoTargetLabel); // Result is in EAX for div and EDX for rem. RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG}; if (!is_div) { rl_result.reg.SetReg(r2); } return rl_result; } static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); } bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); if (is_long && !cu_->target64) { /* * We want to implement the following algorithm * mov eax, low part of arg1 * mov edx, high part of arg1 * mov ebx, low part of arg2 * mov ecx, high part of arg2 * mov edi, eax * sub edi, ebx * mov edi, edx * sbb edi, ecx * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx" * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx" * * The algorithm above needs 5 registers: a pair for the first operand * (which later will be used as result), a pair for the second operand * and a temp register (e.g. 'edi') for intermediate calculations. * Ideally we have 6 GP caller-save registers in 32-bit mode. They are: * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be * always enough registers to operate on. Practically, there is a pair * of registers 'edi' and 'esi' which holds promoted values and * sometimes should be treated as 'callee save'. If one of the operands * is in the promoted registers then we have enough register to * operate on. Otherwise there is lack of resources and we have to * save 'edi' before calculations and restore after. */ RegLocation rl_src1 = info->args[0]; RegLocation rl_src2 = info->args[2]; RegLocation rl_dest = InlineTargetWide(info); if (rl_dest.s_reg_low == INVALID_SREG) { // Result is unused, the code is dead. Inlining successful, no code generated. return true; } if (PartiallyIntersects(rl_src1, rl_dest) && PartiallyIntersects(rl_src2, rl_dest)) { // A special case which we don't want to handle. // This is when src1 is mapped on v0 and v1, // src2 is mapped on v2, v3, // result is mapped on v1, v2 return false; } /* * If the result register is the same as the second element, then we * need to be careful. The reason is that the first copy will * inadvertently clobber the second element with the first one thus * yielding the wrong result. Thus we do a swap in that case. */ if (Intersects(rl_src2, rl_dest)) { std::swap(rl_src1, rl_src2); } rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); // Pick the first integer as min/max. OpRegCopyWide(rl_result.reg, rl_src1.reg); /* * If the integers are both in the same register, then there is * nothing else to do because they are equal and we have already * moved one into the result. */ if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) == mir_graph_->SRegToVReg(rl_src2.s_reg_low)) { StoreValueWide(rl_dest, rl_result); return true; } // Free registers to make some room for the second operand. // But don't try to free part of a source which intersects // part of result or promoted registers. if (IsTemp(rl_src1.reg.GetLow()) && (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) && (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) { // Is low part temporary and doesn't intersect any parts of result? FreeTemp(rl_src1.reg.GetLow()); } if (IsTemp(rl_src1.reg.GetHigh()) && (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) && (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) { // Is high part temporary and doesn't intersect any parts of result? FreeTemp(rl_src1.reg.GetHigh()); } rl_src2 = LoadValueWide(rl_src2, kCoreReg); // Do we have a free register for intermediate calculations? RegStorage tmp = AllocTemp(false); const int kRegSize = cu_->target64 ? 8 : 4; if (tmp == RegStorage::InvalidReg()) { /* * No, will use 'edi'. * * As mentioned above we have 4 temporary and 2 promotable * caller-save registers. Therefore, we assume that a free * register can be allocated only if 'esi' and 'edi' are * already used as operands. If number of promotable registers * increases from 2 to 4 then our assumption fails and operand * data is corrupted. * Let's DCHECK it. */ DCHECK(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()) && IsTemp(rl_result.reg.GetLow()) && IsTemp(rl_result.reg.GetHigh())); tmp = rs_rDI; NewLIR1(kX86Push32R, tmp.GetReg()); cfi_.AdjustCFAOffset(kRegSize); // Record cfi only if it is not already spilled. if (!CoreSpillMaskContains(tmp.GetReg())) { cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0); } } // Now we are ready to do calculations. OpRegReg(kOpMov, tmp, rl_result.reg.GetLow()); OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow()); OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh()); OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh()); // Let's put pop 'edi' here to break a bit the dependency chain. if (tmp == rs_rDI) { NewLIR1(kX86Pop32R, tmp.GetReg()); cfi_.AdjustCFAOffset(-kRegSize); if (!CoreSpillMaskContains(tmp.GetReg())) { cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg())); } } else { FreeTemp(tmp); } // Conditionally move the other integer into the destination register. ConditionCode cc = is_min ? kCondGe : kCondLt; OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow()); OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh()); FreeTemp(rl_src2.reg); StoreValueWide(rl_dest, rl_result); return true; } // Get the two arguments to the invoke and place them in GP registers. RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info); if (rl_dest.s_reg_low == INVALID_SREG) { // Result is unused, the code is dead. Inlining successful, no code generated. return true; } RegLocation rl_src1 = info->args[0]; RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1]; rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg); rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); /* * If the result register is the same as the second element, then we need to be careful. * The reason is that the first copy will inadvertently clobber the second element with * the first one thus yielding the wrong result. Thus we do a swap in that case. */ if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { std::swap(rl_src1, rl_src2); } // Pick the first integer as min/max. OpRegCopy(rl_result.reg, rl_src1.reg); // If the integers are both in the same register, then there is nothing else to do // because they are equal and we have already moved one into the result. if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) { // It is possible we didn't pick correctly so do the actual comparison now. OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); // Conditionally move the other integer into the destination register. ConditionCode condition_code = is_min ? kCondGt : kCondLt; OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg); } if (is_long) { StoreValueWide(rl_dest, rl_result); } else { StoreValue(rl_dest, rl_result); } return true; } bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info); if (rl_dest.s_reg_low == INVALID_SREG) { // Result is unused, the code is dead. Inlining successful, no code generated. return true; } RegLocation rl_src_address = info->args[0]; // long address RegLocation rl_address; if (!cu_->target64) { rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] rl_address = LoadValue(rl_src_address, kCoreReg); } else { rl_address = LoadValueWide(rl_src_address, kCoreReg); } RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); // Unaligned access is allowed on x86. LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); if (size == k64) { StoreValueWide(rl_dest, rl_result); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); StoreValue(rl_dest, rl_result); } return true; } bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { RegLocation rl_src_address = info->args[0]; // long address RegLocation rl_address; if (!cu_->target64) { rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] rl_address = LoadValue(rl_src_address, kCoreReg); } else { rl_address = LoadValueWide(rl_src_address, kCoreReg); } RegLocation rl_src_value = info->args[2]; // [size] value RegLocation rl_value; if (size == k64) { // Unaligned access is allowed on x86. rl_value = LoadValueWide(rl_src_value, kCoreReg); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR. if (!cu_->target64 && size == kSignedByte) { rl_src_value = UpdateLocTyped(rl_src_value); if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) { RegStorage temp = AllocateByteRegister(); OpRegCopy(temp, rl_src_value.reg); rl_value.reg = temp; } else { rl_value = LoadValue(rl_src_value, kCoreReg); } } else { rl_value = LoadValue(rl_src_value, kCoreReg); } } StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); return true; } void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset); } void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) { DCHECK_EQ(kX86, cu_->instruction_set); NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); } void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { DCHECK_EQ(kX86_64, cu_->instruction_set); NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); } static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) { return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home); } bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null RegLocation rl_src_offset = info->args[2]; // long low if (!cu_->target64) { rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] } RegLocation rl_src_expected = info->args[4]; // int, long or Object // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object // If is_long, high half is in info->args[7] const int kRegSize = cu_->target64 ? 8 : 4; if (is_long && cu_->target64) { // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX. FlushReg(rs_r0q); Clobber(rs_r0q); LockTemp(rs_r0q); RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg); LoadValueDirectWide(rl_src_expected, rs_r0q); NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg()); // After a store we need to insert barrier in case of potential load. Since the // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. GenMemBarrier(kAnyAny); FreeTemp(rs_r0q); } else if (is_long) { // TODO: avoid unnecessary loads of SI and DI when the values are in registers. FlushAllRegs(); LockCallTemps(); RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX); RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX); LoadValueDirectWideFixed(rl_src_expected, r_tmp1); LoadValueDirectWideFixed(rl_src_new_value, r_tmp2); // FIXME: needs 64-bit update. const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI); const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI); DCHECK(!obj_in_si || !obj_in_di); const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI); const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI); DCHECK(!off_in_si || !off_in_di); // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg. RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI; RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI; bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI); bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI); if (push_di) { NewLIR1(kX86Push32R, rs_rDI.GetReg()); MarkTemp(rs_rDI); LockTemp(rs_rDI); cfi_.AdjustCFAOffset(kRegSize); // Record cfi only if it is not already spilled. if (!CoreSpillMaskContains(rs_rDI.GetReg())) { cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); } } if (push_si) { NewLIR1(kX86Push32R, rs_rSI.GetReg()); MarkTemp(rs_rSI); LockTemp(rs_rSI); cfi_.AdjustCFAOffset(kRegSize); // Record cfi only if it is not already spilled. if (!CoreSpillMaskContains(rs_rSI.GetReg())) { cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0); } } ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; if (!obj_in_si && !obj_in_di) { LoadWordDisp(rs_rSP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj); // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); } if (!off_in_si && !off_in_di) { LoadWordDisp(rs_rSP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off); // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); } NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0); // After a store we need to insert barrier to prevent reordering with either // earlier or later memory accesses. Since // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, // and it will be associated with the cmpxchg instruction, preventing both. GenMemBarrier(kAnyAny); if (push_si) { FreeTemp(rs_rSI); UnmarkTemp(rs_rSI); NewLIR1(kX86Pop32R, rs_rSI.GetReg()); cfi_.AdjustCFAOffset(-kRegSize); if (!CoreSpillMaskContains(rs_rSI.GetReg())) { cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); } } if (push_di) { FreeTemp(rs_rDI); UnmarkTemp(rs_rDI); NewLIR1(kX86Pop32R, rs_rDI.GetReg()); cfi_.AdjustCFAOffset(-kRegSize); if (!CoreSpillMaskContains(rs_rDI.GetReg())) { cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); } } FreeCallTemps(); } else { // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. FlushReg(rs_r0); Clobber(rs_r0); LockTemp(rs_r0); RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg); if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { // Mark card for object assuming new value is stored. FreeTemp(rs_r0); // Temporarily release EAX for MarkGCCard(). MarkGCCard(0, rl_new_value.reg, rl_object.reg); LockTemp(rs_r0); } RegLocation rl_offset; if (cu_->target64) { rl_offset = LoadValueWide(rl_src_offset, kCoreReg); } else { rl_offset = LoadValue(rl_src_offset, kCoreReg); } LoadValueDirect(rl_src_expected, rs_r0); NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg()); // After a store we need to insert barrier to prevent reordering with either // earlier or later memory accesses. Since // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, // and it will be associated with the cmpxchg instruction, preventing both. GenMemBarrier(kAnyAny); FreeTemp(rs_r0); } // Convert ZF to boolean RegLocation rl_dest = InlineTarget(info); // boolean place for result RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage result_reg = rl_result.reg; // For 32-bit, SETcc only works with EAX..EDX. if (!IsByteRegister(result_reg)) { result_reg = AllocateByteRegister(); } NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ); NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg()); if (IsTemp(result_reg)) { FreeTemp(result_reg); } StoreValue(rl_dest, rl_result); return true; } void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) { RegStorage r_temp = AllocTemp(); OpRegCopy(r_temp, result_reg); OpRegImm(kOpLsr, result_reg, shift); OpRegImm(kOpAnd, r_temp, value); OpRegImm(kOpAnd, result_reg, value); OpRegImm(kOpLsl, r_temp, shift); OpRegReg(kOpOr, result_reg, r_temp); FreeTemp(r_temp); } void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) { RegStorage r_temp = AllocTempWide(); OpRegCopy(r_temp, result_reg); OpRegImm(kOpLsr, result_reg, shift); RegStorage r_value = AllocTempWide(); LoadConstantWide(r_value, value); OpRegReg(kOpAnd, r_temp, r_value); OpRegReg(kOpAnd, result_reg, r_value); OpRegImm(kOpLsl, r_temp, shift); OpRegReg(kOpOr, result_reg, r_temp); FreeTemp(r_temp); FreeTemp(r_value); } bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info); if (rl_dest.s_reg_low == INVALID_SREG) { // Result is unused, the code is dead. Inlining successful, no code generated. return true; } RegLocation rl_src_i = info->args[0]; RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (size == k64) { if (cu_->instruction_set == kX86_64) { /* Use one bswap instruction to reverse byte order first and then use 3 rounds of swapping bits to reverse bits in a long number x. Using bswap to save instructions compared to generic luni implementation which has 5 rounds of swapping bits. x = bswap x x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; */ OpRegReg(kOpRev, rl_result.reg, rl_i.reg); SwapBits64(rl_result.reg, 1, 0x5555555555555555); SwapBits64(rl_result.reg, 2, 0x3333333333333333); SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f); StoreValueWide(rl_dest, rl_result); return true; } RegStorage r_i_low = rl_i.reg.GetLow(); if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second // REV. r_i_low = AllocTemp(); OpRegCopy(r_i_low, rl_i.reg); } OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh()); OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low); // Free up at least one input register if it was a temp. Otherwise we may be in the bad // situation of not having a temp available for SwapBits. Make sure it's not overlapping // with the output, though. if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { // There's definitely a free temp after this. FreeTemp(r_i_low); } else { // We opportunistically release both here. That saves duplication of the register state // lookup (to see if it's actually a temp). if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) { FreeTemp(rl_i.reg.GetLow()); } if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() && rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) { FreeTemp(rl_i.reg.GetHigh()); } } SwapBits(rl_result.reg.GetLow(), 1, 0x55555555); SwapBits(rl_result.reg.GetLow(), 2, 0x33333333); SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f); SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555); SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333); SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f); StoreValueWide(rl_dest, rl_result); } else { OpRegReg(kOpRev, rl_result.reg, rl_i.reg); SwapBits(rl_result.reg, 1, 0x55555555); SwapBits(rl_result.reg, 2, 0x33333333); SwapBits(rl_result.reg, 4, 0x0f0f0f0f); StoreValue(rl_dest, rl_result); } return true; } void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { if (cu_->target64) { // We can do this directly using RIP addressing. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset); res->target = target; res->flags.fixup = kFixupLoad; return; } // Get the PC to a register and get the anchor. LIR* anchor; RegStorage r_pc = GetPcAndAnchor(&anchor); // Load the proper value from the literal area. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset); res->operands[4] = WrapPointer(anchor); res->target = target; res->flags.fixup = kFixupLoad; } bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { return dex_cache_arrays_layout_.Valid(); } LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) { DCHECK(!cu_->target64); LIR* call = NewLIR1(kX86CallI, 0); call->flags.fixup = kFixupLabel; LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg()); pop->flags.fixup = kFixupLabel; DCHECK(NEXT_LIR(call) == pop); return call; } RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) { if (pc_rel_base_reg_.Valid()) { DCHECK(setup_pc_rel_base_reg_ != nullptr); *anchor = NEXT_LIR(setup_pc_rel_base_reg_); DCHECK(*anchor != nullptr); DCHECK_EQ((*anchor)->opcode, kX86Pop32R); pc_rel_base_reg_used_ = true; return pc_rel_base_reg_; } else { RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef(); LIR* load_pc = OpLoadPc(r_pc); *anchor = NEXT_LIR(load_pc); DCHECK(*anchor != nullptr); DCHECK_EQ((*anchor)->opcode, kX86Pop32R); return r_pc; } } void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest, bool wide) { if (cu_->target64) { LIR* mov = NewLIR3(wide ? kX86Mov64RM : kX86Mov32RM, r_dest.GetReg(), kRIPReg, kDummy32BitOffset); mov->flags.fixup = kFixupLabel; mov->operands[3] = WrapPointer(dex_file); mov->operands[4] = offset; mov->target = mov; // Used for pc_insn_offset (not used by x86-64 relative patcher). dex_cache_access_insns_.push_back(mov); } else { CHECK(!wide) << "Unsupported"; // Get the PC to a register and get the anchor. Use r_dest for the temp if needed. LIR* anchor; RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest); LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset); mov->flags.fixup = kFixupLabel; mov->operands[3] = WrapPointer(dex_file); mov->operands[4] = offset; mov->target = anchor; // Used for pc_insn_offset. dex_cache_access_insns_.push_back(mov); } } LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) { UNUSED(r_base, count); LOG(FATAL) << "Unexpected use of OpVldm for x86"; UNREACHABLE(); } LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) { UNUSED(r_base, count); LOG(FATAL) << "Unexpected use of OpVstm for x86"; UNREACHABLE(); } void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit) { UNUSED(lit); RegStorage t_reg = AllocTemp(); OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit); OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg); FreeTemp(t_reg); if (first_bit != 0) { OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); } } void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { if (cu_->target64) { DCHECK(reg.Is64Bit()); NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0); } else { DCHECK(reg.IsPair()); // We are not supposed to clobber the incoming storage, so allocate a temporary. RegStorage t_reg = AllocTemp(); // Doing an OR is a quick way to check if both registers are zero. This will set the flags. OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); // The temp is no longer needed so free it at this time. FreeTemp(t_reg); } // In case of zero, throw ArithmeticException. GenDivZeroCheck(kCondEq); } void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int len_offset) { class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { public: ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in) : LIRSlowPath(m2l, branch_in), index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) { } void Compile() OVERRIDE { m2l_->ResetRegPool(); m2l_->ResetDefTracking(); GenerateTargetLabel(kPseudoThrowTarget); RegStorage new_index = index_; // Move index out of kArg1, either directly to kArg0, or to kArg2. // TODO: clean-up to check not a number but with type if (index_ == m2l_->TargetReg(kArg1, kNotWide)) { if (array_base_ == m2l_->TargetReg(kArg0, kRef)) { m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_); new_index = m2l_->TargetReg(kArg2, kNotWide); } else { m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_); new_index = m2l_->TargetReg(kArg0, kNotWide); } } // Load array length to kArg1. X86Mir2Lir* x86_m2l = static_cast(m2l_); x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index, m2l_->TargetReg(kArg1, kNotWide), true); } private: const RegStorage index_; const RegStorage array_base_; const int32_t len_offset_; }; OpRegMem(kOpCmp, index, array_base, len_offset); MarkPossibleNullPointerException(0); LIR* branch = OpCondBranch(kCondUge, nullptr); AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, index, array_base, len_offset)); } void X86Mir2Lir::GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset) { class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { public: ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, int32_t index_in, RegStorage array_base_in, int32_t len_offset_in) : LIRSlowPath(m2l, branch_in), index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) { } void Compile() OVERRIDE { m2l_->ResetRegPool(); m2l_->ResetDefTracking(); GenerateTargetLabel(kPseudoThrowTarget); // Load array length to kArg1. X86Mir2Lir* x86_m2l = static_cast(m2l_); x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_); x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide), m2l_->TargetReg(kArg1, kNotWide), true); } private: const int32_t index_; const RegStorage array_base_; const int32_t len_offset_; }; NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index); MarkPossibleNullPointerException(0); LIR* branch = OpCondBranch(kCondLs, nullptr); AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, index, array_base, len_offset)); } // Test suspend flag, return target of taken suspend branch LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { if (cu_->target64) { OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0); } else { OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0); } return OpCondBranch((target == nullptr) ? kCondNe : kCondEq, target); } // Decrement register and branch on condition LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { OpRegImm(kOpSub, reg, 1); return OpCondBranch(c_code, target); } bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, RegLocation rl_dest, int lit) { UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit); LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; UNREACHABLE(); } bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { UNUSED(rl_src, rl_dest, lit); LOG(FATAL) << "Unexpected use of easyMultiply in x86"; UNREACHABLE(); } LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { UNUSED(cond, guide); LOG(FATAL) << "Unexpected use of OpIT in x86"; UNREACHABLE(); } void X86Mir2Lir::OpEndIT(LIR* it) { UNUSED(it); LOG(FATAL) << "Unexpected use of OpEndIT in x86"; UNREACHABLE(); } void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) { switch (val) { case 0: NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); break; case 1: OpRegCopy(dest, src); break; default: OpRegRegImm(kOpMul, dest, src, val); break; } } void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) { UNUSED(sreg); // All memory accesses below reference dalvik regs. ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *m; switch (val) { case 0: NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); break; case 1: { const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; LoadBaseDisp(rs_rSP, displacement, dest, k32, kNotVolatile); break; } default: m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), rs_rX86_SP_32.GetReg(), displacement, val); AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); break; } } void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, int flags) { if (!cu_->target64) { // Some x86 32b ops are fallback. switch (opcode) { case Instruction::NOT_LONG: case Instruction::DIV_LONG: case Instruction::DIV_LONG_2ADDR: case Instruction::REM_LONG: case Instruction::REM_LONG_2ADDR: Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); return; default: // Everything else we can handle. break; } } switch (opcode) { case Instruction::NOT_LONG: GenNotLong(rl_dest, rl_src2); return; case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); return; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); return; case Instruction::MUL_LONG: case Instruction::MUL_LONG_2ADDR: GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags); return; case Instruction::DIV_LONG: case Instruction::DIV_LONG_2ADDR: GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags); return; case Instruction::REM_LONG: case Instruction::REM_LONG_2ADDR: GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags); return; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); return; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); return; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); return; case Instruction::NEG_LONG: GenNegLong(rl_dest, rl_src2); return; default: LOG(FATAL) << "Invalid long arith op"; return; } } bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) { // All memory accesses below reference dalvik regs. ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (val == 0) { RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); if (cu_->target64) { OpRegReg(kOpXor, rl_result.reg, rl_result.reg); } else { OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow()); OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); } StoreValueWide(rl_dest, rl_result); return true; } else if (val == 1) { StoreValueWide(rl_dest, rl_src1); return true; } else if (val == 2) { GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags); return true; } else if (IsPowerOfTwo(val)) { int shift_amount = CTZ(val); if (!PartiallyIntersects(rl_src1, rl_dest)) { rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1, shift_amount, flags); StoreValueWide(rl_dest, rl_result); return true; } } // Okay, on 32b just bite the bullet and do it, still better than the general case. if (!cu_->target64) { int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); // Prepare for explicit register usage. ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); rl_src1 = UpdateLocWideTyped(rl_src1); bool src1_in_reg = rl_src1.location == kLocPhysReg; int displacement = SRegOffset(rl_src1.s_reg_low); // ECX <- 1H * 2L // EAX <- 1L * 2H if (src1_in_reg) { GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo); GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi); } else { GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); } // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); // EAX <- 2L LoadConstantNoClobber(rs_r0, val_lo); // EDX:EAX <- 2L * 1L (double precision) if (src1_in_reg) { NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); } else { LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit */); } // EDX <- EDX + ECX (add high words) NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); // Result is EDX:EAX RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); return true; } return false; } void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, int flags) { if (rl_src1.is_const) { std::swap(rl_src1, rl_src2); } if (rl_src2.is_const) { if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) { return; } } // All memory accesses below reference dalvik regs. ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (cu_->target64) { rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() && rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg()); } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && rl_result.reg.GetReg() != rl_src2.reg.GetReg()) { NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); } else { OpRegCopy(rl_result.reg, rl_src1.reg); NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); } StoreValueWide(rl_dest, rl_result); return; } // Not multiplying by a constant. Do it the hard way // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == mir_graph_->SRegToVReg(rl_src2.s_reg_low); // Prepare for explicit register usage. ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); rl_src1 = UpdateLocWideTyped(rl_src1); rl_src2 = UpdateLocWideTyped(rl_src2); // At this point, the VRs are in their home locations. bool src1_in_reg = rl_src1.location == kLocPhysReg; bool src2_in_reg = rl_src2.location == kLocPhysReg; const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; // ECX <- 1H if (src1_in_reg) { NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg()); } else { LoadBaseDisp(rs_rSP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32, kNotVolatile); } if (is_square) { // Take advantage of the fact that the values are the same. // ECX <- ECX * 2L (1H * 2L) if (src2_in_reg) { NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); } else { int displacement = SRegOffset(rl_src2.s_reg_low); LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit */); } // ECX <- 2*ECX (2H * 1L) + (1H * 2L) NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg()); } else { // EAX <- 2H if (src2_in_reg) { NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg()); } else { LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32, kNotVolatile); } // EAX <- EAX * 1L (2H * 1L) if (src1_in_reg) { NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg()); } else { int displacement = SRegOffset(rl_src1.s_reg_low); LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit */); } // ECX <- ECX * 2L (1H * 2L) if (src2_in_reg) { NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); } else { int displacement = SRegOffset(rl_src2.s_reg_low); LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit */); } // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); } // EAX <- 2L if (src2_in_reg) { NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg()); } else { LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32, kNotVolatile); } // EDX:EAX <- 2L * 1L (double precision) if (src1_in_reg) { NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); } else { int displacement = SRegOffset(rl_src1.s_reg_low); LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit */); } // EDX <- EDX + ECX (add high words) NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); // Result is EDX:EAX RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { DCHECK_EQ(rl_dest.location, kLocPhysReg); X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); if (rl_src.location == kLocPhysReg) { // Both operands are in registers. // But we must ensure that rl_src is in pair if (cu_->target64) { NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg()); } else { rl_src = LoadValueWide(rl_src, kCoreReg); if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { // The registers are the same, so we would clobber it before the use. RegStorage temp_reg = AllocTemp(); OpRegCopy(temp_reg, rl_dest.reg); rl_src.reg.SetHighReg(temp_reg.GetReg()); } NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); x86op = GetOpcode(op, rl_dest, rl_src, true); NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); } return; } // RHS is in memory. DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); int r_base = rs_rX86_SP_32.GetReg(); int displacement = SRegOffset(rl_src.s_reg_low); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); if (!cu_->target64) { x86op = GetOpcode(op, rl_dest, rl_src, true); lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); } } void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { rl_dest = UpdateLocWideTyped(rl_dest); if (rl_dest.location == kLocPhysReg) { // Ensure we are in a register pair RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); rl_src = UpdateLocWideTyped(rl_src); GenLongRegOrMemOp(rl_result, rl_src, op); StoreFinalValueWide(rl_dest, rl_result); return; } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) { // Handle the case when src and dest are intersect. rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); rl_src = UpdateLocWideTyped(rl_src); GenLongRegOrMemOp(rl_result, rl_src, op); StoreFinalValueWide(rl_dest, rl_result); return; } // It wasn't in registers, so it better be in memory. DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp)); rl_src = LoadValueWide(rl_src, kCoreReg); // Operate directly into memory. X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); int r_base = rs_rX86_SP_32.GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); if (!cu_->target64) { x86op = GetOpcode(op, rl_dest, rl_src, true); lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); } int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low); int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); // If the left operand is in memory and the right operand is in a register // and both belong to the same dalvik register then we should clobber the // right one because it doesn't hold valid data anymore. if (v_src_reg == v_dst_reg) { Clobber(rl_src.reg); } } void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, Instruction::Code op, bool is_commutative) { // Is this really a 2 operand operation? switch (op) { case Instruction::ADD_LONG_2ADDR: case Instruction::SUB_LONG_2ADDR: case Instruction::AND_LONG_2ADDR: case Instruction::OR_LONG_2ADDR: case Instruction::XOR_LONG_2ADDR: if (GenerateTwoOperandInstructions()) { GenLongArith(rl_dest, rl_src2, op); return; } break; default: break; } if (rl_dest.location == kLocPhysReg) { RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); // We are about to clobber the LHS, so it needs to be a temp. rl_result = ForceTempWide(rl_result); // Perform the operation using the RHS. rl_src2 = UpdateLocWideTyped(rl_src2); GenLongRegOrMemOp(rl_result, rl_src2, op); // And now record that the result is in the temp. StoreFinalValueWide(rl_dest, rl_result); return; } // It wasn't in registers, so it better be in memory. DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp)); rl_src1 = UpdateLocWideTyped(rl_src1); rl_src2 = UpdateLocWideTyped(rl_src2); // Get one of the source operands into temporary register. rl_src1 = LoadValueWide(rl_src1, kCoreReg); if (cu_->target64) { if (IsTemp(rl_src1.reg)) { GenLongRegOrMemOp(rl_src1, rl_src2, op); } else if (is_commutative) { rl_src2 = LoadValueWide(rl_src2, kCoreReg); // We need at least one of them to be a temporary. if (!IsTemp(rl_src2.reg)) { rl_src1 = ForceTempWide(rl_src1); GenLongRegOrMemOp(rl_src1, rl_src2, op); } else { GenLongRegOrMemOp(rl_src2, rl_src1, op); StoreFinalValueWide(rl_dest, rl_src2); return; } } else { // Need LHS to be the temp. rl_src1 = ForceTempWide(rl_src1); GenLongRegOrMemOp(rl_src1, rl_src2, op); } } else { if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { GenLongRegOrMemOp(rl_src1, rl_src2, op); } else if (is_commutative) { rl_src2 = LoadValueWide(rl_src2, kCoreReg); // We need at least one of them to be a temporary. if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { rl_src1 = ForceTempWide(rl_src1); GenLongRegOrMemOp(rl_src1, rl_src2, op); } else { GenLongRegOrMemOp(rl_src2, rl_src1, op); StoreFinalValueWide(rl_dest, rl_src2); return; } } else { // Need LHS to be the temp. rl_src1 = ForceTempWide(rl_src1); GenLongRegOrMemOp(rl_src1, rl_src2, op); } } StoreFinalValueWide(rl_dest, rl_src1); } void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { if (cu_->target64) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result; rl_result = EvalLocWide(rl_dest, kCoreReg, true); OpRegCopy(rl_result.reg, rl_src.reg); OpReg(kOpNot, rl_result.reg); StoreValueWide(rl_dest, rl_result); } else { LOG(FATAL) << "Unexpected use GenNotLong()"; } } void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src, int64_t imm, bool is_div) { if (imm == 0) { GenDivZeroException(); } else if (imm == 1) { if (is_div) { // x / 1 == x. StoreValueWide(rl_dest, rl_src); } else { // x % 1 == 0. RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); LoadConstantWide(rl_result.reg, 0); StoreValueWide(rl_dest, rl_result); } } else if (imm == -1) { // handle 0x8000000000000000 / -1 special case. if (is_div) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); RegStorage rs_temp = AllocTempWide(); OpRegCopy(rl_result.reg, rl_src.reg); LoadConstantWide(rs_temp, 0x8000000000000000); // If x == MIN_LONG, return MIN_LONG. OpRegReg(kOpCmp, rl_src.reg, rs_temp); LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); // For x != MIN_LONG, x / -1 == -x. OpReg(kOpNeg, rl_result.reg); minint_branch->target = NewLIR0(kPseudoTargetLabel); FreeTemp(rs_temp); StoreValueWide(rl_dest, rl_result); } else { // x % -1 == 0. RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); LoadConstantWide(rl_result.reg, 0); StoreValueWide(rl_dest, rl_result); } } else if (is_div && IsPowerOfTwo(std::abs(imm))) { // Division using shifting. rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); if (IsSameReg(rl_result.reg, rl_src.reg)) { RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg); rl_result.reg.SetReg(rs_temp.GetReg()); } LoadConstantWide(rl_result.reg, std::abs(imm) - 1); OpRegReg(kOpAdd, rl_result.reg, rl_src.reg); NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); int shift_amount = CTZ(imm); OpRegImm(kOpAsr, rl_result.reg, shift_amount); if (imm < 0) { OpReg(kOpNeg, rl_result.reg); } StoreValueWide(rl_dest, rl_result); } else { CHECK(imm <= -2 || imm >= 2); FlushReg(rs_r0q); Clobber(rs_r0q); LockTemp(rs_r0q); FlushReg(rs_r2q); Clobber(rs_r2q); LockTemp(rs_r2q); RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG}; // Use H.S.Warren's Hacker's Delight Chapter 10 and // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. int64_t magic; int shift; CalculateMagicAndShift(imm, magic, shift, true /* is_long */); /* * For imm >= 2, * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. * For imm <= -2, * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. * We implement this algorithm in the following way: * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX * 2. if imm > 0 and magic < 0, add numerator to RDX * if imm < 0 and magic > 0, sub numerator from RDX * 3. if S !=0, SAR S bits for RDX * 4. add 1 to RDX if RDX < 0 * 5. Thus, RDX is the quotient */ // RAX = magic. LoadConstantWide(rs_r0q, magic); // Multiply by numerator. RegStorage numerator_reg; if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { // We will need the value later. rl_src = LoadValueWide(rl_src, kCoreReg); numerator_reg = rl_src.reg; // RDX:RAX = magic * numerator. NewLIR1(kX86Imul64DaR, numerator_reg.GetReg()); } else { // Only need this once. Multiply directly from the value. rl_src = UpdateLocWideTyped(rl_src); if (rl_src.location != kLocPhysReg) { // Okay, we can do this from memory. ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int displacement = SRegOffset(rl_src.s_reg_low); // RDX:RAX = magic * numerator. LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP_32.GetReg(), displacement); AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); } else { // RDX:RAX = magic * numerator. NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg()); } } if (imm > 0 && magic < 0) { // Add numerator to RDX. DCHECK(numerator_reg.Valid()); OpRegReg(kOpAdd, rs_r2q, numerator_reg); } else if (imm < 0 && magic > 0) { DCHECK(numerator_reg.Valid()); OpRegReg(kOpSub, rs_r2q, numerator_reg); } // Do we need the shift? if (shift != 0) { // Shift RDX by 'shift' bits. OpRegImm(kOpAsr, rs_r2q, shift); } // Move RDX to RAX. OpRegCopyWide(rs_r0q, rs_r2q); // Move sign bit to bit 0, zeroing the rest. OpRegImm(kOpLsr, rs_r2q, 63); // RDX = RDX + RAX. OpRegReg(kOpAdd, rs_r2q, rs_r0q); // Quotient is in RDX. if (!is_div) { // We need to compute the remainder. // Remainder is divisor - (quotient * imm). DCHECK(numerator_reg.Valid()); OpRegCopyWide(rs_r0q, numerator_reg); // Imul doesn't support 64-bit imms. if (imm > std::numeric_limits::max() || imm < std::numeric_limits::min()) { RegStorage rs_temp = AllocTempWide(); LoadConstantWide(rs_temp, imm); // RAX = numerator * imm. NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg()); FreeTemp(rs_temp); } else { // RAX = numerator * imm. int short_imm = static_cast(imm); NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm); } // RAX -= RDX. OpRegReg(kOpSub, rs_r0q, rs_r2q); // Result in RAX. } else { // Result in RDX. } StoreValueWide(rl_dest, rl_result); FreeTemp(rs_r0q); FreeTemp(rs_r2q); } } void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) { if (!cu_->target64) { LOG(FATAL) << "Unexpected use GenDivRemLong()"; return; } if (rl_src2.is_const) { DCHECK(rl_src2.wide); int64_t imm = mir_graph_->ConstantValueWide(rl_src2); GenDivRemLongLit(rl_dest, rl_src1, imm, is_div); return; } // We have to use fixed registers, so flush all the temps. // Prepare for explicit register usage. ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q); // Load LHS into RAX. LoadValueDirectWideFixed(rl_src1, rs_r0q); // Load RHS into RCX. LoadValueDirectWideFixed(rl_src2, rs_r1q); // Copy LHS sign bit into RDX. NewLIR0(kx86Cqo64Da); // Handle division by zero case. if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { GenDivZeroCheckWide(rs_r1q); } // Have to catch 0x8000000000000000/-1 case, or we will get an exception! NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1); LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); // RHS is -1. LoadConstantWide(rs_r6q, 0x8000000000000000); NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg()); LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); // In 0x8000000000000000/-1 case. if (!is_div) { // For DIV, RAX is already right. For REM, we need RDX 0. NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg()); } LIR* done = NewLIR1(kX86Jmp8, 0); // Expected case. minus_one_branch->target = NewLIR0(kPseudoTargetLabel); minint_branch->target = minus_one_branch->target; NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg()); done->target = NewLIR0(kPseudoTargetLabel); // Result is in RAX for div and RDX for rem. RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG}; if (!is_div) { rl_result.reg.SetReg(r2q); } StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result; if (cu_->target64) { rl_result = EvalLocWide(rl_dest, kCoreReg, true); OpRegReg(kOpNeg, rl_result.reg, rl_src.reg); } else { rl_result = ForceTempWide(rl_src); OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh } StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) { DCHECK_EQ(kX86, cu_->instruction_set); X86OpCode opcode = kX86Bkpt; switch (op) { case kOpCmp: opcode = kX86Cmp32RT; break; case kOpMov: opcode = kX86Mov32RT; break; default: LOG(FATAL) << "Bad opcode: " << op; break; } NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); } void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) { DCHECK_EQ(kX86_64, cu_->instruction_set); X86OpCode opcode = kX86Bkpt; if (cu_->target64 && r_dest.Is64BitSolo()) { switch (op) { case kOpCmp: opcode = kX86Cmp64RT; break; case kOpMov: opcode = kX86Mov64RT; break; default: LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op; break; } } else { switch (op) { case kOpCmp: opcode = kX86Cmp32RT; break; case kOpMov: opcode = kX86Mov32RT; break; default: LOG(FATAL) << "Bad opcode: " << op; break; } } NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); } /* * Generate array load */ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale) { RegisterClass reg_class = RegClassForFieldLoadStore(size, false); int len_offset = mirror::Array::LengthOffset().Int32Value(); RegLocation rl_result; rl_array = LoadValue(rl_array, kRefReg); int data_offset; if (size == k64 || size == kDouble) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } bool constant_index = rl_index.is_const; int32_t constant_index_value = 0; if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } else { constant_index_value = mir_graph_->ConstantValue(rl_index); // If index is constant, just fold it into the data offset data_offset += constant_index_value << scale; // treat as non array below rl_index.reg = RegStorage::InvalidReg(); } /* null object? */ GenNullCheck(rl_array.reg, opt_flags); if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { if (constant_index) { GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); } else { GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); } } rl_result = EvalLoc(rl_dest, reg_class, true); LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size); if ((size == k64) || (size == kDouble)) { StoreValueWide(rl_dest, rl_result); } else { StoreValue(rl_dest, rl_result); } } /* * Generate array store * */ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = RegClassForFieldLoadStore(size, false); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; if (size == k64 || size == kDouble) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } rl_array = LoadValue(rl_array, kRefReg); bool constant_index = rl_index.is_const; int32_t constant_index_value = 0; if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } else { // If index is constant, just fold it into the data offset constant_index_value = mir_graph_->ConstantValue(rl_index); data_offset += constant_index_value << scale; // treat as non array below rl_index.reg = RegStorage::InvalidReg(); } /* null object? */ GenNullCheck(rl_array.reg, opt_flags); if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { if (constant_index) { GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); } else { GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); } } if ((size == k64) || (size == kDouble)) { rl_src = LoadValueWide(rl_src, reg_class); } else { rl_src = LoadValue(rl_src, reg_class); } // If the src reg can't be byte accessed, move it to a temp first. if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) { RegStorage temp = AllocTemp(); OpRegCopy(temp, rl_src.reg); StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags); } else { StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags); } if (card_mark) { // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. if (!constant_index) { FreeTemp(rl_index.reg); } MarkGCCard(opt_flags, rl_src.reg, rl_array.reg); } } RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, int shift_amount, int flags) { UNUSED(flags); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); if (cu_->target64) { OpKind op = static_cast(0); /* Make gcc happy */ switch (opcode) { case Instruction::SHL_LONG: case Instruction::SHL_LONG_2ADDR: op = kOpLsl; break; case Instruction::SHR_LONG: case Instruction::SHR_LONG_2ADDR: op = kOpAsr; break; case Instruction::USHR_LONG: case Instruction::USHR_LONG_2ADDR: op = kOpLsr; break; default: LOG(FATAL) << "Unexpected case"; } OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); } else { switch (opcode) { case Instruction::SHL_LONG: case Instruction::SHL_LONG_2ADDR: DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. if (shift_amount == 32) { OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); LoadConstant(rl_result.reg.GetLow(), 0); } else if (shift_amount > 31) { OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); LoadConstant(rl_result.reg.GetLow(), 0); } else { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), shift_amount); NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); } break; case Instruction::SHR_LONG: case Instruction::SHR_LONG_2ADDR: if (shift_amount == 32) { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); } else if (shift_amount > 31) { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); } else { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); } break; case Instruction::USHR_LONG: case Instruction::USHR_LONG_2ADDR: if (shift_amount == 32) { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); LoadConstant(rl_result.reg.GetHigh(), 0); } else if (shift_amount > 31) { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); LoadConstant(rl_result.reg.GetHigh(), 0); } else { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); } break; default: LOG(FATAL) << "Unexpected case"; } } return rl_result; } void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift, int flags) { // Per spec, we only care about low 6 bits of shift amount. int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; if (shift_amount == 0) { rl_src = LoadValueWide(rl_src, kCoreReg); StoreValueWide(rl_dest, rl_src); return; } else if (shift_amount == 1 && (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { // Need to handle this here to avoid calling StoreValueWide twice. GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags); return; } if (PartiallyIntersects(rl_src, rl_dest)) { GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); return; } rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags); StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, int flags) { bool isConstSuccess = false; switch (opcode) { case Instruction::ADD_LONG: case Instruction::AND_LONG: case Instruction::OR_LONG: case Instruction::XOR_LONG: if (rl_src2.is_const) { isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } else { DCHECK(rl_src1.is_const); isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); } break; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (rl_src2.is_const) { isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } else { GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); isConstSuccess = true; } break; case Instruction::ADD_LONG_2ADDR: case Instruction::OR_LONG_2ADDR: case Instruction::XOR_LONG_2ADDR: case Instruction::AND_LONG_2ADDR: if (rl_src2.is_const) { if (GenerateTwoOperandInstructions()) { isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode); } else { isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } } else { DCHECK(rl_src1.is_const); isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); } break; default: isConstSuccess = false; break; } if (!isConstSuccess) { // Default - bail to non-const handler. GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); } } bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { switch (op) { case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: return value == -1; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: return value == 0; default: return false; } } X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, bool is_high_op) { bool rhs_in_mem = rhs.location != kLocPhysReg; bool dest_in_mem = dest.location != kLocPhysReg; bool is64Bit = cu_->target64; DCHECK(!rhs_in_mem || !dest_in_mem); switch (op) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: if (dest_in_mem) { return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR; } else if (rhs_in_mem) { return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM; } return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (dest_in_mem) { return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR; } else if (rhs_in_mem) { return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM; } return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: if (dest_in_mem) { return is64Bit ? kX86And64MR : kX86And32MR; } if (is64Bit) { return rhs_in_mem ? kX86And64RM : kX86And64RR; } return rhs_in_mem ? kX86And32RM : kX86And32RR; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: if (dest_in_mem) { return is64Bit ? kX86Or64MR : kX86Or32MR; } if (is64Bit) { return rhs_in_mem ? kX86Or64RM : kX86Or64RR; } return rhs_in_mem ? kX86Or32RM : kX86Or32RR; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: if (dest_in_mem) { return is64Bit ? kX86Xor64MR : kX86Xor32MR; } if (is64Bit) { return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR; } return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; default: LOG(FATAL) << "Unexpected opcode: " << op; return kX86Add32RR; } } X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value) { bool in_mem = loc.location != kLocPhysReg; bool is64Bit = cu_->target64; bool byte_imm = IS_SIMM8(value); DCHECK(in_mem || !loc.reg.IsFloat()); switch (op) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: if (byte_imm) { if (in_mem) { return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8; } return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8; } if (in_mem) { return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI; } return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (byte_imm) { if (in_mem) { return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; } return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; } if (in_mem) { return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI; } return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: if (byte_imm) { if (is64Bit) { return in_mem ? kX86And64MI8 : kX86And64RI8; } return in_mem ? kX86And32MI8 : kX86And32RI8; } if (is64Bit) { return in_mem ? kX86And64MI : kX86And64RI; } return in_mem ? kX86And32MI : kX86And32RI; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: if (byte_imm) { if (is64Bit) { return in_mem ? kX86Or64MI8 : kX86Or64RI8; } return in_mem ? kX86Or32MI8 : kX86Or32RI8; } if (is64Bit) { return in_mem ? kX86Or64MI : kX86Or64RI; } return in_mem ? kX86Or32MI : kX86Or32RI; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: if (byte_imm) { if (is64Bit) { return in_mem ? kX86Xor64MI8 : kX86Xor64RI8; } return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; } if (is64Bit) { return in_mem ? kX86Xor64MI : kX86Xor64RI; } return in_mem ? kX86Xor32MI : kX86Xor32RI; default: LOG(FATAL) << "Unexpected opcode: " << op; UNREACHABLE(); } } bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { DCHECK(rl_src.is_const); int64_t val = mir_graph_->ConstantValueWide(rl_src); if (cu_->target64) { // We can do with imm only if it fits 32 bit if (val != (static_cast(static_cast(val)))) { return false; } rl_dest = UpdateLocWideTyped(rl_dest); if ((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp)) { int r_base = rs_rX86_SP_32.GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); X86OpCode x86op = GetOpcode(op, rl_dest, false, val); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); return true; } RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); DCHECK_EQ(rl_result.location, kLocPhysReg); DCHECK(!rl_result.reg.IsFloat()); X86OpCode x86op = GetOpcode(op, rl_result, false, val); NewLIR2(x86op, rl_result.reg.GetReg(), val); StoreValueWide(rl_dest, rl_result); return true; } int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); rl_dest = UpdateLocWideTyped(rl_dest); // Can we just do this into memory? if ((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp)) { int r_base = rs_rX86_SP_32.GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (!IsNoOp(op, val_lo)) { X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); } if (!IsNoOp(op, val_hi)) { X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi); AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); } return true; } RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); DCHECK_EQ(rl_result.location, kLocPhysReg); DCHECK(!rl_result.reg.IsFloat()); if (!IsNoOp(op, val_lo)) { X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); } if (!IsNoOp(op, val_hi)) { X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); } StoreValueWide(rl_dest, rl_result); return true; } bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, Instruction::Code op) { DCHECK(rl_src2.is_const); int64_t val = mir_graph_->ConstantValueWide(rl_src2); if (cu_->target64) { // We can do with imm only if it fits 32 bit if (val != (static_cast(static_cast(val)))) { return false; } if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) { X86OpCode x86op = GetOpcode(op, rl_dest, false, val); OpRegCopy(rl_dest.reg, rl_src1.reg); NewLIR2(x86op, rl_dest.reg.GetReg(), val); StoreFinalValueWide(rl_dest, rl_dest); return true; } rl_src1 = LoadValueWide(rl_src1, kCoreReg); // We need the values to be in a temporary RegLocation rl_result = ForceTempWide(rl_src1); X86OpCode x86op = GetOpcode(op, rl_result, false, val); NewLIR2(x86op, rl_result.reg.GetReg(), val); StoreFinalValueWide(rl_dest, rl_result); return true; } int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); rl_dest = UpdateLocWideTyped(rl_dest); rl_src1 = UpdateLocWideTyped(rl_src1); // Can we do this directly into the destination registers? if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() && rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) { if (!IsNoOp(op, val_lo)) { X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo); } if (!IsNoOp(op, val_hi)) { X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi); } StoreFinalValueWide(rl_dest, rl_dest); return true; } rl_src1 = LoadValueWide(rl_src1, kCoreReg); DCHECK_EQ(rl_src1.location, kLocPhysReg); // We need the values to be in a temporary RegLocation rl_result = ForceTempWide(rl_src1); if (!IsNoOp(op, val_lo)) { X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); } if (!IsNoOp(op, val_hi)) { X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); } StoreFinalValueWide(rl_dest, rl_result); return true; } // For final classes there are no sub-classes to check and so we can answer the instance-of // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src) { RegLocation object = LoadValue(rl_src, kRefReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage result_reg = rl_result.reg; // For 32-bit, SETcc only works with EAX..EDX. RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg; if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) { result_reg = AllocateByteRegister(); } // Assume that there is no match. LoadConstant(result_reg, 0); LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr); // We will use this register to compare to memory below. // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode). // For this reason, force allocation of a 32 bit register to use, so that the // compare to memory will be done using a 32 bit comparision. // The LoadRefDisp(s) below will work normally, even in 64 bit mode. RegStorage check_class = AllocTemp(); // If Method* is already in a register, we can save a copy. RegLocation rl_method = mir_graph_->GetMethodLoc(); int32_t offset_of_type = mirror::Array::DataOffset( sizeof(mirror::HeapReference)).Int32Value() + (sizeof(mirror::HeapReference) * type_idx); if (rl_method.location == kLocPhysReg) { if (use_declaring_class) { LoadRefDisp(rl_method.reg, ArtMethod::DeclaringClassOffset().Int32Value(), check_class, kNotVolatile); } else { LoadRefDisp(rl_method.reg, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), check_class, kNotVolatile); LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); } } else { LoadCurrMethodDirect(check_class); if (use_declaring_class) { LoadRefDisp(check_class, ArtMethod::DeclaringClassOffset().Int32Value(), check_class, kNotVolatile); } else { LoadRefDisp(check_class, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), check_class, kNotVolatile); LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); } } // Compare the computed class to the class in the object. DCHECK_EQ(object.location, kLocPhysReg); OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value()); // Set the low byte of the result to 0 or 1 from the compare condition code. NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq); LIR* target = NewLIR0(kPseudoTargetLabel); null_branchover->target = target; FreeTemp(check_class); if (IsTemp(result_reg)) { OpRegCopy(rl_result.reg, result_reg); FreeTemp(result_reg); } StoreValue(rl_dest, rl_result); } void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs, RegLocation rl_rhs, int flags) { OpKind op = kOpBkpt; bool is_div_rem = false; bool unary = false; bool shift_op = false; bool is_two_addr = false; RegLocation rl_result; switch (opcode) { case Instruction::NEG_INT: op = kOpNeg; unary = true; break; case Instruction::NOT_INT: op = kOpMvn; unary = true; break; case Instruction::ADD_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::ADD_INT: op = kOpAdd; break; case Instruction::SUB_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::SUB_INT: op = kOpSub; break; case Instruction::MUL_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::MUL_INT: op = kOpMul; break; case Instruction::DIV_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::DIV_INT: op = kOpDiv; is_div_rem = true; break; /* NOTE: returns in kArg1 */ case Instruction::REM_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::REM_INT: op = kOpRem; is_div_rem = true; break; case Instruction::AND_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::AND_INT: op = kOpAnd; break; case Instruction::OR_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::OR_INT: op = kOpOr; break; case Instruction::XOR_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::XOR_INT: op = kOpXor; break; case Instruction::SHL_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::SHL_INT: shift_op = true; op = kOpLsl; break; case Instruction::SHR_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::SHR_INT: shift_op = true; op = kOpAsr; break; case Instruction::USHR_INT_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::USHR_INT: shift_op = true; op = kOpLsr; break; default: LOG(FATAL) << "Invalid word arith op: " << opcode; } // Can we convert to a two address instruction? if (!is_two_addr && (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { is_two_addr = true; } if (!GenerateTwoOperandInstructions()) { is_two_addr = false; } // Get the div/rem stuff out of the way. if (is_div_rem) { rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags); StoreValue(rl_dest, rl_result); return; } // If we generate any memory access below, it will reference a dalvik reg. ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (unary) { rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = UpdateLocTyped(rl_dest); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(op, rl_result.reg, rl_lhs.reg); } else { if (shift_op) { // X86 doesn't require masking and must use ECX. RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX LoadValueDirectFixed(rl_rhs, t_reg); if (is_two_addr) { // Can we do this directly into memory? rl_result = UpdateLocTyped(rl_dest); if (rl_result.location != kLocPhysReg) { // Okay, we can do this into memory OpMemReg(op, rl_result, t_reg.GetReg()); FreeTemp(t_reg); return; } else if (!rl_result.reg.IsFloat()) { // Can do this directly into the result register OpRegReg(op, rl_result.reg, t_reg); FreeTemp(t_reg); StoreFinalValue(rl_dest, rl_result); return; } } // Three address form, or we can't do directly. rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg); FreeTemp(t_reg); } else { // Multiply is 3 operand only (sort of). if (is_two_addr && op != kOpMul) { // Can we do this directly into memory? rl_result = UpdateLocTyped(rl_dest); if (rl_result.location == kLocPhysReg) { // Ensure res is in a core reg rl_result = EvalLoc(rl_dest, kCoreReg, true); // Can we do this from memory directly? rl_rhs = UpdateLocTyped(rl_rhs); if (rl_rhs.location != kLocPhysReg) { OpRegMem(op, rl_result.reg, rl_rhs); StoreFinalValue(rl_dest, rl_result); return; } else if (!rl_rhs.reg.IsFloat()) { OpRegReg(op, rl_result.reg, rl_rhs.reg); StoreFinalValue(rl_dest, rl_result); return; } } rl_rhs = LoadValue(rl_rhs, kCoreReg); // It might happen rl_rhs and rl_dest are the same VR // in this case rl_dest is in reg after LoadValue while // rl_result is not updated yet, so do this rl_result = UpdateLocTyped(rl_dest); if (rl_result.location != kLocPhysReg) { // Okay, we can do this into memory. OpMemReg(op, rl_result, rl_rhs.reg.GetReg()); return; } else if (!rl_result.reg.IsFloat()) { // Can do this directly into the result register. OpRegReg(op, rl_result.reg, rl_rhs.reg); StoreFinalValue(rl_dest, rl_result); return; } else { rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); } } else { // Try to use reg/memory instructions. rl_lhs = UpdateLocTyped(rl_lhs); rl_rhs = UpdateLocTyped(rl_rhs); // We can't optimize with FP registers. if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { // Something is difficult, so fall back to the standard case. rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_rhs = LoadValue(rl_rhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); } else { // We can optimize by moving to result and using memory operands. if (rl_rhs.location != kLocPhysReg) { // Force LHS into result. // We should be careful with order here // If rl_dest and rl_lhs points to the same VR we should load first // If the are different we should find a register first for dest if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); // No-op if these are the same. OpRegCopy(rl_result.reg, rl_lhs.reg); } else { rl_result = EvalLoc(rl_dest, kCoreReg, true); LoadValueDirect(rl_lhs, rl_result.reg); } OpRegMem(op, rl_result.reg, rl_rhs); } else if (rl_lhs.location != kLocPhysReg) { // RHS is in a register; LHS is in memory. if (op != kOpSub) { // Force RHS into result and operate on memory. rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegCopy(rl_result.reg, rl_rhs.reg); OpRegMem(op, rl_result.reg, rl_lhs); } else { // Subtraction isn't commutative. rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_rhs = LoadValue(rl_rhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); } } else { // Both are in registers. rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_rhs = LoadValue(rl_rhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); } } } } } StoreValue(rl_dest, rl_result); } bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { // If we have non-core registers, then we can't do good things. if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) { return false; } if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) { return false; } // Everything will be fine :-). return true; } void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { if (!cu_->target64) { Mir2Lir::GenIntToLong(rl_dest, rl_src); return; } rl_src = UpdateLocTyped(rl_src); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (rl_src.location == kLocPhysReg) { NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); } else { int displacement = SRegOffset(rl_src.s_reg_low); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit */); } StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) { rl_src = UpdateLocWide(rl_src); rl_src = NarrowRegLoc(rl_src); StoreValue(rl_dest, rl_src); if (cu_->target64) { // if src and dest are in the same phys reg then StoreValue generates // no operation but we need explicit 32-bit mov R, R to clear // the higher 32-bits rl_dest = UpdateLoc(rl_dest); if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg && IsSameReg(rl_src.reg, rl_dest.reg)) { LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg); // remove nop flag set by OpRegCopyNoInsert if src == dest copy_lir->flags.is_nop = false; AppendLIR(copy_lir); } } } void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift) { if (!cu_->target64) { // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from // the other half, shift the other half, if the shift amount is less than 32 we're done, // otherwise move one register to the other and place zero or sign bits in the other. LIR* branch; FlushAllRegs(); LockCallTemps(); LoadValueDirectFixed(rl_shift, rs_rCX); RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX); LoadValueDirectWideFixed(rl_src1, r_tmp); switch (opcode) { case Instruction::SHL_LONG: case Instruction::SHL_LONG_2ADDR: NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg()); NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg()); NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow()); LoadConstant(r_tmp.GetLow(), 0); branch->target = NewLIR0(kPseudoTargetLabel); break; case Instruction::SHR_LONG: case Instruction::SHR_LONG_2ADDR: NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg()); NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg()); NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh()); NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31); branch->target = NewLIR0(kPseudoTargetLabel); break; case Instruction::USHR_LONG: case Instruction::USHR_LONG_2ADDR: NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg()); NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg()); NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh()); LoadConstant(r_tmp.GetHigh(), 0); branch->target = NewLIR0(kPseudoTargetLabel); break; default: LOG(FATAL) << "Unexpected case: " << opcode; return; } RegLocation rl_result = LocCReturnWide(); StoreValueWide(rl_dest, rl_result); return; } bool is_two_addr = false; OpKind op = kOpBkpt; RegLocation rl_result; switch (opcode) { case Instruction::SHL_LONG_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::SHL_LONG: op = kOpLsl; break; case Instruction::SHR_LONG_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::SHR_LONG: op = kOpAsr; break; case Instruction::USHR_LONG_2ADDR: is_two_addr = true; FALLTHROUGH_INTENDED; case Instruction::USHR_LONG: op = kOpLsr; break; default: op = kOpBkpt; } // X86 doesn't require masking and must use ECX. RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX LoadValueDirectFixed(rl_shift, t_reg); if (is_two_addr) { // Can we do this directly into memory? rl_result = UpdateLocWideTyped(rl_dest); if (rl_result.location != kLocPhysReg) { // Okay, we can do this into memory ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); OpMemReg(op, rl_result, t_reg.GetReg()); } else if (!rl_result.reg.IsFloat()) { // Can do this directly into the result register OpRegReg(op, rl_result.reg, t_reg); StoreFinalValueWide(rl_dest, rl_result); } } else { // Three address form, or we can't do directly. rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_result = EvalLocWide(rl_dest, kCoreReg, true); OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg); StoreFinalValueWide(rl_dest, rl_result); } FreeTemp(t_reg); } } // namespace art