1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* This file contains codegen for the X86 ISA */
18 
19 #include "codegen_x86.h"
20 
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "base/logging.h"
24 #include "dex/quick/mir_to_lir-inl.h"
25 #include "dex/reg_storage_eq.h"
26 #include "mirror/array-inl.h"
27 #include "x86_lir.h"
28 
29 namespace art {
30 
31 /*
32  * Compare two 64-bit values
33  *    x = y     return  0
34  *    x < y     return -1
35  *    x > y     return  1
36  */
GenCmpLong(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)37 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
38                             RegLocation rl_src2) {
39   if (cu_->target64) {
40     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
41     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
42     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
43     RegStorage temp_reg = AllocTemp();
44     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
45     NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
46     NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
47     NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
48     NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
49 
50     StoreValue(rl_dest, rl_result);
51     FreeTemp(temp_reg);
52     return;
53   }
54 
55   // Prepare for explicit register usage
56   ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
57   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
58   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
59   LoadValueDirectWideFixed(rl_src1, r_tmp1);
60   LoadValueDirectWideFixed(rl_src2, r_tmp2);
61   // Compute (r1:r0) = (r1:r0) - (r3:r2)
62   OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
63   OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
64   NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
65   NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
66   OpReg(kOpNeg, rs_r2);         // r2 = -r2
67   OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
68   NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
69   NewLIR2(kX86Movzx8RR, r0, r0);
70   OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
71   RegLocation rl_result = LocCReturn();
72   StoreValue(rl_dest, rl_result);
73 }
74 
X86ConditionEncoding(ConditionCode cond)75 X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
76   switch (cond) {
77     case kCondEq: return kX86CondEq;
78     case kCondNe: return kX86CondNe;
79     case kCondCs: return kX86CondC;
80     case kCondCc: return kX86CondNc;
81     case kCondUlt: return kX86CondC;
82     case kCondUge: return kX86CondNc;
83     case kCondMi: return kX86CondS;
84     case kCondPl: return kX86CondNs;
85     case kCondVs: return kX86CondO;
86     case kCondVc: return kX86CondNo;
87     case kCondHi: return kX86CondA;
88     case kCondLs: return kX86CondBe;
89     case kCondGe: return kX86CondGe;
90     case kCondLt: return kX86CondL;
91     case kCondGt: return kX86CondG;
92     case kCondLe: return kX86CondLe;
93     case kCondAl:
94     case kCondNv: LOG(FATAL) << "Should not reach here";
95   }
96   return kX86CondO;
97 }
98 
OpCmpBranch(ConditionCode cond,RegStorage src1,RegStorage src2,LIR * target)99 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
100   NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
101   X86ConditionCode cc = X86ConditionEncoding(cond);
102   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
103                         cc);
104   branch->target = target;
105   return branch;
106 }
107 
OpCmpImmBranch(ConditionCode cond,RegStorage reg,int check_value,LIR * target)108 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
109                                 int check_value, LIR* target) {
110   if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
111     // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
112     NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
113   } else {
114     if (reg.Is64Bit()) {
115       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
116     } else {
117       NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
118     }
119   }
120   X86ConditionCode cc = X86ConditionEncoding(cond);
121   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
122   branch->target = target;
123   return branch;
124 }
125 
OpRegCopyNoInsert(RegStorage r_dest,RegStorage r_src)126 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
127   // If src or dest is a pair, we'll be using low reg.
128   if (r_dest.IsPair()) {
129     r_dest = r_dest.GetLow();
130   }
131   if (r_src.IsPair()) {
132     r_src = r_src.GetLow();
133   }
134   if (r_dest.IsFloat() || r_src.IsFloat())
135     return OpFpRegCopy(r_dest, r_src);
136   LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
137                     r_dest.GetReg(), r_src.GetReg());
138   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
139     res->flags.is_nop = true;
140   }
141   return res;
142 }
143 
OpRegCopy(RegStorage r_dest,RegStorage r_src)144 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
145   if (r_dest != r_src) {
146     LIR *res = OpRegCopyNoInsert(r_dest, r_src);
147     AppendLIR(res);
148   }
149 }
150 
OpRegCopyWide(RegStorage r_dest,RegStorage r_src)151 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
152   if (r_dest != r_src) {
153     bool dest_fp = r_dest.IsFloat();
154     bool src_fp = r_src.IsFloat();
155     if (dest_fp) {
156       if (src_fp) {
157         OpRegCopy(r_dest, r_src);
158       } else {
159         // TODO: Prevent this from happening in the code. The result is often
160         // unused or could have been loaded more easily from memory.
161         if (!r_src.IsPair()) {
162           DCHECK(!r_dest.IsPair());
163           NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
164         } else {
165           NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
166           RegStorage r_tmp = AllocTempDouble();
167           NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
168           NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
169           FreeTemp(r_tmp);
170         }
171       }
172     } else {
173       if (src_fp) {
174         if (!r_dest.IsPair()) {
175           DCHECK(!r_src.IsPair());
176           NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
177         } else {
178           NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
179           RegStorage temp_reg = AllocTempDouble();
180           NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
181           NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
182           NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
183         }
184       } else {
185         DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
186         if (!r_src.IsPair()) {
187           // Just copy the register directly.
188           OpRegCopy(r_dest, r_src);
189         } else {
190           // Handle overlap
191           if (r_src.GetHighReg() == r_dest.GetLowReg() &&
192               r_src.GetLowReg() == r_dest.GetHighReg()) {
193             // Deal with cycles.
194             RegStorage temp_reg = AllocTemp();
195             OpRegCopy(temp_reg, r_dest.GetHigh());
196             OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
197             OpRegCopy(r_dest.GetLow(), temp_reg);
198             FreeTemp(temp_reg);
199           } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
200             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
201             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
202           } else {
203             OpRegCopy(r_dest.GetLow(), r_src.GetLow());
204             OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
205           }
206         }
207       }
208     }
209   }
210 }
211 
GenSelectConst32(RegStorage left_op,RegStorage right_op,ConditionCode code,int32_t true_val,int32_t false_val,RegStorage rs_dest,RegisterClass dest_reg_class)212 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
213                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
214                                   RegisterClass dest_reg_class) {
215   DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
216   DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
217 
218   // We really need this check for correctness, otherwise we will need to do more checks in
219   // non zero/one case
220   if (true_val == false_val) {
221     LoadConstantNoClobber(rs_dest, true_val);
222     return;
223   }
224 
225   const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
226 
227   const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
228   if (zero_one_case && IsByteRegister(rs_dest)) {
229     if (!dest_intersect) {
230       LoadConstantNoClobber(rs_dest, 0);
231     }
232     OpRegReg(kOpCmp, left_op, right_op);
233     // Set the low byte of the result to 0 or 1 from the compare condition code.
234     NewLIR2(kX86Set8R, rs_dest.GetReg(),
235             X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
236     if (dest_intersect) {
237       NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
238     }
239   } else {
240     // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
241     // and it cannot use xor because it makes cc flags to be dirty
242     RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
243     if (temp_reg.Valid()) {
244       if (false_val == 0 && dest_intersect) {
245         code = FlipComparisonOrder(code);
246         std::swap(true_val, false_val);
247       }
248       if (!dest_intersect) {
249         LoadConstantNoClobber(rs_dest, false_val);
250       }
251       LoadConstantNoClobber(temp_reg, true_val);
252       OpRegReg(kOpCmp, left_op, right_op);
253       if (dest_intersect) {
254         LoadConstantNoClobber(rs_dest, false_val);
255         DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
256       }
257       OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
258       FreeTemp(temp_reg);
259     } else {
260       // slow path
261       LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
262       LoadConstantNoClobber(rs_dest, false_val);
263       LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
264       LIR* true_case = NewLIR0(kPseudoTargetLabel);
265       cmp_branch->target = true_case;
266       LoadConstantNoClobber(rs_dest, true_val);
267       LIR* end = NewLIR0(kPseudoTargetLabel);
268       that_is_it->target = end;
269     }
270   }
271 }
272 
GenSelect(BasicBlock * bb,MIR * mir)273 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
274   UNUSED(bb);
275   RegLocation rl_result;
276   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
277   RegLocation rl_dest = mir_graph_->GetDest(mir);
278   // Avoid using float regs here.
279   RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
280   RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
281   ConditionCode ccode = mir->meta.ccode;
282 
283   // The kMirOpSelect has two variants, one for constants and one for moves.
284   const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
285 
286   if (is_constant_case) {
287     int true_val = mir->dalvikInsn.vB;
288     int false_val = mir->dalvikInsn.vC;
289 
290     // simplest strange case
291     if (true_val == false_val) {
292       rl_result = EvalLoc(rl_dest, result_reg_class, true);
293       LoadConstantNoClobber(rl_result.reg, true_val);
294     } else {
295       // TODO: use GenSelectConst32 and handle additional opcode patterns such as
296       // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
297       rl_src = LoadValue(rl_src, src_reg_class);
298       rl_result = EvalLoc(rl_dest, result_reg_class, true);
299       /*
300        * For ccode == kCondEq:
301        *
302        * 1) When the true case is zero and result_reg is not same as src_reg:
303        *     xor result_reg, result_reg
304        *     cmp $0, src_reg
305        *     mov t1, $false_case
306        *     cmovnz result_reg, t1
307        * 2) When the false case is zero and result_reg is not same as src_reg:
308        *     xor result_reg, result_reg
309        *     cmp $0, src_reg
310        *     mov t1, $true_case
311        *     cmovz result_reg, t1
312        * 3) All other cases (we do compare first to set eflags):
313        *     cmp $0, src_reg
314        *     mov result_reg, $false_case
315        *     mov t1, $true_case
316        *     cmovz result_reg, t1
317        */
318       // FIXME: depending on how you use registers you could get a false != mismatch when dealing
319       // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
320       const bool result_reg_same_as_src =
321           (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
322       const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
323       const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
324       const bool catch_all_case = !(true_zero_case || false_zero_case);
325 
326       if (true_zero_case || false_zero_case) {
327         OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
328       }
329 
330       if (true_zero_case || false_zero_case || catch_all_case) {
331         OpRegImm(kOpCmp, rl_src.reg, 0);
332       }
333 
334       if (catch_all_case) {
335         OpRegImm(kOpMov, rl_result.reg, false_val);
336       }
337 
338       if (true_zero_case || false_zero_case || catch_all_case) {
339         ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
340         int immediateForTemp = true_zero_case ? false_val : true_val;
341         RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
342         OpRegImm(kOpMov, temp1_reg, immediateForTemp);
343 
344         OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
345 
346         FreeTemp(temp1_reg);
347       }
348     }
349   } else {
350     rl_src = LoadValue(rl_src, src_reg_class);
351     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
352     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
353     rl_true = LoadValue(rl_true, result_reg_class);
354     rl_false = LoadValue(rl_false, result_reg_class);
355     rl_result = EvalLoc(rl_dest, result_reg_class, true);
356 
357     /*
358      * For ccode == kCondEq:
359      *
360      * 1) When true case is already in place:
361      *     cmp $0, src_reg
362      *     cmovnz result_reg, false_reg
363      * 2) When false case is already in place:
364      *     cmp $0, src_reg
365      *     cmovz result_reg, true_reg
366      * 3) When neither cases are in place:
367      *     cmp $0, src_reg
368      *     mov result_reg, false_reg
369      *     cmovz result_reg, true_reg
370      */
371 
372     // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
373     OpRegImm(kOpCmp, rl_src.reg, 0);
374 
375     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
376       OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
377     } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
378       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
379     } else {
380       OpRegCopy(rl_result.reg, rl_false.reg);
381       OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
382     }
383   }
384 
385   StoreValue(rl_dest, rl_result);
386 }
387 
GenFusedLongCmpBranch(BasicBlock * bb,MIR * mir)388 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
389   LIR* taken = &block_label_list_[bb->taken];
390   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
391   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
392   ConditionCode ccode = mir->meta.ccode;
393 
394   if (rl_src1.is_const) {
395     std::swap(rl_src1, rl_src2);
396     ccode = FlipComparisonOrder(ccode);
397   }
398   if (rl_src2.is_const) {
399     // Do special compare/branch against simple const operand
400     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
401     GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
402     return;
403   }
404 
405   if (cu_->target64) {
406     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
407     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
408 
409     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
410     OpCondBranch(ccode, taken);
411     return;
412   }
413 
414   // Prepare for explicit register usage
415   ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
416   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
417   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
418   LoadValueDirectWideFixed(rl_src1, r_tmp1);
419   LoadValueDirectWideFixed(rl_src2, r_tmp2);
420 
421   // Swap operands and condition code to prevent use of zero flag.
422   if (ccode == kCondLe || ccode == kCondGt) {
423     // Compute (r3:r2) = (r3:r2) - (r1:r0)
424     OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
425     OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
426   } else {
427     // Compute (r1:r0) = (r1:r0) - (r3:r2)
428     OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
429     OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
430   }
431   switch (ccode) {
432     case kCondEq:
433     case kCondNe:
434       OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
435       break;
436     case kCondLe:
437       ccode = kCondGe;
438       break;
439     case kCondGt:
440       ccode = kCondLt;
441       break;
442     case kCondLt:
443     case kCondGe:
444       break;
445     default:
446       LOG(FATAL) << "Unexpected ccode: " << ccode;
447   }
448   OpCondBranch(ccode, taken);
449 }
450 
GenFusedLongCmpImmBranch(BasicBlock * bb,RegLocation rl_src1,int64_t val,ConditionCode ccode)451 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
452                                           int64_t val, ConditionCode ccode) {
453   int32_t val_lo = Low32Bits(val);
454   int32_t val_hi = High32Bits(val);
455   LIR* taken = &block_label_list_[bb->taken];
456   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
457   bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
458 
459   if (cu_->target64) {
460     if (is_equality_test && val == 0) {
461       // We can simplify of comparing for ==, != to 0.
462       NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
463     } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
464       OpRegImm(kOpCmp, rl_src1.reg, val_lo);
465     } else {
466       RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
467       LoadConstantWide(tmp, val);
468       OpRegReg(kOpCmp, rl_src1.reg, tmp);
469       FreeTemp(tmp);
470     }
471     OpCondBranch(ccode, taken);
472     return;
473   }
474 
475   if (is_equality_test && val != 0) {
476     rl_src1 = ForceTempWide(rl_src1);
477   }
478   RegStorage low_reg = rl_src1.reg.GetLow();
479   RegStorage high_reg = rl_src1.reg.GetHigh();
480 
481   if (is_equality_test) {
482     // We can simplify of comparing for ==, != to 0.
483     if (val == 0) {
484       if (IsTemp(low_reg)) {
485         OpRegReg(kOpOr, low_reg, high_reg);
486         // We have now changed it; ignore the old values.
487         Clobber(rl_src1.reg);
488       } else {
489         RegStorage t_reg = AllocTemp();
490         OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
491         FreeTemp(t_reg);
492       }
493       OpCondBranch(ccode, taken);
494       return;
495     }
496 
497     // Need to compute the actual value for ==, !=.
498     OpRegImm(kOpSub, low_reg, val_lo);
499     NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
500     OpRegReg(kOpOr, high_reg, low_reg);
501     Clobber(rl_src1.reg);
502   } else if (ccode == kCondLe || ccode == kCondGt) {
503     // Swap operands and condition code to prevent use of zero flag.
504     RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
505     LoadConstantWide(tmp, val);
506     OpRegReg(kOpSub, tmp.GetLow(), low_reg);
507     OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
508     ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
509     FreeTemp(tmp);
510   } else {
511     // We can use a compare for the low word to set CF.
512     OpRegImm(kOpCmp, low_reg, val_lo);
513     if (IsTemp(high_reg)) {
514       NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
515       // We have now changed it; ignore the old values.
516       Clobber(rl_src1.reg);
517     } else {
518       // mov temp_reg, high_reg; sbb temp_reg, high_constant
519       RegStorage t_reg = AllocTemp();
520       OpRegCopy(t_reg, high_reg);
521       NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
522       FreeTemp(t_reg);
523     }
524   }
525 
526   OpCondBranch(ccode, taken);
527 }
528 
CalculateMagicAndShift(int64_t divisor,int64_t & magic,int & shift,bool is_long)529 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
530   // It does not make sense to calculate magic and shift for zero divisor.
531   DCHECK_NE(divisor, 0);
532 
533   /* According to H.S.Warren's Hacker's Delight Chapter 10 and
534    * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
535    * The magic number M and shift S can be calculated in the following way:
536    * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
537    * where divisor(d) >=2.
538    * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
539    * where divisor(d) <= -2.
540    * Thus nc can be calculated like:
541    * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
542    * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
543    *
544    * So the shift p is the smallest p satisfying
545    * 2^p > nc * (d - 2^p % d), where d >= 2
546    * 2^p > nc * (d + 2^p % d), where d <= -2.
547    *
548    * the magic number M is calcuated by
549    * M = (2^p + d - 2^p % d) / d, where d >= 2
550    * M = (2^p - d - 2^p % d) / d, where d <= -2.
551    *
552    * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
553    * the shift number S.
554    */
555 
556   int64_t p = (is_long) ? 63 : 31;
557   const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
558 
559   // Initialize the computations.
560   uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
561   uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
562                                     static_cast<uint32_t>(divisor) >> 31);
563   uint64_t abs_nc = tmp - 1 - tmp % abs_d;
564   uint64_t quotient1 = exp / abs_nc;
565   uint64_t remainder1 = exp % abs_nc;
566   uint64_t quotient2 = exp / abs_d;
567   uint64_t remainder2 = exp % abs_d;
568 
569   /*
570    * To avoid handling both positive and negative divisor, Hacker's Delight
571    * introduces a method to handle these 2 cases together to avoid duplication.
572    */
573   uint64_t delta;
574   do {
575     p++;
576     quotient1 = 2 * quotient1;
577     remainder1 = 2 * remainder1;
578     if (remainder1 >= abs_nc) {
579       quotient1++;
580       remainder1 = remainder1 - abs_nc;
581     }
582     quotient2 = 2 * quotient2;
583     remainder2 = 2 * remainder2;
584     if (remainder2 >= abs_d) {
585       quotient2++;
586       remainder2 = remainder2 - abs_d;
587     }
588     delta = abs_d - remainder2;
589   } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
590 
591   magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
592 
593   if (!is_long) {
594     magic = static_cast<int>(magic);
595   }
596 
597   shift = (is_long) ? p - 64 : p - 32;
598 }
599 
GenDivRemLit(RegLocation rl_dest,RegStorage reg_lo,int lit,bool is_div)600 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
601   UNUSED(rl_dest, reg_lo, lit, is_div);
602   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
603   UNREACHABLE();
604 }
605 
GenDivRemLit(RegLocation rl_dest,RegLocation rl_src,int imm,bool is_div)606 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
607                                      int imm, bool is_div) {
608   // Use a multiply (and fixup) to perform an int div/rem by a constant.
609   RegLocation rl_result;
610 
611   if (imm == 1) {
612     rl_result = EvalLoc(rl_dest, kCoreReg, true);
613     if (is_div) {
614       // x / 1 == x.
615       LoadValueDirectFixed(rl_src, rl_result.reg);
616     } else {
617       // x % 1 == 0.
618       LoadConstantNoClobber(rl_result.reg, 0);
619     }
620   } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
621     rl_result = EvalLoc(rl_dest, kCoreReg, true);
622     if (is_div) {
623       LoadValueDirectFixed(rl_src, rl_result.reg);
624 
625       // Check if numerator is 0
626       OpRegImm(kOpCmp, rl_result.reg, 0);
627       LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
628 
629       // handle 0x80000000 / -1
630       OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
631       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
632 
633       // for x != MIN_INT, x / -1 == -x.
634       NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
635 
636       // EAX already contains the right value (0x80000000),
637       minint_branch->target = NewLIR0(kPseudoTargetLabel);
638       branch->target = NewLIR0(kPseudoTargetLabel);
639     } else {
640       // x % -1 == 0.
641       LoadConstantNoClobber(rl_result.reg, 0);
642     }
643   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
644     // Division using shifting.
645     rl_src = LoadValue(rl_src, kCoreReg);
646     rl_result = EvalLoc(rl_dest, kCoreReg, true);
647     if (IsSameReg(rl_result.reg, rl_src.reg)) {
648       RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
649       rl_result.reg.SetReg(rs_temp.GetReg());
650     }
651 
652     // Check if numerator is 0
653     OpRegImm(kOpCmp, rl_src.reg, 0);
654     LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
655     LoadConstantNoClobber(rl_result.reg, 0);
656     LIR* done = NewLIR1(kX86Jmp8, 0);
657     branch->target = NewLIR0(kPseudoTargetLabel);
658 
659     NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
660     NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
661     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
662     int shift_amount = CTZ(imm);
663     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
664     if (imm < 0) {
665       OpReg(kOpNeg, rl_result.reg);
666     }
667     done->target = NewLIR0(kPseudoTargetLabel);
668   } else {
669     CHECK(imm <= -2 || imm >= 2);
670 
671     // Use H.S.Warren's Hacker's Delight Chapter 10 and
672     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
673     int64_t magic;
674     int shift;
675     CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
676 
677     /*
678      * For imm >= 2,
679      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
680      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
681      * For imm <= -2,
682      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
683      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
684      * We implement this algorithm in the following way:
685      * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
686      * 2. if imm > 0 and magic < 0, add numerator to EDX
687      *    if imm < 0 and magic > 0, sub numerator from EDX
688      * 3. if S !=0, SAR S bits for EDX
689      * 4. add 1 to EDX if EDX < 0
690      * 5. Thus, EDX is the quotient
691      */
692 
693     FlushReg(rs_r0);
694     Clobber(rs_r0);
695     LockTemp(rs_r0);
696     FlushReg(rs_r2);
697     Clobber(rs_r2);
698     LockTemp(rs_r2);
699 
700     // Assume that the result will be in EDX for divide, and EAX for remainder.
701     rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0,
702                  INVALID_SREG, INVALID_SREG};
703 
704     // We need the value at least twice.  Load into a temp.
705     rl_src = LoadValue(rl_src, kCoreReg);
706     RegStorage numerator_reg = rl_src.reg;
707 
708     // Check if numerator is 0.
709     OpRegImm(kOpCmp, numerator_reg, 0);
710     LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
711     // Return result 0 if numerator was 0.
712     LoadConstantNoClobber(rl_result.reg, 0);
713     LIR* done = NewLIR1(kX86Jmp8, 0);
714     branch->target = NewLIR0(kPseudoTargetLabel);
715 
716     // EAX = magic.
717     LoadConstant(rs_r0, magic);
718 
719     // EDX:EAX = magic * numerator.
720     NewLIR1(kX86Imul32DaR, numerator_reg.GetReg());
721 
722     if (imm > 0 && magic < 0) {
723       // Add numerator to EDX.
724       DCHECK(numerator_reg.Valid());
725       NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
726     } else if (imm < 0 && magic > 0) {
727       DCHECK(numerator_reg.Valid());
728       NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
729     }
730 
731     // Do we need the shift?
732     if (shift != 0) {
733       // Shift EDX by 'shift' bits.
734       NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
735     }
736 
737     // Add 1 to EDX if EDX < 0.
738 
739     // Move EDX to EAX.
740     OpRegCopy(rs_r0, rs_r2);
741 
742     // Move sign bit to bit 0, zeroing the rest.
743     NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
744 
745     // EDX = EDX + EAX.
746     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
747 
748     // Quotient is in EDX.
749     if (!is_div) {
750       // We need to compute the remainder.
751       // Remainder is divisor - (quotient * imm).
752       DCHECK(numerator_reg.Valid());
753       OpRegCopy(rs_r0, numerator_reg);
754 
755       // EAX = numerator * imm.
756       OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
757 
758       // EAX -= EDX.
759       NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
760 
761       // For this case, return the result in EAX.
762     }
763     done->target = NewLIR0(kPseudoTargetLabel);
764   }
765 
766   return rl_result;
767 }
768 
GenDivRem(RegLocation rl_dest,RegStorage reg_lo,RegStorage reg_hi,bool is_div)769 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
770                                   bool is_div) {
771   UNUSED(rl_dest, reg_lo, reg_hi, is_div);
772   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
773   UNREACHABLE();
774 }
775 
GenDivRem(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,int flags)776 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
777                                   RegLocation rl_src2, bool is_div, int flags) {
778   UNUSED(rl_dest);
779   // We have to use fixed registers, so flush all the temps.
780 
781   // Prepare for explicit register usage.
782   ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
783 
784   // Load LHS into EAX.
785   LoadValueDirectFixed(rl_src1, rs_r0);
786 
787   // Load RHS into EBX.
788   LoadValueDirectFixed(rl_src2, rs_r1);
789 
790   // Copy LHS sign bit into EDX.
791   NewLIR0(kx86Cdq32Da);
792 
793   if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
794     // Handle division by zero case.
795     GenDivZeroCheck(rs_r1);
796   }
797 
798   // Check if numerator is 0
799   OpRegImm(kOpCmp, rs_r0, 0);
800   LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
801 
802   // Have to catch 0x80000000/-1 case, or we will get an exception!
803   OpRegImm(kOpCmp, rs_r1, -1);
804   LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
805 
806   // RHS is -1.
807   OpRegImm(kOpCmp, rs_r0, 0x80000000);
808   LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
809 
810   branch->target = NewLIR0(kPseudoTargetLabel);
811 
812   // In 0x80000000/-1 case.
813   if (!is_div) {
814     // For DIV, EAX is already right. For REM, we need EDX 0.
815     LoadConstantNoClobber(rs_r2, 0);
816   }
817   LIR* done = NewLIR1(kX86Jmp8, 0);
818 
819   // Expected case.
820   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
821   minint_branch->target = minus_one_branch->target;
822   NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
823   done->target = NewLIR0(kPseudoTargetLabel);
824 
825   // Result is in EAX for div and EDX for rem.
826   RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
827   if (!is_div) {
828     rl_result.reg.SetReg(r2);
829   }
830   return rl_result;
831 }
832 
DwarfCoreReg(bool is_x86_64,int num)833 static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
834   return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
835 }
836 
GenInlinedMinMax(CallInfo * info,bool is_min,bool is_long)837 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
838   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
839 
840   if (is_long && !cu_->target64) {
841    /*
842     * We want to implement the following algorithm
843     * mov eax, low part of arg1
844     * mov edx, high part of arg1
845     * mov ebx, low part of arg2
846     * mov ecx, high part of arg2
847     * mov edi, eax
848     * sub edi, ebx
849     * mov edi, edx
850     * sbb edi, ecx
851     * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx"
852     * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx"
853     *
854     * The algorithm above needs 5 registers: a pair for the first operand
855     * (which later will be used as result), a pair for the second operand
856     * and a temp register (e.g. 'edi') for intermediate calculations.
857     * Ideally we have 6 GP caller-save registers in 32-bit mode. They are:
858     * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be
859     * always enough registers to operate on. Practically, there is a pair
860     * of registers 'edi' and 'esi' which holds promoted values and
861     * sometimes should be treated as 'callee save'. If one of the operands
862     * is in the promoted registers then we have enough register to
863     * operate on. Otherwise there is lack of resources and we have to
864     * save 'edi' before calculations and restore after.
865     */
866 
867     RegLocation rl_src1 = info->args[0];
868     RegLocation rl_src2 = info->args[2];
869     RegLocation rl_dest = InlineTargetWide(info);
870 
871     if (rl_dest.s_reg_low == INVALID_SREG) {
872       // Result is unused, the code is dead. Inlining successful, no code generated.
873       return true;
874     }
875 
876     if (PartiallyIntersects(rl_src1, rl_dest) &&
877         PartiallyIntersects(rl_src2, rl_dest)) {
878       // A special case which we don't want to handle.
879       // This is when src1 is mapped on v0 and v1,
880       // src2 is mapped on v2, v3,
881       // result is mapped on v1, v2
882       return false;
883     }
884 
885 
886     /*
887      * If the result register is the same as the second element, then we
888      * need to be careful. The reason is that the first copy will
889      * inadvertently clobber the second element with the first one thus
890      * yielding the wrong result. Thus we do a swap in that case.
891      */
892     if (Intersects(rl_src2, rl_dest)) {
893       std::swap(rl_src1, rl_src2);
894     }
895 
896     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
897     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
898 
899     // Pick the first integer as min/max.
900     OpRegCopyWide(rl_result.reg, rl_src1.reg);
901 
902     /*
903      * If the integers are both in the same register, then there is
904      * nothing else to do because they are equal and we have already
905      * moved one into the result.
906      */
907     if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
908         mir_graph_->SRegToVReg(rl_src2.s_reg_low)) {
909       StoreValueWide(rl_dest, rl_result);
910       return true;
911     }
912 
913     // Free registers to make some room for the second operand.
914     // But don't try to free part of a source which intersects
915     // part of result or promoted registers.
916 
917     if (IsTemp(rl_src1.reg.GetLow()) &&
918        (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) &&
919        (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) {
920       // Is low part temporary and doesn't intersect any parts of result?
921       FreeTemp(rl_src1.reg.GetLow());
922     }
923 
924     if (IsTemp(rl_src1.reg.GetHigh()) &&
925        (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) &&
926        (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) {
927       // Is high part temporary and doesn't intersect any parts of result?
928       FreeTemp(rl_src1.reg.GetHigh());
929     }
930 
931     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
932 
933     // Do we have a free register for intermediate calculations?
934     RegStorage tmp = AllocTemp(false);
935     const int kRegSize = cu_->target64 ? 8 : 4;
936     if (tmp == RegStorage::InvalidReg()) {
937        /*
938         * No, will use 'edi'.
939         *
940         * As mentioned above we have 4 temporary and 2 promotable
941         * caller-save registers. Therefore, we assume that a free
942         * register can be allocated only if 'esi' and 'edi' are
943         * already used as operands. If number of promotable registers
944         * increases from 2 to 4 then our assumption fails and operand
945         * data is corrupted.
946         * Let's DCHECK it.
947         */
948        DCHECK(IsTemp(rl_src2.reg.GetLow()) &&
949               IsTemp(rl_src2.reg.GetHigh()) &&
950               IsTemp(rl_result.reg.GetLow()) &&
951               IsTemp(rl_result.reg.GetHigh()));
952        tmp = rs_rDI;
953        NewLIR1(kX86Push32R, tmp.GetReg());
954        cfi_.AdjustCFAOffset(kRegSize);
955        // Record cfi only if it is not already spilled.
956        if (!CoreSpillMaskContains(tmp.GetReg())) {
957          cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
958        }
959     }
960 
961     // Now we are ready to do calculations.
962     OpRegReg(kOpMov, tmp, rl_result.reg.GetLow());
963     OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow());
964     OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh());
965     OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh());
966 
967     // Let's put pop 'edi' here to break a bit the dependency chain.
968     if (tmp == rs_rDI) {
969       NewLIR1(kX86Pop32R, tmp.GetReg());
970       cfi_.AdjustCFAOffset(-kRegSize);
971       if (!CoreSpillMaskContains(tmp.GetReg())) {
972         cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
973       }
974     } else {
975       FreeTemp(tmp);
976     }
977 
978     // Conditionally move the other integer into the destination register.
979     ConditionCode cc = is_min ? kCondGe : kCondLt;
980     OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
981     OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
982     FreeTemp(rl_src2.reg);
983     StoreValueWide(rl_dest, rl_result);
984     return true;
985   }
986 
987   // Get the two arguments to the invoke and place them in GP registers.
988   RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
989   if (rl_dest.s_reg_low == INVALID_SREG) {
990     // Result is unused, the code is dead. Inlining successful, no code generated.
991     return true;
992   }
993   RegLocation rl_src1 = info->args[0];
994   RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
995   rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
996   rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
997 
998   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
999 
1000   /*
1001    * If the result register is the same as the second element, then we need to be careful.
1002    * The reason is that the first copy will inadvertently clobber the second element with
1003    * the first one thus yielding the wrong result. Thus we do a swap in that case.
1004    */
1005   if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1006     std::swap(rl_src1, rl_src2);
1007   }
1008 
1009   // Pick the first integer as min/max.
1010   OpRegCopy(rl_result.reg, rl_src1.reg);
1011 
1012   // If the integers are both in the same register, then there is nothing else to do
1013   // because they are equal and we have already moved one into the result.
1014   if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
1015     // It is possible we didn't pick correctly so do the actual comparison now.
1016     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
1017 
1018     // Conditionally move the other integer into the destination register.
1019     ConditionCode condition_code = is_min ? kCondGt : kCondLt;
1020     OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
1021   }
1022 
1023   if (is_long) {
1024     StoreValueWide(rl_dest, rl_result);
1025   } else {
1026     StoreValue(rl_dest, rl_result);
1027   }
1028   return true;
1029 }
1030 
GenInlinedPeek(CallInfo * info,OpSize size)1031 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
1032   RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
1033   if (rl_dest.s_reg_low == INVALID_SREG) {
1034     // Result is unused, the code is dead. Inlining successful, no code generated.
1035     return true;
1036   }
1037   RegLocation rl_src_address = info->args[0];  // long address
1038   RegLocation rl_address;
1039   if (!cu_->target64) {
1040     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
1041     rl_address = LoadValue(rl_src_address, kCoreReg);
1042   } else {
1043     rl_address = LoadValueWide(rl_src_address, kCoreReg);
1044   }
1045   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1046   // Unaligned access is allowed on x86.
1047   LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
1048   if (size == k64) {
1049     StoreValueWide(rl_dest, rl_result);
1050   } else {
1051     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
1052     StoreValue(rl_dest, rl_result);
1053   }
1054   return true;
1055 }
1056 
GenInlinedPoke(CallInfo * info,OpSize size)1057 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
1058   RegLocation rl_src_address = info->args[0];  // long address
1059   RegLocation rl_address;
1060   if (!cu_->target64) {
1061     rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
1062     rl_address = LoadValue(rl_src_address, kCoreReg);
1063   } else {
1064     rl_address = LoadValueWide(rl_src_address, kCoreReg);
1065   }
1066   RegLocation rl_src_value = info->args[2];  // [size] value
1067   RegLocation rl_value;
1068   if (size == k64) {
1069     // Unaligned access is allowed on x86.
1070     rl_value = LoadValueWide(rl_src_value, kCoreReg);
1071   } else {
1072     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
1073     // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
1074     if (!cu_->target64 && size == kSignedByte) {
1075       rl_src_value = UpdateLocTyped(rl_src_value);
1076       if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
1077         RegStorage temp = AllocateByteRegister();
1078         OpRegCopy(temp, rl_src_value.reg);
1079         rl_value.reg = temp;
1080       } else {
1081         rl_value = LoadValue(rl_src_value, kCoreReg);
1082       }
1083     } else {
1084       rl_value = LoadValue(rl_src_value, kCoreReg);
1085     }
1086   }
1087   StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
1088   return true;
1089 }
1090 
OpLea(RegStorage r_base,RegStorage reg1,RegStorage reg2,int scale,int offset)1091 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
1092   NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
1093 }
1094 
OpTlsCmp(ThreadOffset<4> offset,int val)1095 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
1096   DCHECK_EQ(kX86, cu_->instruction_set);
1097   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
1098 }
1099 
OpTlsCmp(ThreadOffset<8> offset,int val)1100 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
1101   DCHECK_EQ(kX86_64, cu_->instruction_set);
1102   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
1103 }
1104 
IsInReg(X86Mir2Lir * pMir2Lir,const RegLocation & rl,RegStorage reg)1105 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
1106   return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
1107 }
1108 
GenInlinedCas(CallInfo * info,bool is_long,bool is_object)1109 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
1110   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
1111   // Unused - RegLocation rl_src_unsafe = info->args[0];
1112   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
1113   RegLocation rl_src_offset = info->args[2];  // long low
1114   if (!cu_->target64) {
1115     rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
1116   }
1117   RegLocation rl_src_expected = info->args[4];  // int, long or Object
1118   // If is_long, high half is in info->args[5]
1119   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
1120   // If is_long, high half is in info->args[7]
1121   const int kRegSize = cu_->target64 ? 8 : 4;
1122 
1123   if (is_long && cu_->target64) {
1124     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
1125     FlushReg(rs_r0q);
1126     Clobber(rs_r0q);
1127     LockTemp(rs_r0q);
1128 
1129     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1130     RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
1131     RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1132     LoadValueDirectWide(rl_src_expected, rs_r0q);
1133     NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1134             rl_new_value.reg.GetReg());
1135 
1136     // After a store we need to insert barrier in case of potential load. Since the
1137     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
1138     GenMemBarrier(kAnyAny);
1139 
1140     FreeTemp(rs_r0q);
1141   } else if (is_long) {
1142     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
1143     FlushAllRegs();
1144     LockCallTemps();
1145     RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
1146     RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
1147     LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
1148     LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
1149     // FIXME: needs 64-bit update.
1150     const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
1151     const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
1152     DCHECK(!obj_in_si || !obj_in_di);
1153     const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
1154     const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
1155     DCHECK(!off_in_si || !off_in_di);
1156     // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
1157     RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
1158     RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
1159     bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
1160     bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
1161     if (push_di) {
1162       NewLIR1(kX86Push32R, rs_rDI.GetReg());
1163       MarkTemp(rs_rDI);
1164       LockTemp(rs_rDI);
1165       cfi_.AdjustCFAOffset(kRegSize);
1166       // Record cfi only if it is not already spilled.
1167       if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
1168         cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
1169       }
1170     }
1171     if (push_si) {
1172       NewLIR1(kX86Push32R, rs_rSI.GetReg());
1173       MarkTemp(rs_rSI);
1174       LockTemp(rs_rSI);
1175       cfi_.AdjustCFAOffset(kRegSize);
1176       // Record cfi only if it is not already spilled.
1177       if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
1178         cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
1179       }
1180     }
1181     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1182     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
1183     const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1184     if (!obj_in_si && !obj_in_di) {
1185       LoadWordDisp(rs_rSP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
1186       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
1187       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
1188       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
1189       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1190     }
1191     if (!off_in_si && !off_in_di) {
1192       LoadWordDisp(rs_rSP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
1193       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
1194       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
1195       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
1196       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1197     }
1198     NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
1199 
1200     // After a store we need to insert barrier to prevent reordering with either
1201     // earlier or later memory accesses.  Since
1202     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1203     // and it will be associated with the cmpxchg instruction, preventing both.
1204     GenMemBarrier(kAnyAny);
1205 
1206     if (push_si) {
1207       FreeTemp(rs_rSI);
1208       UnmarkTemp(rs_rSI);
1209       NewLIR1(kX86Pop32R, rs_rSI.GetReg());
1210       cfi_.AdjustCFAOffset(-kRegSize);
1211       if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
1212         cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
1213       }
1214     }
1215     if (push_di) {
1216       FreeTemp(rs_rDI);
1217       UnmarkTemp(rs_rDI);
1218       NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1219       cfi_.AdjustCFAOffset(-kRegSize);
1220       if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
1221         cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
1222       }
1223     }
1224     FreeCallTemps();
1225   } else {
1226     // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
1227     FlushReg(rs_r0);
1228     Clobber(rs_r0);
1229     LockTemp(rs_r0);
1230 
1231     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1232     RegLocation rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
1233 
1234     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
1235       // Mark card for object assuming new value is stored.
1236       FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
1237       MarkGCCard(0, rl_new_value.reg, rl_object.reg);
1238       LockTemp(rs_r0);
1239     }
1240 
1241     RegLocation rl_offset;
1242     if (cu_->target64) {
1243       rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1244     } else {
1245       rl_offset = LoadValue(rl_src_offset, kCoreReg);
1246     }
1247     LoadValueDirect(rl_src_expected, rs_r0);
1248     NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1249             rl_new_value.reg.GetReg());
1250 
1251     // After a store we need to insert barrier to prevent reordering with either
1252     // earlier or later memory accesses.  Since
1253     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1254     // and it will be associated with the cmpxchg instruction, preventing both.
1255     GenMemBarrier(kAnyAny);
1256 
1257     FreeTemp(rs_r0);
1258   }
1259 
1260   // Convert ZF to boolean
1261   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
1262   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1263   RegStorage result_reg = rl_result.reg;
1264 
1265   // For 32-bit, SETcc only works with EAX..EDX.
1266   if (!IsByteRegister(result_reg)) {
1267     result_reg = AllocateByteRegister();
1268   }
1269   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
1270   NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
1271   if (IsTemp(result_reg)) {
1272     FreeTemp(result_reg);
1273   }
1274   StoreValue(rl_dest, rl_result);
1275   return true;
1276 }
1277 
SwapBits(RegStorage result_reg,int shift,int32_t value)1278 void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) {
1279   RegStorage r_temp = AllocTemp();
1280   OpRegCopy(r_temp, result_reg);
1281   OpRegImm(kOpLsr, result_reg, shift);
1282   OpRegImm(kOpAnd, r_temp, value);
1283   OpRegImm(kOpAnd, result_reg, value);
1284   OpRegImm(kOpLsl, r_temp, shift);
1285   OpRegReg(kOpOr, result_reg, r_temp);
1286   FreeTemp(r_temp);
1287 }
1288 
SwapBits64(RegStorage result_reg,int shift,int64_t value)1289 void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) {
1290   RegStorage r_temp = AllocTempWide();
1291   OpRegCopy(r_temp, result_reg);
1292   OpRegImm(kOpLsr, result_reg, shift);
1293   RegStorage r_value = AllocTempWide();
1294   LoadConstantWide(r_value, value);
1295   OpRegReg(kOpAnd, r_temp, r_value);
1296   OpRegReg(kOpAnd, result_reg, r_value);
1297   OpRegImm(kOpLsl, r_temp, shift);
1298   OpRegReg(kOpOr, result_reg, r_temp);
1299   FreeTemp(r_temp);
1300   FreeTemp(r_value);
1301 }
1302 
GenInlinedReverseBits(CallInfo * info,OpSize size)1303 bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
1304   RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
1305   if (rl_dest.s_reg_low == INVALID_SREG) {
1306     // Result is unused, the code is dead. Inlining successful, no code generated.
1307     return true;
1308   }
1309   RegLocation rl_src_i = info->args[0];
1310   RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg)
1311                                    : LoadValue(rl_src_i, kCoreReg);
1312   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1313   if (size == k64) {
1314     if (cu_->instruction_set == kX86_64) {
1315       /* Use one bswap instruction to reverse byte order first and then use 3 rounds of
1316          swapping bits to reverse bits in a long number x. Using bswap to save instructions
1317          compared to generic luni implementation which has 5 rounds of swapping bits.
1318          x = bswap x
1319          x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1320          x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1321          x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1322       */
1323       OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
1324       SwapBits64(rl_result.reg, 1, 0x5555555555555555);
1325       SwapBits64(rl_result.reg, 2, 0x3333333333333333);
1326       SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f);
1327       StoreValueWide(rl_dest, rl_result);
1328       return true;
1329     }
1330     RegStorage r_i_low = rl_i.reg.GetLow();
1331     if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
1332       // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second
1333       // REV.
1334       r_i_low = AllocTemp();
1335       OpRegCopy(r_i_low, rl_i.reg);
1336     }
1337     OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
1338     OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
1339     // Free up at least one input register if it was a temp. Otherwise we may be in the bad
1340     // situation of not having a temp available for SwapBits. Make sure it's not overlapping
1341     // with the output, though.
1342     if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
1343       // There's definitely a free temp after this.
1344       FreeTemp(r_i_low);
1345     } else {
1346       // We opportunistically release both here. That saves duplication of the register state
1347       // lookup (to see if it's actually a temp).
1348       if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) {
1349         FreeTemp(rl_i.reg.GetLow());
1350       }
1351       if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() &&
1352           rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) {
1353         FreeTemp(rl_i.reg.GetHigh());
1354       }
1355     }
1356 
1357     SwapBits(rl_result.reg.GetLow(), 1, 0x55555555);
1358     SwapBits(rl_result.reg.GetLow(), 2, 0x33333333);
1359     SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f);
1360     SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555);
1361     SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333);
1362     SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f);
1363     StoreValueWide(rl_dest, rl_result);
1364   } else {
1365     OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
1366     SwapBits(rl_result.reg, 1, 0x55555555);
1367     SwapBits(rl_result.reg, 2, 0x33333333);
1368     SwapBits(rl_result.reg, 4, 0x0f0f0f0f);
1369     StoreValue(rl_dest, rl_result);
1370   }
1371   return true;
1372 }
1373 
OpPcRelLoad(RegStorage reg,LIR * target)1374 void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1375   if (cu_->target64) {
1376     // We can do this directly using RIP addressing.
1377     ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1378     LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset);
1379     res->target = target;
1380     res->flags.fixup = kFixupLoad;
1381     return;
1382   }
1383 
1384   // Get the PC to a register and get the anchor.
1385   LIR* anchor;
1386   RegStorage r_pc = GetPcAndAnchor(&anchor);
1387 
1388   // Load the proper value from the literal area.
1389   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1390   LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
1391   res->operands[4] = WrapPointer(anchor);
1392   res->target = target;
1393   res->flags.fixup = kFixupLoad;
1394 }
1395 
CanUseOpPcRelDexCacheArrayLoad() const1396 bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
1397   return dex_cache_arrays_layout_.Valid();
1398 }
1399 
OpLoadPc(RegStorage r_dest)1400 LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
1401   DCHECK(!cu_->target64);
1402   LIR* call = NewLIR1(kX86CallI, 0);
1403   call->flags.fixup = kFixupLabel;
1404   LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
1405   pop->flags.fixup = kFixupLabel;
1406   DCHECK(NEXT_LIR(call) == pop);
1407   return call;
1408 }
1409 
GetPcAndAnchor(LIR ** anchor,RegStorage r_tmp)1410 RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
1411   if (pc_rel_base_reg_.Valid()) {
1412     DCHECK(setup_pc_rel_base_reg_ != nullptr);
1413     *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
1414     DCHECK(*anchor != nullptr);
1415     DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
1416     pc_rel_base_reg_used_ = true;
1417     return pc_rel_base_reg_;
1418   } else {
1419     RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
1420     LIR* load_pc = OpLoadPc(r_pc);
1421     *anchor = NEXT_LIR(load_pc);
1422     DCHECK(*anchor != nullptr);
1423     DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
1424     return r_pc;
1425   }
1426 }
1427 
OpPcRelDexCacheArrayLoad(const DexFile * dex_file,int offset,RegStorage r_dest,bool wide)1428 void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
1429                                           bool wide) {
1430   if (cu_->target64) {
1431     LIR* mov = NewLIR3(wide ? kX86Mov64RM : kX86Mov32RM, r_dest.GetReg(), kRIPReg,
1432         kDummy32BitOffset);
1433     mov->flags.fixup = kFixupLabel;
1434     mov->operands[3] = WrapPointer(dex_file);
1435     mov->operands[4] = offset;
1436     mov->target = mov;  // Used for pc_insn_offset (not used by x86-64 relative patcher).
1437     dex_cache_access_insns_.push_back(mov);
1438   } else {
1439     CHECK(!wide) << "Unsupported";
1440     // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
1441     LIR* anchor;
1442     RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
1443     LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
1444     mov->flags.fixup = kFixupLabel;
1445     mov->operands[3] = WrapPointer(dex_file);
1446     mov->operands[4] = offset;
1447     mov->target = anchor;  // Used for pc_insn_offset.
1448     dex_cache_access_insns_.push_back(mov);
1449   }
1450 }
1451 
OpVldm(RegStorage r_base,int count)1452 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
1453   UNUSED(r_base, count);
1454   LOG(FATAL) << "Unexpected use of OpVldm for x86";
1455   UNREACHABLE();
1456 }
1457 
OpVstm(RegStorage r_base,int count)1458 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
1459   UNUSED(r_base, count);
1460   LOG(FATAL) << "Unexpected use of OpVstm for x86";
1461   UNREACHABLE();
1462 }
1463 
GenMultiplyByTwoBitMultiplier(RegLocation rl_src,RegLocation rl_result,int lit,int first_bit,int second_bit)1464 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1465                                                RegLocation rl_result, int lit,
1466                                                int first_bit, int second_bit) {
1467   UNUSED(lit);
1468   RegStorage t_reg = AllocTemp();
1469   OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
1470   OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
1471   FreeTemp(t_reg);
1472   if (first_bit != 0) {
1473     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1474   }
1475 }
1476 
GenDivZeroCheckWide(RegStorage reg)1477 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1478   if (cu_->target64) {
1479     DCHECK(reg.Is64Bit());
1480 
1481     NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
1482   } else {
1483     DCHECK(reg.IsPair());
1484 
1485     // We are not supposed to clobber the incoming storage, so allocate a temporary.
1486     RegStorage t_reg = AllocTemp();
1487     // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
1488     OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
1489     // The temp is no longer needed so free it at this time.
1490     FreeTemp(t_reg);
1491   }
1492 
1493   // In case of zero, throw ArithmeticException.
1494   GenDivZeroCheck(kCondEq);
1495 }
1496 
GenArrayBoundsCheck(RegStorage index,RegStorage array_base,int len_offset)1497 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
1498                                      RegStorage array_base,
1499                                      int len_offset) {
1500   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1501    public:
1502     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
1503                              RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in)
1504         : LIRSlowPath(m2l, branch_in),
1505           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
1506     }
1507 
1508     void Compile() OVERRIDE {
1509       m2l_->ResetRegPool();
1510       m2l_->ResetDefTracking();
1511       GenerateTargetLabel(kPseudoThrowTarget);
1512 
1513       RegStorage new_index = index_;
1514       // Move index out of kArg1, either directly to kArg0, or to kArg2.
1515       // TODO: clean-up to check not a number but with type
1516       if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
1517         if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
1518           m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
1519           new_index = m2l_->TargetReg(kArg2, kNotWide);
1520         } else {
1521           m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
1522           new_index = m2l_->TargetReg(kArg0, kNotWide);
1523         }
1524       }
1525       // Load array length to kArg1.
1526       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1527       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1528       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
1529                                        m2l_->TargetReg(kArg1, kNotWide), true);
1530     }
1531 
1532    private:
1533     const RegStorage index_;
1534     const RegStorage array_base_;
1535     const int32_t len_offset_;
1536   };
1537 
1538   OpRegMem(kOpCmp, index, array_base, len_offset);
1539   MarkPossibleNullPointerException(0);
1540   LIR* branch = OpCondBranch(kCondUge, nullptr);
1541   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1542                                                     index, array_base, len_offset));
1543 }
1544 
GenArrayBoundsCheck(int32_t index,RegStorage array_base,int32_t len_offset)1545 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
1546                                      RegStorage array_base,
1547                                      int32_t len_offset) {
1548   class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1549    public:
1550     ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
1551                              int32_t index_in, RegStorage array_base_in, int32_t len_offset_in)
1552         : LIRSlowPath(m2l, branch_in),
1553           index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
1554     }
1555 
1556     void Compile() OVERRIDE {
1557       m2l_->ResetRegPool();
1558       m2l_->ResetDefTracking();
1559       GenerateTargetLabel(kPseudoThrowTarget);
1560 
1561       // Load array length to kArg1.
1562       X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1563       x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1564       x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
1565       x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
1566                                        m2l_->TargetReg(kArg1, kNotWide), true);
1567     }
1568 
1569    private:
1570     const int32_t index_;
1571     const RegStorage array_base_;
1572     const int32_t len_offset_;
1573   };
1574 
1575   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
1576   MarkPossibleNullPointerException(0);
1577   LIR* branch = OpCondBranch(kCondLs, nullptr);
1578   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1579                                                     index, array_base, len_offset));
1580 }
1581 
1582 // Test suspend flag, return target of taken suspend branch
OpTestSuspend(LIR * target)1583 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
1584   if (cu_->target64) {
1585     OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
1586   } else {
1587     OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
1588   }
1589   return OpCondBranch((target == nullptr) ? kCondNe : kCondEq, target);
1590 }
1591 
1592 // Decrement register and branch on condition
OpDecAndBranch(ConditionCode c_code,RegStorage reg,LIR * target)1593 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1594   OpRegImm(kOpSub, reg, 1);
1595   return OpCondBranch(c_code, target);
1596 }
1597 
SmallLiteralDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)1598 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
1599                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
1600   UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit);
1601   LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
1602   UNREACHABLE();
1603 }
1604 
EasyMultiply(RegLocation rl_src,RegLocation rl_dest,int lit)1605 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
1606   UNUSED(rl_src, rl_dest, lit);
1607   LOG(FATAL) << "Unexpected use of easyMultiply in x86";
1608   UNREACHABLE();
1609 }
1610 
OpIT(ConditionCode cond,const char * guide)1611 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
1612   UNUSED(cond, guide);
1613   LOG(FATAL) << "Unexpected use of OpIT in x86";
1614   UNREACHABLE();
1615 }
1616 
OpEndIT(LIR * it)1617 void X86Mir2Lir::OpEndIT(LIR* it) {
1618   UNUSED(it);
1619   LOG(FATAL) << "Unexpected use of OpEndIT in x86";
1620   UNREACHABLE();
1621 }
1622 
GenImulRegImm(RegStorage dest,RegStorage src,int val)1623 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
1624   switch (val) {
1625     case 0:
1626       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1627       break;
1628     case 1:
1629       OpRegCopy(dest, src);
1630       break;
1631     default:
1632       OpRegRegImm(kOpMul, dest, src, val);
1633       break;
1634   }
1635 }
1636 
GenImulMemImm(RegStorage dest,int sreg,int displacement,int val)1637 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
1638   UNUSED(sreg);
1639   // All memory accesses below reference dalvik regs.
1640   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1641 
1642   LIR *m;
1643   switch (val) {
1644     case 0:
1645       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1646       break;
1647     case 1: {
1648       const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1649       LoadBaseDisp(rs_rSP, displacement, dest, k32, kNotVolatile);
1650       break;
1651     }
1652     default:
1653       m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
1654                   rs_rX86_SP_32.GetReg(), displacement, val);
1655       AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
1656       break;
1657   }
1658 }
1659 
GenArithOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1660 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1661                                 RegLocation rl_src2, int flags) {
1662   if (!cu_->target64) {
1663     // Some x86 32b ops are fallback.
1664     switch (opcode) {
1665       case Instruction::NOT_LONG:
1666       case Instruction::DIV_LONG:
1667       case Instruction::DIV_LONG_2ADDR:
1668       case Instruction::REM_LONG:
1669       case Instruction::REM_LONG_2ADDR:
1670         Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1671         return;
1672 
1673       default:
1674         // Everything else we can handle.
1675         break;
1676     }
1677   }
1678 
1679   switch (opcode) {
1680     case Instruction::NOT_LONG:
1681       GenNotLong(rl_dest, rl_src2);
1682       return;
1683 
1684     case Instruction::ADD_LONG:
1685     case Instruction::ADD_LONG_2ADDR:
1686       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1687       return;
1688 
1689     case Instruction::SUB_LONG:
1690     case Instruction::SUB_LONG_2ADDR:
1691       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
1692       return;
1693 
1694     case Instruction::MUL_LONG:
1695     case Instruction::MUL_LONG_2ADDR:
1696       GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1697       return;
1698 
1699     case Instruction::DIV_LONG:
1700     case Instruction::DIV_LONG_2ADDR:
1701       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
1702       return;
1703 
1704     case Instruction::REM_LONG:
1705     case Instruction::REM_LONG_2ADDR:
1706       GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
1707       return;
1708 
1709     case Instruction::AND_LONG_2ADDR:
1710     case Instruction::AND_LONG:
1711       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1712       return;
1713 
1714     case Instruction::OR_LONG:
1715     case Instruction::OR_LONG_2ADDR:
1716       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1717       return;
1718 
1719     case Instruction::XOR_LONG:
1720     case Instruction::XOR_LONG_2ADDR:
1721       GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1722       return;
1723 
1724     case Instruction::NEG_LONG:
1725       GenNegLong(rl_dest, rl_src2);
1726       return;
1727 
1728     default:
1729       LOG(FATAL) << "Invalid long arith op";
1730       return;
1731   }
1732 }
1733 
GenMulLongConst(RegLocation rl_dest,RegLocation rl_src1,int64_t val,int flags)1734 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) {
1735   // All memory accesses below reference dalvik regs.
1736   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1737 
1738   if (val == 0) {
1739     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1740     if (cu_->target64) {
1741       OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
1742     } else {
1743       OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
1744       OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
1745     }
1746     StoreValueWide(rl_dest, rl_result);
1747     return true;
1748   } else if (val == 1) {
1749     StoreValueWide(rl_dest, rl_src1);
1750     return true;
1751   } else if (val == 2) {
1752     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags);
1753     return true;
1754   } else if (IsPowerOfTwo(val)) {
1755     int shift_amount = CTZ(val);
1756     if (!PartiallyIntersects(rl_src1, rl_dest)) {
1757       rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1758       RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
1759                                                 shift_amount, flags);
1760       StoreValueWide(rl_dest, rl_result);
1761       return true;
1762     }
1763   }
1764 
1765   // Okay, on 32b just bite the bullet and do it, still better than the general case.
1766   if (!cu_->target64) {
1767     int32_t val_lo = Low32Bits(val);
1768     int32_t val_hi = High32Bits(val);
1769     // Prepare for explicit register usage.
1770     ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
1771     rl_src1 = UpdateLocWideTyped(rl_src1);
1772     bool src1_in_reg = rl_src1.location == kLocPhysReg;
1773     int displacement = SRegOffset(rl_src1.s_reg_low);
1774 
1775     // ECX <- 1H * 2L
1776     // EAX <- 1L * 2H
1777     if (src1_in_reg) {
1778       GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
1779       GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
1780     } else {
1781       GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
1782       GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
1783     }
1784 
1785     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1786     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1787 
1788     // EAX <- 2L
1789     LoadConstantNoClobber(rs_r0, val_lo);
1790 
1791     // EDX:EAX <- 2L * 1L (double precision)
1792     if (src1_in_reg) {
1793       NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1794     } else {
1795       LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
1796       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1797                               true /* is_load */, true /* is_64bit */);
1798     }
1799 
1800     // EDX <- EDX + ECX (add high words)
1801     NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1802 
1803     // Result is EDX:EAX
1804     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1805                              RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1806     StoreValueWide(rl_dest, rl_result);
1807     return true;
1808   }
1809   return false;
1810 }
1811 
GenMulLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)1812 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1813                             RegLocation rl_src2, int flags) {
1814   if (rl_src1.is_const) {
1815     std::swap(rl_src1, rl_src2);
1816   }
1817 
1818   if (rl_src2.is_const) {
1819     if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) {
1820       return;
1821     }
1822   }
1823 
1824   // All memory accesses below reference dalvik regs.
1825   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1826 
1827   if (cu_->target64) {
1828     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1829     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1830     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1831     if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1832         rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1833       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
1834     } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
1835                rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1836       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
1837     } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1838                rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
1839       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1840     } else {
1841       OpRegCopy(rl_result.reg, rl_src1.reg);
1842       NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1843     }
1844     StoreValueWide(rl_dest, rl_result);
1845     return;
1846   }
1847 
1848   // Not multiplying by a constant. Do it the hard way
1849   // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
1850   bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
1851                    mir_graph_->SRegToVReg(rl_src2.s_reg_low);
1852 
1853   // Prepare for explicit register usage.
1854   ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
1855   rl_src1 = UpdateLocWideTyped(rl_src1);
1856   rl_src2 = UpdateLocWideTyped(rl_src2);
1857 
1858   // At this point, the VRs are in their home locations.
1859   bool src1_in_reg = rl_src1.location == kLocPhysReg;
1860   bool src2_in_reg = rl_src2.location == kLocPhysReg;
1861   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1862 
1863   // ECX <- 1H
1864   if (src1_in_reg) {
1865     NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
1866   } else {
1867     LoadBaseDisp(rs_rSP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
1868                  kNotVolatile);
1869   }
1870 
1871   if (is_square) {
1872     // Take advantage of the fact that the values are the same.
1873     // ECX <- ECX * 2L  (1H * 2L)
1874     if (src2_in_reg) {
1875       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1876     } else {
1877       int displacement = SRegOffset(rl_src2.s_reg_low);
1878       LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
1879                        displacement + LOWORD_OFFSET);
1880       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1881                               true /* is_load */, true /* is_64bit */);
1882     }
1883 
1884     // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
1885     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
1886   } else {
1887     // EAX <- 2H
1888     if (src2_in_reg) {
1889       NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
1890     } else {
1891       LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
1892                    kNotVolatile);
1893     }
1894 
1895     // EAX <- EAX * 1L  (2H * 1L)
1896     if (src1_in_reg) {
1897       NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
1898     } else {
1899       int displacement = SRegOffset(rl_src1.s_reg_low);
1900       LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP_32.GetReg(),
1901                        displacement + LOWORD_OFFSET);
1902       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1903                               true /* is_load */, true /* is_64bit */);
1904     }
1905 
1906     // ECX <- ECX * 2L  (1H * 2L)
1907     if (src2_in_reg) {
1908       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1909     } else {
1910       int displacement = SRegOffset(rl_src2.s_reg_low);
1911       LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
1912                        displacement + LOWORD_OFFSET);
1913       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1914                               true /* is_load */, true /* is_64bit */);
1915     }
1916 
1917     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1918     NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1919   }
1920 
1921   // EAX <- 2L
1922   if (src2_in_reg) {
1923     NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
1924   } else {
1925     LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
1926                  kNotVolatile);
1927   }
1928 
1929   // EDX:EAX <- 2L * 1L (double precision)
1930   if (src1_in_reg) {
1931     NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1932   } else {
1933     int displacement = SRegOffset(rl_src1.s_reg_low);
1934     LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
1935     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1936                             true /* is_load */, true /* is_64bit */);
1937   }
1938 
1939   // EDX <- EDX + ECX (add high words)
1940   NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1941 
1942   // Result is EDX:EAX
1943   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1944                            RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1945   StoreValueWide(rl_dest, rl_result);
1946 }
1947 
GenLongRegOrMemOp(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1948 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
1949                                    Instruction::Code op) {
1950   DCHECK_EQ(rl_dest.location, kLocPhysReg);
1951   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1952   if (rl_src.location == kLocPhysReg) {
1953     // Both operands are in registers.
1954     // But we must ensure that rl_src is in pair
1955     if (cu_->target64) {
1956       NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
1957     } else {
1958       rl_src = LoadValueWide(rl_src, kCoreReg);
1959       if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1960         // The registers are the same, so we would clobber it before the use.
1961         RegStorage temp_reg = AllocTemp();
1962         OpRegCopy(temp_reg, rl_dest.reg);
1963         rl_src.reg.SetHighReg(temp_reg.GetReg());
1964       }
1965       NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
1966 
1967       x86op = GetOpcode(op, rl_dest, rl_src, true);
1968       NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
1969     }
1970     return;
1971   }
1972 
1973   // RHS is in memory.
1974   DCHECK((rl_src.location == kLocDalvikFrame) ||
1975          (rl_src.location == kLocCompilerTemp));
1976   int r_base = rs_rX86_SP_32.GetReg();
1977   int displacement = SRegOffset(rl_src.s_reg_low);
1978 
1979   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1980   LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
1981                      r_base, displacement + LOWORD_OFFSET);
1982   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1983                           true /* is_load */, true /* is64bit */);
1984   if (!cu_->target64) {
1985     x86op = GetOpcode(op, rl_dest, rl_src, true);
1986     lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
1987     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1988                             true /* is_load */, true /* is64bit */);
1989   }
1990 }
1991 
GenLongArith(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1992 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
1993   rl_dest = UpdateLocWideTyped(rl_dest);
1994   if (rl_dest.location == kLocPhysReg) {
1995     // Ensure we are in a register pair
1996     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1997 
1998     rl_src = UpdateLocWideTyped(rl_src);
1999     GenLongRegOrMemOp(rl_result, rl_src, op);
2000     StoreFinalValueWide(rl_dest, rl_result);
2001     return;
2002   } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) {
2003     // Handle the case when src and dest are intersect.
2004     rl_src = LoadValueWide(rl_src, kCoreReg);
2005     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2006     rl_src = UpdateLocWideTyped(rl_src);
2007     GenLongRegOrMemOp(rl_result, rl_src, op);
2008     StoreFinalValueWide(rl_dest, rl_result);
2009     return;
2010   }
2011 
2012   // It wasn't in registers, so it better be in memory.
2013   DCHECK((rl_dest.location == kLocDalvikFrame) ||
2014          (rl_dest.location == kLocCompilerTemp));
2015   rl_src = LoadValueWide(rl_src, kCoreReg);
2016 
2017   // Operate directly into memory.
2018   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
2019   int r_base = rs_rX86_SP_32.GetReg();
2020   int displacement = SRegOffset(rl_dest.s_reg_low);
2021 
2022   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2023   LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
2024                      cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
2025   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2026                           true /* is_load */, true /* is64bit */);
2027   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2028                           false /* is_load */, true /* is64bit */);
2029   if (!cu_->target64) {
2030     x86op = GetOpcode(op, rl_dest, rl_src, true);
2031     lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
2032     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2033                             true /* is_load */, true /* is64bit */);
2034     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2035                             false /* is_load */, true /* is64bit */);
2036   }
2037 
2038   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
2039   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
2040 
2041   // If the left operand is in memory and the right operand is in a register
2042   // and both belong to the same dalvik register then we should clobber the
2043   // right one because it doesn't hold valid data anymore.
2044   if (v_src_reg == v_dst_reg) {
2045     Clobber(rl_src.reg);
2046   }
2047 }
2048 
GenLongArith(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op,bool is_commutative)2049 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
2050                               RegLocation rl_src2, Instruction::Code op,
2051                               bool is_commutative) {
2052   // Is this really a 2 operand operation?
2053   switch (op) {
2054     case Instruction::ADD_LONG_2ADDR:
2055     case Instruction::SUB_LONG_2ADDR:
2056     case Instruction::AND_LONG_2ADDR:
2057     case Instruction::OR_LONG_2ADDR:
2058     case Instruction::XOR_LONG_2ADDR:
2059       if (GenerateTwoOperandInstructions()) {
2060         GenLongArith(rl_dest, rl_src2, op);
2061         return;
2062       }
2063       break;
2064 
2065     default:
2066       break;
2067   }
2068 
2069   if (rl_dest.location == kLocPhysReg) {
2070     RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
2071 
2072     // We are about to clobber the LHS, so it needs to be a temp.
2073     rl_result = ForceTempWide(rl_result);
2074 
2075     // Perform the operation using the RHS.
2076     rl_src2 = UpdateLocWideTyped(rl_src2);
2077     GenLongRegOrMemOp(rl_result, rl_src2, op);
2078 
2079     // And now record that the result is in the temp.
2080     StoreFinalValueWide(rl_dest, rl_result);
2081     return;
2082   }
2083 
2084   // It wasn't in registers, so it better be in memory.
2085   DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp));
2086   rl_src1 = UpdateLocWideTyped(rl_src1);
2087   rl_src2 = UpdateLocWideTyped(rl_src2);
2088 
2089   // Get one of the source operands into temporary register.
2090   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2091   if (cu_->target64) {
2092     if (IsTemp(rl_src1.reg)) {
2093       GenLongRegOrMemOp(rl_src1, rl_src2, op);
2094     } else if (is_commutative) {
2095       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
2096       // We need at least one of them to be a temporary.
2097       if (!IsTemp(rl_src2.reg)) {
2098         rl_src1 = ForceTempWide(rl_src1);
2099         GenLongRegOrMemOp(rl_src1, rl_src2, op);
2100       } else {
2101         GenLongRegOrMemOp(rl_src2, rl_src1, op);
2102         StoreFinalValueWide(rl_dest, rl_src2);
2103         return;
2104       }
2105     } else {
2106       // Need LHS to be the temp.
2107       rl_src1 = ForceTempWide(rl_src1);
2108       GenLongRegOrMemOp(rl_src1, rl_src2, op);
2109     }
2110   } else {
2111     if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
2112       GenLongRegOrMemOp(rl_src1, rl_src2, op);
2113     } else if (is_commutative) {
2114       rl_src2 = LoadValueWide(rl_src2, kCoreReg);
2115       // We need at least one of them to be a temporary.
2116       if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
2117         rl_src1 = ForceTempWide(rl_src1);
2118         GenLongRegOrMemOp(rl_src1, rl_src2, op);
2119       } else {
2120         GenLongRegOrMemOp(rl_src2, rl_src1, op);
2121         StoreFinalValueWide(rl_dest, rl_src2);
2122         return;
2123       }
2124     } else {
2125       // Need LHS to be the temp.
2126       rl_src1 = ForceTempWide(rl_src1);
2127       GenLongRegOrMemOp(rl_src1, rl_src2, op);
2128     }
2129   }
2130 
2131   StoreFinalValueWide(rl_dest, rl_src1);
2132 }
2133 
GenNotLong(RegLocation rl_dest,RegLocation rl_src)2134 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
2135   if (cu_->target64) {
2136     rl_src = LoadValueWide(rl_src, kCoreReg);
2137     RegLocation rl_result;
2138     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2139     OpRegCopy(rl_result.reg, rl_src.reg);
2140     OpReg(kOpNot, rl_result.reg);
2141     StoreValueWide(rl_dest, rl_result);
2142   } else {
2143     LOG(FATAL) << "Unexpected use GenNotLong()";
2144   }
2145 }
2146 
GenDivRemLongLit(RegLocation rl_dest,RegLocation rl_src,int64_t imm,bool is_div)2147 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
2148                                   int64_t imm, bool is_div) {
2149   if (imm == 0) {
2150     GenDivZeroException();
2151   } else if (imm == 1) {
2152     if (is_div) {
2153       // x / 1 == x.
2154       StoreValueWide(rl_dest, rl_src);
2155     } else {
2156       // x % 1 == 0.
2157       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2158       LoadConstantWide(rl_result.reg, 0);
2159       StoreValueWide(rl_dest, rl_result);
2160     }
2161   } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
2162     if (is_div) {
2163       rl_src = LoadValueWide(rl_src, kCoreReg);
2164       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2165       RegStorage rs_temp = AllocTempWide();
2166 
2167       OpRegCopy(rl_result.reg, rl_src.reg);
2168       LoadConstantWide(rs_temp, 0x8000000000000000);
2169 
2170       // If x == MIN_LONG, return MIN_LONG.
2171       OpRegReg(kOpCmp, rl_src.reg, rs_temp);
2172       LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
2173 
2174       // For x != MIN_LONG, x / -1 == -x.
2175       OpReg(kOpNeg, rl_result.reg);
2176 
2177       minint_branch->target = NewLIR0(kPseudoTargetLabel);
2178       FreeTemp(rs_temp);
2179       StoreValueWide(rl_dest, rl_result);
2180     } else {
2181       // x % -1 == 0.
2182       RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2183       LoadConstantWide(rl_result.reg, 0);
2184       StoreValueWide(rl_dest, rl_result);
2185     }
2186   } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
2187     // Division using shifting.
2188     rl_src = LoadValueWide(rl_src, kCoreReg);
2189     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2190     if (IsSameReg(rl_result.reg, rl_src.reg)) {
2191       RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
2192       rl_result.reg.SetReg(rs_temp.GetReg());
2193     }
2194     LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
2195     OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
2196     NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
2197     OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
2198     int shift_amount = CTZ(imm);
2199     OpRegImm(kOpAsr, rl_result.reg, shift_amount);
2200     if (imm < 0) {
2201       OpReg(kOpNeg, rl_result.reg);
2202     }
2203     StoreValueWide(rl_dest, rl_result);
2204   } else {
2205     CHECK(imm <= -2 || imm >= 2);
2206 
2207     FlushReg(rs_r0q);
2208     Clobber(rs_r0q);
2209     LockTemp(rs_r0q);
2210     FlushReg(rs_r2q);
2211     Clobber(rs_r2q);
2212     LockTemp(rs_r2q);
2213 
2214     RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
2215                              is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG};
2216 
2217     // Use H.S.Warren's Hacker's Delight Chapter 10 and
2218     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
2219     int64_t magic;
2220     int shift;
2221     CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
2222 
2223     /*
2224      * For imm >= 2,
2225      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
2226      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
2227      * For imm <= -2,
2228      *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
2229      *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
2230      * We implement this algorithm in the following way:
2231      * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
2232      * 2. if imm > 0 and magic < 0, add numerator to RDX
2233      *    if imm < 0 and magic > 0, sub numerator from RDX
2234      * 3. if S !=0, SAR S bits for RDX
2235      * 4. add 1 to RDX if RDX < 0
2236      * 5. Thus, RDX is the quotient
2237      */
2238 
2239     // RAX = magic.
2240     LoadConstantWide(rs_r0q, magic);
2241 
2242     // Multiply by numerator.
2243     RegStorage numerator_reg;
2244     if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
2245       // We will need the value later.
2246       rl_src = LoadValueWide(rl_src, kCoreReg);
2247       numerator_reg = rl_src.reg;
2248 
2249       // RDX:RAX = magic * numerator.
2250       NewLIR1(kX86Imul64DaR, numerator_reg.GetReg());
2251     } else {
2252       // Only need this once.  Multiply directly from the value.
2253       rl_src = UpdateLocWideTyped(rl_src);
2254       if (rl_src.location != kLocPhysReg) {
2255         // Okay, we can do this from memory.
2256         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2257         int displacement = SRegOffset(rl_src.s_reg_low);
2258         // RDX:RAX = magic * numerator.
2259         LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP_32.GetReg(), displacement);
2260         AnnotateDalvikRegAccess(m, displacement >> 2,
2261                                 true /* is_load */, true /* is_64bit */);
2262       } else {
2263         // RDX:RAX = magic * numerator.
2264         NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg());
2265       }
2266     }
2267 
2268     if (imm > 0 && magic < 0) {
2269       // Add numerator to RDX.
2270       DCHECK(numerator_reg.Valid());
2271       OpRegReg(kOpAdd, rs_r2q, numerator_reg);
2272     } else if (imm < 0 && magic > 0) {
2273       DCHECK(numerator_reg.Valid());
2274       OpRegReg(kOpSub, rs_r2q, numerator_reg);
2275     }
2276 
2277     // Do we need the shift?
2278     if (shift != 0) {
2279       // Shift RDX by 'shift' bits.
2280       OpRegImm(kOpAsr, rs_r2q, shift);
2281     }
2282 
2283     // Move RDX to RAX.
2284     OpRegCopyWide(rs_r0q, rs_r2q);
2285 
2286     // Move sign bit to bit 0, zeroing the rest.
2287     OpRegImm(kOpLsr, rs_r2q, 63);
2288 
2289     // RDX = RDX + RAX.
2290     OpRegReg(kOpAdd, rs_r2q, rs_r0q);
2291 
2292     // Quotient is in RDX.
2293     if (!is_div) {
2294       // We need to compute the remainder.
2295       // Remainder is divisor - (quotient * imm).
2296       DCHECK(numerator_reg.Valid());
2297       OpRegCopyWide(rs_r0q, numerator_reg);
2298 
2299       // Imul doesn't support 64-bit imms.
2300       if (imm > std::numeric_limits<int32_t>::max() ||
2301           imm < std::numeric_limits<int32_t>::min()) {
2302         RegStorage rs_temp = AllocTempWide();
2303         LoadConstantWide(rs_temp, imm);
2304 
2305         // RAX = numerator * imm.
2306         NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
2307 
2308         FreeTemp(rs_temp);
2309       } else {
2310         // RAX = numerator * imm.
2311         int short_imm = static_cast<int>(imm);
2312         NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
2313       }
2314 
2315       // RAX -= RDX.
2316       OpRegReg(kOpSub, rs_r0q, rs_r2q);
2317 
2318       // Result in RAX.
2319     } else {
2320       // Result in RDX.
2321     }
2322     StoreValueWide(rl_dest, rl_result);
2323     FreeTemp(rs_r0q);
2324     FreeTemp(rs_r2q);
2325   }
2326 }
2327 
GenDivRemLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,int flags)2328 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
2329                                RegLocation rl_src2, bool is_div, int flags) {
2330   if (!cu_->target64) {
2331     LOG(FATAL) << "Unexpected use GenDivRemLong()";
2332     return;
2333   }
2334 
2335   if (rl_src2.is_const) {
2336     DCHECK(rl_src2.wide);
2337     int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
2338     GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
2339     return;
2340   }
2341 
2342   // We have to use fixed registers, so flush all the temps.
2343   // Prepare for explicit register usage.
2344   ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q);
2345 
2346   // Load LHS into RAX.
2347   LoadValueDirectWideFixed(rl_src1, rs_r0q);
2348 
2349   // Load RHS into RCX.
2350   LoadValueDirectWideFixed(rl_src2, rs_r1q);
2351 
2352   // Copy LHS sign bit into RDX.
2353   NewLIR0(kx86Cqo64Da);
2354 
2355   // Handle division by zero case.
2356   if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
2357     GenDivZeroCheckWide(rs_r1q);
2358   }
2359 
2360   // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
2361   NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
2362   LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
2363 
2364   // RHS is -1.
2365   LoadConstantWide(rs_r6q, 0x8000000000000000);
2366   NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
2367   LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
2368 
2369   // In 0x8000000000000000/-1 case.
2370   if (!is_div) {
2371     // For DIV, RAX is already right. For REM, we need RDX 0.
2372     NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
2373   }
2374   LIR* done = NewLIR1(kX86Jmp8, 0);
2375 
2376   // Expected case.
2377   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
2378   minint_branch->target = minus_one_branch->target;
2379   NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
2380   done->target = NewLIR0(kPseudoTargetLabel);
2381 
2382   // Result is in RAX for div and RDX for rem.
2383   RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
2384   if (!is_div) {
2385     rl_result.reg.SetReg(r2q);
2386   }
2387 
2388   StoreValueWide(rl_dest, rl_result);
2389 }
2390 
GenNegLong(RegLocation rl_dest,RegLocation rl_src)2391 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
2392   rl_src = LoadValueWide(rl_src, kCoreReg);
2393   RegLocation rl_result;
2394   if (cu_->target64) {
2395     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2396     OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
2397   } else {
2398     rl_result = ForceTempWide(rl_src);
2399     OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
2400     OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
2401     OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
2402   }
2403   StoreValueWide(rl_dest, rl_result);
2404 }
2405 
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<4> thread_offset)2406 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
2407   DCHECK_EQ(kX86, cu_->instruction_set);
2408   X86OpCode opcode = kX86Bkpt;
2409   switch (op) {
2410   case kOpCmp: opcode = kX86Cmp32RT;  break;
2411   case kOpMov: opcode = kX86Mov32RT;  break;
2412   default:
2413     LOG(FATAL) << "Bad opcode: " << op;
2414     break;
2415   }
2416   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2417 }
2418 
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<8> thread_offset)2419 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
2420   DCHECK_EQ(kX86_64, cu_->instruction_set);
2421   X86OpCode opcode = kX86Bkpt;
2422   if (cu_->target64 && r_dest.Is64BitSolo()) {
2423     switch (op) {
2424     case kOpCmp: opcode = kX86Cmp64RT;  break;
2425     case kOpMov: opcode = kX86Mov64RT;  break;
2426     default:
2427       LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
2428       break;
2429     }
2430   } else {
2431     switch (op) {
2432     case kOpCmp: opcode = kX86Cmp32RT;  break;
2433     case kOpMov: opcode = kX86Mov32RT;  break;
2434     default:
2435       LOG(FATAL) << "Bad opcode: " << op;
2436       break;
2437     }
2438   }
2439   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2440 }
2441 
2442 /*
2443  * Generate array load
2444  */
GenArrayGet(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_dest,int scale)2445 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
2446                              RegLocation rl_index, RegLocation rl_dest, int scale) {
2447   RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
2448   int len_offset = mirror::Array::LengthOffset().Int32Value();
2449   RegLocation rl_result;
2450   rl_array = LoadValue(rl_array, kRefReg);
2451 
2452   int data_offset;
2453   if (size == k64 || size == kDouble) {
2454     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2455   } else {
2456     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2457   }
2458 
2459   bool constant_index = rl_index.is_const;
2460   int32_t constant_index_value = 0;
2461   if (!constant_index) {
2462     rl_index = LoadValue(rl_index, kCoreReg);
2463   } else {
2464     constant_index_value = mir_graph_->ConstantValue(rl_index);
2465     // If index is constant, just fold it into the data offset
2466     data_offset += constant_index_value << scale;
2467     // treat as non array below
2468     rl_index.reg = RegStorage::InvalidReg();
2469   }
2470 
2471   /* null object? */
2472   GenNullCheck(rl_array.reg, opt_flags);
2473 
2474   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2475     if (constant_index) {
2476       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2477     } else {
2478       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2479     }
2480   }
2481   rl_result = EvalLoc(rl_dest, reg_class, true);
2482   LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
2483   if ((size == k64) || (size == kDouble)) {
2484     StoreValueWide(rl_dest, rl_result);
2485   } else {
2486     StoreValue(rl_dest, rl_result);
2487   }
2488 }
2489 
2490 /*
2491  * Generate array store
2492  *
2493  */
GenArrayPut(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_src,int scale,bool card_mark)2494 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
2495                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
2496   RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
2497   int len_offset = mirror::Array::LengthOffset().Int32Value();
2498   int data_offset;
2499 
2500   if (size == k64 || size == kDouble) {
2501     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2502   } else {
2503     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2504   }
2505 
2506   rl_array = LoadValue(rl_array, kRefReg);
2507   bool constant_index = rl_index.is_const;
2508   int32_t constant_index_value = 0;
2509   if (!constant_index) {
2510     rl_index = LoadValue(rl_index, kCoreReg);
2511   } else {
2512     // If index is constant, just fold it into the data offset
2513     constant_index_value = mir_graph_->ConstantValue(rl_index);
2514     data_offset += constant_index_value << scale;
2515     // treat as non array below
2516     rl_index.reg = RegStorage::InvalidReg();
2517   }
2518 
2519   /* null object? */
2520   GenNullCheck(rl_array.reg, opt_flags);
2521 
2522   if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2523     if (constant_index) {
2524       GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2525     } else {
2526       GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2527     }
2528   }
2529   if ((size == k64) || (size == kDouble)) {
2530     rl_src = LoadValueWide(rl_src, reg_class);
2531   } else {
2532     rl_src = LoadValue(rl_src, reg_class);
2533   }
2534   // If the src reg can't be byte accessed, move it to a temp first.
2535   if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
2536     RegStorage temp = AllocTemp();
2537     OpRegCopy(temp, rl_src.reg);
2538     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags);
2539   } else {
2540     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags);
2541   }
2542   if (card_mark) {
2543     // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
2544     if (!constant_index) {
2545       FreeTemp(rl_index.reg);
2546     }
2547     MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
2548   }
2549 }
2550 
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,int shift_amount,int flags)2551 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2552                                           RegLocation rl_src, int shift_amount, int flags) {
2553   UNUSED(flags);
2554   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2555   if (cu_->target64) {
2556     OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
2557     switch (opcode) {
2558       case Instruction::SHL_LONG:
2559       case Instruction::SHL_LONG_2ADDR:
2560         op = kOpLsl;
2561         break;
2562       case Instruction::SHR_LONG:
2563       case Instruction::SHR_LONG_2ADDR:
2564         op = kOpAsr;
2565         break;
2566       case Instruction::USHR_LONG:
2567       case Instruction::USHR_LONG_2ADDR:
2568         op = kOpLsr;
2569         break;
2570       default:
2571         LOG(FATAL) << "Unexpected case";
2572     }
2573     OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
2574   } else {
2575     switch (opcode) {
2576       case Instruction::SHL_LONG:
2577       case Instruction::SHL_LONG_2ADDR:
2578         DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
2579         if (shift_amount == 32) {
2580           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2581           LoadConstant(rl_result.reg.GetLow(), 0);
2582         } else if (shift_amount > 31) {
2583           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2584           NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
2585           LoadConstant(rl_result.reg.GetLow(), 0);
2586         } else {
2587           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2588           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2589           NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
2590                   shift_amount);
2591           NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
2592         }
2593         break;
2594       case Instruction::SHR_LONG:
2595       case Instruction::SHR_LONG_2ADDR:
2596         if (shift_amount == 32) {
2597           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2598           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2599           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2600         } else if (shift_amount > 31) {
2601           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2602           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2603           NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2604           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2605         } else {
2606           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2607           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2608           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2609                   shift_amount);
2610           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
2611         }
2612         break;
2613       case Instruction::USHR_LONG:
2614       case Instruction::USHR_LONG_2ADDR:
2615         if (shift_amount == 32) {
2616           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2617           LoadConstant(rl_result.reg.GetHigh(), 0);
2618         } else if (shift_amount > 31) {
2619           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2620           NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2621           LoadConstant(rl_result.reg.GetHigh(), 0);
2622         } else {
2623           OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2624           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2625           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2626                   shift_amount);
2627           NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
2628         }
2629         break;
2630       default:
2631         LOG(FATAL) << "Unexpected case";
2632     }
2633   }
2634   return rl_result;
2635 }
2636 
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,RegLocation rl_shift,int flags)2637 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2638                                    RegLocation rl_src, RegLocation rl_shift, int flags) {
2639   // Per spec, we only care about low 6 bits of shift amount.
2640   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
2641   if (shift_amount == 0) {
2642     rl_src = LoadValueWide(rl_src, kCoreReg);
2643     StoreValueWide(rl_dest, rl_src);
2644     return;
2645   } else if (shift_amount == 1 &&
2646             (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
2647     // Need to handle this here to avoid calling StoreValueWide twice.
2648     GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags);
2649     return;
2650   }
2651   if (PartiallyIntersects(rl_src, rl_dest)) {
2652     GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
2653     return;
2654   }
2655   rl_src = LoadValueWide(rl_src, kCoreReg);
2656   RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags);
2657   StoreValueWide(rl_dest, rl_result);
2658 }
2659 
GenArithImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,int flags)2660 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
2661                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
2662                                    int flags) {
2663   bool isConstSuccess = false;
2664   switch (opcode) {
2665     case Instruction::ADD_LONG:
2666     case Instruction::AND_LONG:
2667     case Instruction::OR_LONG:
2668     case Instruction::XOR_LONG:
2669       if (rl_src2.is_const) {
2670         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2671       } else {
2672         DCHECK(rl_src1.is_const);
2673         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2674       }
2675       break;
2676     case Instruction::SUB_LONG:
2677     case Instruction::SUB_LONG_2ADDR:
2678       if (rl_src2.is_const) {
2679         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2680       } else {
2681         GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
2682         isConstSuccess = true;
2683       }
2684       break;
2685     case Instruction::ADD_LONG_2ADDR:
2686     case Instruction::OR_LONG_2ADDR:
2687     case Instruction::XOR_LONG_2ADDR:
2688     case Instruction::AND_LONG_2ADDR:
2689       if (rl_src2.is_const) {
2690         if (GenerateTwoOperandInstructions()) {
2691           isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
2692         } else {
2693           isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2694         }
2695       } else {
2696         DCHECK(rl_src1.is_const);
2697         isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2698       }
2699       break;
2700     default:
2701       isConstSuccess = false;
2702       break;
2703   }
2704 
2705   if (!isConstSuccess) {
2706     // Default - bail to non-const handler.
2707     GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
2708   }
2709 }
2710 
IsNoOp(Instruction::Code op,int32_t value)2711 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
2712   switch (op) {
2713     case Instruction::AND_LONG_2ADDR:
2714     case Instruction::AND_LONG:
2715       return value == -1;
2716     case Instruction::OR_LONG:
2717     case Instruction::OR_LONG_2ADDR:
2718     case Instruction::XOR_LONG:
2719     case Instruction::XOR_LONG_2ADDR:
2720       return value == 0;
2721     default:
2722       return false;
2723   }
2724 }
2725 
GetOpcode(Instruction::Code op,RegLocation dest,RegLocation rhs,bool is_high_op)2726 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
2727                                 bool is_high_op) {
2728   bool rhs_in_mem = rhs.location != kLocPhysReg;
2729   bool dest_in_mem = dest.location != kLocPhysReg;
2730   bool is64Bit = cu_->target64;
2731   DCHECK(!rhs_in_mem || !dest_in_mem);
2732   switch (op) {
2733     case Instruction::ADD_LONG:
2734     case Instruction::ADD_LONG_2ADDR:
2735       if (dest_in_mem) {
2736         return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
2737       } else if (rhs_in_mem) {
2738         return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
2739       }
2740       return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
2741     case Instruction::SUB_LONG:
2742     case Instruction::SUB_LONG_2ADDR:
2743       if (dest_in_mem) {
2744         return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
2745       } else if (rhs_in_mem) {
2746         return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
2747       }
2748       return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
2749     case Instruction::AND_LONG_2ADDR:
2750     case Instruction::AND_LONG:
2751       if (dest_in_mem) {
2752         return is64Bit ? kX86And64MR : kX86And32MR;
2753       }
2754       if (is64Bit) {
2755         return rhs_in_mem ? kX86And64RM : kX86And64RR;
2756       }
2757       return rhs_in_mem ? kX86And32RM : kX86And32RR;
2758     case Instruction::OR_LONG:
2759     case Instruction::OR_LONG_2ADDR:
2760       if (dest_in_mem) {
2761         return is64Bit ? kX86Or64MR : kX86Or32MR;
2762       }
2763       if (is64Bit) {
2764         return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
2765       }
2766       return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
2767     case Instruction::XOR_LONG:
2768     case Instruction::XOR_LONG_2ADDR:
2769       if (dest_in_mem) {
2770         return is64Bit ? kX86Xor64MR : kX86Xor32MR;
2771       }
2772       if (is64Bit) {
2773         return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
2774       }
2775       return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
2776     default:
2777       LOG(FATAL) << "Unexpected opcode: " << op;
2778       return kX86Add32RR;
2779   }
2780 }
2781 
GetOpcode(Instruction::Code op,RegLocation loc,bool is_high_op,int32_t value)2782 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
2783                                 int32_t value) {
2784   bool in_mem = loc.location != kLocPhysReg;
2785   bool is64Bit = cu_->target64;
2786   bool byte_imm = IS_SIMM8(value);
2787   DCHECK(in_mem || !loc.reg.IsFloat());
2788   switch (op) {
2789     case Instruction::ADD_LONG:
2790     case Instruction::ADD_LONG_2ADDR:
2791       if (byte_imm) {
2792         if (in_mem) {
2793           return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
2794         }
2795         return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
2796       }
2797       if (in_mem) {
2798         return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
2799       }
2800       return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
2801     case Instruction::SUB_LONG:
2802     case Instruction::SUB_LONG_2ADDR:
2803       if (byte_imm) {
2804         if (in_mem) {
2805           return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
2806         }
2807         return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
2808       }
2809       if (in_mem) {
2810         return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
2811       }
2812       return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
2813     case Instruction::AND_LONG_2ADDR:
2814     case Instruction::AND_LONG:
2815       if (byte_imm) {
2816         if (is64Bit) {
2817           return in_mem ? kX86And64MI8 : kX86And64RI8;
2818         }
2819         return in_mem ? kX86And32MI8 : kX86And32RI8;
2820       }
2821       if (is64Bit) {
2822         return in_mem ? kX86And64MI : kX86And64RI;
2823       }
2824       return in_mem ? kX86And32MI : kX86And32RI;
2825     case Instruction::OR_LONG:
2826     case Instruction::OR_LONG_2ADDR:
2827       if (byte_imm) {
2828         if (is64Bit) {
2829           return in_mem ? kX86Or64MI8 : kX86Or64RI8;
2830         }
2831         return in_mem ? kX86Or32MI8 : kX86Or32RI8;
2832       }
2833       if (is64Bit) {
2834         return in_mem ? kX86Or64MI : kX86Or64RI;
2835       }
2836       return in_mem ? kX86Or32MI : kX86Or32RI;
2837     case Instruction::XOR_LONG:
2838     case Instruction::XOR_LONG_2ADDR:
2839       if (byte_imm) {
2840         if (is64Bit) {
2841           return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
2842         }
2843         return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
2844       }
2845       if (is64Bit) {
2846         return in_mem ? kX86Xor64MI : kX86Xor64RI;
2847       }
2848       return in_mem ? kX86Xor32MI : kX86Xor32RI;
2849     default:
2850       LOG(FATAL) << "Unexpected opcode: " << op;
2851       UNREACHABLE();
2852   }
2853 }
2854 
GenLongImm(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)2855 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
2856   DCHECK(rl_src.is_const);
2857   int64_t val = mir_graph_->ConstantValueWide(rl_src);
2858 
2859   if (cu_->target64) {
2860     // We can do with imm only if it fits 32 bit
2861     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2862       return false;
2863     }
2864 
2865     rl_dest = UpdateLocWideTyped(rl_dest);
2866 
2867     if ((rl_dest.location == kLocDalvikFrame) ||
2868         (rl_dest.location == kLocCompilerTemp)) {
2869       int r_base = rs_rX86_SP_32.GetReg();
2870       int displacement = SRegOffset(rl_dest.s_reg_low);
2871 
2872       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2873       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2874       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
2875       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2876                               true /* is_load */, true /* is64bit */);
2877       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2878                               false /* is_load */, true /* is64bit */);
2879       return true;
2880     }
2881 
2882     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2883     DCHECK_EQ(rl_result.location, kLocPhysReg);
2884     DCHECK(!rl_result.reg.IsFloat());
2885 
2886     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2887     NewLIR2(x86op, rl_result.reg.GetReg(), val);
2888 
2889     StoreValueWide(rl_dest, rl_result);
2890     return true;
2891   }
2892 
2893   int32_t val_lo = Low32Bits(val);
2894   int32_t val_hi = High32Bits(val);
2895   rl_dest = UpdateLocWideTyped(rl_dest);
2896 
2897   // Can we just do this into memory?
2898   if ((rl_dest.location == kLocDalvikFrame) ||
2899       (rl_dest.location == kLocCompilerTemp)) {
2900     int r_base = rs_rX86_SP_32.GetReg();
2901     int displacement = SRegOffset(rl_dest.s_reg_low);
2902 
2903     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2904     if (!IsNoOp(op, val_lo)) {
2905       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2906       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
2907       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2908                               true /* is_load */, true /* is64bit */);
2909       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2910                               false /* is_load */, true /* is64bit */);
2911     }
2912     if (!IsNoOp(op, val_hi)) {
2913       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2914       LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
2915       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2916                                 true /* is_load */, true /* is64bit */);
2917       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2918                                 false /* is_load */, true /* is64bit */);
2919     }
2920     return true;
2921   }
2922 
2923   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2924   DCHECK_EQ(rl_result.location, kLocPhysReg);
2925   DCHECK(!rl_result.reg.IsFloat());
2926 
2927   if (!IsNoOp(op, val_lo)) {
2928     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2929     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2930   }
2931   if (!IsNoOp(op, val_hi)) {
2932     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2933     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2934   }
2935   StoreValueWide(rl_dest, rl_result);
2936   return true;
2937 }
2938 
GenLongLongImm(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op)2939 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
2940                                 RegLocation rl_src2, Instruction::Code op) {
2941   DCHECK(rl_src2.is_const);
2942   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
2943 
2944   if (cu_->target64) {
2945     // We can do with imm only if it fits 32 bit
2946     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2947       return false;
2948     }
2949     if (rl_dest.location == kLocPhysReg &&
2950         rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
2951       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2952       OpRegCopy(rl_dest.reg, rl_src1.reg);
2953       NewLIR2(x86op, rl_dest.reg.GetReg(), val);
2954       StoreFinalValueWide(rl_dest, rl_dest);
2955       return true;
2956     }
2957 
2958     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2959     // We need the values to be in a temporary
2960     RegLocation rl_result = ForceTempWide(rl_src1);
2961 
2962     X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2963     NewLIR2(x86op, rl_result.reg.GetReg(), val);
2964 
2965     StoreFinalValueWide(rl_dest, rl_result);
2966     return true;
2967   }
2968 
2969   int32_t val_lo = Low32Bits(val);
2970   int32_t val_hi = High32Bits(val);
2971   rl_dest = UpdateLocWideTyped(rl_dest);
2972   rl_src1 = UpdateLocWideTyped(rl_src1);
2973 
2974   // Can we do this directly into the destination registers?
2975   if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
2976       rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
2977       rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
2978     if (!IsNoOp(op, val_lo)) {
2979       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2980       NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
2981     }
2982     if (!IsNoOp(op, val_hi)) {
2983       X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2984       NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
2985     }
2986 
2987     StoreFinalValueWide(rl_dest, rl_dest);
2988     return true;
2989   }
2990 
2991   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2992   DCHECK_EQ(rl_src1.location, kLocPhysReg);
2993 
2994   // We need the values to be in a temporary
2995   RegLocation rl_result = ForceTempWide(rl_src1);
2996   if (!IsNoOp(op, val_lo)) {
2997     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2998     NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2999   }
3000   if (!IsNoOp(op, val_hi)) {
3001     X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
3002     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
3003   }
3004 
3005   StoreFinalValueWide(rl_dest, rl_result);
3006   return true;
3007 }
3008 
3009 // For final classes there are no sub-classes to check and so we can answer the instance-of
3010 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
GenInstanceofFinal(bool use_declaring_class,uint32_t type_idx,RegLocation rl_dest,RegLocation rl_src)3011 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
3012                                     RegLocation rl_dest, RegLocation rl_src) {
3013   RegLocation object = LoadValue(rl_src, kRefReg);
3014   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
3015   RegStorage result_reg = rl_result.reg;
3016 
3017   // For 32-bit, SETcc only works with EAX..EDX.
3018   RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
3019   if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
3020     result_reg = AllocateByteRegister();
3021   }
3022 
3023   // Assume that there is no match.
3024   LoadConstant(result_reg, 0);
3025   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr);
3026 
3027   // We will use this register to compare to memory below.
3028   // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
3029   // For this reason, force allocation of a 32 bit register to use, so that the
3030   // compare to memory will be done using a 32 bit comparision.
3031   // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
3032   RegStorage check_class = AllocTemp();
3033 
3034   // If Method* is already in a register, we can save a copy.
3035   RegLocation rl_method = mir_graph_->GetMethodLoc();
3036   int32_t offset_of_type = mirror::Array::DataOffset(
3037       sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
3038       (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
3039 
3040   if (rl_method.location == kLocPhysReg) {
3041     if (use_declaring_class) {
3042       LoadRefDisp(rl_method.reg, ArtMethod::DeclaringClassOffset().Int32Value(),
3043                   check_class, kNotVolatile);
3044     } else {
3045       LoadRefDisp(rl_method.reg, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
3046                   check_class, kNotVolatile);
3047       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
3048     }
3049   } else {
3050     LoadCurrMethodDirect(check_class);
3051     if (use_declaring_class) {
3052       LoadRefDisp(check_class, ArtMethod::DeclaringClassOffset().Int32Value(),
3053                   check_class, kNotVolatile);
3054     } else {
3055       LoadRefDisp(check_class, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
3056                   check_class, kNotVolatile);
3057       LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
3058     }
3059   }
3060 
3061   // Compare the computed class to the class in the object.
3062   DCHECK_EQ(object.location, kLocPhysReg);
3063   OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
3064 
3065   // Set the low byte of the result to 0 or 1 from the compare condition code.
3066   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
3067 
3068   LIR* target = NewLIR0(kPseudoTargetLabel);
3069   null_branchover->target = target;
3070   FreeTemp(check_class);
3071   if (IsTemp(result_reg)) {
3072     OpRegCopy(rl_result.reg, result_reg);
3073     FreeTemp(result_reg);
3074   }
3075   StoreValue(rl_dest, rl_result);
3076 }
3077 
GenArithOpInt(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_lhs,RegLocation rl_rhs,int flags)3078 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
3079                                RegLocation rl_lhs, RegLocation rl_rhs, int flags) {
3080   OpKind op = kOpBkpt;
3081   bool is_div_rem = false;
3082   bool unary = false;
3083   bool shift_op = false;
3084   bool is_two_addr = false;
3085   RegLocation rl_result;
3086   switch (opcode) {
3087     case Instruction::NEG_INT:
3088       op = kOpNeg;
3089       unary = true;
3090       break;
3091     case Instruction::NOT_INT:
3092       op = kOpMvn;
3093       unary = true;
3094       break;
3095     case Instruction::ADD_INT_2ADDR:
3096       is_two_addr = true;
3097       FALLTHROUGH_INTENDED;
3098     case Instruction::ADD_INT:
3099       op = kOpAdd;
3100       break;
3101     case Instruction::SUB_INT_2ADDR:
3102       is_two_addr = true;
3103       FALLTHROUGH_INTENDED;
3104     case Instruction::SUB_INT:
3105       op = kOpSub;
3106       break;
3107     case Instruction::MUL_INT_2ADDR:
3108       is_two_addr = true;
3109       FALLTHROUGH_INTENDED;
3110     case Instruction::MUL_INT:
3111       op = kOpMul;
3112       break;
3113     case Instruction::DIV_INT_2ADDR:
3114       is_two_addr = true;
3115       FALLTHROUGH_INTENDED;
3116     case Instruction::DIV_INT:
3117       op = kOpDiv;
3118       is_div_rem = true;
3119       break;
3120     /* NOTE: returns in kArg1 */
3121     case Instruction::REM_INT_2ADDR:
3122       is_two_addr = true;
3123       FALLTHROUGH_INTENDED;
3124     case Instruction::REM_INT:
3125       op = kOpRem;
3126       is_div_rem = true;
3127       break;
3128     case Instruction::AND_INT_2ADDR:
3129       is_two_addr = true;
3130       FALLTHROUGH_INTENDED;
3131     case Instruction::AND_INT:
3132       op = kOpAnd;
3133       break;
3134     case Instruction::OR_INT_2ADDR:
3135       is_two_addr = true;
3136       FALLTHROUGH_INTENDED;
3137     case Instruction::OR_INT:
3138       op = kOpOr;
3139       break;
3140     case Instruction::XOR_INT_2ADDR:
3141       is_two_addr = true;
3142       FALLTHROUGH_INTENDED;
3143     case Instruction::XOR_INT:
3144       op = kOpXor;
3145       break;
3146     case Instruction::SHL_INT_2ADDR:
3147       is_two_addr = true;
3148       FALLTHROUGH_INTENDED;
3149     case Instruction::SHL_INT:
3150       shift_op = true;
3151       op = kOpLsl;
3152       break;
3153     case Instruction::SHR_INT_2ADDR:
3154       is_two_addr = true;
3155       FALLTHROUGH_INTENDED;
3156     case Instruction::SHR_INT:
3157       shift_op = true;
3158       op = kOpAsr;
3159       break;
3160     case Instruction::USHR_INT_2ADDR:
3161       is_two_addr = true;
3162       FALLTHROUGH_INTENDED;
3163     case Instruction::USHR_INT:
3164       shift_op = true;
3165       op = kOpLsr;
3166       break;
3167     default:
3168       LOG(FATAL) << "Invalid word arith op: " << opcode;
3169   }
3170 
3171   // Can we convert to a two address instruction?
3172   if (!is_two_addr &&
3173         (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
3174          mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
3175     is_two_addr = true;
3176   }
3177 
3178   if (!GenerateTwoOperandInstructions()) {
3179     is_two_addr = false;
3180   }
3181 
3182   // Get the div/rem stuff out of the way.
3183   if (is_div_rem) {
3184     rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags);
3185     StoreValue(rl_dest, rl_result);
3186     return;
3187   }
3188 
3189   // If we generate any memory access below, it will reference a dalvik reg.
3190   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3191 
3192   if (unary) {
3193     rl_lhs = LoadValue(rl_lhs, kCoreReg);
3194     rl_result = UpdateLocTyped(rl_dest);
3195     rl_result = EvalLoc(rl_dest, kCoreReg, true);
3196     OpRegReg(op, rl_result.reg, rl_lhs.reg);
3197   } else {
3198     if (shift_op) {
3199       // X86 doesn't require masking and must use ECX.
3200       RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
3201       LoadValueDirectFixed(rl_rhs, t_reg);
3202       if (is_two_addr) {
3203         // Can we do this directly into memory?
3204         rl_result = UpdateLocTyped(rl_dest);
3205         if (rl_result.location != kLocPhysReg) {
3206           // Okay, we can do this into memory
3207           OpMemReg(op, rl_result, t_reg.GetReg());
3208           FreeTemp(t_reg);
3209           return;
3210         } else if (!rl_result.reg.IsFloat()) {
3211           // Can do this directly into the result register
3212           OpRegReg(op, rl_result.reg, t_reg);
3213           FreeTemp(t_reg);
3214           StoreFinalValue(rl_dest, rl_result);
3215           return;
3216         }
3217       }
3218       // Three address form, or we can't do directly.
3219       rl_lhs = LoadValue(rl_lhs, kCoreReg);
3220       rl_result = EvalLoc(rl_dest, kCoreReg, true);
3221       OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
3222       FreeTemp(t_reg);
3223     } else {
3224       // Multiply is 3 operand only (sort of).
3225       if (is_two_addr && op != kOpMul) {
3226         // Can we do this directly into memory?
3227         rl_result = UpdateLocTyped(rl_dest);
3228         if (rl_result.location == kLocPhysReg) {
3229           // Ensure res is in a core reg
3230           rl_result = EvalLoc(rl_dest, kCoreReg, true);
3231           // Can we do this from memory directly?
3232           rl_rhs = UpdateLocTyped(rl_rhs);
3233           if (rl_rhs.location != kLocPhysReg) {
3234             OpRegMem(op, rl_result.reg, rl_rhs);
3235             StoreFinalValue(rl_dest, rl_result);
3236             return;
3237           } else if (!rl_rhs.reg.IsFloat()) {
3238             OpRegReg(op, rl_result.reg, rl_rhs.reg);
3239             StoreFinalValue(rl_dest, rl_result);
3240             return;
3241           }
3242         }
3243         rl_rhs = LoadValue(rl_rhs, kCoreReg);
3244         // It might happen rl_rhs and rl_dest are the same VR
3245         // in this case rl_dest is in reg after LoadValue while
3246         // rl_result is not updated yet, so do this
3247         rl_result = UpdateLocTyped(rl_dest);
3248         if (rl_result.location != kLocPhysReg) {
3249           // Okay, we can do this into memory.
3250           OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
3251           return;
3252         } else if (!rl_result.reg.IsFloat()) {
3253           // Can do this directly into the result register.
3254           OpRegReg(op, rl_result.reg, rl_rhs.reg);
3255           StoreFinalValue(rl_dest, rl_result);
3256           return;
3257         } else {
3258           rl_lhs = LoadValue(rl_lhs, kCoreReg);
3259           rl_result = EvalLoc(rl_dest, kCoreReg, true);
3260           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3261         }
3262       } else {
3263         // Try to use reg/memory instructions.
3264         rl_lhs = UpdateLocTyped(rl_lhs);
3265         rl_rhs = UpdateLocTyped(rl_rhs);
3266         // We can't optimize with FP registers.
3267         if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
3268           // Something is difficult, so fall back to the standard case.
3269           rl_lhs = LoadValue(rl_lhs, kCoreReg);
3270           rl_rhs = LoadValue(rl_rhs, kCoreReg);
3271           rl_result = EvalLoc(rl_dest, kCoreReg, true);
3272           OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3273         } else {
3274           // We can optimize by moving to result and using memory operands.
3275           if (rl_rhs.location != kLocPhysReg) {
3276             // Force LHS into result.
3277             // We should be careful with order here
3278             // If rl_dest and rl_lhs points to the same VR we should load first
3279             // If the are different we should find a register first for dest
3280             if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
3281                 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
3282               rl_lhs = LoadValue(rl_lhs, kCoreReg);
3283               rl_result = EvalLoc(rl_dest, kCoreReg, true);
3284               // No-op if these are the same.
3285               OpRegCopy(rl_result.reg, rl_lhs.reg);
3286             } else {
3287               rl_result = EvalLoc(rl_dest, kCoreReg, true);
3288               LoadValueDirect(rl_lhs, rl_result.reg);
3289             }
3290             OpRegMem(op, rl_result.reg, rl_rhs);
3291           } else if (rl_lhs.location != kLocPhysReg) {
3292             // RHS is in a register; LHS is in memory.
3293             if (op != kOpSub) {
3294               // Force RHS into result and operate on memory.
3295               rl_result = EvalLoc(rl_dest, kCoreReg, true);
3296               OpRegCopy(rl_result.reg, rl_rhs.reg);
3297               OpRegMem(op, rl_result.reg, rl_lhs);
3298             } else {
3299               // Subtraction isn't commutative.
3300               rl_lhs = LoadValue(rl_lhs, kCoreReg);
3301               rl_rhs = LoadValue(rl_rhs, kCoreReg);
3302               rl_result = EvalLoc(rl_dest, kCoreReg, true);
3303               OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3304             }
3305           } else {
3306             // Both are in registers.
3307             rl_lhs = LoadValue(rl_lhs, kCoreReg);
3308             rl_rhs = LoadValue(rl_rhs, kCoreReg);
3309             rl_result = EvalLoc(rl_dest, kCoreReg, true);
3310             OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3311           }
3312         }
3313       }
3314     }
3315   }
3316   StoreValue(rl_dest, rl_result);
3317 }
3318 
IsOperationSafeWithoutTemps(RegLocation rl_lhs,RegLocation rl_rhs)3319 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
3320   // If we have non-core registers, then we can't do good things.
3321   if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
3322     return false;
3323   }
3324   if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
3325     return false;
3326   }
3327 
3328   // Everything will be fine :-).
3329   return true;
3330 }
3331 
GenIntToLong(RegLocation rl_dest,RegLocation rl_src)3332 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
3333   if (!cu_->target64) {
3334     Mir2Lir::GenIntToLong(rl_dest, rl_src);
3335     return;
3336   }
3337   rl_src = UpdateLocTyped(rl_src);
3338   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
3339   if (rl_src.location == kLocPhysReg) {
3340     NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
3341   } else {
3342     int displacement = SRegOffset(rl_src.s_reg_low);
3343     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3344     LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP_32.GetReg(),
3345                      displacement + LOWORD_OFFSET);
3346     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
3347                             true /* is_load */, true /* is_64bit */);
3348   }
3349   StoreValueWide(rl_dest, rl_result);
3350 }
3351 
GenLongToInt(RegLocation rl_dest,RegLocation rl_src)3352 void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
3353   rl_src = UpdateLocWide(rl_src);
3354   rl_src = NarrowRegLoc(rl_src);
3355   StoreValue(rl_dest, rl_src);
3356 
3357   if (cu_->target64) {
3358     // if src and dest are in the same phys reg then StoreValue generates
3359     // no operation but we need explicit 32-bit mov R, R to clear
3360     // the higher 32-bits
3361     rl_dest = UpdateLoc(rl_dest);
3362     if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg
3363            && IsSameReg(rl_src.reg, rl_dest.reg)) {
3364         LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg);
3365         // remove nop flag set by OpRegCopyNoInsert if src == dest
3366         copy_lir->flags.is_nop = false;
3367         AppendLIR(copy_lir);
3368     }
3369   }
3370 }
3371 
GenShiftOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_shift)3372 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
3373                         RegLocation rl_src1, RegLocation rl_shift) {
3374   if (!cu_->target64) {
3375     // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from
3376     // the other half, shift the other half, if the shift amount is less than 32 we're done,
3377     // otherwise move one register to the other and place zero or sign bits in the other.
3378     LIR* branch;
3379     FlushAllRegs();
3380     LockCallTemps();
3381     LoadValueDirectFixed(rl_shift, rs_rCX);
3382     RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
3383     LoadValueDirectWideFixed(rl_src1, r_tmp);
3384     switch (opcode) {
3385       case Instruction::SHL_LONG:
3386       case Instruction::SHL_LONG_2ADDR:
3387         NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg());
3388         NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg());
3389         NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3390         branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3391         OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow());
3392         LoadConstant(r_tmp.GetLow(), 0);
3393         branch->target = NewLIR0(kPseudoTargetLabel);
3394         break;
3395       case Instruction::SHR_LONG:
3396       case Instruction::SHR_LONG_2ADDR:
3397         NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg());
3398         NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
3399         NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3400         branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3401         OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
3402         NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31);
3403         branch->target = NewLIR0(kPseudoTargetLabel);
3404         break;
3405       case Instruction::USHR_LONG:
3406       case Instruction::USHR_LONG_2ADDR:
3407         NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(),
3408                rs_rCX.GetReg());
3409         NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
3410         NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3411         branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3412         OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
3413         LoadConstant(r_tmp.GetHigh(), 0);
3414         branch->target = NewLIR0(kPseudoTargetLabel);
3415         break;
3416       default:
3417         LOG(FATAL) << "Unexpected case: " << opcode;
3418         return;
3419     }
3420     RegLocation rl_result = LocCReturnWide();
3421     StoreValueWide(rl_dest, rl_result);
3422     return;
3423   }
3424 
3425   bool is_two_addr = false;
3426   OpKind op = kOpBkpt;
3427   RegLocation rl_result;
3428 
3429   switch (opcode) {
3430     case Instruction::SHL_LONG_2ADDR:
3431       is_two_addr = true;
3432       FALLTHROUGH_INTENDED;
3433     case Instruction::SHL_LONG:
3434       op = kOpLsl;
3435       break;
3436     case Instruction::SHR_LONG_2ADDR:
3437       is_two_addr = true;
3438       FALLTHROUGH_INTENDED;
3439     case Instruction::SHR_LONG:
3440       op = kOpAsr;
3441       break;
3442     case Instruction::USHR_LONG_2ADDR:
3443       is_two_addr = true;
3444       FALLTHROUGH_INTENDED;
3445     case Instruction::USHR_LONG:
3446       op = kOpLsr;
3447       break;
3448     default:
3449       op = kOpBkpt;
3450   }
3451 
3452   // X86 doesn't require masking and must use ECX.
3453   RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
3454   LoadValueDirectFixed(rl_shift, t_reg);
3455   if (is_two_addr) {
3456     // Can we do this directly into memory?
3457     rl_result = UpdateLocWideTyped(rl_dest);
3458     if (rl_result.location != kLocPhysReg) {
3459       // Okay, we can do this into memory
3460       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3461       OpMemReg(op, rl_result, t_reg.GetReg());
3462     } else if (!rl_result.reg.IsFloat()) {
3463       // Can do this directly into the result register
3464       OpRegReg(op, rl_result.reg, t_reg);
3465       StoreFinalValueWide(rl_dest, rl_result);
3466     }
3467   } else {
3468     // Three address form, or we can't do directly.
3469     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
3470     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
3471     OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
3472     StoreFinalValueWide(rl_dest, rl_result);
3473   }
3474 
3475   FreeTemp(t_reg);
3476 }
3477 
3478 }  // namespace art
3479