1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "codegen_x86.h"
18 
19 #include "base/logging.h"
20 #include "dex/quick/mir_to_lir-inl.h"
21 #include "dex/reg_storage_eq.h"
22 #include "x86_lir.h"
23 
24 namespace art {
25 
GenArithOpFloat(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)26 void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
27                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
28   X86OpCode op = kX86Nop;
29   RegLocation rl_result;
30 
31   /*
32    * Don't attempt to optimize register usage since these opcodes call out to
33    * the handlers.
34    */
35   switch (opcode) {
36     case Instruction::ADD_FLOAT_2ADDR:
37     case Instruction::ADD_FLOAT:
38       op = kX86AddssRR;
39       break;
40     case Instruction::SUB_FLOAT_2ADDR:
41     case Instruction::SUB_FLOAT:
42       op = kX86SubssRR;
43       break;
44     case Instruction::DIV_FLOAT_2ADDR:
45     case Instruction::DIV_FLOAT:
46       op = kX86DivssRR;
47       break;
48     case Instruction::MUL_FLOAT_2ADDR:
49     case Instruction::MUL_FLOAT:
50       op = kX86MulssRR;
51       break;
52     case Instruction::REM_FLOAT_2ADDR:
53     case Instruction::REM_FLOAT:
54       GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
55       return;
56     case Instruction::NEG_FLOAT:
57       GenNegFloat(rl_dest, rl_src1);
58       return;
59     default:
60       LOG(FATAL) << "Unexpected opcode: " << opcode;
61   }
62   rl_src1 = LoadValue(rl_src1, kFPReg);
63   rl_src2 = LoadValue(rl_src2, kFPReg);
64   rl_result = EvalLoc(rl_dest, kFPReg, true);
65   RegStorage r_dest = rl_result.reg;
66   RegStorage r_src1 = rl_src1.reg;
67   RegStorage r_src2 = rl_src2.reg;
68   if (r_dest == r_src2) {
69     r_src2 = AllocTempSingle();
70     OpRegCopy(r_src2, r_dest);
71   }
72   OpRegCopy(r_dest, r_src1);
73   NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
74   StoreValue(rl_dest, rl_result);
75 }
76 
GenArithOpDouble(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)77 void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
78                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
79   DCHECK(rl_dest.wide);
80   DCHECK(rl_dest.fp);
81   DCHECK(rl_src1.wide);
82   DCHECK(rl_src1.fp);
83   DCHECK(rl_src2.wide);
84   DCHECK(rl_src2.fp);
85   X86OpCode op = kX86Nop;
86   RegLocation rl_result;
87 
88   switch (opcode) {
89     case Instruction::ADD_DOUBLE_2ADDR:
90     case Instruction::ADD_DOUBLE:
91       op = kX86AddsdRR;
92       break;
93     case Instruction::SUB_DOUBLE_2ADDR:
94     case Instruction::SUB_DOUBLE:
95       op = kX86SubsdRR;
96       break;
97     case Instruction::DIV_DOUBLE_2ADDR:
98     case Instruction::DIV_DOUBLE:
99       op = kX86DivsdRR;
100       break;
101     case Instruction::MUL_DOUBLE_2ADDR:
102     case Instruction::MUL_DOUBLE:
103       op = kX86MulsdRR;
104       break;
105     case Instruction::REM_DOUBLE_2ADDR:
106     case Instruction::REM_DOUBLE:
107       GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
108       return;
109     case Instruction::NEG_DOUBLE:
110       GenNegDouble(rl_dest, rl_src1);
111       return;
112     default:
113       LOG(FATAL) << "Unexpected opcode: " << opcode;
114   }
115   rl_src1 = LoadValueWide(rl_src1, kFPReg);
116   rl_src2 = LoadValueWide(rl_src2, kFPReg);
117   rl_result = EvalLoc(rl_dest, kFPReg, true);
118   if (rl_result.reg == rl_src2.reg) {
119     rl_src2.reg = AllocTempDouble();
120     OpRegCopy(rl_src2.reg, rl_result.reg);
121   }
122   OpRegCopy(rl_result.reg, rl_src1.reg);
123   NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
124   StoreValueWide(rl_dest, rl_result);
125 }
126 
GenMultiplyByConstantFloat(RegLocation rl_dest,RegLocation rl_src1,int32_t constant)127 void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
128                                             int32_t constant) {
129   // TODO: need x86 implementation.
130   UNUSED(rl_dest, rl_src1, constant);
131   LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
132 }
133 
GenMultiplyByConstantDouble(RegLocation rl_dest,RegLocation rl_src1,int64_t constant)134 void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
135                                              int64_t constant) {
136   // TODO: need x86 implementation.
137   UNUSED(rl_dest, rl_src1, constant);
138   LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
139 }
140 
GenLongToFP(RegLocation rl_dest,RegLocation rl_src,bool is_double)141 void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
142   // Compute offsets to the source and destination VRs on stack
143   int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
144   int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
145 
146   // Update the in-register state of source.
147   rl_src = UpdateLocWide(rl_src);
148 
149   // All memory accesses below reference dalvik regs.
150   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
151 
152   // If the source is in physical register, then put it in its location on stack.
153   if (rl_src.location == kLocPhysReg) {
154     RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
155 
156     if (reg_info != nullptr && reg_info->IsTemp()) {
157       // Calling FlushSpecificReg because it will only write back VR if it is dirty.
158       FlushSpecificReg(reg_info);
159       // ResetDef to prevent NullifyRange from removing stores.
160       ResetDef(rl_src.reg);
161     } else {
162       // It must have been register promoted if it is not a temp but is still in physical
163       // register. Since we need it to be in memory to convert, we place it there now.
164       const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
165       StoreBaseDisp(rs_rSP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
166     }
167   }
168 
169   // Push the source virtual register onto the x87 stack.
170   LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP_32.GetReg(),
171                               src_v_reg_offset + LOWORD_OFFSET);
172   AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
173                           true /* is_load */, true /* is64bit */);
174 
175   // Now pop off x87 stack and store it in the destination VR's stack location.
176   int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
177   int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
178   LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP_32.GetReg(), displacement);
179   AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
180 
181   /*
182    * The result is in a physical register if it was in a temp or was register
183    * promoted. For that reason it is enough to check if it is in physical
184    * register. If it is, then we must do all of the bookkeeping necessary to
185    * invalidate temp (if needed) and load in promoted register (if needed).
186    * If the result's location is in memory, then we do not need to do anything
187    * more since the fstp has already placed the correct value in memory.
188    */
189   RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
190   if (rl_result.location == kLocPhysReg) {
191     /*
192      * We already know that the result is in a physical register but do not know if it is the
193      * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
194      * correct register class.
195      */
196     rl_result = EvalLoc(rl_dest, kFPReg, true);
197     const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
198     if (is_double) {
199       LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
200 
201       StoreFinalValueWide(rl_dest, rl_result);
202     } else {
203       Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
204 
205       StoreFinalValue(rl_dest, rl_result);
206     }
207   }
208 }
209 
GenConversion(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src)210 void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
211                                RegLocation rl_src) {
212   RegisterClass rcSrc = kFPReg;
213   X86OpCode op = kX86Nop;
214   RegLocation rl_result;
215   switch (opcode) {
216     case Instruction::INT_TO_FLOAT:
217       rcSrc = kCoreReg;
218       op = kX86Cvtsi2ssRR;
219       break;
220     case Instruction::DOUBLE_TO_FLOAT:
221       rcSrc = kFPReg;
222       op = kX86Cvtsd2ssRR;
223       break;
224     case Instruction::FLOAT_TO_DOUBLE:
225       rcSrc = kFPReg;
226       op = kX86Cvtss2sdRR;
227       break;
228     case Instruction::INT_TO_DOUBLE:
229       rcSrc = kCoreReg;
230       op = kX86Cvtsi2sdRR;
231       break;
232     case Instruction::FLOAT_TO_INT: {
233       rl_src = LoadValue(rl_src, kFPReg);
234       // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
235       ClobberSReg(rl_dest.s_reg_low);
236       rl_result = EvalLoc(rl_dest, kCoreReg, true);
237       RegStorage temp_reg = AllocTempSingle();
238 
239       LoadConstant(rl_result.reg, 0x7fffffff);
240       NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
241       NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
242       LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
243       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
244       NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
245       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
246       branch_na_n->target = NewLIR0(kPseudoTargetLabel);
247       NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
248       branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
249       branch_normal->target = NewLIR0(kPseudoTargetLabel);
250       StoreValue(rl_dest, rl_result);
251       return;
252     }
253     case Instruction::DOUBLE_TO_INT: {
254       rl_src = LoadValueWide(rl_src, kFPReg);
255       // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
256       ClobberSReg(rl_dest.s_reg_low);
257       rl_result = EvalLoc(rl_dest, kCoreReg, true);
258       RegStorage temp_reg = AllocTempDouble();
259 
260       LoadConstant(rl_result.reg, 0x7fffffff);
261       NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
262       NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
263       LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
264       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
265       NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
266       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
267       branch_na_n->target = NewLIR0(kPseudoTargetLabel);
268       NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
269       branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
270       branch_normal->target = NewLIR0(kPseudoTargetLabel);
271       StoreValue(rl_dest, rl_result);
272       return;
273     }
274     case Instruction::LONG_TO_DOUBLE:
275       if (cu_->target64) {
276         rcSrc = kCoreReg;
277         op = kX86Cvtsqi2sdRR;
278         break;
279       }
280       GenLongToFP(rl_dest, rl_src, true /* is_double */);
281       return;
282     case Instruction::LONG_TO_FLOAT:
283       if (cu_->target64) {
284         rcSrc = kCoreReg;
285         op = kX86Cvtsqi2ssRR;
286        break;
287       }
288       GenLongToFP(rl_dest, rl_src, false /* is_double */);
289       return;
290     case Instruction::FLOAT_TO_LONG:
291       if (cu_->target64) {
292         rl_src = LoadValue(rl_src, kFPReg);
293         // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
294         ClobberSReg(rl_dest.s_reg_low);
295         rl_result = EvalLoc(rl_dest, kCoreReg, true);
296         RegStorage temp_reg = AllocTempSingle();
297 
298         // Set 0x7fffffffffffffff to rl_result
299         LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
300         NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
301         NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
302         LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
303         LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
304         NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
305         LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
306         branch_na_n->target = NewLIR0(kPseudoTargetLabel);
307         NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
308         branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
309         branch_normal->target = NewLIR0(kPseudoTargetLabel);
310         StoreValueWide(rl_dest, rl_result);
311       } else {
312         CheckEntrypointTypes<kQuickF2l, int64_t, float>();  // int64_t -> kCoreReg
313         GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
314       }
315       return;
316     case Instruction::DOUBLE_TO_LONG:
317       if (cu_->target64) {
318         rl_src = LoadValueWide(rl_src, kFPReg);
319         // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
320         ClobberSReg(rl_dest.s_reg_low);
321         rl_result = EvalLoc(rl_dest, kCoreReg, true);
322         RegStorage temp_reg = AllocTempDouble();
323 
324         // Set 0x7fffffffffffffff to rl_result
325         LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
326         NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
327         NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
328         LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
329         LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
330         NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
331         LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
332         branch_na_n->target = NewLIR0(kPseudoTargetLabel);
333         NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
334         branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
335         branch_normal->target = NewLIR0(kPseudoTargetLabel);
336         StoreValueWide(rl_dest, rl_result);
337       } else {
338         CheckEntrypointTypes<kQuickD2l, int64_t, double>();  // int64_t -> kCoreReg
339         GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
340       }
341       return;
342     default:
343       LOG(INFO) << "Unexpected opcode: " << opcode;
344   }
345   // At this point, target will be either float or double.
346   DCHECK(rl_dest.fp);
347   if (rl_src.wide) {
348     rl_src = LoadValueWide(rl_src, rcSrc);
349   } else {
350     rl_src = LoadValue(rl_src, rcSrc);
351   }
352   rl_result = EvalLoc(rl_dest, kFPReg, true);
353   NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
354   if (rl_dest.wide) {
355     StoreValueWide(rl_dest, rl_result);
356   } else {
357     StoreValue(rl_dest, rl_result);
358   }
359 }
360 
GenRemFP(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_double)361 void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
362   // Compute offsets to the source and destination VRs on stack.
363   int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
364   int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
365   int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
366 
367   // Update the in-register state of sources.
368   rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
369   rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
370 
371   // All memory accesses below reference dalvik regs.
372   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
373 
374   // If the source is in physical register, then put it in its location on stack.
375   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
376   if (rl_src1.location == kLocPhysReg) {
377     RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
378 
379     if (reg_info != nullptr && reg_info->IsTemp()) {
380       // Calling FlushSpecificReg because it will only write back VR if it is dirty.
381       FlushSpecificReg(reg_info);
382       // ResetDef to prevent NullifyRange from removing stores.
383       ResetDef(rl_src1.reg);
384     } else {
385       // It must have been register promoted if it is not a temp but is still in physical
386       // register. Since we need it to be in memory to convert, we place it there now.
387       StoreBaseDisp(rs_rSP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
388                     kNotVolatile);
389     }
390   }
391 
392   if (rl_src2.location == kLocPhysReg) {
393     RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
394     if (reg_info != nullptr && reg_info->IsTemp()) {
395       FlushSpecificReg(reg_info);
396       ResetDef(rl_src2.reg);
397     } else {
398       StoreBaseDisp(rs_rSP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
399                     kNotVolatile);
400     }
401   }
402 
403   int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
404 
405   // Push the source virtual registers onto the x87 stack.
406   LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
407                              src2_v_reg_offset + LOWORD_OFFSET);
408   AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
409                           true /* is_load */, is_double /* is64bit */);
410 
411   LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
412                              src1_v_reg_offset + LOWORD_OFFSET);
413   AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
414                           true /* is_load */, is_double /* is64bit */);
415 
416   FlushReg(rs_rAX);
417   Clobber(rs_rAX);
418   LockTemp(rs_rAX);
419 
420   LIR* retry = NewLIR0(kPseudoTargetLabel);
421 
422   // Divide ST(0) by ST(1) and place result to ST(0).
423   NewLIR0(kX86Fprem);
424 
425   // Move FPU status word to AX.
426   NewLIR0(kX86Fstsw16R);
427 
428   // Check if reduction is complete.
429   OpRegImm(kOpAnd, rs_rAX, 0x400);
430 
431   // If no then continue to compute remainder.
432   LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
433   branch->target = retry;
434 
435   FreeTemp(rs_rAX);
436 
437   // Now store result in the destination VR's stack location.
438   int displacement = dest_v_reg_offset + LOWORD_OFFSET;
439   int opcode = is_double ? kX86Fst64M : kX86Fst32M;
440   LIR *fst = NewLIR2NoDest(opcode, rs_rSP.GetReg(), displacement);
441   AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
442 
443   // Pop ST(1) and ST(0).
444   NewLIR0(kX86Fucompp);
445 
446   /*
447    * The result is in a physical register if it was in a temp or was register
448    * promoted. For that reason it is enough to check if it is in physical
449    * register. If it is, then we must do all of the bookkeeping necessary to
450    * invalidate temp (if needed) and load in promoted register (if needed).
451    * If the result's location is in memory, then we do not need to do anything
452    * more since the fstp has already placed the correct value in memory.
453    */
454   RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
455   if (rl_result.location == kLocPhysReg) {
456     rl_result = EvalLoc(rl_dest, kFPReg, true);
457     if (is_double) {
458       LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
459       StoreFinalValueWide(rl_dest, rl_result);
460     } else {
461       Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
462       StoreFinalValue(rl_dest, rl_result);
463     }
464   }
465 }
466 
GenCmpFP(Instruction::Code code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)467 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
468                           RegLocation rl_src1, RegLocation rl_src2) {
469   bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
470   bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
471   if (single) {
472     rl_src1 = LoadValue(rl_src1, kFPReg);
473     rl_src2 = LoadValue(rl_src2, kFPReg);
474   } else {
475     rl_src1 = LoadValueWide(rl_src1, kFPReg);
476     rl_src2 = LoadValueWide(rl_src2, kFPReg);
477   }
478   // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
479   ClobberSReg(rl_dest.s_reg_low);
480   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
481   LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
482   if (single) {
483     NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
484   } else {
485     NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
486   }
487   LIR* branch = nullptr;
488   if (unordered_gt) {
489     branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
490   }
491   // If the result reg can't be byte accessed, use a jump and move instead of a set.
492   if (!IsByteRegister(rl_result.reg)) {
493     LIR* branch2 = nullptr;
494     if (unordered_gt) {
495       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
496       NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
497     } else {
498       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
499       NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
500     }
501     branch2->target = NewLIR0(kPseudoTargetLabel);
502   } else {
503     NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
504   }
505   NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
506   if (unordered_gt) {
507     branch->target = NewLIR0(kPseudoTargetLabel);
508   }
509   StoreValue(rl_dest, rl_result);
510 }
511 
GenFusedFPCmpBranch(BasicBlock * bb,MIR * mir,bool gt_bias,bool is_double)512 void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
513                                      bool is_double) {
514   LIR* taken = &block_label_list_[bb->taken];
515   LIR* not_taken = &block_label_list_[bb->fall_through];
516   LIR* branch = nullptr;
517   RegLocation rl_src1;
518   RegLocation rl_src2;
519   if (is_double) {
520     rl_src1 = mir_graph_->GetSrcWide(mir, 0);
521     rl_src2 = mir_graph_->GetSrcWide(mir, 2);
522     rl_src1 = LoadValueWide(rl_src1, kFPReg);
523     rl_src2 = LoadValueWide(rl_src2, kFPReg);
524     NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
525   } else {
526     rl_src1 = mir_graph_->GetSrc(mir, 0);
527     rl_src2 = mir_graph_->GetSrc(mir, 1);
528     rl_src1 = LoadValue(rl_src1, kFPReg);
529     rl_src2 = LoadValue(rl_src2, kFPReg);
530     NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
531   }
532   ConditionCode ccode = mir->meta.ccode;
533   switch (ccode) {
534     case kCondEq:
535       if (!gt_bias) {
536         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
537         branch->target = not_taken;
538       }
539       break;
540     case kCondNe:
541       if (!gt_bias) {
542         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
543         branch->target = taken;
544       }
545       break;
546     case kCondLt:
547       if (gt_bias) {
548         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
549         branch->target = not_taken;
550       }
551       ccode = kCondUlt;
552       break;
553     case kCondLe:
554       if (gt_bias) {
555         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
556         branch->target = not_taken;
557       }
558       ccode = kCondLs;
559       break;
560     case kCondGt:
561       if (gt_bias) {
562         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
563         branch->target = taken;
564       }
565       ccode = kCondHi;
566       break;
567     case kCondGe:
568       if (gt_bias) {
569         branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
570         branch->target = taken;
571       }
572       ccode = kCondUge;
573       break;
574     default:
575       LOG(FATAL) << "Unexpected ccode: " << ccode;
576   }
577   OpCondBranch(ccode, taken);
578 }
579 
GenNegFloat(RegLocation rl_dest,RegLocation rl_src)580 void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
581   RegLocation rl_result;
582   rl_src = LoadValue(rl_src, kCoreReg);
583   rl_result = EvalLoc(rl_dest, kCoreReg, true);
584   OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
585   StoreValue(rl_dest, rl_result);
586 }
587 
GenNegDouble(RegLocation rl_dest,RegLocation rl_src)588 void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
589   RegLocation rl_result;
590   rl_src = LoadValueWide(rl_src, kCoreReg);
591   if (cu_->target64) {
592     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
593     OpRegCopy(rl_result.reg, rl_src.reg);
594     // Flip sign bit.
595     NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
596     NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1);
597     NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1);
598   } else {
599     rl_result = ForceTempWide(rl_src);
600     OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), 0x80000000);
601   }
602   StoreValueWide(rl_dest, rl_result);
603 }
604 
GenInlinedSqrt(CallInfo * info)605 bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
606   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
607   if (rl_dest.s_reg_low == INVALID_SREG) {
608     // Result is unused, the code is dead. Inlining successful, no code generated.
609     return true;
610   }
611   RegLocation rl_src = info->args[0];
612   rl_src = LoadValueWide(rl_src, kFPReg);
613   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
614   NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
615   StoreValueWide(rl_dest, rl_result);
616   return true;
617 }
618 
GenInlinedAbsFloat(CallInfo * info)619 bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
620   // Get the argument
621   RegLocation rl_src = info->args[0];
622 
623   // Get the inlined intrinsic target virtual register
624   RegLocation rl_dest = InlineTarget(info);
625 
626   // Get the virtual register number
627   DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
628   if (rl_dest.s_reg_low == INVALID_SREG) {
629     // Result is unused, the code is dead. Inlining successful, no code generated.
630     return true;
631   }
632   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
633   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
634 
635   // if argument is the same as inlined intrinsic target
636   if (v_src_reg == v_dst_reg) {
637     rl_src = UpdateLoc(rl_src);
638 
639     // if argument is in the physical register
640     if (rl_src.location == kLocPhysReg) {
641       rl_src = LoadValue(rl_src, kCoreReg);
642       OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
643       StoreValue(rl_dest, rl_src);
644       return true;
645     }
646     // the argument is in memory
647     DCHECK((rl_src.location == kLocDalvikFrame) ||
648          (rl_src.location == kLocCompilerTemp));
649 
650     // Operate directly into memory.
651     int displacement = SRegOffset(rl_dest.s_reg_low);
652     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
653     LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement, 0x7fffffff);
654     AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
655     AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
656     return true;
657   } else {
658     rl_src = LoadValue(rl_src, kCoreReg);
659     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
660     OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
661     StoreValue(rl_dest, rl_result);
662     return true;
663   }
664 }
665 
GenInlinedAbsDouble(CallInfo * info)666 bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
667   RegLocation rl_src = info->args[0];
668   RegLocation rl_dest = InlineTargetWide(info);
669   DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
670   if (rl_dest.s_reg_low == INVALID_SREG) {
671     // Result is unused, the code is dead. Inlining successful, no code generated.
672     return true;
673   }
674   if (cu_->target64) {
675     rl_src = LoadValueWide(rl_src, kCoreReg);
676     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
677     OpRegCopyWide(rl_result.reg, rl_src.reg);
678     OpRegImm(kOpLsl, rl_result.reg, 1);
679     OpRegImm(kOpLsr, rl_result.reg, 1);
680     StoreValueWide(rl_dest, rl_result);
681     return true;
682   }
683   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
684   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
685   rl_src = UpdateLocWide(rl_src);
686 
687   // if argument is in the physical XMM register
688   if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
689     RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
690     if (rl_result.reg != rl_src.reg) {
691       LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
692       NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
693     } else {
694       RegStorage sign_mask = AllocTempDouble();
695       LoadConstantWide(sign_mask, 0x7fffffffffffffff);
696       NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
697       FreeTemp(sign_mask);
698     }
699     StoreValueWide(rl_dest, rl_result);
700     return true;
701   } else if (v_src_reg == v_dst_reg) {
702     // if argument is the same as inlined intrinsic target
703     // if argument is in the physical register
704     if (rl_src.location == kLocPhysReg) {
705       rl_src = LoadValueWide(rl_src, kCoreReg);
706       OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
707       StoreValueWide(rl_dest, rl_src);
708       return true;
709     }
710     // the argument is in memory
711     DCHECK((rl_src.location == kLocDalvikFrame) ||
712            (rl_src.location == kLocCompilerTemp));
713 
714     // Operate directly into memory.
715     int displacement = SRegOffset(rl_dest.s_reg_low);
716     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
717     LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
718     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
719     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
720     return true;
721   } else {
722     rl_src = LoadValueWide(rl_src, kCoreReg);
723     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
724     OpRegCopyWide(rl_result.reg, rl_src.reg);
725     OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
726     StoreValueWide(rl_dest, rl_result);
727     return true;
728   }
729 }
730 
GenInlinedMinMaxFP(CallInfo * info,bool is_min,bool is_double)731 bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
732   if (is_double) {
733     RegLocation rl_dest = InlineTargetWide(info);
734     if (rl_dest.s_reg_low == INVALID_SREG) {
735       // Result is unused, the code is dead. Inlining successful, no code generated.
736       return true;
737     }
738     RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
739     RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
740     RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);
741 
742     // Avoid src2 corruption by OpRegCopyWide.
743     if (rl_result.reg == rl_src2.reg) {
744         std::swap(rl_src2.reg, rl_src1.reg);
745     }
746 
747     OpRegCopyWide(rl_result.reg, rl_src1.reg);
748     NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
749     // If either arg is NaN, return NaN.
750     LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
751     // Min/Max branches.
752     LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
753     LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
754     // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
755     NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
756     LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
757     // Handle NaN.
758     branch_nan->target = NewLIR0(kPseudoTargetLabel);
759     LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
760 
761     LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
762     // Handle Min/Max. Copy greater/lesser value from src2.
763     branch_cond1->target = NewLIR0(kPseudoTargetLabel);
764     OpRegCopyWide(rl_result.reg, rl_src2.reg);
765     // Right operand is already in result reg.
766     branch_cond2->target = NewLIR0(kPseudoTargetLabel);
767     // Exit.
768     branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
769     branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
770     StoreValueWide(rl_dest, rl_result);
771   } else {
772     RegLocation rl_dest = InlineTarget(info);
773     if (rl_dest.s_reg_low == INVALID_SREG) {
774       // Result is unused, the code is dead. Inlining successful, no code generated.
775       return true;
776     }
777     RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
778     RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
779     RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
780 
781     // Avoid src2 corruption by OpRegCopyWide.
782     if (rl_result.reg == rl_src2.reg) {
783         std::swap(rl_src2.reg, rl_src1.reg);
784     }
785 
786     OpRegCopy(rl_result.reg, rl_src1.reg);
787     NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
788     // If either arg is NaN, return NaN.
789     LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
790     // Min/Max branches.
791     LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
792     LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
793     // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
794     NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
795     LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
796     // Handle NaN.
797     branch_nan->target = NewLIR0(kPseudoTargetLabel);
798     LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
799     LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
800     // Handle Min/Max. Copy greater/lesser value from src2.
801     branch_cond1->target = NewLIR0(kPseudoTargetLabel);
802     OpRegCopy(rl_result.reg, rl_src2.reg);
803     // Right operand is already in result reg.
804     branch_cond2->target = NewLIR0(kPseudoTargetLabel);
805     // Exit.
806     branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
807     branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
808     StoreValue(rl_dest, rl_result);
809   }
810   return true;
811 }
812 
813 }  // namespace art
814