1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "codegen_x86.h"
18
19 #include "base/logging.h"
20 #include "dex/quick/mir_to_lir-inl.h"
21 #include "dex/reg_storage_eq.h"
22 #include "x86_lir.h"
23
24 namespace art {
25
GenArithOpFloat(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)26 void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
27 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
28 X86OpCode op = kX86Nop;
29 RegLocation rl_result;
30
31 /*
32 * Don't attempt to optimize register usage since these opcodes call out to
33 * the handlers.
34 */
35 switch (opcode) {
36 case Instruction::ADD_FLOAT_2ADDR:
37 case Instruction::ADD_FLOAT:
38 op = kX86AddssRR;
39 break;
40 case Instruction::SUB_FLOAT_2ADDR:
41 case Instruction::SUB_FLOAT:
42 op = kX86SubssRR;
43 break;
44 case Instruction::DIV_FLOAT_2ADDR:
45 case Instruction::DIV_FLOAT:
46 op = kX86DivssRR;
47 break;
48 case Instruction::MUL_FLOAT_2ADDR:
49 case Instruction::MUL_FLOAT:
50 op = kX86MulssRR;
51 break;
52 case Instruction::REM_FLOAT_2ADDR:
53 case Instruction::REM_FLOAT:
54 GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
55 return;
56 case Instruction::NEG_FLOAT:
57 GenNegFloat(rl_dest, rl_src1);
58 return;
59 default:
60 LOG(FATAL) << "Unexpected opcode: " << opcode;
61 }
62 rl_src1 = LoadValue(rl_src1, kFPReg);
63 rl_src2 = LoadValue(rl_src2, kFPReg);
64 rl_result = EvalLoc(rl_dest, kFPReg, true);
65 RegStorage r_dest = rl_result.reg;
66 RegStorage r_src1 = rl_src1.reg;
67 RegStorage r_src2 = rl_src2.reg;
68 if (r_dest == r_src2) {
69 r_src2 = AllocTempSingle();
70 OpRegCopy(r_src2, r_dest);
71 }
72 OpRegCopy(r_dest, r_src1);
73 NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
74 StoreValue(rl_dest, rl_result);
75 }
76
GenArithOpDouble(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)77 void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
78 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
79 DCHECK(rl_dest.wide);
80 DCHECK(rl_dest.fp);
81 DCHECK(rl_src1.wide);
82 DCHECK(rl_src1.fp);
83 DCHECK(rl_src2.wide);
84 DCHECK(rl_src2.fp);
85 X86OpCode op = kX86Nop;
86 RegLocation rl_result;
87
88 switch (opcode) {
89 case Instruction::ADD_DOUBLE_2ADDR:
90 case Instruction::ADD_DOUBLE:
91 op = kX86AddsdRR;
92 break;
93 case Instruction::SUB_DOUBLE_2ADDR:
94 case Instruction::SUB_DOUBLE:
95 op = kX86SubsdRR;
96 break;
97 case Instruction::DIV_DOUBLE_2ADDR:
98 case Instruction::DIV_DOUBLE:
99 op = kX86DivsdRR;
100 break;
101 case Instruction::MUL_DOUBLE_2ADDR:
102 case Instruction::MUL_DOUBLE:
103 op = kX86MulsdRR;
104 break;
105 case Instruction::REM_DOUBLE_2ADDR:
106 case Instruction::REM_DOUBLE:
107 GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
108 return;
109 case Instruction::NEG_DOUBLE:
110 GenNegDouble(rl_dest, rl_src1);
111 return;
112 default:
113 LOG(FATAL) << "Unexpected opcode: " << opcode;
114 }
115 rl_src1 = LoadValueWide(rl_src1, kFPReg);
116 rl_src2 = LoadValueWide(rl_src2, kFPReg);
117 rl_result = EvalLoc(rl_dest, kFPReg, true);
118 if (rl_result.reg == rl_src2.reg) {
119 rl_src2.reg = AllocTempDouble();
120 OpRegCopy(rl_src2.reg, rl_result.reg);
121 }
122 OpRegCopy(rl_result.reg, rl_src1.reg);
123 NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
124 StoreValueWide(rl_dest, rl_result);
125 }
126
GenMultiplyByConstantFloat(RegLocation rl_dest,RegLocation rl_src1,int32_t constant)127 void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
128 int32_t constant) {
129 // TODO: need x86 implementation.
130 UNUSED(rl_dest, rl_src1, constant);
131 LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
132 }
133
GenMultiplyByConstantDouble(RegLocation rl_dest,RegLocation rl_src1,int64_t constant)134 void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
135 int64_t constant) {
136 // TODO: need x86 implementation.
137 UNUSED(rl_dest, rl_src1, constant);
138 LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
139 }
140
GenLongToFP(RegLocation rl_dest,RegLocation rl_src,bool is_double)141 void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
142 // Compute offsets to the source and destination VRs on stack
143 int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
144 int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
145
146 // Update the in-register state of source.
147 rl_src = UpdateLocWide(rl_src);
148
149 // All memory accesses below reference dalvik regs.
150 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
151
152 // If the source is in physical register, then put it in its location on stack.
153 if (rl_src.location == kLocPhysReg) {
154 RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
155
156 if (reg_info != nullptr && reg_info->IsTemp()) {
157 // Calling FlushSpecificReg because it will only write back VR if it is dirty.
158 FlushSpecificReg(reg_info);
159 // ResetDef to prevent NullifyRange from removing stores.
160 ResetDef(rl_src.reg);
161 } else {
162 // It must have been register promoted if it is not a temp but is still in physical
163 // register. Since we need it to be in memory to convert, we place it there now.
164 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
165 StoreBaseDisp(rs_rSP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
166 }
167 }
168
169 // Push the source virtual register onto the x87 stack.
170 LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP_32.GetReg(),
171 src_v_reg_offset + LOWORD_OFFSET);
172 AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
173 true /* is_load */, true /* is64bit */);
174
175 // Now pop off x87 stack and store it in the destination VR's stack location.
176 int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
177 int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
178 LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP_32.GetReg(), displacement);
179 AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
180
181 /*
182 * The result is in a physical register if it was in a temp or was register
183 * promoted. For that reason it is enough to check if it is in physical
184 * register. If it is, then we must do all of the bookkeeping necessary to
185 * invalidate temp (if needed) and load in promoted register (if needed).
186 * If the result's location is in memory, then we do not need to do anything
187 * more since the fstp has already placed the correct value in memory.
188 */
189 RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
190 if (rl_result.location == kLocPhysReg) {
191 /*
192 * We already know that the result is in a physical register but do not know if it is the
193 * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
194 * correct register class.
195 */
196 rl_result = EvalLoc(rl_dest, kFPReg, true);
197 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
198 if (is_double) {
199 LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
200
201 StoreFinalValueWide(rl_dest, rl_result);
202 } else {
203 Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
204
205 StoreFinalValue(rl_dest, rl_result);
206 }
207 }
208 }
209
GenConversion(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src)210 void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
211 RegLocation rl_src) {
212 RegisterClass rcSrc = kFPReg;
213 X86OpCode op = kX86Nop;
214 RegLocation rl_result;
215 switch (opcode) {
216 case Instruction::INT_TO_FLOAT:
217 rcSrc = kCoreReg;
218 op = kX86Cvtsi2ssRR;
219 break;
220 case Instruction::DOUBLE_TO_FLOAT:
221 rcSrc = kFPReg;
222 op = kX86Cvtsd2ssRR;
223 break;
224 case Instruction::FLOAT_TO_DOUBLE:
225 rcSrc = kFPReg;
226 op = kX86Cvtss2sdRR;
227 break;
228 case Instruction::INT_TO_DOUBLE:
229 rcSrc = kCoreReg;
230 op = kX86Cvtsi2sdRR;
231 break;
232 case Instruction::FLOAT_TO_INT: {
233 rl_src = LoadValue(rl_src, kFPReg);
234 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
235 ClobberSReg(rl_dest.s_reg_low);
236 rl_result = EvalLoc(rl_dest, kCoreReg, true);
237 RegStorage temp_reg = AllocTempSingle();
238
239 LoadConstant(rl_result.reg, 0x7fffffff);
240 NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
241 NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
242 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
243 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
244 NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
245 LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
246 branch_na_n->target = NewLIR0(kPseudoTargetLabel);
247 NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
248 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
249 branch_normal->target = NewLIR0(kPseudoTargetLabel);
250 StoreValue(rl_dest, rl_result);
251 return;
252 }
253 case Instruction::DOUBLE_TO_INT: {
254 rl_src = LoadValueWide(rl_src, kFPReg);
255 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
256 ClobberSReg(rl_dest.s_reg_low);
257 rl_result = EvalLoc(rl_dest, kCoreReg, true);
258 RegStorage temp_reg = AllocTempDouble();
259
260 LoadConstant(rl_result.reg, 0x7fffffff);
261 NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
262 NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
263 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
264 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
265 NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
266 LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
267 branch_na_n->target = NewLIR0(kPseudoTargetLabel);
268 NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
269 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
270 branch_normal->target = NewLIR0(kPseudoTargetLabel);
271 StoreValue(rl_dest, rl_result);
272 return;
273 }
274 case Instruction::LONG_TO_DOUBLE:
275 if (cu_->target64) {
276 rcSrc = kCoreReg;
277 op = kX86Cvtsqi2sdRR;
278 break;
279 }
280 GenLongToFP(rl_dest, rl_src, true /* is_double */);
281 return;
282 case Instruction::LONG_TO_FLOAT:
283 if (cu_->target64) {
284 rcSrc = kCoreReg;
285 op = kX86Cvtsqi2ssRR;
286 break;
287 }
288 GenLongToFP(rl_dest, rl_src, false /* is_double */);
289 return;
290 case Instruction::FLOAT_TO_LONG:
291 if (cu_->target64) {
292 rl_src = LoadValue(rl_src, kFPReg);
293 // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
294 ClobberSReg(rl_dest.s_reg_low);
295 rl_result = EvalLoc(rl_dest, kCoreReg, true);
296 RegStorage temp_reg = AllocTempSingle();
297
298 // Set 0x7fffffffffffffff to rl_result
299 LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
300 NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
301 NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
302 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
303 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
304 NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
305 LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
306 branch_na_n->target = NewLIR0(kPseudoTargetLabel);
307 NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
308 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
309 branch_normal->target = NewLIR0(kPseudoTargetLabel);
310 StoreValueWide(rl_dest, rl_result);
311 } else {
312 CheckEntrypointTypes<kQuickF2l, int64_t, float>(); // int64_t -> kCoreReg
313 GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
314 }
315 return;
316 case Instruction::DOUBLE_TO_LONG:
317 if (cu_->target64) {
318 rl_src = LoadValueWide(rl_src, kFPReg);
319 // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
320 ClobberSReg(rl_dest.s_reg_low);
321 rl_result = EvalLoc(rl_dest, kCoreReg, true);
322 RegStorage temp_reg = AllocTempDouble();
323
324 // Set 0x7fffffffffffffff to rl_result
325 LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
326 NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
327 NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
328 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
329 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
330 NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
331 LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
332 branch_na_n->target = NewLIR0(kPseudoTargetLabel);
333 NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
334 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
335 branch_normal->target = NewLIR0(kPseudoTargetLabel);
336 StoreValueWide(rl_dest, rl_result);
337 } else {
338 CheckEntrypointTypes<kQuickD2l, int64_t, double>(); // int64_t -> kCoreReg
339 GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
340 }
341 return;
342 default:
343 LOG(INFO) << "Unexpected opcode: " << opcode;
344 }
345 // At this point, target will be either float or double.
346 DCHECK(rl_dest.fp);
347 if (rl_src.wide) {
348 rl_src = LoadValueWide(rl_src, rcSrc);
349 } else {
350 rl_src = LoadValue(rl_src, rcSrc);
351 }
352 rl_result = EvalLoc(rl_dest, kFPReg, true);
353 NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
354 if (rl_dest.wide) {
355 StoreValueWide(rl_dest, rl_result);
356 } else {
357 StoreValue(rl_dest, rl_result);
358 }
359 }
360
GenRemFP(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_double)361 void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
362 // Compute offsets to the source and destination VRs on stack.
363 int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
364 int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
365 int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
366
367 // Update the in-register state of sources.
368 rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
369 rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
370
371 // All memory accesses below reference dalvik regs.
372 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
373
374 // If the source is in physical register, then put it in its location on stack.
375 const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
376 if (rl_src1.location == kLocPhysReg) {
377 RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
378
379 if (reg_info != nullptr && reg_info->IsTemp()) {
380 // Calling FlushSpecificReg because it will only write back VR if it is dirty.
381 FlushSpecificReg(reg_info);
382 // ResetDef to prevent NullifyRange from removing stores.
383 ResetDef(rl_src1.reg);
384 } else {
385 // It must have been register promoted if it is not a temp but is still in physical
386 // register. Since we need it to be in memory to convert, we place it there now.
387 StoreBaseDisp(rs_rSP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
388 kNotVolatile);
389 }
390 }
391
392 if (rl_src2.location == kLocPhysReg) {
393 RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
394 if (reg_info != nullptr && reg_info->IsTemp()) {
395 FlushSpecificReg(reg_info);
396 ResetDef(rl_src2.reg);
397 } else {
398 StoreBaseDisp(rs_rSP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
399 kNotVolatile);
400 }
401 }
402
403 int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
404
405 // Push the source virtual registers onto the x87 stack.
406 LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
407 src2_v_reg_offset + LOWORD_OFFSET);
408 AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
409 true /* is_load */, is_double /* is64bit */);
410
411 LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
412 src1_v_reg_offset + LOWORD_OFFSET);
413 AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
414 true /* is_load */, is_double /* is64bit */);
415
416 FlushReg(rs_rAX);
417 Clobber(rs_rAX);
418 LockTemp(rs_rAX);
419
420 LIR* retry = NewLIR0(kPseudoTargetLabel);
421
422 // Divide ST(0) by ST(1) and place result to ST(0).
423 NewLIR0(kX86Fprem);
424
425 // Move FPU status word to AX.
426 NewLIR0(kX86Fstsw16R);
427
428 // Check if reduction is complete.
429 OpRegImm(kOpAnd, rs_rAX, 0x400);
430
431 // If no then continue to compute remainder.
432 LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
433 branch->target = retry;
434
435 FreeTemp(rs_rAX);
436
437 // Now store result in the destination VR's stack location.
438 int displacement = dest_v_reg_offset + LOWORD_OFFSET;
439 int opcode = is_double ? kX86Fst64M : kX86Fst32M;
440 LIR *fst = NewLIR2NoDest(opcode, rs_rSP.GetReg(), displacement);
441 AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
442
443 // Pop ST(1) and ST(0).
444 NewLIR0(kX86Fucompp);
445
446 /*
447 * The result is in a physical register if it was in a temp or was register
448 * promoted. For that reason it is enough to check if it is in physical
449 * register. If it is, then we must do all of the bookkeeping necessary to
450 * invalidate temp (if needed) and load in promoted register (if needed).
451 * If the result's location is in memory, then we do not need to do anything
452 * more since the fstp has already placed the correct value in memory.
453 */
454 RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
455 if (rl_result.location == kLocPhysReg) {
456 rl_result = EvalLoc(rl_dest, kFPReg, true);
457 if (is_double) {
458 LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
459 StoreFinalValueWide(rl_dest, rl_result);
460 } else {
461 Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
462 StoreFinalValue(rl_dest, rl_result);
463 }
464 }
465 }
466
GenCmpFP(Instruction::Code code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)467 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
468 RegLocation rl_src1, RegLocation rl_src2) {
469 bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
470 bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
471 if (single) {
472 rl_src1 = LoadValue(rl_src1, kFPReg);
473 rl_src2 = LoadValue(rl_src2, kFPReg);
474 } else {
475 rl_src1 = LoadValueWide(rl_src1, kFPReg);
476 rl_src2 = LoadValueWide(rl_src2, kFPReg);
477 }
478 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
479 ClobberSReg(rl_dest.s_reg_low);
480 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
481 LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
482 if (single) {
483 NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
484 } else {
485 NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
486 }
487 LIR* branch = nullptr;
488 if (unordered_gt) {
489 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
490 }
491 // If the result reg can't be byte accessed, use a jump and move instead of a set.
492 if (!IsByteRegister(rl_result.reg)) {
493 LIR* branch2 = nullptr;
494 if (unordered_gt) {
495 branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
496 NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
497 } else {
498 branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
499 NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
500 }
501 branch2->target = NewLIR0(kPseudoTargetLabel);
502 } else {
503 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
504 }
505 NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
506 if (unordered_gt) {
507 branch->target = NewLIR0(kPseudoTargetLabel);
508 }
509 StoreValue(rl_dest, rl_result);
510 }
511
GenFusedFPCmpBranch(BasicBlock * bb,MIR * mir,bool gt_bias,bool is_double)512 void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
513 bool is_double) {
514 LIR* taken = &block_label_list_[bb->taken];
515 LIR* not_taken = &block_label_list_[bb->fall_through];
516 LIR* branch = nullptr;
517 RegLocation rl_src1;
518 RegLocation rl_src2;
519 if (is_double) {
520 rl_src1 = mir_graph_->GetSrcWide(mir, 0);
521 rl_src2 = mir_graph_->GetSrcWide(mir, 2);
522 rl_src1 = LoadValueWide(rl_src1, kFPReg);
523 rl_src2 = LoadValueWide(rl_src2, kFPReg);
524 NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
525 } else {
526 rl_src1 = mir_graph_->GetSrc(mir, 0);
527 rl_src2 = mir_graph_->GetSrc(mir, 1);
528 rl_src1 = LoadValue(rl_src1, kFPReg);
529 rl_src2 = LoadValue(rl_src2, kFPReg);
530 NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
531 }
532 ConditionCode ccode = mir->meta.ccode;
533 switch (ccode) {
534 case kCondEq:
535 if (!gt_bias) {
536 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
537 branch->target = not_taken;
538 }
539 break;
540 case kCondNe:
541 if (!gt_bias) {
542 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
543 branch->target = taken;
544 }
545 break;
546 case kCondLt:
547 if (gt_bias) {
548 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
549 branch->target = not_taken;
550 }
551 ccode = kCondUlt;
552 break;
553 case kCondLe:
554 if (gt_bias) {
555 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
556 branch->target = not_taken;
557 }
558 ccode = kCondLs;
559 break;
560 case kCondGt:
561 if (gt_bias) {
562 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
563 branch->target = taken;
564 }
565 ccode = kCondHi;
566 break;
567 case kCondGe:
568 if (gt_bias) {
569 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
570 branch->target = taken;
571 }
572 ccode = kCondUge;
573 break;
574 default:
575 LOG(FATAL) << "Unexpected ccode: " << ccode;
576 }
577 OpCondBranch(ccode, taken);
578 }
579
GenNegFloat(RegLocation rl_dest,RegLocation rl_src)580 void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
581 RegLocation rl_result;
582 rl_src = LoadValue(rl_src, kCoreReg);
583 rl_result = EvalLoc(rl_dest, kCoreReg, true);
584 OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
585 StoreValue(rl_dest, rl_result);
586 }
587
GenNegDouble(RegLocation rl_dest,RegLocation rl_src)588 void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
589 RegLocation rl_result;
590 rl_src = LoadValueWide(rl_src, kCoreReg);
591 if (cu_->target64) {
592 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
593 OpRegCopy(rl_result.reg, rl_src.reg);
594 // Flip sign bit.
595 NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
596 NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1);
597 NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1);
598 } else {
599 rl_result = ForceTempWide(rl_src);
600 OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), 0x80000000);
601 }
602 StoreValueWide(rl_dest, rl_result);
603 }
604
GenInlinedSqrt(CallInfo * info)605 bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
606 RegLocation rl_dest = InlineTargetWide(info); // double place for result
607 if (rl_dest.s_reg_low == INVALID_SREG) {
608 // Result is unused, the code is dead. Inlining successful, no code generated.
609 return true;
610 }
611 RegLocation rl_src = info->args[0];
612 rl_src = LoadValueWide(rl_src, kFPReg);
613 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
614 NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
615 StoreValueWide(rl_dest, rl_result);
616 return true;
617 }
618
GenInlinedAbsFloat(CallInfo * info)619 bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
620 // Get the argument
621 RegLocation rl_src = info->args[0];
622
623 // Get the inlined intrinsic target virtual register
624 RegLocation rl_dest = InlineTarget(info);
625
626 // Get the virtual register number
627 DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
628 if (rl_dest.s_reg_low == INVALID_SREG) {
629 // Result is unused, the code is dead. Inlining successful, no code generated.
630 return true;
631 }
632 int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
633 int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
634
635 // if argument is the same as inlined intrinsic target
636 if (v_src_reg == v_dst_reg) {
637 rl_src = UpdateLoc(rl_src);
638
639 // if argument is in the physical register
640 if (rl_src.location == kLocPhysReg) {
641 rl_src = LoadValue(rl_src, kCoreReg);
642 OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
643 StoreValue(rl_dest, rl_src);
644 return true;
645 }
646 // the argument is in memory
647 DCHECK((rl_src.location == kLocDalvikFrame) ||
648 (rl_src.location == kLocCompilerTemp));
649
650 // Operate directly into memory.
651 int displacement = SRegOffset(rl_dest.s_reg_low);
652 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
653 LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement, 0x7fffffff);
654 AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
655 AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
656 return true;
657 } else {
658 rl_src = LoadValue(rl_src, kCoreReg);
659 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
660 OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
661 StoreValue(rl_dest, rl_result);
662 return true;
663 }
664 }
665
GenInlinedAbsDouble(CallInfo * info)666 bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
667 RegLocation rl_src = info->args[0];
668 RegLocation rl_dest = InlineTargetWide(info);
669 DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
670 if (rl_dest.s_reg_low == INVALID_SREG) {
671 // Result is unused, the code is dead. Inlining successful, no code generated.
672 return true;
673 }
674 if (cu_->target64) {
675 rl_src = LoadValueWide(rl_src, kCoreReg);
676 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
677 OpRegCopyWide(rl_result.reg, rl_src.reg);
678 OpRegImm(kOpLsl, rl_result.reg, 1);
679 OpRegImm(kOpLsr, rl_result.reg, 1);
680 StoreValueWide(rl_dest, rl_result);
681 return true;
682 }
683 int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
684 int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
685 rl_src = UpdateLocWide(rl_src);
686
687 // if argument is in the physical XMM register
688 if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
689 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
690 if (rl_result.reg != rl_src.reg) {
691 LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
692 NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
693 } else {
694 RegStorage sign_mask = AllocTempDouble();
695 LoadConstantWide(sign_mask, 0x7fffffffffffffff);
696 NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
697 FreeTemp(sign_mask);
698 }
699 StoreValueWide(rl_dest, rl_result);
700 return true;
701 } else if (v_src_reg == v_dst_reg) {
702 // if argument is the same as inlined intrinsic target
703 // if argument is in the physical register
704 if (rl_src.location == kLocPhysReg) {
705 rl_src = LoadValueWide(rl_src, kCoreReg);
706 OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
707 StoreValueWide(rl_dest, rl_src);
708 return true;
709 }
710 // the argument is in memory
711 DCHECK((rl_src.location == kLocDalvikFrame) ||
712 (rl_src.location == kLocCompilerTemp));
713
714 // Operate directly into memory.
715 int displacement = SRegOffset(rl_dest.s_reg_low);
716 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
717 LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement + HIWORD_OFFSET, 0x7fffffff);
718 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
719 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
720 return true;
721 } else {
722 rl_src = LoadValueWide(rl_src, kCoreReg);
723 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
724 OpRegCopyWide(rl_result.reg, rl_src.reg);
725 OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
726 StoreValueWide(rl_dest, rl_result);
727 return true;
728 }
729 }
730
GenInlinedMinMaxFP(CallInfo * info,bool is_min,bool is_double)731 bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
732 if (is_double) {
733 RegLocation rl_dest = InlineTargetWide(info);
734 if (rl_dest.s_reg_low == INVALID_SREG) {
735 // Result is unused, the code is dead. Inlining successful, no code generated.
736 return true;
737 }
738 RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
739 RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
740 RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);
741
742 // Avoid src2 corruption by OpRegCopyWide.
743 if (rl_result.reg == rl_src2.reg) {
744 std::swap(rl_src2.reg, rl_src1.reg);
745 }
746
747 OpRegCopyWide(rl_result.reg, rl_src1.reg);
748 NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
749 // If either arg is NaN, return NaN.
750 LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
751 // Min/Max branches.
752 LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
753 LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
754 // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
755 NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
756 LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
757 // Handle NaN.
758 branch_nan->target = NewLIR0(kPseudoTargetLabel);
759 LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
760
761 LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
762 // Handle Min/Max. Copy greater/lesser value from src2.
763 branch_cond1->target = NewLIR0(kPseudoTargetLabel);
764 OpRegCopyWide(rl_result.reg, rl_src2.reg);
765 // Right operand is already in result reg.
766 branch_cond2->target = NewLIR0(kPseudoTargetLabel);
767 // Exit.
768 branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
769 branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
770 StoreValueWide(rl_dest, rl_result);
771 } else {
772 RegLocation rl_dest = InlineTarget(info);
773 if (rl_dest.s_reg_low == INVALID_SREG) {
774 // Result is unused, the code is dead. Inlining successful, no code generated.
775 return true;
776 }
777 RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
778 RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
779 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
780
781 // Avoid src2 corruption by OpRegCopyWide.
782 if (rl_result.reg == rl_src2.reg) {
783 std::swap(rl_src2.reg, rl_src1.reg);
784 }
785
786 OpRegCopy(rl_result.reg, rl_src1.reg);
787 NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
788 // If either arg is NaN, return NaN.
789 LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
790 // Min/Max branches.
791 LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
792 LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
793 // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
794 NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
795 LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
796 // Handle NaN.
797 branch_nan->target = NewLIR0(kPseudoTargetLabel);
798 LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
799 LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
800 // Handle Min/Max. Copy greater/lesser value from src2.
801 branch_cond1->target = NewLIR0(kPseudoTargetLabel);
802 OpRegCopy(rl_result.reg, rl_src2.reg);
803 // Right operand is already in result reg.
804 branch_cond2->target = NewLIR0(kPseudoTargetLabel);
805 // Exit.
806 branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
807 branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
808 StoreValue(rl_dest, rl_result);
809 }
810 return true;
811 }
812
813 } // namespace art
814