1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "codegen_arm.h"
18
19 #include "arm_lir.h"
20 #include "base/logging.h"
21 #include "dex/mir_graph.h"
22 #include "dex/quick/mir_to_lir-inl.h"
23
24 namespace art {
25
GenArithOpFloat(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)26 void ArmMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
27 RegLocation rl_src1, RegLocation rl_src2) {
28 int op = kThumbBkpt;
29 RegLocation rl_result;
30
31 /*
32 * Don't attempt to optimize register usage since these opcodes call out to
33 * the handlers.
34 */
35 switch (opcode) {
36 case Instruction::ADD_FLOAT_2ADDR:
37 case Instruction::ADD_FLOAT:
38 op = kThumb2Vadds;
39 break;
40 case Instruction::SUB_FLOAT_2ADDR:
41 case Instruction::SUB_FLOAT:
42 op = kThumb2Vsubs;
43 break;
44 case Instruction::DIV_FLOAT_2ADDR:
45 case Instruction::DIV_FLOAT:
46 op = kThumb2Vdivs;
47 break;
48 case Instruction::MUL_FLOAT_2ADDR:
49 case Instruction::MUL_FLOAT:
50 op = kThumb2Vmuls;
51 break;
52 case Instruction::REM_FLOAT_2ADDR:
53 case Instruction::REM_FLOAT:
54 FlushAllRegs(); // Send everything to home location
55 CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false);
56 rl_result = GetReturn(kFPReg);
57 StoreValue(rl_dest, rl_result);
58 return;
59 case Instruction::NEG_FLOAT:
60 GenNegFloat(rl_dest, rl_src1);
61 return;
62 default:
63 LOG(FATAL) << "Unexpected opcode: " << opcode;
64 }
65 rl_src1 = LoadValue(rl_src1, kFPReg);
66 rl_src2 = LoadValue(rl_src2, kFPReg);
67 rl_result = EvalLoc(rl_dest, kFPReg, true);
68 NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
69 StoreValue(rl_dest, rl_result);
70 }
71
GenArithOpDouble(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)72 void ArmMir2Lir::GenArithOpDouble(Instruction::Code opcode,
73 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
74 int op = kThumbBkpt;
75 RegLocation rl_result;
76
77 switch (opcode) {
78 case Instruction::ADD_DOUBLE_2ADDR:
79 case Instruction::ADD_DOUBLE:
80 op = kThumb2Vaddd;
81 break;
82 case Instruction::SUB_DOUBLE_2ADDR:
83 case Instruction::SUB_DOUBLE:
84 op = kThumb2Vsubd;
85 break;
86 case Instruction::DIV_DOUBLE_2ADDR:
87 case Instruction::DIV_DOUBLE:
88 op = kThumb2Vdivd;
89 break;
90 case Instruction::MUL_DOUBLE_2ADDR:
91 case Instruction::MUL_DOUBLE:
92 op = kThumb2Vmuld;
93 break;
94 case Instruction::REM_DOUBLE_2ADDR:
95 case Instruction::REM_DOUBLE:
96 FlushAllRegs(); // Send everything to home location
97 CallRuntimeHelperRegLocationRegLocation(kQuickFmod, rl_src1, rl_src2, false);
98 rl_result = GetReturnWide(kFPReg);
99 StoreValueWide(rl_dest, rl_result);
100 return;
101 case Instruction::NEG_DOUBLE:
102 GenNegDouble(rl_dest, rl_src1);
103 return;
104 default:
105 LOG(FATAL) << "Unexpected opcode: " << opcode;
106 }
107
108 rl_src1 = LoadValueWide(rl_src1, kFPReg);
109 DCHECK(rl_src1.wide);
110 rl_src2 = LoadValueWide(rl_src2, kFPReg);
111 DCHECK(rl_src2.wide);
112 rl_result = EvalLoc(rl_dest, kFPReg, true);
113 DCHECK(rl_dest.wide);
114 DCHECK(rl_result.wide);
115 NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
116 StoreValueWide(rl_dest, rl_result);
117 }
118
GenMultiplyByConstantFloat(RegLocation rl_dest,RegLocation rl_src1,int32_t constant)119 void ArmMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
120 int32_t constant) {
121 RegLocation rl_result;
122 RegStorage r_tmp = AllocTempSingle();
123 LoadConstantNoClobber(r_tmp, constant);
124 rl_src1 = LoadValue(rl_src1, kFPReg);
125 rl_result = EvalLoc(rl_dest, kFPReg, true);
126 NewLIR3(kThumb2Vmuls, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
127 StoreValue(rl_dest, rl_result);
128 }
129
GenMultiplyByConstantDouble(RegLocation rl_dest,RegLocation rl_src1,int64_t constant)130 void ArmMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
131 int64_t constant) {
132 RegLocation rl_result;
133 RegStorage r_tmp = AllocTempDouble();
134 DCHECK(r_tmp.IsDouble());
135 LoadConstantWide(r_tmp, constant);
136 rl_src1 = LoadValueWide(rl_src1, kFPReg);
137 DCHECK(rl_src1.wide);
138 rl_result = EvalLocWide(rl_dest, kFPReg, true);
139 DCHECK(rl_dest.wide);
140 DCHECK(rl_result.wide);
141 NewLIR3(kThumb2Vmuld, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
142 StoreValueWide(rl_dest, rl_result);
143 }
144
GenConversion(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src)145 void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
146 int op = kThumbBkpt;
147 int src_reg;
148 RegLocation rl_result;
149
150 switch (opcode) {
151 case Instruction::INT_TO_FLOAT:
152 op = kThumb2VcvtIF;
153 break;
154 case Instruction::FLOAT_TO_INT:
155 op = kThumb2VcvtFI;
156 break;
157 case Instruction::DOUBLE_TO_FLOAT:
158 op = kThumb2VcvtDF;
159 break;
160 case Instruction::FLOAT_TO_DOUBLE:
161 op = kThumb2VcvtFd;
162 break;
163 case Instruction::INT_TO_DOUBLE:
164 op = kThumb2VcvtF64S32;
165 break;
166 case Instruction::DOUBLE_TO_INT:
167 op = kThumb2VcvtDI;
168 break;
169 case Instruction::LONG_TO_DOUBLE: {
170 rl_src = LoadValueWide(rl_src, kFPReg);
171 RegisterInfo* info = GetRegInfo(rl_src.reg);
172 RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
173 DCHECK(src_low.Valid());
174 RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
175 DCHECK(src_high.Valid());
176 rl_result = EvalLoc(rl_dest, kFPReg, true);
177 RegStorage tmp1 = AllocTempDouble();
178 RegStorage tmp2 = AllocTempDouble();
179
180 NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
181 NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
182 LoadConstantWide(tmp2, 0x41f0000000000000LL);
183 NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
184 FreeTemp(tmp1);
185 FreeTemp(tmp2);
186 StoreValueWide(rl_dest, rl_result);
187 return;
188 }
189 case Instruction::FLOAT_TO_LONG:
190 CheckEntrypointTypes<kQuickF2l, int64_t, float>(); // int64_t -> kCoreReg
191 GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
192 return;
193 case Instruction::LONG_TO_FLOAT: {
194 rl_src = LoadValueWide(rl_src, kFPReg);
195 RegisterInfo* info = GetRegInfo(rl_src.reg);
196 RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
197 DCHECK(src_low.Valid());
198 RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
199 DCHECK(src_high.Valid());
200 rl_result = EvalLoc(rl_dest, kFPReg, true);
201 // Allocate temp registers.
202 RegStorage high_val = AllocTempDouble();
203 RegStorage low_val = AllocTempDouble();
204 RegStorage const_val = AllocTempDouble();
205 // Long to double.
206 NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
207 NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
208 LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
209 NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
210 // Double to float.
211 NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
212 // Free temp registers.
213 FreeTemp(high_val);
214 FreeTemp(low_val);
215 FreeTemp(const_val);
216 // Store result.
217 StoreValue(rl_dest, rl_result);
218 return;
219 }
220 case Instruction::DOUBLE_TO_LONG:
221 CheckEntrypointTypes<kQuickD2l, int64_t, double>(); // int64_t -> kCoreReg
222 GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
223 return;
224 default:
225 LOG(FATAL) << "Unexpected opcode: " << opcode;
226 }
227 if (rl_src.wide) {
228 rl_src = LoadValueWide(rl_src, kFPReg);
229 src_reg = rl_src.reg.GetReg();
230 } else {
231 rl_src = LoadValue(rl_src, kFPReg);
232 src_reg = rl_src.reg.GetReg();
233 }
234 if (rl_dest.wide) {
235 rl_result = EvalLoc(rl_dest, kFPReg, true);
236 NewLIR2(op, rl_result.reg.GetReg(), src_reg);
237 StoreValueWide(rl_dest, rl_result);
238 } else {
239 rl_result = EvalLoc(rl_dest, kFPReg, true);
240 NewLIR2(op, rl_result.reg.GetReg(), src_reg);
241 StoreValue(rl_dest, rl_result);
242 }
243 }
244
GenFusedFPCmpBranch(BasicBlock * bb,MIR * mir,bool gt_bias,bool is_double)245 void ArmMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
246 bool is_double) {
247 LIR* target = &block_label_list_[bb->taken];
248 RegLocation rl_src1;
249 RegLocation rl_src2;
250 if (is_double) {
251 rl_src1 = mir_graph_->GetSrcWide(mir, 0);
252 rl_src2 = mir_graph_->GetSrcWide(mir, 2);
253 rl_src1 = LoadValueWide(rl_src1, kFPReg);
254 rl_src2 = LoadValueWide(rl_src2, kFPReg);
255 NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
256 } else {
257 rl_src1 = mir_graph_->GetSrc(mir, 0);
258 rl_src2 = mir_graph_->GetSrc(mir, 1);
259 rl_src1 = LoadValue(rl_src1, kFPReg);
260 rl_src2 = LoadValue(rl_src2, kFPReg);
261 NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
262 }
263 NewLIR0(kThumb2Fmstat);
264 ConditionCode ccode = mir->meta.ccode;
265 switch (ccode) {
266 case kCondEq:
267 case kCondNe:
268 break;
269 case kCondLt:
270 if (gt_bias) {
271 ccode = kCondMi;
272 }
273 break;
274 case kCondLe:
275 if (gt_bias) {
276 ccode = kCondLs;
277 }
278 break;
279 case kCondGt:
280 if (gt_bias) {
281 ccode = kCondHi;
282 }
283 break;
284 case kCondGe:
285 if (gt_bias) {
286 ccode = kCondUge;
287 }
288 break;
289 default:
290 LOG(FATAL) << "Unexpected ccode: " << ccode;
291 }
292 OpCondBranch(ccode, target);
293 }
294
295
GenCmpFP(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)296 void ArmMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
297 RegLocation rl_src1, RegLocation rl_src2) {
298 bool is_double = false;
299 int default_result = -1;
300 RegLocation rl_result;
301
302 switch (opcode) {
303 case Instruction::CMPL_FLOAT:
304 is_double = false;
305 default_result = -1;
306 break;
307 case Instruction::CMPG_FLOAT:
308 is_double = false;
309 default_result = 1;
310 break;
311 case Instruction::CMPL_DOUBLE:
312 is_double = true;
313 default_result = -1;
314 break;
315 case Instruction::CMPG_DOUBLE:
316 is_double = true;
317 default_result = 1;
318 break;
319 default:
320 LOG(FATAL) << "Unexpected opcode: " << opcode;
321 }
322 if (is_double) {
323 rl_src1 = LoadValueWide(rl_src1, kFPReg);
324 rl_src2 = LoadValueWide(rl_src2, kFPReg);
325 // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
326 ClobberSReg(rl_dest.s_reg_low);
327 rl_result = EvalLoc(rl_dest, kCoreReg, true);
328 LoadConstant(rl_result.reg, default_result);
329 NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
330 } else {
331 rl_src1 = LoadValue(rl_src1, kFPReg);
332 rl_src2 = LoadValue(rl_src2, kFPReg);
333 // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
334 ClobberSReg(rl_dest.s_reg_low);
335 rl_result = EvalLoc(rl_dest, kCoreReg, true);
336 LoadConstant(rl_result.reg, default_result);
337 NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
338 }
339 DCHECK(!rl_result.reg.IsFloat());
340 NewLIR0(kThumb2Fmstat);
341
342 LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
343 NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(),
344 ModifiedImmediate(-default_result)); // Must not alter ccodes
345 OpEndIT(it);
346
347 it = OpIT(kCondEq, "");
348 LoadConstant(rl_result.reg, 0);
349 OpEndIT(it);
350
351 StoreValue(rl_dest, rl_result);
352 }
353
GenNegFloat(RegLocation rl_dest,RegLocation rl_src)354 void ArmMir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
355 RegLocation rl_result;
356 rl_src = LoadValue(rl_src, kFPReg);
357 rl_result = EvalLoc(rl_dest, kFPReg, true);
358 NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
359 StoreValue(rl_dest, rl_result);
360 }
361
GenNegDouble(RegLocation rl_dest,RegLocation rl_src)362 void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
363 RegLocation rl_result;
364 rl_src = LoadValueWide(rl_src, kFPReg);
365 rl_result = EvalLoc(rl_dest, kFPReg, true);
366 NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
367 StoreValueWide(rl_dest, rl_result);
368 }
369
RegClassForAbsFP(RegLocation rl_src,RegLocation rl_dest)370 static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
371 // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
372 if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
373 (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
374 return kCoreReg;
375 }
376 // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
377 if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
378 return kFPReg;
379 }
380 // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
381 // is faster using a core reg or fp reg depends on the particular CPU. Without further
382 // investigation and testing we prefer core register. (If the result is subsequently used in
383 // another fp operation, the dalvik reg will probably get promoted and that should be handled
384 // by the cases above.)
385 return kCoreReg;
386 }
387
GenInlinedAbsFloat(CallInfo * info)388 bool ArmMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
389 if (info->result.location == kLocInvalid) {
390 return true; // Result is unused: inlining successful, no code generated.
391 }
392 RegLocation rl_dest = info->result;
393 RegLocation rl_src = UpdateLoc(info->args[0]);
394 RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
395 rl_src = LoadValue(rl_src, reg_class);
396 RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
397 if (reg_class == kFPReg) {
398 NewLIR2(kThumb2Vabss, rl_result.reg.GetReg(), rl_src.reg.GetReg());
399 } else {
400 OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
401 }
402 StoreValue(rl_dest, rl_result);
403 return true;
404 }
405
GenInlinedAbsDouble(CallInfo * info)406 bool ArmMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
407 if (info->result.location == kLocInvalid) {
408 return true; // Result is unused: inlining successful, no code generated.
409 }
410 RegLocation rl_dest = info->result;
411 RegLocation rl_src = UpdateLocWide(info->args[0]);
412 RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
413 rl_src = LoadValueWide(rl_src, reg_class);
414 RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
415 if (reg_class == kFPReg) {
416 NewLIR2(kThumb2Vabsd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
417 } else if (rl_result.reg.GetLow().GetReg() != rl_src.reg.GetHigh().GetReg()) {
418 // No inconvenient overlap.
419 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
420 OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x7fffffff);
421 } else {
422 // Inconvenient overlap, use a temp register to preserve the high word of the source.
423 RegStorage rs_tmp = AllocTemp();
424 OpRegCopy(rs_tmp, rl_src.reg.GetHigh());
425 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
426 OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rs_tmp, 0x7fffffff);
427 FreeTemp(rs_tmp);
428 }
429 StoreValueWide(rl_dest, rl_result);
430 return true;
431 }
432
GenInlinedSqrt(CallInfo * info)433 bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
434 DCHECK_EQ(cu_->instruction_set, kThumb2);
435 RegLocation rl_src = info->args[0];
436 RegLocation rl_dest = InlineTargetWide(info); // double place for result
437 rl_src = LoadValueWide(rl_src, kFPReg);
438 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
439 NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
440 StoreValueWide(rl_dest, rl_result);
441 return true;
442 }
443
444
445 } // namespace art
446