1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "scheduler_arm.h"
18
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "code_generator_utils.h"
21 #include "common_arm.h"
22 #include "heap_poisoning.h"
23 #include "mirror/array-inl.h"
24 #include "mirror/string.h"
25
26 namespace art {
27 namespace arm {
28
29 using helpers::Int32ConstantFrom;
30 using helpers::Uint64ConstantFrom;
31
HandleBinaryOperationLantencies(HBinaryOperation * instr)32 void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
33 switch (instr->GetResultType()) {
34 case DataType::Type::kInt64:
35 // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
36 // so a bubble (kArmNopLatency) is added to represent the internal carry flag
37 // dependency inside these pairs.
38 last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
39 last_visited_latency_ = kArmIntegerOpLatency;
40 break;
41 case DataType::Type::kFloat32:
42 case DataType::Type::kFloat64:
43 last_visited_latency_ = kArmFloatingPointOpLatency;
44 break;
45 default:
46 last_visited_latency_ = kArmIntegerOpLatency;
47 break;
48 }
49 }
50
VisitAdd(HAdd * instr)51 void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
52 HandleBinaryOperationLantencies(instr);
53 }
54
VisitSub(HSub * instr)55 void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
56 HandleBinaryOperationLantencies(instr);
57 }
58
VisitMul(HMul * instr)59 void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
60 switch (instr->GetResultType()) {
61 case DataType::Type::kInt64:
62 last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
63 last_visited_latency_ = kArmIntegerOpLatency;
64 break;
65 case DataType::Type::kFloat32:
66 case DataType::Type::kFloat64:
67 last_visited_latency_ = kArmMulFloatingPointLatency;
68 break;
69 default:
70 last_visited_latency_ = kArmMulIntegerLatency;
71 break;
72 }
73 }
74
HandleBitwiseOperationLantencies(HBinaryOperation * instr)75 void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
76 switch (instr->GetResultType()) {
77 case DataType::Type::kInt64:
78 last_visited_internal_latency_ = kArmIntegerOpLatency;
79 last_visited_latency_ = kArmIntegerOpLatency;
80 break;
81 case DataType::Type::kFloat32:
82 case DataType::Type::kFloat64:
83 last_visited_latency_ = kArmFloatingPointOpLatency;
84 break;
85 default:
86 last_visited_latency_ = kArmIntegerOpLatency;
87 break;
88 }
89 }
90
VisitAnd(HAnd * instr)91 void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
92 HandleBitwiseOperationLantencies(instr);
93 }
94
VisitOr(HOr * instr)95 void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
96 HandleBitwiseOperationLantencies(instr);
97 }
98
VisitXor(HXor * instr)99 void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
100 HandleBitwiseOperationLantencies(instr);
101 }
102
VisitRor(HRor * instr)103 void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
104 switch (instr->GetResultType()) {
105 case DataType::Type::kInt32:
106 last_visited_latency_ = kArmIntegerOpLatency;
107 break;
108 case DataType::Type::kInt64: {
109 // HandleLongRotate
110 HInstruction* rhs = instr->GetRight();
111 if (rhs->IsConstant()) {
112 uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
113 if (rot != 0u) {
114 last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
115 last_visited_latency_ = kArmIntegerOpLatency;
116 } else {
117 last_visited_internal_latency_ = kArmIntegerOpLatency;
118 last_visited_latency_ = kArmIntegerOpLatency;
119 }
120 } else {
121 last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
122 last_visited_latency_ = kArmBranchLatency;
123 }
124 break;
125 }
126 default:
127 LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
128 UNREACHABLE();
129 }
130 }
131
HandleShiftLatencies(HBinaryOperation * instr)132 void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
133 DataType::Type type = instr->GetResultType();
134 HInstruction* rhs = instr->GetRight();
135 switch (type) {
136 case DataType::Type::kInt32:
137 if (!rhs->IsConstant()) {
138 last_visited_internal_latency_ = kArmIntegerOpLatency;
139 }
140 last_visited_latency_ = kArmIntegerOpLatency;
141 break;
142 case DataType::Type::kInt64:
143 if (!rhs->IsConstant()) {
144 last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
145 } else {
146 uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
147 if (shift_value == 1 || shift_value >= 32) {
148 last_visited_internal_latency_ = kArmIntegerOpLatency;
149 } else {
150 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
151 }
152 }
153 last_visited_latency_ = kArmIntegerOpLatency;
154 break;
155 default:
156 LOG(FATAL) << "Unexpected operation type " << type;
157 UNREACHABLE();
158 }
159 }
160
VisitShl(HShl * instr)161 void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
162 HandleShiftLatencies(instr);
163 }
164
VisitShr(HShr * instr)165 void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
166 HandleShiftLatencies(instr);
167 }
168
VisitUShr(HUShr * instr)169 void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
170 HandleShiftLatencies(instr);
171 }
172
HandleGenerateConditionWithZero(IfCondition condition)173 void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
174 switch (condition) {
175 case kCondEQ:
176 case kCondBE:
177 case kCondNE:
178 case kCondA:
179 last_visited_internal_latency_ += kArmIntegerOpLatency;
180 last_visited_latency_ = kArmIntegerOpLatency;
181 break;
182 case kCondGE:
183 // Mvn
184 last_visited_internal_latency_ += kArmIntegerOpLatency;
185 FALLTHROUGH_INTENDED;
186 case kCondLT:
187 // Lsr
188 last_visited_latency_ = kArmIntegerOpLatency;
189 break;
190 case kCondAE:
191 // Trivially true.
192 // Mov
193 last_visited_latency_ = kArmIntegerOpLatency;
194 break;
195 case kCondB:
196 // Trivially false.
197 // Mov
198 last_visited_latency_ = kArmIntegerOpLatency;
199 break;
200 default:
201 LOG(FATAL) << "Unexpected condition " << condition;
202 UNREACHABLE();
203 }
204 }
205
HandleGenerateLongTestConstant(HCondition * condition)206 void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
207 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
208
209 IfCondition cond = condition->GetCondition();
210
211 HInstruction* right = condition->InputAt(1);
212
213 int64_t value = Uint64ConstantFrom(right);
214
215 // Comparisons against 0 are common enough, so codegen has special handling for them.
216 if (value == 0) {
217 switch (cond) {
218 case kCondNE:
219 case kCondA:
220 case kCondEQ:
221 case kCondBE:
222 // Orrs
223 last_visited_internal_latency_ += kArmIntegerOpLatency;
224 return;
225 case kCondLT:
226 case kCondGE:
227 // Cmp
228 last_visited_internal_latency_ += kArmIntegerOpLatency;
229 return;
230 case kCondB:
231 case kCondAE:
232 // Cmp
233 last_visited_internal_latency_ += kArmIntegerOpLatency;
234 return;
235 default:
236 break;
237 }
238 }
239
240 switch (cond) {
241 case kCondEQ:
242 case kCondNE:
243 case kCondB:
244 case kCondBE:
245 case kCondA:
246 case kCondAE: {
247 // Cmp, IT, Cmp
248 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
249 break;
250 }
251 case kCondLE:
252 case kCondGT:
253 // Trivially true or false.
254 if (value == std::numeric_limits<int64_t>::max()) {
255 // Cmp
256 last_visited_internal_latency_ += kArmIntegerOpLatency;
257 break;
258 }
259 FALLTHROUGH_INTENDED;
260 case kCondGE:
261 case kCondLT: {
262 // Cmp, Sbcs
263 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
264 break;
265 }
266 default:
267 LOG(FATAL) << "Unreachable";
268 UNREACHABLE();
269 }
270 }
271
HandleGenerateLongTest(HCondition * condition)272 void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
273 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
274
275 IfCondition cond = condition->GetCondition();
276
277 switch (cond) {
278 case kCondEQ:
279 case kCondNE:
280 case kCondB:
281 case kCondBE:
282 case kCondA:
283 case kCondAE: {
284 // Cmp, IT, Cmp
285 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
286 break;
287 }
288 case kCondLE:
289 case kCondGT:
290 case kCondGE:
291 case kCondLT: {
292 // Cmp, Sbcs
293 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
294 break;
295 }
296 default:
297 LOG(FATAL) << "Unreachable";
298 UNREACHABLE();
299 }
300 }
301
302 // The GenerateTest series of function all counted as internal latency.
HandleGenerateTest(HCondition * condition)303 void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
304 const DataType::Type type = condition->GetLeft()->GetType();
305
306 if (type == DataType::Type::kInt64) {
307 condition->InputAt(1)->IsConstant()
308 ? HandleGenerateLongTestConstant(condition)
309 : HandleGenerateLongTest(condition);
310 } else if (DataType::IsFloatingPointType(type)) {
311 // GenerateVcmp + Vmrs
312 last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
313 } else {
314 // Cmp
315 last_visited_internal_latency_ += kArmIntegerOpLatency;
316 }
317 }
318
CanGenerateTest(HCondition * condition)319 bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
320 if (condition->GetLeft()->GetType() == DataType::Type::kInt64) {
321 HInstruction* right = condition->InputAt(1);
322
323 if (right->IsConstant()) {
324 IfCondition c = condition->GetCondition();
325 const uint64_t value = Uint64ConstantFrom(right);
326
327 if (c < kCondLT || c > kCondGE) {
328 if (value != 0) {
329 return false;
330 }
331 } else if (c == kCondLE || c == kCondGT) {
332 if (value < std::numeric_limits<int64_t>::max() &&
333 !codegen_->GetAssembler()->ShifterOperandCanHold(
334 SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) {
335 return false;
336 }
337 } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(
338 SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) {
339 return false;
340 }
341 }
342 }
343
344 return true;
345 }
346
HandleGenerateConditionGeneric(HCondition * cond)347 void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
348 HandleGenerateTest(cond);
349
350 // Unlike codegen pass, we cannot check 'out' register IsLow() here,
351 // because scheduling is before liveness(location builder) and register allocator,
352 // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
353 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
354 last_visited_latency_ = kArmIntegerOpLatency;
355 }
356
HandleGenerateEqualLong(HCondition * cond)357 void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
358 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
359
360 IfCondition condition = cond->GetCondition();
361
362 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
363
364 if (condition == kCondNE) {
365 // Orrs, IT, Mov
366 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
367 } else {
368 last_visited_internal_latency_ += kArmIntegerOpLatency;
369 HandleGenerateConditionWithZero(condition);
370 }
371 }
372
HandleGenerateLongComparesAndJumps()373 void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
374 last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
375 last_visited_internal_latency_ += kArmBranchLatency;
376 }
377
HandleGenerateConditionLong(HCondition * cond)378 void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
379 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
380
381 IfCondition condition = cond->GetCondition();
382 HInstruction* right = cond->InputAt(1);
383
384 if (right->IsConstant()) {
385 // Comparisons against 0 are common enough, so codegen has special handling for them.
386 if (Uint64ConstantFrom(right) == 0) {
387 switch (condition) {
388 case kCondNE:
389 case kCondA:
390 case kCondEQ:
391 case kCondBE:
392 // Orr
393 last_visited_internal_latency_ += kArmIntegerOpLatency;
394 HandleGenerateConditionWithZero(condition);
395 return;
396 case kCondLT:
397 case kCondGE:
398 FALLTHROUGH_INTENDED;
399 case kCondAE:
400 case kCondB:
401 HandleGenerateConditionWithZero(condition);
402 return;
403 case kCondLE:
404 case kCondGT:
405 default:
406 break;
407 }
408 }
409 }
410
411 if ((condition == kCondEQ || condition == kCondNE) &&
412 !CanGenerateTest(cond)) {
413 HandleGenerateEqualLong(cond);
414 return;
415 }
416
417 if (CanGenerateTest(cond)) {
418 HandleGenerateConditionGeneric(cond);
419 return;
420 }
421
422 HandleGenerateLongComparesAndJumps();
423
424 last_visited_internal_latency_ += kArmIntegerOpLatency;
425 last_visited_latency_ = kArmBranchLatency;;
426 }
427
HandleGenerateConditionIntegralOrNonPrimitive(HCondition * cond)428 void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
429 const DataType::Type type = cond->GetLeft()->GetType();
430
431 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
432
433 if (type == DataType::Type::kInt64) {
434 HandleGenerateConditionLong(cond);
435 return;
436 }
437
438 IfCondition condition = cond->GetCondition();
439 HInstruction* right = cond->InputAt(1);
440 int64_t value;
441
442 if (right->IsConstant()) {
443 value = Uint64ConstantFrom(right);
444
445 // Comparisons against 0 are common enough, so codegen has special handling for them.
446 if (value == 0) {
447 switch (condition) {
448 case kCondNE:
449 case kCondA:
450 case kCondEQ:
451 case kCondBE:
452 case kCondLT:
453 case kCondGE:
454 case kCondAE:
455 case kCondB:
456 HandleGenerateConditionWithZero(condition);
457 return;
458 case kCondLE:
459 case kCondGT:
460 default:
461 break;
462 }
463 }
464 }
465
466 if (condition == kCondEQ || condition == kCondNE) {
467 if (condition == kCondNE) {
468 // CMP, IT, MOV.ne
469 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
470 last_visited_latency_ = kArmIntegerOpLatency;
471 } else {
472 last_visited_internal_latency_ += kArmIntegerOpLatency;
473 HandleGenerateConditionWithZero(condition);
474 }
475 return;
476 }
477
478 HandleGenerateConditionGeneric(cond);
479 }
480
HandleCondition(HCondition * cond)481 void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
482 if (cond->IsEmittedAtUseSite()) {
483 last_visited_latency_ = 0;
484 return;
485 }
486
487 const DataType::Type type = cond->GetLeft()->GetType();
488
489 if (DataType::IsFloatingPointType(type)) {
490 HandleGenerateConditionGeneric(cond);
491 return;
492 }
493
494 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
495
496 const IfCondition condition = cond->GetCondition();
497
498 if (type == DataType::Type::kBool &&
499 cond->GetRight()->GetType() == DataType::Type::kBool &&
500 (condition == kCondEQ || condition == kCondNE)) {
501 if (condition == kCondEQ) {
502 last_visited_internal_latency_ = kArmIntegerOpLatency;
503 }
504 last_visited_latency_ = kArmIntegerOpLatency;
505 return;
506 }
507
508 HandleGenerateConditionIntegralOrNonPrimitive(cond);
509 }
510
VisitCondition(HCondition * instr)511 void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
512 HandleCondition(instr);
513 }
514
VisitCompare(HCompare * instr)515 void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
516 DataType::Type type = instr->InputAt(0)->GetType();
517 switch (type) {
518 case DataType::Type::kBool:
519 case DataType::Type::kUint8:
520 case DataType::Type::kInt8:
521 case DataType::Type::kUint16:
522 case DataType::Type::kInt16:
523 case DataType::Type::kInt32:
524 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
525 break;
526 case DataType::Type::kInt64:
527 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
528 break;
529 case DataType::Type::kFloat32:
530 case DataType::Type::kFloat64:
531 last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
532 break;
533 default:
534 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
535 break;
536 }
537 last_visited_latency_ = kArmIntegerOpLatency;
538 }
539
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)540 void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
541 if (instruction->GetResultType() == DataType::Type::kInt32) {
542 last_visited_latency_ = kArmIntegerOpLatency;
543 } else {
544 last_visited_internal_latency_ = kArmIntegerOpLatency;
545 last_visited_latency_ = kArmIntegerOpLatency;
546 }
547 }
548
HandleGenerateDataProcInstruction(bool internal_latency)549 void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
550 if (internal_latency) {
551 last_visited_internal_latency_ += kArmIntegerOpLatency;
552 } else {
553 last_visited_latency_ = kArmDataProcWithShifterOpLatency;
554 }
555 }
556
HandleGenerateDataProc(HDataProcWithShifterOp * instruction)557 void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
558 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
559 if (kind == HInstruction::kAdd) {
560 last_visited_internal_latency_ = kArmIntegerOpLatency;
561 last_visited_latency_ = kArmIntegerOpLatency;
562 } else if (kind == HInstruction::kSub) {
563 last_visited_internal_latency_ = kArmIntegerOpLatency;
564 last_visited_latency_ = kArmIntegerOpLatency;
565 } else {
566 HandleGenerateDataProcInstruction(/* internal_latency */ true);
567 HandleGenerateDataProcInstruction();
568 }
569 }
570
HandleGenerateLongDataProc(HDataProcWithShifterOp * instruction)571 void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
572 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
573 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
574
575 const uint32_t shift_value = instruction->GetShiftAmount();
576 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
577
578 if (shift_value >= 32) {
579 // Different shift types actually generate similar code here,
580 // no need to differentiate shift types like the codegen pass does,
581 // which also avoids handling shift types from different ARM backends.
582 HandleGenerateDataProc(instruction);
583 } else {
584 DCHECK_GT(shift_value, 1U);
585 DCHECK_LT(shift_value, 32U);
586
587 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
588 HandleGenerateDataProcInstruction(/* internal_latency */ true);
589 HandleGenerateDataProcInstruction(/* internal_latency */ true);
590 HandleGenerateDataProcInstruction();
591 } else {
592 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
593 HandleGenerateDataProc(instruction);
594 }
595 }
596 }
597
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)598 void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
599 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
600
601 if (instruction->GetType() == DataType::Type::kInt32) {
602 HandleGenerateDataProcInstruction();
603 } else {
604 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
605 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
606 HandleGenerateDataProc(instruction);
607 } else {
608 HandleGenerateLongDataProc(instruction);
609 }
610 }
611 }
612
VisitIntermediateAddress(HIntermediateAddress * ATTRIBUTE_UNUSED)613 void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
614 // Although the code generated is a simple `add` instruction, we found through empirical results
615 // that spacing it from its use in memory accesses was beneficial.
616 last_visited_internal_latency_ = kArmNopLatency;
617 last_visited_latency_ = kArmIntegerOpLatency;
618 }
619
VisitIntermediateAddressIndex(HIntermediateAddressIndex * ATTRIBUTE_UNUSED)620 void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
621 HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
622 UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
623 }
624
VisitMultiplyAccumulate(HMultiplyAccumulate * ATTRIBUTE_UNUSED)625 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
626 last_visited_latency_ = kArmMulIntegerLatency;
627 }
628
VisitArrayGet(HArrayGet * instruction)629 void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
630 DataType::Type type = instruction->GetType();
631 const bool maybe_compressed_char_at =
632 mirror::kUseStringCompression && instruction->IsStringCharAt();
633 HInstruction* array_instr = instruction->GetArray();
634 bool has_intermediate_address = array_instr->IsIntermediateAddress();
635 HInstruction* index = instruction->InputAt(1);
636
637 switch (type) {
638 case DataType::Type::kBool:
639 case DataType::Type::kUint8:
640 case DataType::Type::kInt8:
641 case DataType::Type::kUint16:
642 case DataType::Type::kInt16:
643 case DataType::Type::kInt32: {
644 if (maybe_compressed_char_at) {
645 last_visited_internal_latency_ += kArmMemoryLoadLatency;
646 }
647 if (index->IsConstant()) {
648 if (maybe_compressed_char_at) {
649 last_visited_internal_latency_ +=
650 kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
651 last_visited_latency_ = kArmBranchLatency;
652 } else {
653 last_visited_latency_ += kArmMemoryLoadLatency;
654 }
655 } else {
656 if (has_intermediate_address) {
657 } else {
658 last_visited_internal_latency_ += kArmIntegerOpLatency;
659 }
660 if (maybe_compressed_char_at) {
661 last_visited_internal_latency_ +=
662 kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
663 last_visited_latency_ = kArmBranchLatency;
664 } else {
665 last_visited_latency_ += kArmMemoryLoadLatency;
666 }
667 }
668 break;
669 }
670
671 case DataType::Type::kReference: {
672 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
673 last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
674 } else {
675 if (index->IsConstant()) {
676 last_visited_latency_ = kArmMemoryLoadLatency;
677 } else {
678 if (has_intermediate_address) {
679 } else {
680 last_visited_internal_latency_ += kArmIntegerOpLatency;
681 }
682 last_visited_internal_latency_ = kArmMemoryLoadLatency;
683 }
684 }
685 break;
686 }
687
688 case DataType::Type::kInt64: {
689 if (index->IsConstant()) {
690 last_visited_latency_ = kArmMemoryLoadLatency;
691 } else {
692 last_visited_internal_latency_ += kArmIntegerOpLatency;
693 last_visited_latency_ = kArmMemoryLoadLatency;
694 }
695 break;
696 }
697
698 case DataType::Type::kFloat32: {
699 if (index->IsConstant()) {
700 last_visited_latency_ = kArmMemoryLoadLatency;
701 } else {
702 last_visited_internal_latency_ += kArmIntegerOpLatency;
703 last_visited_latency_ = kArmMemoryLoadLatency;
704 }
705 break;
706 }
707
708 case DataType::Type::kFloat64: {
709 if (index->IsConstant()) {
710 last_visited_latency_ = kArmMemoryLoadLatency;
711 } else {
712 last_visited_internal_latency_ += kArmIntegerOpLatency;
713 last_visited_latency_ = kArmMemoryLoadLatency;
714 }
715 break;
716 }
717
718 default:
719 LOG(FATAL) << "Unreachable type " << type;
720 UNREACHABLE();
721 }
722 }
723
VisitArrayLength(HArrayLength * instruction)724 void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
725 last_visited_latency_ = kArmMemoryLoadLatency;
726 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
727 last_visited_internal_latency_ = kArmMemoryLoadLatency;
728 last_visited_latency_ = kArmIntegerOpLatency;
729 }
730 }
731
VisitArraySet(HArraySet * instruction)732 void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
733 HInstruction* index = instruction->InputAt(1);
734 DataType::Type value_type = instruction->GetComponentType();
735 HInstruction* array_instr = instruction->GetArray();
736 bool has_intermediate_address = array_instr->IsIntermediateAddress();
737
738 switch (value_type) {
739 case DataType::Type::kBool:
740 case DataType::Type::kUint8:
741 case DataType::Type::kInt8:
742 case DataType::Type::kUint16:
743 case DataType::Type::kInt16:
744 case DataType::Type::kInt32: {
745 if (index->IsConstant()) {
746 last_visited_latency_ = kArmMemoryStoreLatency;
747 } else {
748 if (has_intermediate_address) {
749 } else {
750 last_visited_internal_latency_ = kArmIntegerOpLatency;
751 }
752 last_visited_latency_ = kArmMemoryStoreLatency;
753 }
754 break;
755 }
756
757 case DataType::Type::kReference: {
758 if (instruction->InputAt(2)->IsNullConstant()) {
759 if (index->IsConstant()) {
760 last_visited_latency_ = kArmMemoryStoreLatency;
761 } else {
762 last_visited_internal_latency_ = kArmIntegerOpLatency;
763 last_visited_latency_ = kArmMemoryStoreLatency;
764 }
765 } else {
766 // Following the exact instructions of runtime type checks is too complicated,
767 // just giving it a simple slow latency.
768 last_visited_latency_ = kArmRuntimeTypeCheckLatency;
769 }
770 break;
771 }
772
773 case DataType::Type::kInt64: {
774 if (index->IsConstant()) {
775 last_visited_latency_ = kArmMemoryLoadLatency;
776 } else {
777 last_visited_internal_latency_ = kArmIntegerOpLatency;
778 last_visited_latency_ = kArmMemoryLoadLatency;
779 }
780 break;
781 }
782
783 case DataType::Type::kFloat32: {
784 if (index->IsConstant()) {
785 last_visited_latency_ = kArmMemoryLoadLatency;
786 } else {
787 last_visited_internal_latency_ = kArmIntegerOpLatency;
788 last_visited_latency_ = kArmMemoryLoadLatency;
789 }
790 break;
791 }
792
793 case DataType::Type::kFloat64: {
794 if (index->IsConstant()) {
795 last_visited_latency_ = kArmMemoryLoadLatency;
796 } else {
797 last_visited_internal_latency_ = kArmIntegerOpLatency;
798 last_visited_latency_ = kArmMemoryLoadLatency;
799 }
800 break;
801 }
802
803 default:
804 LOG(FATAL) << "Unreachable type " << value_type;
805 UNREACHABLE();
806 }
807 }
808
VisitBoundsCheck(HBoundsCheck * ATTRIBUTE_UNUSED)809 void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
810 last_visited_internal_latency_ = kArmIntegerOpLatency;
811 // Users do not use any data results.
812 last_visited_latency_ = 0;
813 }
814
HandleDivRemConstantIntegralLatencies(int32_t imm)815 void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
816 if (imm == 0) {
817 last_visited_internal_latency_ = 0;
818 last_visited_latency_ = 0;
819 } else if (imm == 1 || imm == -1) {
820 last_visited_latency_ = kArmIntegerOpLatency;
821 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
822 last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
823 last_visited_latency_ = kArmIntegerOpLatency;
824 } else {
825 last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
826 last_visited_latency_ = kArmIntegerOpLatency;
827 }
828 }
829
VisitDiv(HDiv * instruction)830 void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
831 DataType::Type type = instruction->GetResultType();
832 switch (type) {
833 case DataType::Type::kInt32: {
834 HInstruction* rhs = instruction->GetRight();
835 if (rhs->IsConstant()) {
836 int32_t imm = Int32ConstantFrom(rhs->AsConstant());
837 HandleDivRemConstantIntegralLatencies(imm);
838 } else {
839 last_visited_latency_ = kArmDivIntegerLatency;
840 }
841 break;
842 }
843 case DataType::Type::kFloat32:
844 last_visited_latency_ = kArmDivFloatLatency;
845 break;
846 case DataType::Type::kFloat64:
847 last_visited_latency_ = kArmDivDoubleLatency;
848 break;
849 default:
850 last_visited_internal_latency_ = kArmCallInternalLatency;
851 last_visited_latency_ = kArmCallLatency;
852 break;
853 }
854 }
855
VisitInstanceFieldGet(HInstanceFieldGet * instruction)856 void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
857 HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
858 }
859
VisitInstanceFieldSet(HInstanceFieldSet * instruction)860 void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
861 HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
862 }
863
VisitInstanceOf(HInstanceOf * ATTRIBUTE_UNUSED)864 void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
865 last_visited_internal_latency_ = kArmCallInternalLatency;
866 last_visited_latency_ = kArmIntegerOpLatency;
867 }
868
VisitInvoke(HInvoke * ATTRIBUTE_UNUSED)869 void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
870 last_visited_internal_latency_ = kArmCallInternalLatency;
871 last_visited_latency_ = kArmCallLatency;
872 }
873
VisitLoadString(HLoadString * ATTRIBUTE_UNUSED)874 void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
875 last_visited_internal_latency_ = kArmLoadStringInternalLatency;
876 last_visited_latency_ = kArmMemoryLoadLatency;
877 }
878
VisitNewArray(HNewArray * ATTRIBUTE_UNUSED)879 void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
880 last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
881 last_visited_latency_ = kArmCallLatency;
882 }
883
VisitNewInstance(HNewInstance * instruction)884 void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
885 if (instruction->IsStringAlloc()) {
886 last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
887 } else {
888 last_visited_internal_latency_ = kArmCallInternalLatency;
889 }
890 last_visited_latency_ = kArmCallLatency;
891 }
892
VisitRem(HRem * instruction)893 void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
894 DataType::Type type = instruction->GetResultType();
895 switch (type) {
896 case DataType::Type::kInt32: {
897 HInstruction* rhs = instruction->GetRight();
898 if (rhs->IsConstant()) {
899 int32_t imm = Int32ConstantFrom(rhs->AsConstant());
900 HandleDivRemConstantIntegralLatencies(imm);
901 } else {
902 last_visited_internal_latency_ = kArmDivIntegerLatency;
903 last_visited_latency_ = kArmMulIntegerLatency;
904 }
905 break;
906 }
907 default:
908 last_visited_internal_latency_ = kArmCallInternalLatency;
909 last_visited_latency_ = kArmCallLatency;
910 break;
911 }
912 }
913
HandleFieldGetLatencies(HInstruction * instruction,const FieldInfo & field_info)914 void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
915 const FieldInfo& field_info) {
916 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
917 DCHECK(codegen_ != nullptr);
918 bool is_volatile = field_info.IsVolatile();
919 DataType::Type field_type = field_info.GetFieldType();
920 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
921
922 switch (field_type) {
923 case DataType::Type::kBool:
924 case DataType::Type::kUint8:
925 case DataType::Type::kInt8:
926 case DataType::Type::kUint16:
927 case DataType::Type::kInt16:
928 case DataType::Type::kInt32:
929 last_visited_latency_ = kArmMemoryLoadLatency;
930 break;
931
932 case DataType::Type::kReference:
933 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
934 last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
935 last_visited_latency_ = kArmMemoryLoadLatency;
936 } else {
937 last_visited_latency_ = kArmMemoryLoadLatency;
938 }
939 break;
940
941 case DataType::Type::kInt64:
942 if (is_volatile && !atomic_ldrd_strd) {
943 last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
944 last_visited_latency_ = kArmMemoryLoadLatency;
945 } else {
946 last_visited_latency_ = kArmMemoryLoadLatency;
947 }
948 break;
949
950 case DataType::Type::kFloat32:
951 last_visited_latency_ = kArmMemoryLoadLatency;
952 break;
953
954 case DataType::Type::kFloat64:
955 if (is_volatile && !atomic_ldrd_strd) {
956 last_visited_internal_latency_ =
957 kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
958 last_visited_latency_ = kArmIntegerOpLatency;
959 } else {
960 last_visited_latency_ = kArmMemoryLoadLatency;
961 }
962 break;
963
964 default:
965 last_visited_latency_ = kArmMemoryLoadLatency;
966 break;
967 }
968
969 if (is_volatile) {
970 last_visited_internal_latency_ += kArmMemoryBarrierLatency;
971 }
972 }
973
HandleFieldSetLatencies(HInstruction * instruction,const FieldInfo & field_info)974 void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
975 const FieldInfo& field_info) {
976 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
977 DCHECK(codegen_ != nullptr);
978 bool is_volatile = field_info.IsVolatile();
979 DataType::Type field_type = field_info.GetFieldType();
980 bool needs_write_barrier =
981 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
982 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
983
984 switch (field_type) {
985 case DataType::Type::kBool:
986 case DataType::Type::kUint8:
987 case DataType::Type::kInt8:
988 case DataType::Type::kUint16:
989 case DataType::Type::kInt16:
990 if (is_volatile) {
991 last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
992 last_visited_latency_ = kArmMemoryBarrierLatency;
993 } else {
994 last_visited_latency_ = kArmMemoryStoreLatency;
995 }
996 break;
997
998 case DataType::Type::kInt32:
999 case DataType::Type::kReference:
1000 if (kPoisonHeapReferences && needs_write_barrier) {
1001 last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
1002 }
1003 last_visited_latency_ = kArmMemoryStoreLatency;
1004 break;
1005
1006 case DataType::Type::kInt64:
1007 if (is_volatile && !atomic_ldrd_strd) {
1008 last_visited_internal_latency_ =
1009 kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1010 last_visited_latency_ = kArmIntegerOpLatency;
1011 } else {
1012 last_visited_latency_ = kArmMemoryStoreLatency;
1013 }
1014 break;
1015
1016 case DataType::Type::kFloat32:
1017 last_visited_latency_ = kArmMemoryStoreLatency;
1018 break;
1019
1020 case DataType::Type::kFloat64:
1021 if (is_volatile && !atomic_ldrd_strd) {
1022 last_visited_internal_latency_ = kArmIntegerOpLatency +
1023 kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1024 last_visited_latency_ = kArmIntegerOpLatency;
1025 } else {
1026 last_visited_latency_ = kArmMemoryStoreLatency;
1027 }
1028 break;
1029
1030 default:
1031 last_visited_latency_ = kArmMemoryStoreLatency;
1032 break;
1033 }
1034 }
1035
VisitStaticFieldGet(HStaticFieldGet * instruction)1036 void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
1037 HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
1038 }
1039
VisitStaticFieldSet(HStaticFieldSet * instruction)1040 void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
1041 HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
1042 }
1043
VisitSuspendCheck(HSuspendCheck * instruction)1044 void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
1045 HBasicBlock* block = instruction->GetBlock();
1046 DCHECK((block->GetLoopInformation() != nullptr) ||
1047 (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
1048 // Users do not use any data results.
1049 last_visited_latency_ = 0;
1050 }
1051
VisitTypeConversion(HTypeConversion * instr)1052 void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
1053 DataType::Type result_type = instr->GetResultType();
1054 DataType::Type input_type = instr->GetInputType();
1055
1056 switch (result_type) {
1057 case DataType::Type::kUint8:
1058 case DataType::Type::kInt8:
1059 case DataType::Type::kUint16:
1060 case DataType::Type::kInt16:
1061 last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
1062 break;
1063
1064 case DataType::Type::kInt32:
1065 switch (input_type) {
1066 case DataType::Type::kInt64:
1067 last_visited_latency_ = kArmIntegerOpLatency; // MOV
1068 break;
1069 case DataType::Type::kFloat32:
1070 case DataType::Type::kFloat64:
1071 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1072 last_visited_latency_ = kArmFloatingPointOpLatency;
1073 break;
1074 default:
1075 last_visited_latency_ = kArmIntegerOpLatency;
1076 break;
1077 }
1078 break;
1079
1080 case DataType::Type::kInt64:
1081 switch (input_type) {
1082 case DataType::Type::kBool:
1083 case DataType::Type::kUint8:
1084 case DataType::Type::kInt8:
1085 case DataType::Type::kUint16:
1086 case DataType::Type::kInt16:
1087 case DataType::Type::kInt32:
1088 // MOV and extension
1089 last_visited_internal_latency_ = kArmIntegerOpLatency;
1090 last_visited_latency_ = kArmIntegerOpLatency;
1091 break;
1092 case DataType::Type::kFloat32:
1093 case DataType::Type::kFloat64:
1094 // invokes runtime
1095 last_visited_internal_latency_ = kArmCallInternalLatency;
1096 break;
1097 default:
1098 last_visited_internal_latency_ = kArmIntegerOpLatency;
1099 last_visited_latency_ = kArmIntegerOpLatency;
1100 break;
1101 }
1102 break;
1103
1104 case DataType::Type::kFloat32:
1105 switch (input_type) {
1106 case DataType::Type::kBool:
1107 case DataType::Type::kUint8:
1108 case DataType::Type::kInt8:
1109 case DataType::Type::kUint16:
1110 case DataType::Type::kInt16:
1111 case DataType::Type::kInt32:
1112 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1113 last_visited_latency_ = kArmFloatingPointOpLatency;
1114 break;
1115 case DataType::Type::kInt64:
1116 // invokes runtime
1117 last_visited_internal_latency_ = kArmCallInternalLatency;
1118 break;
1119 case DataType::Type::kFloat64:
1120 last_visited_latency_ = kArmFloatingPointOpLatency;
1121 break;
1122 default:
1123 last_visited_latency_ = kArmFloatingPointOpLatency;
1124 break;
1125 }
1126 break;
1127
1128 case DataType::Type::kFloat64:
1129 switch (input_type) {
1130 case DataType::Type::kBool:
1131 case DataType::Type::kUint8:
1132 case DataType::Type::kInt8:
1133 case DataType::Type::kUint16:
1134 case DataType::Type::kInt16:
1135 case DataType::Type::kInt32:
1136 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1137 last_visited_latency_ = kArmFloatingPointOpLatency;
1138 break;
1139 case DataType::Type::kInt64:
1140 last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
1141 last_visited_latency_ = kArmFloatingPointOpLatency;
1142 break;
1143 case DataType::Type::kFloat32:
1144 last_visited_latency_ = kArmFloatingPointOpLatency;
1145 break;
1146 default:
1147 last_visited_latency_ = kArmFloatingPointOpLatency;
1148 break;
1149 }
1150 break;
1151
1152 default:
1153 last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1154 break;
1155 }
1156 }
1157
1158 } // namespace arm
1159 } // namespace art
1160