1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "scheduler_arm.h"
18
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "code_generator_utils.h"
21 #include "common_arm.h"
22 #include "heap_poisoning.h"
23 #include "mirror/array-inl.h"
24 #include "mirror/string.h"
25
26 namespace art {
27 namespace arm {
28
29 using helpers::Int32ConstantFrom;
30 using helpers::Uint64ConstantFrom;
31
HandleBinaryOperationLantencies(HBinaryOperation * instr)32 void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
33 switch (instr->GetResultType()) {
34 case DataType::Type::kInt64:
35 // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
36 // so a bubble (kArmNopLatency) is added to represent the internal carry flag
37 // dependency inside these pairs.
38 last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
39 last_visited_latency_ = kArmIntegerOpLatency;
40 break;
41 case DataType::Type::kFloat32:
42 case DataType::Type::kFloat64:
43 last_visited_latency_ = kArmFloatingPointOpLatency;
44 break;
45 default:
46 last_visited_latency_ = kArmIntegerOpLatency;
47 break;
48 }
49 }
50
VisitAdd(HAdd * instr)51 void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
52 HandleBinaryOperationLantencies(instr);
53 }
54
VisitSub(HSub * instr)55 void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
56 HandleBinaryOperationLantencies(instr);
57 }
58
VisitMul(HMul * instr)59 void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
60 switch (instr->GetResultType()) {
61 case DataType::Type::kInt64:
62 last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
63 last_visited_latency_ = kArmIntegerOpLatency;
64 break;
65 case DataType::Type::kFloat32:
66 case DataType::Type::kFloat64:
67 last_visited_latency_ = kArmMulFloatingPointLatency;
68 break;
69 default:
70 last_visited_latency_ = kArmMulIntegerLatency;
71 break;
72 }
73 }
74
HandleBitwiseOperationLantencies(HBinaryOperation * instr)75 void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
76 switch (instr->GetResultType()) {
77 case DataType::Type::kInt64:
78 last_visited_internal_latency_ = kArmIntegerOpLatency;
79 last_visited_latency_ = kArmIntegerOpLatency;
80 break;
81 case DataType::Type::kFloat32:
82 case DataType::Type::kFloat64:
83 last_visited_latency_ = kArmFloatingPointOpLatency;
84 break;
85 default:
86 last_visited_latency_ = kArmIntegerOpLatency;
87 break;
88 }
89 }
90
VisitAnd(HAnd * instr)91 void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
92 HandleBitwiseOperationLantencies(instr);
93 }
94
VisitOr(HOr * instr)95 void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
96 HandleBitwiseOperationLantencies(instr);
97 }
98
VisitXor(HXor * instr)99 void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
100 HandleBitwiseOperationLantencies(instr);
101 }
102
VisitRor(HRor * instr)103 void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
104 switch (instr->GetResultType()) {
105 case DataType::Type::kInt32:
106 last_visited_latency_ = kArmIntegerOpLatency;
107 break;
108 case DataType::Type::kInt64: {
109 // HandleLongRotate
110 HInstruction* rhs = instr->GetRight();
111 if (rhs->IsConstant()) {
112 uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
113 if (rot != 0u) {
114 last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
115 last_visited_latency_ = kArmIntegerOpLatency;
116 } else {
117 last_visited_internal_latency_ = kArmIntegerOpLatency;
118 last_visited_latency_ = kArmIntegerOpLatency;
119 }
120 } else {
121 last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
122 last_visited_latency_ = kArmBranchLatency;
123 }
124 break;
125 }
126 default:
127 LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
128 UNREACHABLE();
129 }
130 }
131
HandleShiftLatencies(HBinaryOperation * instr)132 void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
133 DataType::Type type = instr->GetResultType();
134 HInstruction* rhs = instr->GetRight();
135 switch (type) {
136 case DataType::Type::kInt32:
137 if (!rhs->IsConstant()) {
138 last_visited_internal_latency_ = kArmIntegerOpLatency;
139 }
140 last_visited_latency_ = kArmIntegerOpLatency;
141 break;
142 case DataType::Type::kInt64:
143 if (!rhs->IsConstant()) {
144 last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
145 } else {
146 uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
147 if (shift_value == 1 || shift_value >= 32) {
148 last_visited_internal_latency_ = kArmIntegerOpLatency;
149 } else {
150 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
151 }
152 }
153 last_visited_latency_ = kArmIntegerOpLatency;
154 break;
155 default:
156 LOG(FATAL) << "Unexpected operation type " << type;
157 UNREACHABLE();
158 }
159 }
160
VisitShl(HShl * instr)161 void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
162 HandleShiftLatencies(instr);
163 }
164
VisitShr(HShr * instr)165 void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
166 HandleShiftLatencies(instr);
167 }
168
VisitUShr(HUShr * instr)169 void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
170 HandleShiftLatencies(instr);
171 }
172
HandleGenerateConditionWithZero(IfCondition condition)173 void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
174 switch (condition) {
175 case kCondEQ:
176 case kCondBE:
177 case kCondNE:
178 case kCondA:
179 last_visited_internal_latency_ += kArmIntegerOpLatency;
180 last_visited_latency_ = kArmIntegerOpLatency;
181 break;
182 case kCondGE:
183 // Mvn
184 last_visited_internal_latency_ += kArmIntegerOpLatency;
185 FALLTHROUGH_INTENDED;
186 case kCondLT:
187 // Lsr
188 last_visited_latency_ = kArmIntegerOpLatency;
189 break;
190 case kCondAE:
191 // Trivially true.
192 // Mov
193 last_visited_latency_ = kArmIntegerOpLatency;
194 break;
195 case kCondB:
196 // Trivially false.
197 // Mov
198 last_visited_latency_ = kArmIntegerOpLatency;
199 break;
200 default:
201 LOG(FATAL) << "Unexpected condition " << condition;
202 UNREACHABLE();
203 }
204 }
205
HandleGenerateLongTestConstant(HCondition * condition)206 void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
207 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
208
209 IfCondition cond = condition->GetCondition();
210
211 HInstruction* right = condition->InputAt(1);
212
213 int64_t value = Uint64ConstantFrom(right);
214
215 // Comparisons against 0 are common enough, so codegen has special handling for them.
216 if (value == 0) {
217 switch (cond) {
218 case kCondNE:
219 case kCondA:
220 case kCondEQ:
221 case kCondBE:
222 // Orrs
223 last_visited_internal_latency_ += kArmIntegerOpLatency;
224 return;
225 case kCondLT:
226 case kCondGE:
227 // Cmp
228 last_visited_internal_latency_ += kArmIntegerOpLatency;
229 return;
230 case kCondB:
231 case kCondAE:
232 // Cmp
233 last_visited_internal_latency_ += kArmIntegerOpLatency;
234 return;
235 default:
236 break;
237 }
238 }
239
240 switch (cond) {
241 case kCondEQ:
242 case kCondNE:
243 case kCondB:
244 case kCondBE:
245 case kCondA:
246 case kCondAE: {
247 // Cmp, IT, Cmp
248 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
249 break;
250 }
251 case kCondLE:
252 case kCondGT:
253 // Trivially true or false.
254 if (value == std::numeric_limits<int64_t>::max()) {
255 // Cmp
256 last_visited_internal_latency_ += kArmIntegerOpLatency;
257 break;
258 }
259 FALLTHROUGH_INTENDED;
260 case kCondGE:
261 case kCondLT: {
262 // Cmp, Sbcs
263 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
264 break;
265 }
266 default:
267 LOG(FATAL) << "Unreachable";
268 UNREACHABLE();
269 }
270 }
271
HandleGenerateLongTest(HCondition * condition)272 void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
273 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
274
275 IfCondition cond = condition->GetCondition();
276
277 switch (cond) {
278 case kCondEQ:
279 case kCondNE:
280 case kCondB:
281 case kCondBE:
282 case kCondA:
283 case kCondAE: {
284 // Cmp, IT, Cmp
285 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
286 break;
287 }
288 case kCondLE:
289 case kCondGT:
290 case kCondGE:
291 case kCondLT: {
292 // Cmp, Sbcs
293 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
294 break;
295 }
296 default:
297 LOG(FATAL) << "Unreachable";
298 UNREACHABLE();
299 }
300 }
301
302 // The GenerateTest series of function all counted as internal latency.
HandleGenerateTest(HCondition * condition)303 void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
304 const DataType::Type type = condition->GetLeft()->GetType();
305
306 if (type == DataType::Type::kInt64) {
307 condition->InputAt(1)->IsConstant()
308 ? HandleGenerateLongTestConstant(condition)
309 : HandleGenerateLongTest(condition);
310 } else if (DataType::IsFloatingPointType(type)) {
311 // GenerateVcmp + Vmrs
312 last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
313 } else {
314 // Cmp
315 last_visited_internal_latency_ += kArmIntegerOpLatency;
316 }
317 }
318
CanGenerateTest(HCondition * condition)319 bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
320 if (condition->GetLeft()->GetType() == DataType::Type::kInt64) {
321 HInstruction* right = condition->InputAt(1);
322
323 if (right->IsConstant()) {
324 IfCondition c = condition->GetCondition();
325 const uint64_t value = Uint64ConstantFrom(right);
326
327 if (c < kCondLT || c > kCondGE) {
328 if (value != 0) {
329 return false;
330 }
331 } else if (c == kCondLE || c == kCondGT) {
332 if (value < std::numeric_limits<int64_t>::max() &&
333 !codegen_->GetAssembler()->ShifterOperandCanHold(
334 SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) {
335 return false;
336 }
337 } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(
338 SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) {
339 return false;
340 }
341 }
342 }
343
344 return true;
345 }
346
HandleGenerateConditionGeneric(HCondition * cond)347 void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
348 HandleGenerateTest(cond);
349
350 // Unlike codegen pass, we cannot check 'out' register IsLow() here,
351 // because scheduling is before liveness(location builder) and register allocator,
352 // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
353 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
354 last_visited_latency_ = kArmIntegerOpLatency;
355 }
356
HandleGenerateEqualLong(HCondition * cond)357 void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
358 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
359
360 IfCondition condition = cond->GetCondition();
361
362 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
363
364 if (condition == kCondNE) {
365 // Orrs, IT, Mov
366 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
367 } else {
368 last_visited_internal_latency_ += kArmIntegerOpLatency;
369 HandleGenerateConditionWithZero(condition);
370 }
371 }
372
HandleGenerateLongComparesAndJumps()373 void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
374 last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
375 last_visited_internal_latency_ += kArmBranchLatency;
376 }
377
HandleGenerateConditionLong(HCondition * cond)378 void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
379 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
380
381 IfCondition condition = cond->GetCondition();
382 HInstruction* right = cond->InputAt(1);
383
384 if (right->IsConstant()) {
385 // Comparisons against 0 are common enough, so codegen has special handling for them.
386 if (Uint64ConstantFrom(right) == 0) {
387 switch (condition) {
388 case kCondNE:
389 case kCondA:
390 case kCondEQ:
391 case kCondBE:
392 // Orr
393 last_visited_internal_latency_ += kArmIntegerOpLatency;
394 HandleGenerateConditionWithZero(condition);
395 return;
396 case kCondLT:
397 case kCondGE:
398 FALLTHROUGH_INTENDED;
399 case kCondAE:
400 case kCondB:
401 HandleGenerateConditionWithZero(condition);
402 return;
403 case kCondLE:
404 case kCondGT:
405 default:
406 break;
407 }
408 }
409 }
410
411 if ((condition == kCondEQ || condition == kCondNE) &&
412 !CanGenerateTest(cond)) {
413 HandleGenerateEqualLong(cond);
414 return;
415 }
416
417 if (CanGenerateTest(cond)) {
418 HandleGenerateConditionGeneric(cond);
419 return;
420 }
421
422 HandleGenerateLongComparesAndJumps();
423
424 last_visited_internal_latency_ += kArmIntegerOpLatency;
425 last_visited_latency_ = kArmBranchLatency;;
426 }
427
HandleGenerateConditionIntegralOrNonPrimitive(HCondition * cond)428 void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
429 const DataType::Type type = cond->GetLeft()->GetType();
430
431 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
432
433 if (type == DataType::Type::kInt64) {
434 HandleGenerateConditionLong(cond);
435 return;
436 }
437
438 IfCondition condition = cond->GetCondition();
439 HInstruction* right = cond->InputAt(1);
440 int64_t value;
441
442 if (right->IsConstant()) {
443 value = Uint64ConstantFrom(right);
444
445 // Comparisons against 0 are common enough, so codegen has special handling for them.
446 if (value == 0) {
447 switch (condition) {
448 case kCondNE:
449 case kCondA:
450 case kCondEQ:
451 case kCondBE:
452 case kCondLT:
453 case kCondGE:
454 case kCondAE:
455 case kCondB:
456 HandleGenerateConditionWithZero(condition);
457 return;
458 case kCondLE:
459 case kCondGT:
460 default:
461 break;
462 }
463 }
464 }
465
466 if (condition == kCondEQ || condition == kCondNE) {
467 if (condition == kCondNE) {
468 // CMP, IT, MOV.ne
469 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
470 last_visited_latency_ = kArmIntegerOpLatency;
471 } else {
472 last_visited_internal_latency_ += kArmIntegerOpLatency;
473 HandleGenerateConditionWithZero(condition);
474 }
475 return;
476 }
477
478 HandleGenerateConditionGeneric(cond);
479 }
480
HandleCondition(HCondition * cond)481 void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
482 if (cond->IsEmittedAtUseSite()) {
483 last_visited_latency_ = 0;
484 return;
485 }
486
487 const DataType::Type type = cond->GetLeft()->GetType();
488
489 if (DataType::IsFloatingPointType(type)) {
490 HandleGenerateConditionGeneric(cond);
491 return;
492 }
493
494 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
495
496 const IfCondition condition = cond->GetCondition();
497
498 if (type == DataType::Type::kBool &&
499 cond->GetRight()->GetType() == DataType::Type::kBool &&
500 (condition == kCondEQ || condition == kCondNE)) {
501 if (condition == kCondEQ) {
502 last_visited_internal_latency_ = kArmIntegerOpLatency;
503 }
504 last_visited_latency_ = kArmIntegerOpLatency;
505 return;
506 }
507
508 HandleGenerateConditionIntegralOrNonPrimitive(cond);
509 }
510
VisitCondition(HCondition * instr)511 void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
512 HandleCondition(instr);
513 }
514
VisitCompare(HCompare * instr)515 void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
516 DataType::Type type = instr->InputAt(0)->GetType();
517 switch (type) {
518 case DataType::Type::kBool:
519 case DataType::Type::kUint8:
520 case DataType::Type::kInt8:
521 case DataType::Type::kUint16:
522 case DataType::Type::kInt16:
523 case DataType::Type::kInt32:
524 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
525 break;
526 case DataType::Type::kInt64:
527 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
528 break;
529 case DataType::Type::kFloat32:
530 case DataType::Type::kFloat64:
531 last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
532 break;
533 default:
534 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
535 break;
536 }
537 last_visited_latency_ = kArmIntegerOpLatency;
538 }
539
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)540 void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
541 if (instruction->GetResultType() == DataType::Type::kInt32) {
542 last_visited_latency_ = kArmIntegerOpLatency;
543 } else {
544 last_visited_internal_latency_ = kArmIntegerOpLatency;
545 last_visited_latency_ = kArmIntegerOpLatency;
546 }
547 }
548
HandleGenerateDataProcInstruction(bool internal_latency)549 void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
550 if (internal_latency) {
551 last_visited_internal_latency_ += kArmIntegerOpLatency;
552 } else {
553 last_visited_latency_ = kArmDataProcWithShifterOpLatency;
554 }
555 }
556
HandleGenerateDataProc(HDataProcWithShifterOp * instruction)557 void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
558 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
559 if (kind == HInstruction::kAdd) {
560 last_visited_internal_latency_ = kArmIntegerOpLatency;
561 last_visited_latency_ = kArmIntegerOpLatency;
562 } else if (kind == HInstruction::kSub) {
563 last_visited_internal_latency_ = kArmIntegerOpLatency;
564 last_visited_latency_ = kArmIntegerOpLatency;
565 } else {
566 HandleGenerateDataProcInstruction(/* internal_latency= */ true);
567 HandleGenerateDataProcInstruction();
568 }
569 }
570
HandleGenerateLongDataProc(HDataProcWithShifterOp * instruction)571 void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
572 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
573 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
574
575 const uint32_t shift_value = instruction->GetShiftAmount();
576 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
577
578 if (shift_value >= 32) {
579 // Different shift types actually generate similar code here,
580 // no need to differentiate shift types like the codegen pass does,
581 // which also avoids handling shift types from different ARM backends.
582 HandleGenerateDataProc(instruction);
583 } else {
584 DCHECK_GT(shift_value, 1U);
585 DCHECK_LT(shift_value, 32U);
586
587 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
588 HandleGenerateDataProcInstruction(/* internal_latency= */ true);
589 HandleGenerateDataProcInstruction(/* internal_latency= */ true);
590 HandleGenerateDataProcInstruction();
591 } else {
592 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
593 HandleGenerateDataProc(instruction);
594 }
595 }
596 }
597
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)598 void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
599 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
600
601 if (instruction->GetType() == DataType::Type::kInt32) {
602 HandleGenerateDataProcInstruction();
603 } else {
604 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
605 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
606 HandleGenerateDataProc(instruction);
607 } else {
608 HandleGenerateLongDataProc(instruction);
609 }
610 }
611 }
612
VisitIntermediateAddress(HIntermediateAddress * ATTRIBUTE_UNUSED)613 void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
614 // Although the code generated is a simple `add` instruction, we found through empirical results
615 // that spacing it from its use in memory accesses was beneficial.
616 last_visited_internal_latency_ = kArmNopLatency;
617 last_visited_latency_ = kArmIntegerOpLatency;
618 }
619
VisitIntermediateAddressIndex(HIntermediateAddressIndex * ATTRIBUTE_UNUSED)620 void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
621 HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
622 UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
623 }
624
VisitMultiplyAccumulate(HMultiplyAccumulate * ATTRIBUTE_UNUSED)625 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
626 last_visited_latency_ = kArmMulIntegerLatency;
627 }
628
VisitArrayGet(HArrayGet * instruction)629 void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
630 DataType::Type type = instruction->GetType();
631 const bool maybe_compressed_char_at =
632 mirror::kUseStringCompression && instruction->IsStringCharAt();
633 HInstruction* array_instr = instruction->GetArray();
634 bool has_intermediate_address = array_instr->IsIntermediateAddress();
635 HInstruction* index = instruction->InputAt(1);
636
637 switch (type) {
638 case DataType::Type::kBool:
639 case DataType::Type::kUint8:
640 case DataType::Type::kInt8:
641 case DataType::Type::kUint16:
642 case DataType::Type::kInt16:
643 case DataType::Type::kInt32: {
644 if (maybe_compressed_char_at) {
645 last_visited_internal_latency_ += kArmMemoryLoadLatency;
646 }
647 if (index->IsConstant()) {
648 if (maybe_compressed_char_at) {
649 last_visited_internal_latency_ +=
650 kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
651 last_visited_latency_ = kArmBranchLatency;
652 } else {
653 last_visited_latency_ += kArmMemoryLoadLatency;
654 }
655 } else {
656 if (has_intermediate_address) {
657 } else {
658 last_visited_internal_latency_ += kArmIntegerOpLatency;
659 }
660 if (maybe_compressed_char_at) {
661 last_visited_internal_latency_ +=
662 kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
663 last_visited_latency_ = kArmBranchLatency;
664 } else {
665 last_visited_latency_ += kArmMemoryLoadLatency;
666 }
667 }
668 break;
669 }
670
671 case DataType::Type::kReference: {
672 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
673 last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
674 } else {
675 if (index->IsConstant()) {
676 last_visited_latency_ = kArmMemoryLoadLatency;
677 } else {
678 if (has_intermediate_address) {
679 } else {
680 last_visited_internal_latency_ += kArmIntegerOpLatency;
681 }
682 last_visited_latency_ = kArmMemoryLoadLatency;
683 }
684 }
685 break;
686 }
687
688 case DataType::Type::kInt64: {
689 if (index->IsConstant()) {
690 last_visited_latency_ = kArmMemoryLoadLatency;
691 } else {
692 last_visited_internal_latency_ += kArmIntegerOpLatency;
693 last_visited_latency_ = kArmMemoryLoadLatency;
694 }
695 break;
696 }
697
698 case DataType::Type::kFloat32: {
699 if (index->IsConstant()) {
700 last_visited_latency_ = kArmMemoryLoadLatency;
701 } else {
702 last_visited_internal_latency_ += kArmIntegerOpLatency;
703 last_visited_latency_ = kArmMemoryLoadLatency;
704 }
705 break;
706 }
707
708 case DataType::Type::kFloat64: {
709 if (index->IsConstant()) {
710 last_visited_latency_ = kArmMemoryLoadLatency;
711 } else {
712 last_visited_internal_latency_ += kArmIntegerOpLatency;
713 last_visited_latency_ = kArmMemoryLoadLatency;
714 }
715 break;
716 }
717
718 default:
719 LOG(FATAL) << "Unreachable type " << type;
720 UNREACHABLE();
721 }
722 }
723
VisitArrayLength(HArrayLength * instruction)724 void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
725 last_visited_latency_ = kArmMemoryLoadLatency;
726 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
727 last_visited_internal_latency_ = kArmMemoryLoadLatency;
728 last_visited_latency_ = kArmIntegerOpLatency;
729 }
730 }
731
VisitArraySet(HArraySet * instruction)732 void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
733 HInstruction* index = instruction->InputAt(1);
734 DataType::Type value_type = instruction->GetComponentType();
735 HInstruction* array_instr = instruction->GetArray();
736 bool has_intermediate_address = array_instr->IsIntermediateAddress();
737
738 switch (value_type) {
739 case DataType::Type::kBool:
740 case DataType::Type::kUint8:
741 case DataType::Type::kInt8:
742 case DataType::Type::kUint16:
743 case DataType::Type::kInt16:
744 case DataType::Type::kInt32: {
745 if (index->IsConstant()) {
746 last_visited_latency_ = kArmMemoryStoreLatency;
747 } else {
748 if (has_intermediate_address) {
749 } else {
750 last_visited_internal_latency_ = kArmIntegerOpLatency;
751 }
752 last_visited_latency_ = kArmMemoryStoreLatency;
753 }
754 break;
755 }
756
757 case DataType::Type::kReference: {
758 if (instruction->InputAt(2)->IsNullConstant()) {
759 if (index->IsConstant()) {
760 last_visited_latency_ = kArmMemoryStoreLatency;
761 } else {
762 last_visited_internal_latency_ = kArmIntegerOpLatency;
763 last_visited_latency_ = kArmMemoryStoreLatency;
764 }
765 } else {
766 // Following the exact instructions of runtime type checks is too complicated,
767 // just giving it a simple slow latency.
768 last_visited_latency_ = kArmRuntimeTypeCheckLatency;
769 }
770 break;
771 }
772
773 case DataType::Type::kInt64: {
774 if (index->IsConstant()) {
775 last_visited_latency_ = kArmMemoryLoadLatency;
776 } else {
777 last_visited_internal_latency_ = kArmIntegerOpLatency;
778 last_visited_latency_ = kArmMemoryLoadLatency;
779 }
780 break;
781 }
782
783 case DataType::Type::kFloat32: {
784 if (index->IsConstant()) {
785 last_visited_latency_ = kArmMemoryLoadLatency;
786 } else {
787 last_visited_internal_latency_ = kArmIntegerOpLatency;
788 last_visited_latency_ = kArmMemoryLoadLatency;
789 }
790 break;
791 }
792
793 case DataType::Type::kFloat64: {
794 if (index->IsConstant()) {
795 last_visited_latency_ = kArmMemoryLoadLatency;
796 } else {
797 last_visited_internal_latency_ = kArmIntegerOpLatency;
798 last_visited_latency_ = kArmMemoryLoadLatency;
799 }
800 break;
801 }
802
803 default:
804 LOG(FATAL) << "Unreachable type " << value_type;
805 UNREACHABLE();
806 }
807 }
808
VisitBoundsCheck(HBoundsCheck * ATTRIBUTE_UNUSED)809 void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
810 last_visited_internal_latency_ = kArmIntegerOpLatency;
811 // Users do not use any data results.
812 last_visited_latency_ = 0;
813 }
814
HandleDivRemConstantIntegralLatencies(int32_t imm)815 void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
816 if (imm == 0) {
817 last_visited_internal_latency_ = 0;
818 last_visited_latency_ = 0;
819 } else if (imm == 1 || imm == -1) {
820 last_visited_latency_ = kArmIntegerOpLatency;
821 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
822 last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
823 last_visited_latency_ = kArmIntegerOpLatency;
824 } else {
825 last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
826 last_visited_latency_ = kArmIntegerOpLatency;
827 }
828 }
829
VisitDiv(HDiv * instruction)830 void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
831 DataType::Type type = instruction->GetResultType();
832 switch (type) {
833 case DataType::Type::kInt32: {
834 HInstruction* rhs = instruction->GetRight();
835 if (rhs->IsConstant()) {
836 int32_t imm = Int32ConstantFrom(rhs->AsConstant());
837 HandleDivRemConstantIntegralLatencies(imm);
838 } else {
839 last_visited_latency_ = kArmDivIntegerLatency;
840 }
841 break;
842 }
843 case DataType::Type::kFloat32:
844 last_visited_latency_ = kArmDivFloatLatency;
845 break;
846 case DataType::Type::kFloat64:
847 last_visited_latency_ = kArmDivDoubleLatency;
848 break;
849 default:
850 last_visited_internal_latency_ = kArmCallInternalLatency;
851 last_visited_latency_ = kArmCallLatency;
852 break;
853 }
854 }
855
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)856 void SchedulingLatencyVisitorARM::VisitPredicatedInstanceFieldGet(
857 HPredicatedInstanceFieldGet* instruction) {
858 HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
859 }
860
VisitInstanceFieldGet(HInstanceFieldGet * instruction)861 void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
862 HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
863 }
864
VisitInstanceFieldSet(HInstanceFieldSet * instruction)865 void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
866 HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
867 }
868
VisitInstanceOf(HInstanceOf * ATTRIBUTE_UNUSED)869 void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
870 last_visited_internal_latency_ = kArmCallInternalLatency;
871 last_visited_latency_ = kArmIntegerOpLatency;
872 }
873
VisitInvoke(HInvoke * ATTRIBUTE_UNUSED)874 void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
875 last_visited_internal_latency_ = kArmCallInternalLatency;
876 last_visited_latency_ = kArmCallLatency;
877 }
878
VisitLoadString(HLoadString * ATTRIBUTE_UNUSED)879 void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
880 last_visited_internal_latency_ = kArmLoadStringInternalLatency;
881 last_visited_latency_ = kArmMemoryLoadLatency;
882 }
883
VisitNewArray(HNewArray * ATTRIBUTE_UNUSED)884 void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
885 last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
886 last_visited_latency_ = kArmCallLatency;
887 }
888
VisitNewInstance(HNewInstance * instruction)889 void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
890 if (instruction->IsStringAlloc()) {
891 last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
892 } else {
893 last_visited_internal_latency_ = kArmCallInternalLatency;
894 }
895 last_visited_latency_ = kArmCallLatency;
896 }
897
VisitRem(HRem * instruction)898 void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
899 DataType::Type type = instruction->GetResultType();
900 switch (type) {
901 case DataType::Type::kInt32: {
902 HInstruction* rhs = instruction->GetRight();
903 if (rhs->IsConstant()) {
904 int32_t imm = Int32ConstantFrom(rhs->AsConstant());
905 HandleDivRemConstantIntegralLatencies(imm);
906 } else {
907 last_visited_internal_latency_ = kArmDivIntegerLatency;
908 last_visited_latency_ = kArmMulIntegerLatency;
909 }
910 break;
911 }
912 default:
913 last_visited_internal_latency_ = kArmCallInternalLatency;
914 last_visited_latency_ = kArmCallLatency;
915 break;
916 }
917 }
918
HandleFieldGetLatencies(HInstruction * instruction,const FieldInfo & field_info)919 void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
920 const FieldInfo& field_info) {
921 DCHECK(instruction->IsInstanceFieldGet() ||
922 instruction->IsStaticFieldGet() ||
923 instruction->IsPredicatedInstanceFieldGet());
924 DCHECK(codegen_ != nullptr);
925 bool is_volatile = field_info.IsVolatile();
926 DataType::Type field_type = field_info.GetFieldType();
927 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
928
929 switch (field_type) {
930 case DataType::Type::kBool:
931 case DataType::Type::kUint8:
932 case DataType::Type::kInt8:
933 case DataType::Type::kUint16:
934 case DataType::Type::kInt16:
935 case DataType::Type::kInt32:
936 last_visited_latency_ = kArmMemoryLoadLatency;
937 break;
938
939 case DataType::Type::kReference:
940 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
941 last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
942 last_visited_latency_ = kArmMemoryLoadLatency;
943 } else {
944 last_visited_latency_ = kArmMemoryLoadLatency;
945 }
946 break;
947
948 case DataType::Type::kInt64:
949 if (is_volatile && !atomic_ldrd_strd) {
950 last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
951 last_visited_latency_ = kArmMemoryLoadLatency;
952 } else {
953 last_visited_latency_ = kArmMemoryLoadLatency;
954 }
955 break;
956
957 case DataType::Type::kFloat32:
958 last_visited_latency_ = kArmMemoryLoadLatency;
959 break;
960
961 case DataType::Type::kFloat64:
962 if (is_volatile && !atomic_ldrd_strd) {
963 last_visited_internal_latency_ =
964 kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
965 last_visited_latency_ = kArmIntegerOpLatency;
966 } else {
967 last_visited_latency_ = kArmMemoryLoadLatency;
968 }
969 break;
970
971 default:
972 last_visited_latency_ = kArmMemoryLoadLatency;
973 break;
974 }
975
976 if (is_volatile) {
977 last_visited_internal_latency_ += kArmMemoryBarrierLatency;
978 }
979 }
980
HandleFieldSetLatencies(HInstruction * instruction,const FieldInfo & field_info)981 void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
982 const FieldInfo& field_info) {
983 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
984 DCHECK(codegen_ != nullptr);
985 bool is_volatile = field_info.IsVolatile();
986 DataType::Type field_type = field_info.GetFieldType();
987 bool needs_write_barrier =
988 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
989 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
990
991 switch (field_type) {
992 case DataType::Type::kBool:
993 case DataType::Type::kUint8:
994 case DataType::Type::kInt8:
995 case DataType::Type::kUint16:
996 case DataType::Type::kInt16:
997 if (is_volatile) {
998 last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
999 last_visited_latency_ = kArmMemoryBarrierLatency;
1000 } else {
1001 last_visited_latency_ = kArmMemoryStoreLatency;
1002 }
1003 break;
1004
1005 case DataType::Type::kInt32:
1006 case DataType::Type::kReference:
1007 if (kPoisonHeapReferences && needs_write_barrier) {
1008 last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
1009 }
1010 last_visited_latency_ = kArmMemoryStoreLatency;
1011 break;
1012
1013 case DataType::Type::kInt64:
1014 if (is_volatile && !atomic_ldrd_strd) {
1015 last_visited_internal_latency_ =
1016 kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1017 last_visited_latency_ = kArmIntegerOpLatency;
1018 } else {
1019 last_visited_latency_ = kArmMemoryStoreLatency;
1020 }
1021 break;
1022
1023 case DataType::Type::kFloat32:
1024 last_visited_latency_ = kArmMemoryStoreLatency;
1025 break;
1026
1027 case DataType::Type::kFloat64:
1028 if (is_volatile && !atomic_ldrd_strd) {
1029 last_visited_internal_latency_ = kArmIntegerOpLatency +
1030 kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1031 last_visited_latency_ = kArmIntegerOpLatency;
1032 } else {
1033 last_visited_latency_ = kArmMemoryStoreLatency;
1034 }
1035 break;
1036
1037 default:
1038 last_visited_latency_ = kArmMemoryStoreLatency;
1039 break;
1040 }
1041 }
1042
VisitStaticFieldGet(HStaticFieldGet * instruction)1043 void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
1044 HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
1045 }
1046
VisitStaticFieldSet(HStaticFieldSet * instruction)1047 void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
1048 HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
1049 }
1050
VisitSuspendCheck(HSuspendCheck * instruction)1051 void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
1052 HBasicBlock* block = instruction->GetBlock();
1053 DCHECK((block->GetLoopInformation() != nullptr) ||
1054 (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
1055 // Users do not use any data results.
1056 last_visited_latency_ = 0;
1057 }
1058
VisitTypeConversion(HTypeConversion * instr)1059 void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
1060 DataType::Type result_type = instr->GetResultType();
1061 DataType::Type input_type = instr->GetInputType();
1062
1063 switch (result_type) {
1064 case DataType::Type::kUint8:
1065 case DataType::Type::kInt8:
1066 case DataType::Type::kUint16:
1067 case DataType::Type::kInt16:
1068 last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
1069 break;
1070
1071 case DataType::Type::kInt32:
1072 switch (input_type) {
1073 case DataType::Type::kInt64:
1074 last_visited_latency_ = kArmIntegerOpLatency; // MOV
1075 break;
1076 case DataType::Type::kFloat32:
1077 case DataType::Type::kFloat64:
1078 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1079 last_visited_latency_ = kArmFloatingPointOpLatency;
1080 break;
1081 default:
1082 last_visited_latency_ = kArmIntegerOpLatency;
1083 break;
1084 }
1085 break;
1086
1087 case DataType::Type::kInt64:
1088 switch (input_type) {
1089 case DataType::Type::kBool:
1090 case DataType::Type::kUint8:
1091 case DataType::Type::kInt8:
1092 case DataType::Type::kUint16:
1093 case DataType::Type::kInt16:
1094 case DataType::Type::kInt32:
1095 // MOV and extension
1096 last_visited_internal_latency_ = kArmIntegerOpLatency;
1097 last_visited_latency_ = kArmIntegerOpLatency;
1098 break;
1099 case DataType::Type::kFloat32:
1100 case DataType::Type::kFloat64:
1101 // invokes runtime
1102 last_visited_internal_latency_ = kArmCallInternalLatency;
1103 break;
1104 default:
1105 last_visited_internal_latency_ = kArmIntegerOpLatency;
1106 last_visited_latency_ = kArmIntegerOpLatency;
1107 break;
1108 }
1109 break;
1110
1111 case DataType::Type::kFloat32:
1112 switch (input_type) {
1113 case DataType::Type::kBool:
1114 case DataType::Type::kUint8:
1115 case DataType::Type::kInt8:
1116 case DataType::Type::kUint16:
1117 case DataType::Type::kInt16:
1118 case DataType::Type::kInt32:
1119 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1120 last_visited_latency_ = kArmFloatingPointOpLatency;
1121 break;
1122 case DataType::Type::kInt64:
1123 // invokes runtime
1124 last_visited_internal_latency_ = kArmCallInternalLatency;
1125 break;
1126 case DataType::Type::kFloat64:
1127 last_visited_latency_ = kArmFloatingPointOpLatency;
1128 break;
1129 default:
1130 last_visited_latency_ = kArmFloatingPointOpLatency;
1131 break;
1132 }
1133 break;
1134
1135 case DataType::Type::kFloat64:
1136 switch (input_type) {
1137 case DataType::Type::kBool:
1138 case DataType::Type::kUint8:
1139 case DataType::Type::kInt8:
1140 case DataType::Type::kUint16:
1141 case DataType::Type::kInt16:
1142 case DataType::Type::kInt32:
1143 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1144 last_visited_latency_ = kArmFloatingPointOpLatency;
1145 break;
1146 case DataType::Type::kInt64:
1147 last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
1148 last_visited_latency_ = kArmFloatingPointOpLatency;
1149 break;
1150 case DataType::Type::kFloat32:
1151 last_visited_latency_ = kArmFloatingPointOpLatency;
1152 break;
1153 default:
1154 last_visited_latency_ = kArmFloatingPointOpLatency;
1155 break;
1156 }
1157 break;
1158
1159 default:
1160 last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1161 break;
1162 }
1163 }
1164
1165 } // namespace arm
1166 } // namespace art
1167