1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "scheduler_arm.h"
18 
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "code_generator_utils.h"
21 #include "common_arm.h"
22 #include "heap_poisoning.h"
23 #include "mirror/array-inl.h"
24 #include "mirror/string.h"
25 
26 namespace art {
27 namespace arm {
28 
29 using helpers::Int32ConstantFrom;
30 using helpers::Uint64ConstantFrom;
31 
HandleBinaryOperationLantencies(HBinaryOperation * instr)32 void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
33   switch (instr->GetResultType()) {
34     case DataType::Type::kInt64:
35       // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
36       // so a bubble (kArmNopLatency) is added to represent the internal carry flag
37       // dependency inside these pairs.
38       last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
39       last_visited_latency_ = kArmIntegerOpLatency;
40       break;
41     case DataType::Type::kFloat32:
42     case DataType::Type::kFloat64:
43       last_visited_latency_ = kArmFloatingPointOpLatency;
44       break;
45     default:
46       last_visited_latency_ = kArmIntegerOpLatency;
47       break;
48   }
49 }
50 
VisitAdd(HAdd * instr)51 void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
52   HandleBinaryOperationLantencies(instr);
53 }
54 
VisitSub(HSub * instr)55 void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
56   HandleBinaryOperationLantencies(instr);
57 }
58 
VisitMul(HMul * instr)59 void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
60   switch (instr->GetResultType()) {
61     case DataType::Type::kInt64:
62       last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
63       last_visited_latency_ = kArmIntegerOpLatency;
64       break;
65     case DataType::Type::kFloat32:
66     case DataType::Type::kFloat64:
67       last_visited_latency_ = kArmMulFloatingPointLatency;
68       break;
69     default:
70       last_visited_latency_ = kArmMulIntegerLatency;
71       break;
72   }
73 }
74 
HandleBitwiseOperationLantencies(HBinaryOperation * instr)75 void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
76   switch (instr->GetResultType()) {
77     case DataType::Type::kInt64:
78       last_visited_internal_latency_ = kArmIntegerOpLatency;
79       last_visited_latency_ = kArmIntegerOpLatency;
80       break;
81     case DataType::Type::kFloat32:
82     case DataType::Type::kFloat64:
83       last_visited_latency_ = kArmFloatingPointOpLatency;
84       break;
85     default:
86       last_visited_latency_ = kArmIntegerOpLatency;
87       break;
88   }
89 }
90 
VisitAnd(HAnd * instr)91 void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
92   HandleBitwiseOperationLantencies(instr);
93 }
94 
VisitOr(HOr * instr)95 void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
96   HandleBitwiseOperationLantencies(instr);
97 }
98 
VisitXor(HXor * instr)99 void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
100   HandleBitwiseOperationLantencies(instr);
101 }
102 
VisitRor(HRor * instr)103 void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
104   switch (instr->GetResultType()) {
105     case DataType::Type::kInt32:
106       last_visited_latency_ = kArmIntegerOpLatency;
107       break;
108     case DataType::Type::kInt64: {
109       // HandleLongRotate
110       HInstruction* rhs = instr->GetRight();
111       if (rhs->IsConstant()) {
112         uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
113         if (rot != 0u) {
114           last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
115           last_visited_latency_ = kArmIntegerOpLatency;
116         } else {
117           last_visited_internal_latency_ = kArmIntegerOpLatency;
118           last_visited_latency_ = kArmIntegerOpLatency;
119         }
120       } else {
121         last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
122         last_visited_latency_ = kArmBranchLatency;
123       }
124       break;
125     }
126     default:
127       LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
128       UNREACHABLE();
129   }
130 }
131 
HandleShiftLatencies(HBinaryOperation * instr)132 void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
133   DataType::Type type = instr->GetResultType();
134   HInstruction* rhs = instr->GetRight();
135   switch (type) {
136     case DataType::Type::kInt32:
137       if (!rhs->IsConstant()) {
138         last_visited_internal_latency_ = kArmIntegerOpLatency;
139       }
140       last_visited_latency_ = kArmIntegerOpLatency;
141       break;
142     case DataType::Type::kInt64:
143       if (!rhs->IsConstant()) {
144         last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
145       } else {
146         uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
147         if (shift_value == 1 || shift_value >= 32) {
148           last_visited_internal_latency_ = kArmIntegerOpLatency;
149         } else {
150           last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
151         }
152       }
153       last_visited_latency_ = kArmIntegerOpLatency;
154       break;
155     default:
156       LOG(FATAL) << "Unexpected operation type " << type;
157       UNREACHABLE();
158   }
159 }
160 
VisitShl(HShl * instr)161 void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
162   HandleShiftLatencies(instr);
163 }
164 
VisitShr(HShr * instr)165 void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
166   HandleShiftLatencies(instr);
167 }
168 
VisitUShr(HUShr * instr)169 void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
170   HandleShiftLatencies(instr);
171 }
172 
HandleGenerateConditionWithZero(IfCondition condition)173 void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
174   switch (condition) {
175     case kCondEQ:
176     case kCondBE:
177     case kCondNE:
178     case kCondA:
179       last_visited_internal_latency_ += kArmIntegerOpLatency;
180       last_visited_latency_ = kArmIntegerOpLatency;
181       break;
182     case kCondGE:
183       // Mvn
184       last_visited_internal_latency_ += kArmIntegerOpLatency;
185       FALLTHROUGH_INTENDED;
186     case kCondLT:
187       // Lsr
188       last_visited_latency_ = kArmIntegerOpLatency;
189       break;
190     case kCondAE:
191       // Trivially true.
192       // Mov
193       last_visited_latency_ = kArmIntegerOpLatency;
194       break;
195     case kCondB:
196       // Trivially false.
197       // Mov
198       last_visited_latency_ = kArmIntegerOpLatency;
199       break;
200     default:
201       LOG(FATAL) << "Unexpected condition " << condition;
202       UNREACHABLE();
203   }
204 }
205 
HandleGenerateLongTestConstant(HCondition * condition)206 void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
207   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
208 
209   IfCondition cond = condition->GetCondition();
210 
211   HInstruction* right = condition->InputAt(1);
212 
213   int64_t value = Uint64ConstantFrom(right);
214 
215   // Comparisons against 0 are common enough, so codegen has special handling for them.
216   if (value == 0) {
217     switch (cond) {
218       case kCondNE:
219       case kCondA:
220       case kCondEQ:
221       case kCondBE:
222         // Orrs
223         last_visited_internal_latency_ += kArmIntegerOpLatency;
224         return;
225       case kCondLT:
226       case kCondGE:
227         // Cmp
228         last_visited_internal_latency_ += kArmIntegerOpLatency;
229         return;
230       case kCondB:
231       case kCondAE:
232         // Cmp
233         last_visited_internal_latency_ += kArmIntegerOpLatency;
234         return;
235       default:
236         break;
237     }
238   }
239 
240   switch (cond) {
241     case kCondEQ:
242     case kCondNE:
243     case kCondB:
244     case kCondBE:
245     case kCondA:
246     case kCondAE: {
247       // Cmp, IT, Cmp
248       last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
249       break;
250     }
251     case kCondLE:
252     case kCondGT:
253       // Trivially true or false.
254       if (value == std::numeric_limits<int64_t>::max()) {
255         // Cmp
256         last_visited_internal_latency_ += kArmIntegerOpLatency;
257         break;
258       }
259       FALLTHROUGH_INTENDED;
260     case kCondGE:
261     case kCondLT: {
262       // Cmp, Sbcs
263       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
264       break;
265     }
266     default:
267       LOG(FATAL) << "Unreachable";
268       UNREACHABLE();
269   }
270 }
271 
HandleGenerateLongTest(HCondition * condition)272 void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
273   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
274 
275   IfCondition cond = condition->GetCondition();
276 
277   switch (cond) {
278     case kCondEQ:
279     case kCondNE:
280     case kCondB:
281     case kCondBE:
282     case kCondA:
283     case kCondAE: {
284       // Cmp, IT, Cmp
285       last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
286       break;
287     }
288     case kCondLE:
289     case kCondGT:
290     case kCondGE:
291     case kCondLT: {
292       // Cmp, Sbcs
293       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
294       break;
295     }
296     default:
297       LOG(FATAL) << "Unreachable";
298       UNREACHABLE();
299   }
300 }
301 
302 // The GenerateTest series of function all counted as internal latency.
HandleGenerateTest(HCondition * condition)303 void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
304   const DataType::Type type = condition->GetLeft()->GetType();
305 
306   if (type == DataType::Type::kInt64) {
307     condition->InputAt(1)->IsConstant()
308         ? HandleGenerateLongTestConstant(condition)
309         : HandleGenerateLongTest(condition);
310   } else if (DataType::IsFloatingPointType(type)) {
311     // GenerateVcmp + Vmrs
312     last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
313   } else {
314     // Cmp
315     last_visited_internal_latency_ += kArmIntegerOpLatency;
316   }
317 }
318 
CanGenerateTest(HCondition * condition)319 bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
320   if (condition->GetLeft()->GetType() == DataType::Type::kInt64) {
321     HInstruction* right = condition->InputAt(1);
322 
323     if (right->IsConstant()) {
324       IfCondition c = condition->GetCondition();
325       const uint64_t value = Uint64ConstantFrom(right);
326 
327       if (c < kCondLT || c > kCondGE) {
328         if (value != 0) {
329           return false;
330         }
331       } else if (c == kCondLE || c == kCondGT) {
332         if (value < std::numeric_limits<int64_t>::max() &&
333             !codegen_->GetAssembler()->ShifterOperandCanHold(
334                 SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) {
335           return false;
336         }
337       } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(
338                       SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) {
339         return false;
340       }
341     }
342   }
343 
344   return true;
345 }
346 
HandleGenerateConditionGeneric(HCondition * cond)347 void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
348   HandleGenerateTest(cond);
349 
350   // Unlike codegen pass, we cannot check 'out' register IsLow() here,
351   // because scheduling is before liveness(location builder) and register allocator,
352   // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
353   last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
354   last_visited_latency_ = kArmIntegerOpLatency;
355 }
356 
HandleGenerateEqualLong(HCondition * cond)357 void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
358   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
359 
360   IfCondition condition = cond->GetCondition();
361 
362   last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
363 
364   if (condition == kCondNE) {
365     // Orrs, IT, Mov
366     last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
367   } else {
368     last_visited_internal_latency_ += kArmIntegerOpLatency;
369     HandleGenerateConditionWithZero(condition);
370   }
371 }
372 
HandleGenerateLongComparesAndJumps()373 void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
374   last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
375   last_visited_internal_latency_ += kArmBranchLatency;
376 }
377 
HandleGenerateConditionLong(HCondition * cond)378 void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
379   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
380 
381   IfCondition condition = cond->GetCondition();
382   HInstruction* right = cond->InputAt(1);
383 
384   if (right->IsConstant()) {
385     // Comparisons against 0 are common enough, so codegen has special handling for them.
386     if (Uint64ConstantFrom(right) == 0) {
387       switch (condition) {
388         case kCondNE:
389         case kCondA:
390         case kCondEQ:
391         case kCondBE:
392           // Orr
393           last_visited_internal_latency_ += kArmIntegerOpLatency;
394           HandleGenerateConditionWithZero(condition);
395           return;
396         case kCondLT:
397         case kCondGE:
398           FALLTHROUGH_INTENDED;
399         case kCondAE:
400         case kCondB:
401           HandleGenerateConditionWithZero(condition);
402           return;
403         case kCondLE:
404         case kCondGT:
405         default:
406           break;
407       }
408     }
409   }
410 
411   if ((condition == kCondEQ || condition == kCondNE) &&
412       !CanGenerateTest(cond)) {
413     HandleGenerateEqualLong(cond);
414     return;
415   }
416 
417   if (CanGenerateTest(cond)) {
418     HandleGenerateConditionGeneric(cond);
419     return;
420   }
421 
422   HandleGenerateLongComparesAndJumps();
423 
424   last_visited_internal_latency_ += kArmIntegerOpLatency;
425   last_visited_latency_ = kArmBranchLatency;;
426 }
427 
HandleGenerateConditionIntegralOrNonPrimitive(HCondition * cond)428 void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
429   const DataType::Type type = cond->GetLeft()->GetType();
430 
431   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
432 
433   if (type == DataType::Type::kInt64) {
434     HandleGenerateConditionLong(cond);
435     return;
436   }
437 
438   IfCondition condition = cond->GetCondition();
439   HInstruction* right = cond->InputAt(1);
440   int64_t value;
441 
442   if (right->IsConstant()) {
443     value = Uint64ConstantFrom(right);
444 
445     // Comparisons against 0 are common enough, so codegen has special handling for them.
446     if (value == 0) {
447       switch (condition) {
448         case kCondNE:
449         case kCondA:
450         case kCondEQ:
451         case kCondBE:
452         case kCondLT:
453         case kCondGE:
454         case kCondAE:
455         case kCondB:
456           HandleGenerateConditionWithZero(condition);
457           return;
458         case kCondLE:
459         case kCondGT:
460         default:
461           break;
462       }
463     }
464   }
465 
466   if (condition == kCondEQ || condition == kCondNE) {
467     if (condition == kCondNE) {
468       // CMP, IT, MOV.ne
469       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
470       last_visited_latency_ = kArmIntegerOpLatency;
471     } else {
472       last_visited_internal_latency_ += kArmIntegerOpLatency;
473       HandleGenerateConditionWithZero(condition);
474     }
475     return;
476   }
477 
478   HandleGenerateConditionGeneric(cond);
479 }
480 
HandleCondition(HCondition * cond)481 void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
482   if (cond->IsEmittedAtUseSite()) {
483     last_visited_latency_ = 0;
484     return;
485   }
486 
487   const DataType::Type type = cond->GetLeft()->GetType();
488 
489   if (DataType::IsFloatingPointType(type)) {
490     HandleGenerateConditionGeneric(cond);
491     return;
492   }
493 
494   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
495 
496   const IfCondition condition = cond->GetCondition();
497 
498   if (type == DataType::Type::kBool &&
499       cond->GetRight()->GetType() == DataType::Type::kBool &&
500       (condition == kCondEQ || condition == kCondNE)) {
501     if (condition == kCondEQ) {
502       last_visited_internal_latency_ = kArmIntegerOpLatency;
503     }
504     last_visited_latency_ = kArmIntegerOpLatency;
505     return;
506   }
507 
508   HandleGenerateConditionIntegralOrNonPrimitive(cond);
509 }
510 
VisitCondition(HCondition * instr)511 void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
512   HandleCondition(instr);
513 }
514 
VisitCompare(HCompare * instr)515 void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
516   DataType::Type type = instr->InputAt(0)->GetType();
517   switch (type) {
518     case DataType::Type::kBool:
519     case DataType::Type::kUint8:
520     case DataType::Type::kInt8:
521     case DataType::Type::kUint16:
522     case DataType::Type::kInt16:
523     case DataType::Type::kInt32:
524       last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
525       break;
526     case DataType::Type::kInt64:
527       last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
528       break;
529     case DataType::Type::kFloat32:
530     case DataType::Type::kFloat64:
531       last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
532       break;
533     default:
534       last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
535       break;
536   }
537   last_visited_latency_ = kArmIntegerOpLatency;
538 }
539 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)540 void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
541   if (instruction->GetResultType() == DataType::Type::kInt32) {
542     last_visited_latency_ = kArmIntegerOpLatency;
543   } else {
544     last_visited_internal_latency_ = kArmIntegerOpLatency;
545     last_visited_latency_ = kArmIntegerOpLatency;
546   }
547 }
548 
HandleGenerateDataProcInstruction(bool internal_latency)549 void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
550   if (internal_latency) {
551     last_visited_internal_latency_ += kArmIntegerOpLatency;
552   } else {
553     last_visited_latency_ = kArmDataProcWithShifterOpLatency;
554   }
555 }
556 
HandleGenerateDataProc(HDataProcWithShifterOp * instruction)557 void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
558   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
559   if (kind == HInstruction::kAdd) {
560     last_visited_internal_latency_ = kArmIntegerOpLatency;
561     last_visited_latency_ = kArmIntegerOpLatency;
562   } else if (kind == HInstruction::kSub) {
563     last_visited_internal_latency_ = kArmIntegerOpLatency;
564     last_visited_latency_ = kArmIntegerOpLatency;
565   } else {
566     HandleGenerateDataProcInstruction(/* internal_latency */ true);
567     HandleGenerateDataProcInstruction();
568   }
569 }
570 
HandleGenerateLongDataProc(HDataProcWithShifterOp * instruction)571 void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
572   DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
573   DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
574 
575   const uint32_t shift_value = instruction->GetShiftAmount();
576   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
577 
578   if (shift_value >= 32) {
579     // Different shift types actually generate similar code here,
580     // no need to differentiate shift types like the codegen pass does,
581     // which also avoids handling shift types from different ARM backends.
582     HandleGenerateDataProc(instruction);
583   } else {
584     DCHECK_GT(shift_value, 1U);
585     DCHECK_LT(shift_value, 32U);
586 
587     if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
588       HandleGenerateDataProcInstruction(/* internal_latency */ true);
589       HandleGenerateDataProcInstruction(/* internal_latency */ true);
590       HandleGenerateDataProcInstruction();
591     } else {
592       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
593       HandleGenerateDataProc(instruction);
594     }
595   }
596 }
597 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)598 void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
599   const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
600 
601   if (instruction->GetType() == DataType::Type::kInt32) {
602     HandleGenerateDataProcInstruction();
603   } else {
604     DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
605     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
606       HandleGenerateDataProc(instruction);
607     } else {
608       HandleGenerateLongDataProc(instruction);
609     }
610   }
611 }
612 
VisitIntermediateAddress(HIntermediateAddress * ATTRIBUTE_UNUSED)613 void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
614   // Although the code generated is a simple `add` instruction, we found through empirical results
615   // that spacing it from its use in memory accesses was beneficial.
616   last_visited_internal_latency_ = kArmNopLatency;
617   last_visited_latency_ = kArmIntegerOpLatency;
618 }
619 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * ATTRIBUTE_UNUSED)620 void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
621     HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
622   UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
623 }
624 
VisitMultiplyAccumulate(HMultiplyAccumulate * ATTRIBUTE_UNUSED)625 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
626   last_visited_latency_ = kArmMulIntegerLatency;
627 }
628 
VisitArrayGet(HArrayGet * instruction)629 void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
630   DataType::Type type = instruction->GetType();
631   const bool maybe_compressed_char_at =
632       mirror::kUseStringCompression && instruction->IsStringCharAt();
633   HInstruction* array_instr = instruction->GetArray();
634   bool has_intermediate_address = array_instr->IsIntermediateAddress();
635   HInstruction* index = instruction->InputAt(1);
636 
637   switch (type) {
638     case DataType::Type::kBool:
639     case DataType::Type::kUint8:
640     case DataType::Type::kInt8:
641     case DataType::Type::kUint16:
642     case DataType::Type::kInt16:
643     case DataType::Type::kInt32: {
644       if (maybe_compressed_char_at) {
645         last_visited_internal_latency_ += kArmMemoryLoadLatency;
646       }
647       if (index->IsConstant()) {
648         if (maybe_compressed_char_at) {
649           last_visited_internal_latency_ +=
650               kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
651           last_visited_latency_ = kArmBranchLatency;
652         } else {
653           last_visited_latency_ += kArmMemoryLoadLatency;
654         }
655       } else {
656         if (has_intermediate_address) {
657         } else {
658           last_visited_internal_latency_ += kArmIntegerOpLatency;
659         }
660         if (maybe_compressed_char_at) {
661           last_visited_internal_latency_ +=
662               kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
663           last_visited_latency_ = kArmBranchLatency;
664         } else {
665           last_visited_latency_ += kArmMemoryLoadLatency;
666         }
667       }
668       break;
669     }
670 
671     case DataType::Type::kReference: {
672       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
673         last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
674       } else {
675         if (index->IsConstant()) {
676           last_visited_latency_ = kArmMemoryLoadLatency;
677         } else {
678           if (has_intermediate_address) {
679           } else {
680             last_visited_internal_latency_ += kArmIntegerOpLatency;
681           }
682           last_visited_internal_latency_ = kArmMemoryLoadLatency;
683         }
684       }
685       break;
686     }
687 
688     case DataType::Type::kInt64: {
689       if (index->IsConstant()) {
690         last_visited_latency_ = kArmMemoryLoadLatency;
691       } else {
692         last_visited_internal_latency_ += kArmIntegerOpLatency;
693         last_visited_latency_ = kArmMemoryLoadLatency;
694       }
695       break;
696     }
697 
698     case DataType::Type::kFloat32: {
699       if (index->IsConstant()) {
700         last_visited_latency_ = kArmMemoryLoadLatency;
701       } else {
702         last_visited_internal_latency_ += kArmIntegerOpLatency;
703         last_visited_latency_ = kArmMemoryLoadLatency;
704       }
705       break;
706     }
707 
708     case DataType::Type::kFloat64: {
709       if (index->IsConstant()) {
710         last_visited_latency_ = kArmMemoryLoadLatency;
711       } else {
712         last_visited_internal_latency_ += kArmIntegerOpLatency;
713         last_visited_latency_ = kArmMemoryLoadLatency;
714       }
715       break;
716     }
717 
718     default:
719       LOG(FATAL) << "Unreachable type " << type;
720       UNREACHABLE();
721   }
722 }
723 
VisitArrayLength(HArrayLength * instruction)724 void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
725   last_visited_latency_ = kArmMemoryLoadLatency;
726   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
727     last_visited_internal_latency_ = kArmMemoryLoadLatency;
728     last_visited_latency_ = kArmIntegerOpLatency;
729   }
730 }
731 
VisitArraySet(HArraySet * instruction)732 void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
733   HInstruction* index = instruction->InputAt(1);
734   DataType::Type value_type = instruction->GetComponentType();
735   HInstruction* array_instr = instruction->GetArray();
736   bool has_intermediate_address = array_instr->IsIntermediateAddress();
737 
738   switch (value_type) {
739     case DataType::Type::kBool:
740     case DataType::Type::kUint8:
741     case DataType::Type::kInt8:
742     case DataType::Type::kUint16:
743     case DataType::Type::kInt16:
744     case DataType::Type::kInt32: {
745       if (index->IsConstant()) {
746         last_visited_latency_ = kArmMemoryStoreLatency;
747       } else {
748         if (has_intermediate_address) {
749         } else {
750           last_visited_internal_latency_ = kArmIntegerOpLatency;
751         }
752         last_visited_latency_ = kArmMemoryStoreLatency;
753       }
754       break;
755     }
756 
757     case DataType::Type::kReference: {
758       if (instruction->InputAt(2)->IsNullConstant()) {
759         if (index->IsConstant()) {
760           last_visited_latency_ = kArmMemoryStoreLatency;
761         } else {
762           last_visited_internal_latency_ = kArmIntegerOpLatency;
763           last_visited_latency_ = kArmMemoryStoreLatency;
764         }
765       } else {
766         // Following the exact instructions of runtime type checks is too complicated,
767         // just giving it a simple slow latency.
768         last_visited_latency_ = kArmRuntimeTypeCheckLatency;
769       }
770       break;
771     }
772 
773     case DataType::Type::kInt64: {
774       if (index->IsConstant()) {
775         last_visited_latency_ = kArmMemoryLoadLatency;
776       } else {
777         last_visited_internal_latency_ = kArmIntegerOpLatency;
778         last_visited_latency_ = kArmMemoryLoadLatency;
779       }
780       break;
781     }
782 
783     case DataType::Type::kFloat32: {
784       if (index->IsConstant()) {
785         last_visited_latency_ = kArmMemoryLoadLatency;
786       } else {
787         last_visited_internal_latency_ = kArmIntegerOpLatency;
788         last_visited_latency_ = kArmMemoryLoadLatency;
789       }
790       break;
791     }
792 
793     case DataType::Type::kFloat64: {
794       if (index->IsConstant()) {
795         last_visited_latency_ = kArmMemoryLoadLatency;
796       } else {
797         last_visited_internal_latency_ = kArmIntegerOpLatency;
798         last_visited_latency_ = kArmMemoryLoadLatency;
799       }
800       break;
801     }
802 
803     default:
804       LOG(FATAL) << "Unreachable type " << value_type;
805       UNREACHABLE();
806   }
807 }
808 
VisitBoundsCheck(HBoundsCheck * ATTRIBUTE_UNUSED)809 void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
810   last_visited_internal_latency_ = kArmIntegerOpLatency;
811   // Users do not use any data results.
812   last_visited_latency_ = 0;
813 }
814 
HandleDivRemConstantIntegralLatencies(int32_t imm)815 void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
816   if (imm == 0) {
817     last_visited_internal_latency_ = 0;
818     last_visited_latency_ = 0;
819   } else if (imm == 1 || imm == -1) {
820     last_visited_latency_ = kArmIntegerOpLatency;
821   } else if (IsPowerOfTwo(AbsOrMin(imm))) {
822     last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
823     last_visited_latency_ = kArmIntegerOpLatency;
824   } else {
825     last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
826     last_visited_latency_ = kArmIntegerOpLatency;
827   }
828 }
829 
VisitDiv(HDiv * instruction)830 void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
831   DataType::Type type = instruction->GetResultType();
832   switch (type) {
833     case DataType::Type::kInt32: {
834       HInstruction* rhs = instruction->GetRight();
835       if (rhs->IsConstant()) {
836         int32_t imm = Int32ConstantFrom(rhs->AsConstant());
837         HandleDivRemConstantIntegralLatencies(imm);
838       } else {
839         last_visited_latency_ = kArmDivIntegerLatency;
840       }
841       break;
842     }
843     case DataType::Type::kFloat32:
844       last_visited_latency_ = kArmDivFloatLatency;
845       break;
846     case DataType::Type::kFloat64:
847       last_visited_latency_ = kArmDivDoubleLatency;
848       break;
849     default:
850       last_visited_internal_latency_ = kArmCallInternalLatency;
851       last_visited_latency_ = kArmCallLatency;
852       break;
853   }
854 }
855 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)856 void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
857   HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
858 }
859 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)860 void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
861   HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
862 }
863 
VisitInstanceOf(HInstanceOf * ATTRIBUTE_UNUSED)864 void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
865   last_visited_internal_latency_ = kArmCallInternalLatency;
866   last_visited_latency_ = kArmIntegerOpLatency;
867 }
868 
VisitInvoke(HInvoke * ATTRIBUTE_UNUSED)869 void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
870   last_visited_internal_latency_ = kArmCallInternalLatency;
871   last_visited_latency_ = kArmCallLatency;
872 }
873 
VisitLoadString(HLoadString * ATTRIBUTE_UNUSED)874 void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
875   last_visited_internal_latency_ = kArmLoadStringInternalLatency;
876   last_visited_latency_ = kArmMemoryLoadLatency;
877 }
878 
VisitNewArray(HNewArray * ATTRIBUTE_UNUSED)879 void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
880   last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
881   last_visited_latency_ = kArmCallLatency;
882 }
883 
VisitNewInstance(HNewInstance * instruction)884 void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
885   if (instruction->IsStringAlloc()) {
886     last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
887   } else {
888     last_visited_internal_latency_ = kArmCallInternalLatency;
889   }
890   last_visited_latency_ = kArmCallLatency;
891 }
892 
VisitRem(HRem * instruction)893 void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
894   DataType::Type type = instruction->GetResultType();
895   switch (type) {
896     case DataType::Type::kInt32: {
897       HInstruction* rhs = instruction->GetRight();
898       if (rhs->IsConstant()) {
899         int32_t imm = Int32ConstantFrom(rhs->AsConstant());
900         HandleDivRemConstantIntegralLatencies(imm);
901       } else {
902         last_visited_internal_latency_ = kArmDivIntegerLatency;
903         last_visited_latency_ = kArmMulIntegerLatency;
904       }
905       break;
906     }
907     default:
908       last_visited_internal_latency_ = kArmCallInternalLatency;
909       last_visited_latency_ = kArmCallLatency;
910       break;
911   }
912 }
913 
HandleFieldGetLatencies(HInstruction * instruction,const FieldInfo & field_info)914 void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
915                                                           const FieldInfo& field_info) {
916   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
917   DCHECK(codegen_ != nullptr);
918   bool is_volatile = field_info.IsVolatile();
919   DataType::Type field_type = field_info.GetFieldType();
920   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
921 
922   switch (field_type) {
923     case DataType::Type::kBool:
924     case DataType::Type::kUint8:
925     case DataType::Type::kInt8:
926     case DataType::Type::kUint16:
927     case DataType::Type::kInt16:
928     case DataType::Type::kInt32:
929       last_visited_latency_ = kArmMemoryLoadLatency;
930       break;
931 
932     case DataType::Type::kReference:
933       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
934         last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
935         last_visited_latency_ = kArmMemoryLoadLatency;
936       } else {
937         last_visited_latency_ = kArmMemoryLoadLatency;
938       }
939       break;
940 
941     case DataType::Type::kInt64:
942       if (is_volatile && !atomic_ldrd_strd) {
943         last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
944         last_visited_latency_ = kArmMemoryLoadLatency;
945       } else {
946         last_visited_latency_ = kArmMemoryLoadLatency;
947       }
948       break;
949 
950     case DataType::Type::kFloat32:
951       last_visited_latency_ = kArmMemoryLoadLatency;
952       break;
953 
954     case DataType::Type::kFloat64:
955       if (is_volatile && !atomic_ldrd_strd) {
956         last_visited_internal_latency_ =
957             kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
958         last_visited_latency_ = kArmIntegerOpLatency;
959       } else {
960         last_visited_latency_ = kArmMemoryLoadLatency;
961       }
962       break;
963 
964     default:
965       last_visited_latency_ = kArmMemoryLoadLatency;
966       break;
967   }
968 
969   if (is_volatile) {
970     last_visited_internal_latency_ += kArmMemoryBarrierLatency;
971   }
972 }
973 
HandleFieldSetLatencies(HInstruction * instruction,const FieldInfo & field_info)974 void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
975                                                           const FieldInfo& field_info) {
976   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
977   DCHECK(codegen_ != nullptr);
978   bool is_volatile = field_info.IsVolatile();
979   DataType::Type field_type = field_info.GetFieldType();
980   bool needs_write_barrier =
981       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
982   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
983 
984   switch (field_type) {
985     case DataType::Type::kBool:
986     case DataType::Type::kUint8:
987     case DataType::Type::kInt8:
988     case DataType::Type::kUint16:
989     case DataType::Type::kInt16:
990       if (is_volatile) {
991         last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
992         last_visited_latency_ = kArmMemoryBarrierLatency;
993       } else {
994         last_visited_latency_ = kArmMemoryStoreLatency;
995       }
996       break;
997 
998     case DataType::Type::kInt32:
999     case DataType::Type::kReference:
1000       if (kPoisonHeapReferences && needs_write_barrier) {
1001         last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
1002       }
1003       last_visited_latency_ = kArmMemoryStoreLatency;
1004       break;
1005 
1006     case DataType::Type::kInt64:
1007       if (is_volatile && !atomic_ldrd_strd) {
1008         last_visited_internal_latency_ =
1009             kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1010         last_visited_latency_ = kArmIntegerOpLatency;
1011       } else {
1012         last_visited_latency_ = kArmMemoryStoreLatency;
1013       }
1014       break;
1015 
1016     case DataType::Type::kFloat32:
1017       last_visited_latency_ = kArmMemoryStoreLatency;
1018       break;
1019 
1020     case DataType::Type::kFloat64:
1021       if (is_volatile && !atomic_ldrd_strd) {
1022         last_visited_internal_latency_ = kArmIntegerOpLatency +
1023             kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1024         last_visited_latency_ = kArmIntegerOpLatency;
1025       } else {
1026         last_visited_latency_ = kArmMemoryStoreLatency;
1027       }
1028       break;
1029 
1030     default:
1031       last_visited_latency_ = kArmMemoryStoreLatency;
1032       break;
1033   }
1034 }
1035 
VisitStaticFieldGet(HStaticFieldGet * instruction)1036 void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
1037   HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
1038 }
1039 
VisitStaticFieldSet(HStaticFieldSet * instruction)1040 void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
1041   HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
1042 }
1043 
VisitSuspendCheck(HSuspendCheck * instruction)1044 void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
1045   HBasicBlock* block = instruction->GetBlock();
1046   DCHECK((block->GetLoopInformation() != nullptr) ||
1047          (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
1048   // Users do not use any data results.
1049   last_visited_latency_ = 0;
1050 }
1051 
VisitTypeConversion(HTypeConversion * instr)1052 void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
1053   DataType::Type result_type = instr->GetResultType();
1054   DataType::Type input_type = instr->GetInputType();
1055 
1056   switch (result_type) {
1057     case DataType::Type::kUint8:
1058     case DataType::Type::kInt8:
1059     case DataType::Type::kUint16:
1060     case DataType::Type::kInt16:
1061       last_visited_latency_ = kArmIntegerOpLatency;  // SBFX or UBFX
1062       break;
1063 
1064     case DataType::Type::kInt32:
1065       switch (input_type) {
1066         case DataType::Type::kInt64:
1067           last_visited_latency_ = kArmIntegerOpLatency;  // MOV
1068           break;
1069         case DataType::Type::kFloat32:
1070         case DataType::Type::kFloat64:
1071           last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1072           last_visited_latency_ = kArmFloatingPointOpLatency;
1073           break;
1074         default:
1075           last_visited_latency_ = kArmIntegerOpLatency;
1076           break;
1077       }
1078       break;
1079 
1080     case DataType::Type::kInt64:
1081       switch (input_type) {
1082         case DataType::Type::kBool:
1083         case DataType::Type::kUint8:
1084         case DataType::Type::kInt8:
1085         case DataType::Type::kUint16:
1086         case DataType::Type::kInt16:
1087         case DataType::Type::kInt32:
1088           // MOV and extension
1089           last_visited_internal_latency_ = kArmIntegerOpLatency;
1090           last_visited_latency_ = kArmIntegerOpLatency;
1091           break;
1092         case DataType::Type::kFloat32:
1093         case DataType::Type::kFloat64:
1094           // invokes runtime
1095           last_visited_internal_latency_ = kArmCallInternalLatency;
1096           break;
1097         default:
1098           last_visited_internal_latency_ = kArmIntegerOpLatency;
1099           last_visited_latency_ = kArmIntegerOpLatency;
1100           break;
1101       }
1102       break;
1103 
1104     case DataType::Type::kFloat32:
1105       switch (input_type) {
1106         case DataType::Type::kBool:
1107         case DataType::Type::kUint8:
1108         case DataType::Type::kInt8:
1109         case DataType::Type::kUint16:
1110         case DataType::Type::kInt16:
1111         case DataType::Type::kInt32:
1112           last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1113           last_visited_latency_ = kArmFloatingPointOpLatency;
1114           break;
1115         case DataType::Type::kInt64:
1116           // invokes runtime
1117           last_visited_internal_latency_ = kArmCallInternalLatency;
1118           break;
1119         case DataType::Type::kFloat64:
1120           last_visited_latency_ = kArmFloatingPointOpLatency;
1121           break;
1122         default:
1123           last_visited_latency_ = kArmFloatingPointOpLatency;
1124           break;
1125       }
1126       break;
1127 
1128     case DataType::Type::kFloat64:
1129       switch (input_type) {
1130         case DataType::Type::kBool:
1131         case DataType::Type::kUint8:
1132         case DataType::Type::kInt8:
1133         case DataType::Type::kUint16:
1134         case DataType::Type::kInt16:
1135         case DataType::Type::kInt32:
1136           last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1137           last_visited_latency_ = kArmFloatingPointOpLatency;
1138           break;
1139         case DataType::Type::kInt64:
1140           last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
1141           last_visited_latency_ = kArmFloatingPointOpLatency;
1142           break;
1143         case DataType::Type::kFloat32:
1144           last_visited_latency_ = kArmFloatingPointOpLatency;
1145           break;
1146         default:
1147           last_visited_latency_ = kArmFloatingPointOpLatency;
1148           break;
1149       }
1150       break;
1151 
1152     default:
1153       last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1154       break;
1155   }
1156 }
1157 
1158 }  // namespace arm
1159 }  // namespace art
1160