1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "scheduler_arm.h"
18 
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "code_generator_utils.h"
21 #include "common_arm.h"
22 #include "heap_poisoning.h"
23 #include "mirror/array-inl.h"
24 #include "mirror/string.h"
25 
26 namespace art {
27 namespace arm {
28 
29 using helpers::Int32ConstantFrom;
30 using helpers::Uint64ConstantFrom;
31 
HandleBinaryOperationLantencies(HBinaryOperation * instr)32 void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
33   switch (instr->GetResultType()) {
34     case DataType::Type::kInt64:
35       // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
36       // so a bubble (kArmNopLatency) is added to represent the internal carry flag
37       // dependency inside these pairs.
38       last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
39       last_visited_latency_ = kArmIntegerOpLatency;
40       break;
41     case DataType::Type::kFloat32:
42     case DataType::Type::kFloat64:
43       last_visited_latency_ = kArmFloatingPointOpLatency;
44       break;
45     default:
46       last_visited_latency_ = kArmIntegerOpLatency;
47       break;
48   }
49 }
50 
VisitAdd(HAdd * instr)51 void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
52   HandleBinaryOperationLantencies(instr);
53 }
54 
VisitSub(HSub * instr)55 void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
56   HandleBinaryOperationLantencies(instr);
57 }
58 
VisitMul(HMul * instr)59 void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
60   switch (instr->GetResultType()) {
61     case DataType::Type::kInt64:
62       last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
63       last_visited_latency_ = kArmIntegerOpLatency;
64       break;
65     case DataType::Type::kFloat32:
66     case DataType::Type::kFloat64:
67       last_visited_latency_ = kArmMulFloatingPointLatency;
68       break;
69     default:
70       last_visited_latency_ = kArmMulIntegerLatency;
71       break;
72   }
73 }
74 
HandleBitwiseOperationLantencies(HBinaryOperation * instr)75 void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
76   switch (instr->GetResultType()) {
77     case DataType::Type::kInt64:
78       last_visited_internal_latency_ = kArmIntegerOpLatency;
79       last_visited_latency_ = kArmIntegerOpLatency;
80       break;
81     case DataType::Type::kFloat32:
82     case DataType::Type::kFloat64:
83       last_visited_latency_ = kArmFloatingPointOpLatency;
84       break;
85     default:
86       last_visited_latency_ = kArmIntegerOpLatency;
87       break;
88   }
89 }
90 
VisitAnd(HAnd * instr)91 void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
92   HandleBitwiseOperationLantencies(instr);
93 }
94 
VisitOr(HOr * instr)95 void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
96   HandleBitwiseOperationLantencies(instr);
97 }
98 
VisitXor(HXor * instr)99 void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
100   HandleBitwiseOperationLantencies(instr);
101 }
102 
VisitRor(HRor * instr)103 void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
104   switch (instr->GetResultType()) {
105     case DataType::Type::kInt32:
106       last_visited_latency_ = kArmIntegerOpLatency;
107       break;
108     case DataType::Type::kInt64: {
109       // HandleLongRotate
110       HInstruction* rhs = instr->GetRight();
111       if (rhs->IsConstant()) {
112         uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
113         if (rot != 0u) {
114           last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
115           last_visited_latency_ = kArmIntegerOpLatency;
116         } else {
117           last_visited_internal_latency_ = kArmIntegerOpLatency;
118           last_visited_latency_ = kArmIntegerOpLatency;
119         }
120       } else {
121         last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
122         last_visited_latency_ = kArmBranchLatency;
123       }
124       break;
125     }
126     default:
127       LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
128       UNREACHABLE();
129   }
130 }
131 
HandleShiftLatencies(HBinaryOperation * instr)132 void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
133   DataType::Type type = instr->GetResultType();
134   HInstruction* rhs = instr->GetRight();
135   switch (type) {
136     case DataType::Type::kInt32:
137       if (!rhs->IsConstant()) {
138         last_visited_internal_latency_ = kArmIntegerOpLatency;
139       }
140       last_visited_latency_ = kArmIntegerOpLatency;
141       break;
142     case DataType::Type::kInt64:
143       if (!rhs->IsConstant()) {
144         last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
145       } else {
146         uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
147         if (shift_value == 1 || shift_value >= 32) {
148           last_visited_internal_latency_ = kArmIntegerOpLatency;
149         } else {
150           last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
151         }
152       }
153       last_visited_latency_ = kArmIntegerOpLatency;
154       break;
155     default:
156       LOG(FATAL) << "Unexpected operation type " << type;
157       UNREACHABLE();
158   }
159 }
160 
VisitShl(HShl * instr)161 void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
162   HandleShiftLatencies(instr);
163 }
164 
VisitShr(HShr * instr)165 void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
166   HandleShiftLatencies(instr);
167 }
168 
VisitUShr(HUShr * instr)169 void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
170   HandleShiftLatencies(instr);
171 }
172 
HandleGenerateConditionWithZero(IfCondition condition)173 void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
174   switch (condition) {
175     case kCondEQ:
176     case kCondBE:
177     case kCondNE:
178     case kCondA:
179       last_visited_internal_latency_ += kArmIntegerOpLatency;
180       last_visited_latency_ = kArmIntegerOpLatency;
181       break;
182     case kCondGE:
183       // Mvn
184       last_visited_internal_latency_ += kArmIntegerOpLatency;
185       FALLTHROUGH_INTENDED;
186     case kCondLT:
187       // Lsr
188       last_visited_latency_ = kArmIntegerOpLatency;
189       break;
190     case kCondAE:
191       // Trivially true.
192       // Mov
193       last_visited_latency_ = kArmIntegerOpLatency;
194       break;
195     case kCondB:
196       // Trivially false.
197       // Mov
198       last_visited_latency_ = kArmIntegerOpLatency;
199       break;
200     default:
201       LOG(FATAL) << "Unexpected condition " << condition;
202       UNREACHABLE();
203   }
204 }
205 
HandleGenerateLongTestConstant(HCondition * condition)206 void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
207   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
208 
209   IfCondition cond = condition->GetCondition();
210 
211   HInstruction* right = condition->InputAt(1);
212 
213   int64_t value = Uint64ConstantFrom(right);
214 
215   // Comparisons against 0 are common enough, so codegen has special handling for them.
216   if (value == 0) {
217     switch (cond) {
218       case kCondNE:
219       case kCondA:
220       case kCondEQ:
221       case kCondBE:
222         // Orrs
223         last_visited_internal_latency_ += kArmIntegerOpLatency;
224         return;
225       case kCondLT:
226       case kCondGE:
227         // Cmp
228         last_visited_internal_latency_ += kArmIntegerOpLatency;
229         return;
230       case kCondB:
231       case kCondAE:
232         // Cmp
233         last_visited_internal_latency_ += kArmIntegerOpLatency;
234         return;
235       default:
236         break;
237     }
238   }
239 
240   switch (cond) {
241     case kCondEQ:
242     case kCondNE:
243     case kCondB:
244     case kCondBE:
245     case kCondA:
246     case kCondAE: {
247       // Cmp, IT, Cmp
248       last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
249       break;
250     }
251     case kCondLE:
252     case kCondGT:
253       // Trivially true or false.
254       if (value == std::numeric_limits<int64_t>::max()) {
255         // Cmp
256         last_visited_internal_latency_ += kArmIntegerOpLatency;
257         break;
258       }
259       FALLTHROUGH_INTENDED;
260     case kCondGE:
261     case kCondLT: {
262       // Cmp, Sbcs
263       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
264       break;
265     }
266     default:
267       LOG(FATAL) << "Unreachable";
268       UNREACHABLE();
269   }
270 }
271 
HandleGenerateLongTest(HCondition * condition)272 void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
273   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
274 
275   IfCondition cond = condition->GetCondition();
276 
277   switch (cond) {
278     case kCondEQ:
279     case kCondNE:
280     case kCondB:
281     case kCondBE:
282     case kCondA:
283     case kCondAE: {
284       // Cmp, IT, Cmp
285       last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
286       break;
287     }
288     case kCondLE:
289     case kCondGT:
290     case kCondGE:
291     case kCondLT: {
292       // Cmp, Sbcs
293       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
294       break;
295     }
296     default:
297       LOG(FATAL) << "Unreachable";
298       UNREACHABLE();
299   }
300 }
301 
302 // The GenerateTest series of function all counted as internal latency.
HandleGenerateTest(HCondition * condition)303 void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
304   const DataType::Type type = condition->GetLeft()->GetType();
305 
306   if (type == DataType::Type::kInt64) {
307     condition->InputAt(1)->IsConstant()
308         ? HandleGenerateLongTestConstant(condition)
309         : HandleGenerateLongTest(condition);
310   } else if (DataType::IsFloatingPointType(type)) {
311     // GenerateVcmp + Vmrs
312     last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
313   } else {
314     // Cmp
315     last_visited_internal_latency_ += kArmIntegerOpLatency;
316   }
317 }
318 
CanGenerateTest(HCondition * condition)319 bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
320   if (condition->GetLeft()->GetType() == DataType::Type::kInt64) {
321     HInstruction* right = condition->InputAt(1);
322 
323     if (right->IsConstant()) {
324       IfCondition c = condition->GetCondition();
325       const uint64_t value = Uint64ConstantFrom(right);
326 
327       if (c < kCondLT || c > kCondGE) {
328         if (value != 0) {
329           return false;
330         }
331       } else if (c == kCondLE || c == kCondGT) {
332         if (value < std::numeric_limits<int64_t>::max() &&
333             !codegen_->GetAssembler()->ShifterOperandCanHold(
334                 SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) {
335           return false;
336         }
337       } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(
338                       SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) {
339         return false;
340       }
341     }
342   }
343 
344   return true;
345 }
346 
HandleGenerateConditionGeneric(HCondition * cond)347 void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
348   HandleGenerateTest(cond);
349 
350   // Unlike codegen pass, we cannot check 'out' register IsLow() here,
351   // because scheduling is before liveness(location builder) and register allocator,
352   // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
353   last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
354   last_visited_latency_ = kArmIntegerOpLatency;
355 }
356 
HandleGenerateEqualLong(HCondition * cond)357 void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
358   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
359 
360   IfCondition condition = cond->GetCondition();
361 
362   last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
363 
364   if (condition == kCondNE) {
365     // Orrs, IT, Mov
366     last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
367   } else {
368     last_visited_internal_latency_ += kArmIntegerOpLatency;
369     HandleGenerateConditionWithZero(condition);
370   }
371 }
372 
HandleGenerateLongComparesAndJumps()373 void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
374   last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
375   last_visited_internal_latency_ += kArmBranchLatency;
376 }
377 
HandleGenerateConditionLong(HCondition * cond)378 void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
379   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
380 
381   IfCondition condition = cond->GetCondition();
382   HInstruction* right = cond->InputAt(1);
383 
384   if (right->IsConstant()) {
385     // Comparisons against 0 are common enough, so codegen has special handling for them.
386     if (Uint64ConstantFrom(right) == 0) {
387       switch (condition) {
388         case kCondNE:
389         case kCondA:
390         case kCondEQ:
391         case kCondBE:
392           // Orr
393           last_visited_internal_latency_ += kArmIntegerOpLatency;
394           HandleGenerateConditionWithZero(condition);
395           return;
396         case kCondLT:
397         case kCondGE:
398           FALLTHROUGH_INTENDED;
399         case kCondAE:
400         case kCondB:
401           HandleGenerateConditionWithZero(condition);
402           return;
403         case kCondLE:
404         case kCondGT:
405         default:
406           break;
407       }
408     }
409   }
410 
411   if ((condition == kCondEQ || condition == kCondNE) &&
412       !CanGenerateTest(cond)) {
413     HandleGenerateEqualLong(cond);
414     return;
415   }
416 
417   if (CanGenerateTest(cond)) {
418     HandleGenerateConditionGeneric(cond);
419     return;
420   }
421 
422   HandleGenerateLongComparesAndJumps();
423 
424   last_visited_internal_latency_ += kArmIntegerOpLatency;
425   last_visited_latency_ = kArmBranchLatency;;
426 }
427 
HandleGenerateConditionIntegralOrNonPrimitive(HCondition * cond)428 void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
429   const DataType::Type type = cond->GetLeft()->GetType();
430 
431   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
432 
433   if (type == DataType::Type::kInt64) {
434     HandleGenerateConditionLong(cond);
435     return;
436   }
437 
438   IfCondition condition = cond->GetCondition();
439   HInstruction* right = cond->InputAt(1);
440   int64_t value;
441 
442   if (right->IsConstant()) {
443     value = Uint64ConstantFrom(right);
444 
445     // Comparisons against 0 are common enough, so codegen has special handling for them.
446     if (value == 0) {
447       switch (condition) {
448         case kCondNE:
449         case kCondA:
450         case kCondEQ:
451         case kCondBE:
452         case kCondLT:
453         case kCondGE:
454         case kCondAE:
455         case kCondB:
456           HandleGenerateConditionWithZero(condition);
457           return;
458         case kCondLE:
459         case kCondGT:
460         default:
461           break;
462       }
463     }
464   }
465 
466   if (condition == kCondEQ || condition == kCondNE) {
467     if (condition == kCondNE) {
468       // CMP, IT, MOV.ne
469       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
470       last_visited_latency_ = kArmIntegerOpLatency;
471     } else {
472       last_visited_internal_latency_ += kArmIntegerOpLatency;
473       HandleGenerateConditionWithZero(condition);
474     }
475     return;
476   }
477 
478   HandleGenerateConditionGeneric(cond);
479 }
480 
HandleCondition(HCondition * cond)481 void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
482   if (cond->IsEmittedAtUseSite()) {
483     last_visited_latency_ = 0;
484     return;
485   }
486 
487   const DataType::Type type = cond->GetLeft()->GetType();
488 
489   if (DataType::IsFloatingPointType(type)) {
490     HandleGenerateConditionGeneric(cond);
491     return;
492   }
493 
494   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
495 
496   const IfCondition condition = cond->GetCondition();
497 
498   if (type == DataType::Type::kBool &&
499       cond->GetRight()->GetType() == DataType::Type::kBool &&
500       (condition == kCondEQ || condition == kCondNE)) {
501     if (condition == kCondEQ) {
502       last_visited_internal_latency_ = kArmIntegerOpLatency;
503     }
504     last_visited_latency_ = kArmIntegerOpLatency;
505     return;
506   }
507 
508   HandleGenerateConditionIntegralOrNonPrimitive(cond);
509 }
510 
VisitCondition(HCondition * instr)511 void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
512   HandleCondition(instr);
513 }
514 
VisitCompare(HCompare * instr)515 void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
516   DataType::Type type = instr->InputAt(0)->GetType();
517   switch (type) {
518     case DataType::Type::kBool:
519     case DataType::Type::kUint8:
520     case DataType::Type::kInt8:
521     case DataType::Type::kUint16:
522     case DataType::Type::kInt16:
523     case DataType::Type::kInt32:
524       last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
525       break;
526     case DataType::Type::kInt64:
527       last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
528       break;
529     case DataType::Type::kFloat32:
530     case DataType::Type::kFloat64:
531       last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
532       break;
533     default:
534       last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
535       break;
536   }
537   last_visited_latency_ = kArmIntegerOpLatency;
538 }
539 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)540 void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
541   if (instruction->GetResultType() == DataType::Type::kInt32) {
542     last_visited_latency_ = kArmIntegerOpLatency;
543   } else {
544     last_visited_internal_latency_ = kArmIntegerOpLatency;
545     last_visited_latency_ = kArmIntegerOpLatency;
546   }
547 }
548 
HandleGenerateDataProcInstruction(bool internal_latency)549 void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
550   if (internal_latency) {
551     last_visited_internal_latency_ += kArmIntegerOpLatency;
552   } else {
553     last_visited_latency_ = kArmDataProcWithShifterOpLatency;
554   }
555 }
556 
HandleGenerateDataProc(HDataProcWithShifterOp * instruction)557 void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
558   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
559   if (kind == HInstruction::kAdd) {
560     last_visited_internal_latency_ = kArmIntegerOpLatency;
561     last_visited_latency_ = kArmIntegerOpLatency;
562   } else if (kind == HInstruction::kSub) {
563     last_visited_internal_latency_ = kArmIntegerOpLatency;
564     last_visited_latency_ = kArmIntegerOpLatency;
565   } else {
566     HandleGenerateDataProcInstruction(/* internal_latency= */ true);
567     HandleGenerateDataProcInstruction();
568   }
569 }
570 
HandleGenerateLongDataProc(HDataProcWithShifterOp * instruction)571 void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
572   DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
573   DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
574 
575   const uint32_t shift_value = instruction->GetShiftAmount();
576   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
577 
578   if (shift_value >= 32) {
579     // Different shift types actually generate similar code here,
580     // no need to differentiate shift types like the codegen pass does,
581     // which also avoids handling shift types from different ARM backends.
582     HandleGenerateDataProc(instruction);
583   } else {
584     DCHECK_GT(shift_value, 1U);
585     DCHECK_LT(shift_value, 32U);
586 
587     if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
588       HandleGenerateDataProcInstruction(/* internal_latency= */ true);
589       HandleGenerateDataProcInstruction(/* internal_latency= */ true);
590       HandleGenerateDataProcInstruction();
591     } else {
592       last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
593       HandleGenerateDataProc(instruction);
594     }
595   }
596 }
597 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)598 void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
599   const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
600 
601   if (instruction->GetType() == DataType::Type::kInt32) {
602     HandleGenerateDataProcInstruction();
603   } else {
604     DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
605     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
606       HandleGenerateDataProc(instruction);
607     } else {
608       HandleGenerateLongDataProc(instruction);
609     }
610   }
611 }
612 
VisitIntermediateAddress(HIntermediateAddress * ATTRIBUTE_UNUSED)613 void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
614   // Although the code generated is a simple `add` instruction, we found through empirical results
615   // that spacing it from its use in memory accesses was beneficial.
616   last_visited_internal_latency_ = kArmNopLatency;
617   last_visited_latency_ = kArmIntegerOpLatency;
618 }
619 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * ATTRIBUTE_UNUSED)620 void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
621     HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
622   UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
623 }
624 
VisitMultiplyAccumulate(HMultiplyAccumulate * ATTRIBUTE_UNUSED)625 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
626   last_visited_latency_ = kArmMulIntegerLatency;
627 }
628 
VisitArrayGet(HArrayGet * instruction)629 void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
630   DataType::Type type = instruction->GetType();
631   const bool maybe_compressed_char_at =
632       mirror::kUseStringCompression && instruction->IsStringCharAt();
633   HInstruction* array_instr = instruction->GetArray();
634   bool has_intermediate_address = array_instr->IsIntermediateAddress();
635   HInstruction* index = instruction->InputAt(1);
636 
637   switch (type) {
638     case DataType::Type::kBool:
639     case DataType::Type::kUint8:
640     case DataType::Type::kInt8:
641     case DataType::Type::kUint16:
642     case DataType::Type::kInt16:
643     case DataType::Type::kInt32: {
644       if (maybe_compressed_char_at) {
645         last_visited_internal_latency_ += kArmMemoryLoadLatency;
646       }
647       if (index->IsConstant()) {
648         if (maybe_compressed_char_at) {
649           last_visited_internal_latency_ +=
650               kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
651           last_visited_latency_ = kArmBranchLatency;
652         } else {
653           last_visited_latency_ += kArmMemoryLoadLatency;
654         }
655       } else {
656         if (has_intermediate_address) {
657         } else {
658           last_visited_internal_latency_ += kArmIntegerOpLatency;
659         }
660         if (maybe_compressed_char_at) {
661           last_visited_internal_latency_ +=
662               kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
663           last_visited_latency_ = kArmBranchLatency;
664         } else {
665           last_visited_latency_ += kArmMemoryLoadLatency;
666         }
667       }
668       break;
669     }
670 
671     case DataType::Type::kReference: {
672       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
673         last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
674       } else {
675         if (index->IsConstant()) {
676           last_visited_latency_ = kArmMemoryLoadLatency;
677         } else {
678           if (has_intermediate_address) {
679           } else {
680             last_visited_internal_latency_ += kArmIntegerOpLatency;
681           }
682           last_visited_latency_ = kArmMemoryLoadLatency;
683         }
684       }
685       break;
686     }
687 
688     case DataType::Type::kInt64: {
689       if (index->IsConstant()) {
690         last_visited_latency_ = kArmMemoryLoadLatency;
691       } else {
692         last_visited_internal_latency_ += kArmIntegerOpLatency;
693         last_visited_latency_ = kArmMemoryLoadLatency;
694       }
695       break;
696     }
697 
698     case DataType::Type::kFloat32: {
699       if (index->IsConstant()) {
700         last_visited_latency_ = kArmMemoryLoadLatency;
701       } else {
702         last_visited_internal_latency_ += kArmIntegerOpLatency;
703         last_visited_latency_ = kArmMemoryLoadLatency;
704       }
705       break;
706     }
707 
708     case DataType::Type::kFloat64: {
709       if (index->IsConstant()) {
710         last_visited_latency_ = kArmMemoryLoadLatency;
711       } else {
712         last_visited_internal_latency_ += kArmIntegerOpLatency;
713         last_visited_latency_ = kArmMemoryLoadLatency;
714       }
715       break;
716     }
717 
718     default:
719       LOG(FATAL) << "Unreachable type " << type;
720       UNREACHABLE();
721   }
722 }
723 
VisitArrayLength(HArrayLength * instruction)724 void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
725   last_visited_latency_ = kArmMemoryLoadLatency;
726   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
727     last_visited_internal_latency_ = kArmMemoryLoadLatency;
728     last_visited_latency_ = kArmIntegerOpLatency;
729   }
730 }
731 
VisitArraySet(HArraySet * instruction)732 void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
733   HInstruction* index = instruction->InputAt(1);
734   DataType::Type value_type = instruction->GetComponentType();
735   HInstruction* array_instr = instruction->GetArray();
736   bool has_intermediate_address = array_instr->IsIntermediateAddress();
737 
738   switch (value_type) {
739     case DataType::Type::kBool:
740     case DataType::Type::kUint8:
741     case DataType::Type::kInt8:
742     case DataType::Type::kUint16:
743     case DataType::Type::kInt16:
744     case DataType::Type::kInt32: {
745       if (index->IsConstant()) {
746         last_visited_latency_ = kArmMemoryStoreLatency;
747       } else {
748         if (has_intermediate_address) {
749         } else {
750           last_visited_internal_latency_ = kArmIntegerOpLatency;
751         }
752         last_visited_latency_ = kArmMemoryStoreLatency;
753       }
754       break;
755     }
756 
757     case DataType::Type::kReference: {
758       if (instruction->InputAt(2)->IsNullConstant()) {
759         if (index->IsConstant()) {
760           last_visited_latency_ = kArmMemoryStoreLatency;
761         } else {
762           last_visited_internal_latency_ = kArmIntegerOpLatency;
763           last_visited_latency_ = kArmMemoryStoreLatency;
764         }
765       } else {
766         // Following the exact instructions of runtime type checks is too complicated,
767         // just giving it a simple slow latency.
768         last_visited_latency_ = kArmRuntimeTypeCheckLatency;
769       }
770       break;
771     }
772 
773     case DataType::Type::kInt64: {
774       if (index->IsConstant()) {
775         last_visited_latency_ = kArmMemoryLoadLatency;
776       } else {
777         last_visited_internal_latency_ = kArmIntegerOpLatency;
778         last_visited_latency_ = kArmMemoryLoadLatency;
779       }
780       break;
781     }
782 
783     case DataType::Type::kFloat32: {
784       if (index->IsConstant()) {
785         last_visited_latency_ = kArmMemoryLoadLatency;
786       } else {
787         last_visited_internal_latency_ = kArmIntegerOpLatency;
788         last_visited_latency_ = kArmMemoryLoadLatency;
789       }
790       break;
791     }
792 
793     case DataType::Type::kFloat64: {
794       if (index->IsConstant()) {
795         last_visited_latency_ = kArmMemoryLoadLatency;
796       } else {
797         last_visited_internal_latency_ = kArmIntegerOpLatency;
798         last_visited_latency_ = kArmMemoryLoadLatency;
799       }
800       break;
801     }
802 
803     default:
804       LOG(FATAL) << "Unreachable type " << value_type;
805       UNREACHABLE();
806   }
807 }
808 
VisitBoundsCheck(HBoundsCheck * ATTRIBUTE_UNUSED)809 void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
810   last_visited_internal_latency_ = kArmIntegerOpLatency;
811   // Users do not use any data results.
812   last_visited_latency_ = 0;
813 }
814 
HandleDivRemConstantIntegralLatencies(int32_t imm)815 void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
816   if (imm == 0) {
817     last_visited_internal_latency_ = 0;
818     last_visited_latency_ = 0;
819   } else if (imm == 1 || imm == -1) {
820     last_visited_latency_ = kArmIntegerOpLatency;
821   } else if (IsPowerOfTwo(AbsOrMin(imm))) {
822     last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
823     last_visited_latency_ = kArmIntegerOpLatency;
824   } else {
825     last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
826     last_visited_latency_ = kArmIntegerOpLatency;
827   }
828 }
829 
VisitDiv(HDiv * instruction)830 void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
831   DataType::Type type = instruction->GetResultType();
832   switch (type) {
833     case DataType::Type::kInt32: {
834       HInstruction* rhs = instruction->GetRight();
835       if (rhs->IsConstant()) {
836         int32_t imm = Int32ConstantFrom(rhs->AsConstant());
837         HandleDivRemConstantIntegralLatencies(imm);
838       } else {
839         last_visited_latency_ = kArmDivIntegerLatency;
840       }
841       break;
842     }
843     case DataType::Type::kFloat32:
844       last_visited_latency_ = kArmDivFloatLatency;
845       break;
846     case DataType::Type::kFloat64:
847       last_visited_latency_ = kArmDivDoubleLatency;
848       break;
849     default:
850       last_visited_internal_latency_ = kArmCallInternalLatency;
851       last_visited_latency_ = kArmCallLatency;
852       break;
853   }
854 }
855 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)856 void SchedulingLatencyVisitorARM::VisitPredicatedInstanceFieldGet(
857     HPredicatedInstanceFieldGet* instruction) {
858   HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
859 }
860 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)861 void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
862   HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
863 }
864 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)865 void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
866   HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
867 }
868 
VisitInstanceOf(HInstanceOf * ATTRIBUTE_UNUSED)869 void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
870   last_visited_internal_latency_ = kArmCallInternalLatency;
871   last_visited_latency_ = kArmIntegerOpLatency;
872 }
873 
VisitInvoke(HInvoke * ATTRIBUTE_UNUSED)874 void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
875   last_visited_internal_latency_ = kArmCallInternalLatency;
876   last_visited_latency_ = kArmCallLatency;
877 }
878 
VisitLoadString(HLoadString * ATTRIBUTE_UNUSED)879 void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
880   last_visited_internal_latency_ = kArmLoadStringInternalLatency;
881   last_visited_latency_ = kArmMemoryLoadLatency;
882 }
883 
VisitNewArray(HNewArray * ATTRIBUTE_UNUSED)884 void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
885   last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
886   last_visited_latency_ = kArmCallLatency;
887 }
888 
VisitNewInstance(HNewInstance * instruction)889 void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
890   if (instruction->IsStringAlloc()) {
891     last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
892   } else {
893     last_visited_internal_latency_ = kArmCallInternalLatency;
894   }
895   last_visited_latency_ = kArmCallLatency;
896 }
897 
VisitRem(HRem * instruction)898 void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
899   DataType::Type type = instruction->GetResultType();
900   switch (type) {
901     case DataType::Type::kInt32: {
902       HInstruction* rhs = instruction->GetRight();
903       if (rhs->IsConstant()) {
904         int32_t imm = Int32ConstantFrom(rhs->AsConstant());
905         HandleDivRemConstantIntegralLatencies(imm);
906       } else {
907         last_visited_internal_latency_ = kArmDivIntegerLatency;
908         last_visited_latency_ = kArmMulIntegerLatency;
909       }
910       break;
911     }
912     default:
913       last_visited_internal_latency_ = kArmCallInternalLatency;
914       last_visited_latency_ = kArmCallLatency;
915       break;
916   }
917 }
918 
HandleFieldGetLatencies(HInstruction * instruction,const FieldInfo & field_info)919 void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
920                                                           const FieldInfo& field_info) {
921   DCHECK(instruction->IsInstanceFieldGet() ||
922          instruction->IsStaticFieldGet() ||
923          instruction->IsPredicatedInstanceFieldGet());
924   DCHECK(codegen_ != nullptr);
925   bool is_volatile = field_info.IsVolatile();
926   DataType::Type field_type = field_info.GetFieldType();
927   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
928 
929   switch (field_type) {
930     case DataType::Type::kBool:
931     case DataType::Type::kUint8:
932     case DataType::Type::kInt8:
933     case DataType::Type::kUint16:
934     case DataType::Type::kInt16:
935     case DataType::Type::kInt32:
936       last_visited_latency_ = kArmMemoryLoadLatency;
937       break;
938 
939     case DataType::Type::kReference:
940       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
941         last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
942         last_visited_latency_ = kArmMemoryLoadLatency;
943       } else {
944         last_visited_latency_ = kArmMemoryLoadLatency;
945       }
946       break;
947 
948     case DataType::Type::kInt64:
949       if (is_volatile && !atomic_ldrd_strd) {
950         last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
951         last_visited_latency_ = kArmMemoryLoadLatency;
952       } else {
953         last_visited_latency_ = kArmMemoryLoadLatency;
954       }
955       break;
956 
957     case DataType::Type::kFloat32:
958       last_visited_latency_ = kArmMemoryLoadLatency;
959       break;
960 
961     case DataType::Type::kFloat64:
962       if (is_volatile && !atomic_ldrd_strd) {
963         last_visited_internal_latency_ =
964             kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
965         last_visited_latency_ = kArmIntegerOpLatency;
966       } else {
967         last_visited_latency_ = kArmMemoryLoadLatency;
968       }
969       break;
970 
971     default:
972       last_visited_latency_ = kArmMemoryLoadLatency;
973       break;
974   }
975 
976   if (is_volatile) {
977     last_visited_internal_latency_ += kArmMemoryBarrierLatency;
978   }
979 }
980 
HandleFieldSetLatencies(HInstruction * instruction,const FieldInfo & field_info)981 void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
982                                                           const FieldInfo& field_info) {
983   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
984   DCHECK(codegen_ != nullptr);
985   bool is_volatile = field_info.IsVolatile();
986   DataType::Type field_type = field_info.GetFieldType();
987   bool needs_write_barrier =
988       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
989   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
990 
991   switch (field_type) {
992     case DataType::Type::kBool:
993     case DataType::Type::kUint8:
994     case DataType::Type::kInt8:
995     case DataType::Type::kUint16:
996     case DataType::Type::kInt16:
997       if (is_volatile) {
998         last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
999         last_visited_latency_ = kArmMemoryBarrierLatency;
1000       } else {
1001         last_visited_latency_ = kArmMemoryStoreLatency;
1002       }
1003       break;
1004 
1005     case DataType::Type::kInt32:
1006     case DataType::Type::kReference:
1007       if (kPoisonHeapReferences && needs_write_barrier) {
1008         last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
1009       }
1010       last_visited_latency_ = kArmMemoryStoreLatency;
1011       break;
1012 
1013     case DataType::Type::kInt64:
1014       if (is_volatile && !atomic_ldrd_strd) {
1015         last_visited_internal_latency_ =
1016             kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1017         last_visited_latency_ = kArmIntegerOpLatency;
1018       } else {
1019         last_visited_latency_ = kArmMemoryStoreLatency;
1020       }
1021       break;
1022 
1023     case DataType::Type::kFloat32:
1024       last_visited_latency_ = kArmMemoryStoreLatency;
1025       break;
1026 
1027     case DataType::Type::kFloat64:
1028       if (is_volatile && !atomic_ldrd_strd) {
1029         last_visited_internal_latency_ = kArmIntegerOpLatency +
1030             kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1031         last_visited_latency_ = kArmIntegerOpLatency;
1032       } else {
1033         last_visited_latency_ = kArmMemoryStoreLatency;
1034       }
1035       break;
1036 
1037     default:
1038       last_visited_latency_ = kArmMemoryStoreLatency;
1039       break;
1040   }
1041 }
1042 
VisitStaticFieldGet(HStaticFieldGet * instruction)1043 void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
1044   HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
1045 }
1046 
VisitStaticFieldSet(HStaticFieldSet * instruction)1047 void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
1048   HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
1049 }
1050 
VisitSuspendCheck(HSuspendCheck * instruction)1051 void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
1052   HBasicBlock* block = instruction->GetBlock();
1053   DCHECK((block->GetLoopInformation() != nullptr) ||
1054          (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
1055   // Users do not use any data results.
1056   last_visited_latency_ = 0;
1057 }
1058 
VisitTypeConversion(HTypeConversion * instr)1059 void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
1060   DataType::Type result_type = instr->GetResultType();
1061   DataType::Type input_type = instr->GetInputType();
1062 
1063   switch (result_type) {
1064     case DataType::Type::kUint8:
1065     case DataType::Type::kInt8:
1066     case DataType::Type::kUint16:
1067     case DataType::Type::kInt16:
1068       last_visited_latency_ = kArmIntegerOpLatency;  // SBFX or UBFX
1069       break;
1070 
1071     case DataType::Type::kInt32:
1072       switch (input_type) {
1073         case DataType::Type::kInt64:
1074           last_visited_latency_ = kArmIntegerOpLatency;  // MOV
1075           break;
1076         case DataType::Type::kFloat32:
1077         case DataType::Type::kFloat64:
1078           last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1079           last_visited_latency_ = kArmFloatingPointOpLatency;
1080           break;
1081         default:
1082           last_visited_latency_ = kArmIntegerOpLatency;
1083           break;
1084       }
1085       break;
1086 
1087     case DataType::Type::kInt64:
1088       switch (input_type) {
1089         case DataType::Type::kBool:
1090         case DataType::Type::kUint8:
1091         case DataType::Type::kInt8:
1092         case DataType::Type::kUint16:
1093         case DataType::Type::kInt16:
1094         case DataType::Type::kInt32:
1095           // MOV and extension
1096           last_visited_internal_latency_ = kArmIntegerOpLatency;
1097           last_visited_latency_ = kArmIntegerOpLatency;
1098           break;
1099         case DataType::Type::kFloat32:
1100         case DataType::Type::kFloat64:
1101           // invokes runtime
1102           last_visited_internal_latency_ = kArmCallInternalLatency;
1103           break;
1104         default:
1105           last_visited_internal_latency_ = kArmIntegerOpLatency;
1106           last_visited_latency_ = kArmIntegerOpLatency;
1107           break;
1108       }
1109       break;
1110 
1111     case DataType::Type::kFloat32:
1112       switch (input_type) {
1113         case DataType::Type::kBool:
1114         case DataType::Type::kUint8:
1115         case DataType::Type::kInt8:
1116         case DataType::Type::kUint16:
1117         case DataType::Type::kInt16:
1118         case DataType::Type::kInt32:
1119           last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1120           last_visited_latency_ = kArmFloatingPointOpLatency;
1121           break;
1122         case DataType::Type::kInt64:
1123           // invokes runtime
1124           last_visited_internal_latency_ = kArmCallInternalLatency;
1125           break;
1126         case DataType::Type::kFloat64:
1127           last_visited_latency_ = kArmFloatingPointOpLatency;
1128           break;
1129         default:
1130           last_visited_latency_ = kArmFloatingPointOpLatency;
1131           break;
1132       }
1133       break;
1134 
1135     case DataType::Type::kFloat64:
1136       switch (input_type) {
1137         case DataType::Type::kBool:
1138         case DataType::Type::kUint8:
1139         case DataType::Type::kInt8:
1140         case DataType::Type::kUint16:
1141         case DataType::Type::kInt16:
1142         case DataType::Type::kInt32:
1143           last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1144           last_visited_latency_ = kArmFloatingPointOpLatency;
1145           break;
1146         case DataType::Type::kInt64:
1147           last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
1148           last_visited_latency_ = kArmFloatingPointOpLatency;
1149           break;
1150         case DataType::Type::kFloat32:
1151           last_visited_latency_ = kArmFloatingPointOpLatency;
1152           break;
1153         default:
1154           last_visited_latency_ = kArmFloatingPointOpLatency;
1155           break;
1156       }
1157       break;
1158 
1159     default:
1160       last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1161       break;
1162   }
1163 }
1164 
1165 }  // namespace arm
1166 }  // namespace art
1167