1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 19 20 #include "code_generator_arm_vixl.h" 21 #include "scheduler.h" 22 23 namespace art { 24 namespace arm { 25 // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? 26 typedef CodeGeneratorARMVIXL CodeGeneratorARMType; 27 28 // AArch32 instruction latencies. 29 // We currently assume that all ARM CPUs share the same instruction latency list. 30 // The following latencies were tuned based on performance experiments and 31 // automatic tuning using differential evolution approach on various benchmarks. 32 static constexpr uint32_t kArmIntegerOpLatency = 2; 33 static constexpr uint32_t kArmFloatingPointOpLatency = 11; 34 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; 35 static constexpr uint32_t kArmMulIntegerLatency = 6; 36 static constexpr uint32_t kArmMulFloatingPointLatency = 11; 37 static constexpr uint32_t kArmDivIntegerLatency = 10; 38 static constexpr uint32_t kArmDivFloatLatency = 20; 39 static constexpr uint32_t kArmDivDoubleLatency = 25; 40 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; 41 static constexpr uint32_t kArmMemoryLoadLatency = 9; 42 static constexpr uint32_t kArmMemoryStoreLatency = 9; 43 static constexpr uint32_t kArmMemoryBarrierLatency = 6; 44 static constexpr uint32_t kArmBranchLatency = 4; 45 static constexpr uint32_t kArmCallLatency = 5; 46 static constexpr uint32_t kArmCallInternalLatency = 29; 47 static constexpr uint32_t kArmLoadStringInternalLatency = 10; 48 static constexpr uint32_t kArmNopLatency = 2; 49 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; 50 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; 51 52 class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { 53 public: SchedulingLatencyVisitorARM(CodeGenerator * codegen)54 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) 55 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} 56 57 // Default visitor for instructions not handled specifically below. VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)58 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { 59 last_visited_latency_ = kArmIntegerOpLatency; 60 } 61 62 // We add a second unused parameter to be able to use this macro like the others 63 // defined in `nodes.h`. 64 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ 65 M(ArrayGet, unused) \ 66 M(ArrayLength, unused) \ 67 M(ArraySet, unused) \ 68 M(Add, unused) \ 69 M(Sub, unused) \ 70 M(And, unused) \ 71 M(Or, unused) \ 72 M(Ror, unused) \ 73 M(Xor, unused) \ 74 M(Shl, unused) \ 75 M(Shr, unused) \ 76 M(UShr, unused) \ 77 M(Mul, unused) \ 78 M(Div, unused) \ 79 M(Condition, unused) \ 80 M(Compare, unused) \ 81 M(BoundsCheck, unused) \ 82 M(PredicatedInstanceFieldGet, unused) \ 83 M(InstanceFieldGet, unused) \ 84 M(InstanceFieldSet, unused) \ 85 M(InstanceOf, unused) \ 86 M(Invoke, unused) \ 87 M(LoadString, unused) \ 88 M(NewArray, unused) \ 89 M(NewInstance, unused) \ 90 M(Rem, unused) \ 91 M(StaticFieldGet, unused) \ 92 M(StaticFieldSet, unused) \ 93 M(SuspendCheck, unused) \ 94 M(TypeConversion, unused) 95 96 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ 97 M(BitwiseNegatedRight, unused) \ 98 M(MultiplyAccumulate, unused) \ 99 M(IntermediateAddress, unused) \ 100 M(IntermediateAddressIndex, unused) \ 101 M(DataProcWithShifterOp, unused) 102 103 #define DECLARE_VISIT_INSTRUCTION(type, unused) \ 104 void Visit##type(H##type* instruction) override; 105 106 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 107 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 108 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) 109 110 #undef DECLARE_VISIT_INSTRUCTION 111 112 private: 113 bool CanGenerateTest(HCondition* cond); 114 void HandleGenerateConditionWithZero(IfCondition cond); 115 void HandleGenerateLongTestConstant(HCondition* cond); 116 void HandleGenerateLongTest(HCondition* cond); 117 void HandleGenerateLongComparesAndJumps(); 118 void HandleGenerateTest(HCondition* cond); 119 void HandleGenerateConditionGeneric(HCondition* cond); 120 void HandleGenerateEqualLong(HCondition* cond); 121 void HandleGenerateConditionLong(HCondition* cond); 122 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); 123 void HandleCondition(HCondition* instr); 124 void HandleBinaryOperationLantencies(HBinaryOperation* instr); 125 void HandleBitwiseOperationLantencies(HBinaryOperation* instr); 126 void HandleShiftLatencies(HBinaryOperation* instr); 127 void HandleDivRemConstantIntegralLatencies(int32_t imm); 128 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); 129 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); 130 void HandleGenerateDataProcInstruction(bool internal_latency = false); 131 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); 132 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); 133 134 // The latency setting for each HInstruction depends on how CodeGenerator may generate code, 135 // latency visitors may query CodeGenerator for such information for accurate latency settings. 136 CodeGeneratorARMType* codegen_; 137 }; 138 139 class HSchedulerARM : public HScheduler { 140 public: HSchedulerARM(SchedulingNodeSelector * selector,SchedulingLatencyVisitorARM * arm_latency_visitor)141 HSchedulerARM(SchedulingNodeSelector* selector, 142 SchedulingLatencyVisitorARM* arm_latency_visitor) 143 : HScheduler(arm_latency_visitor, selector) {} ~HSchedulerARM()144 ~HSchedulerARM() override {} 145 IsSchedulable(const HInstruction * instruction)146 bool IsSchedulable(const HInstruction* instruction) const override { 147 #define CASE_INSTRUCTION_KIND(type, unused) case \ 148 HInstruction::InstructionKind::k##type: 149 switch (instruction->GetKind()) { 150 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) 151 return true; 152 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) 153 return true; 154 default: 155 return HScheduler::IsSchedulable(instruction); 156 } 157 #undef CASE_INSTRUCTION_KIND 158 } 159 160 private: 161 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); 162 }; 163 164 } // namespace arm 165 } // namespace art 166 167 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 168