1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 19 20 #include "code_generator_arm_vixl.h" 21 #include "scheduler.h" 22 23 namespace art { 24 namespace arm { 25 // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? 26 typedef CodeGeneratorARMVIXL CodeGeneratorARMType; 27 28 // AArch32 instruction latencies. 29 // We currently assume that all ARM CPUs share the same instruction latency list. 30 // The following latencies were tuned based on performance experiments and 31 // automatic tuning using differential evolution approach on various benchmarks. 32 static constexpr uint32_t kArmIntegerOpLatency = 2; 33 static constexpr uint32_t kArmFloatingPointOpLatency = 11; 34 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; 35 static constexpr uint32_t kArmMulIntegerLatency = 6; 36 static constexpr uint32_t kArmMulFloatingPointLatency = 11; 37 static constexpr uint32_t kArmDivIntegerLatency = 10; 38 static constexpr uint32_t kArmDivFloatLatency = 20; 39 static constexpr uint32_t kArmDivDoubleLatency = 25; 40 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; 41 static constexpr uint32_t kArmMemoryLoadLatency = 9; 42 static constexpr uint32_t kArmMemoryStoreLatency = 9; 43 static constexpr uint32_t kArmMemoryBarrierLatency = 6; 44 static constexpr uint32_t kArmBranchLatency = 4; 45 static constexpr uint32_t kArmCallLatency = 5; 46 static constexpr uint32_t kArmCallInternalLatency = 29; 47 static constexpr uint32_t kArmLoadStringInternalLatency = 10; 48 static constexpr uint32_t kArmNopLatency = 2; 49 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; 50 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; 51 52 class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { 53 public: SchedulingLatencyVisitorARM(CodeGenerator * codegen)54 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) 55 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} 56 57 // Default visitor for instructions not handled specifically below. VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)58 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { 59 last_visited_latency_ = kArmIntegerOpLatency; 60 } 61 62 // We add a second unused parameter to be able to use this macro like the others 63 // defined in `nodes.h`. 64 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ 65 M(ArrayGet , unused) \ 66 M(ArrayLength , unused) \ 67 M(ArraySet , unused) \ 68 M(Add , unused) \ 69 M(Sub , unused) \ 70 M(And , unused) \ 71 M(Or , unused) \ 72 M(Ror , unused) \ 73 M(Xor , unused) \ 74 M(Shl , unused) \ 75 M(Shr , unused) \ 76 M(UShr , unused) \ 77 M(Mul , unused) \ 78 M(Div , unused) \ 79 M(Condition , unused) \ 80 M(Compare , unused) \ 81 M(BoundsCheck , unused) \ 82 M(InstanceFieldGet , unused) \ 83 M(InstanceFieldSet , unused) \ 84 M(InstanceOf , unused) \ 85 M(Invoke , unused) \ 86 M(LoadString , unused) \ 87 M(NewArray , unused) \ 88 M(NewInstance , unused) \ 89 M(Rem , unused) \ 90 M(StaticFieldGet , unused) \ 91 M(StaticFieldSet , unused) \ 92 M(SuspendCheck , unused) \ 93 M(TypeConversion , unused) 94 95 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ 96 M(BitwiseNegatedRight, unused) \ 97 M(MultiplyAccumulate, unused) \ 98 M(IntermediateAddress, unused) \ 99 M(IntermediateAddressIndex, unused) \ 100 M(DataProcWithShifterOp, unused) 101 102 #define DECLARE_VISIT_INSTRUCTION(type, unused) \ 103 void Visit##type(H##type* instruction) OVERRIDE; 104 105 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 106 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 107 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) 108 109 #undef DECLARE_VISIT_INSTRUCTION 110 111 private: 112 bool CanGenerateTest(HCondition* cond); 113 void HandleGenerateConditionWithZero(IfCondition cond); 114 void HandleGenerateLongTestConstant(HCondition* cond); 115 void HandleGenerateLongTest(HCondition* cond); 116 void HandleGenerateLongComparesAndJumps(); 117 void HandleGenerateTest(HCondition* cond); 118 void HandleGenerateConditionGeneric(HCondition* cond); 119 void HandleGenerateEqualLong(HCondition* cond); 120 void HandleGenerateConditionLong(HCondition* cond); 121 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); 122 void HandleCondition(HCondition* instr); 123 void HandleBinaryOperationLantencies(HBinaryOperation* instr); 124 void HandleBitwiseOperationLantencies(HBinaryOperation* instr); 125 void HandleShiftLatencies(HBinaryOperation* instr); 126 void HandleDivRemConstantIntegralLatencies(int32_t imm); 127 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); 128 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); 129 void HandleGenerateDataProcInstruction(bool internal_latency = false); 130 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); 131 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); 132 133 // The latency setting for each HInstruction depends on how CodeGenerator may generate code, 134 // latency visitors may query CodeGenerator for such information for accurate latency settings. 135 CodeGeneratorARMType* codegen_; 136 }; 137 138 class HSchedulerARM : public HScheduler { 139 public: HSchedulerARM(ScopedArenaAllocator * allocator,SchedulingNodeSelector * selector,SchedulingLatencyVisitorARM * arm_latency_visitor)140 HSchedulerARM(ScopedArenaAllocator* allocator, 141 SchedulingNodeSelector* selector, 142 SchedulingLatencyVisitorARM* arm_latency_visitor) 143 : HScheduler(allocator, arm_latency_visitor, selector) {} ~HSchedulerARM()144 ~HSchedulerARM() OVERRIDE {} 145 IsSchedulable(const HInstruction * instruction)146 bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { 147 #define CASE_INSTRUCTION_KIND(type, unused) case \ 148 HInstruction::InstructionKind::k##type: 149 switch (instruction->GetKind()) { 150 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) 151 return true; 152 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) 153 return true; 154 default: 155 return HScheduler::IsSchedulable(instruction); 156 } 157 #undef CASE_INSTRUCTION_KIND 158 } 159 160 private: 161 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); 162 }; 163 164 } // namespace arm 165 } // namespace art 166 167 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 168