1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 19 20 #include "scheduler.h" 21 22 namespace art { 23 namespace arm64 { 24 25 static constexpr uint32_t kArm64MemoryLoadLatency = 5; 26 static constexpr uint32_t kArm64MemoryStoreLatency = 3; 27 28 static constexpr uint32_t kArm64CallInternalLatency = 10; 29 static constexpr uint32_t kArm64CallLatency = 5; 30 31 // AArch64 instruction latency. 32 // We currently assume that all arm64 CPUs share the same instruction latency list. 33 static constexpr uint32_t kArm64IntegerOpLatency = 2; 34 static constexpr uint32_t kArm64FloatingPointOpLatency = 5; 35 36 37 static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3; 38 static constexpr uint32_t kArm64DivDoubleLatency = 30; 39 static constexpr uint32_t kArm64DivFloatLatency = 15; 40 static constexpr uint32_t kArm64DivIntegerLatency = 5; 41 static constexpr uint32_t kArm64LoadStringInternalLatency = 7; 42 static constexpr uint32_t kArm64MulFloatingPointLatency = 6; 43 static constexpr uint32_t kArm64MulIntegerLatency = 6; 44 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; 45 static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; 46 47 static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; 48 static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; 49 static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; 50 static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; 51 static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; 52 static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; 53 static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; 54 static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; 55 static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; 56 static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; 57 58 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { 59 public: 60 // Default visitor for instructions not handled specifically below. VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)61 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { 62 last_visited_latency_ = kArm64IntegerOpLatency; 63 } 64 65 // We add a second unused parameter to be able to use this macro like the others 66 // defined in `nodes.h`. 67 #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ 68 M(ArrayGet , unused) \ 69 M(ArrayLength , unused) \ 70 M(ArraySet , unused) \ 71 M(BoundsCheck , unused) \ 72 M(Div , unused) \ 73 M(InstanceFieldGet , unused) \ 74 M(InstanceOf , unused) \ 75 M(LoadString , unused) \ 76 M(Mul , unused) \ 77 M(NewArray , unused) \ 78 M(NewInstance , unused) \ 79 M(Rem , unused) \ 80 M(StaticFieldGet , unused) \ 81 M(SuspendCheck , unused) \ 82 M(TypeConversion , unused) \ 83 M(VecReplicateScalar , unused) \ 84 M(VecExtractScalar , unused) \ 85 M(VecReduce , unused) \ 86 M(VecCnv , unused) \ 87 M(VecNeg , unused) \ 88 M(VecAbs , unused) \ 89 M(VecNot , unused) \ 90 M(VecAdd , unused) \ 91 M(VecHalvingAdd , unused) \ 92 M(VecSub , unused) \ 93 M(VecMul , unused) \ 94 M(VecDiv , unused) \ 95 M(VecMin , unused) \ 96 M(VecMax , unused) \ 97 M(VecAnd , unused) \ 98 M(VecAndNot , unused) \ 99 M(VecOr , unused) \ 100 M(VecXor , unused) \ 101 M(VecShl , unused) \ 102 M(VecShr , unused) \ 103 M(VecUShr , unused) \ 104 M(VecSetScalars , unused) \ 105 M(VecMultiplyAccumulate, unused) \ 106 M(VecLoad , unused) \ 107 M(VecStore , unused) 108 109 #define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \ 110 M(BinaryOperation , unused) \ 111 M(Invoke , unused) 112 113 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ 114 M(BitwiseNegatedRight, unused) \ 115 M(MultiplyAccumulate, unused) \ 116 M(IntermediateAddress, unused) \ 117 M(IntermediateAddressIndex, unused) \ 118 M(DataProcWithShifterOp, unused) 119 120 #define DECLARE_VISIT_INSTRUCTION(type, unused) \ 121 void Visit##type(H##type* instruction) override; 122 123 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 124 FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 125 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 126 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) 127 128 #undef DECLARE_VISIT_INSTRUCTION 129 130 private: 131 void HandleSimpleArithmeticSIMD(HVecOperation *instr); 132 void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); 133 }; 134 135 class HSchedulerARM64 : public HScheduler { 136 public: HSchedulerARM64(SchedulingNodeSelector * selector)137 explicit HSchedulerARM64(SchedulingNodeSelector* selector) 138 : HScheduler(&arm64_latency_visitor_, selector) {} ~HSchedulerARM64()139 ~HSchedulerARM64() override {} 140 IsSchedulable(const HInstruction * instruction)141 bool IsSchedulable(const HInstruction* instruction) const override { 142 #define CASE_INSTRUCTION_KIND(type, unused) case \ 143 HInstruction::InstructionKind::k##type: 144 switch (instruction->GetKind()) { 145 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) 146 return true; 147 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) 148 return true; 149 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) 150 return true; 151 default: 152 return HScheduler::IsSchedulable(instruction); 153 } 154 #undef CASE_INSTRUCTION_KIND 155 } 156 157 // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized 158 // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; 159 // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of 160 // SIMD&FP registers are callee saved) so don't reorder such vector instructions. 161 // 162 // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. IsSchedulingBarrier(const HInstruction * instr)163 bool IsSchedulingBarrier(const HInstruction* instr) const override { 164 return HScheduler::IsSchedulingBarrier(instr) || 165 instr->IsVecReduce() || 166 instr->IsVecExtractScalar() || 167 instr->IsVecSetScalars() || 168 instr->IsVecReplicateScalar(); 169 } 170 171 private: 172 SchedulingLatencyVisitorARM64 arm64_latency_visitor_; 173 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); 174 }; 175 176 } // namespace arm64 177 } // namespace art 178 179 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 180