1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 19 20 #include "scheduler.h" 21 22 namespace art { 23 namespace arm64 { 24 25 static constexpr uint32_t kArm64MemoryLoadLatency = 5; 26 static constexpr uint32_t kArm64MemoryStoreLatency = 3; 27 28 static constexpr uint32_t kArm64CallInternalLatency = 10; 29 static constexpr uint32_t kArm64CallLatency = 5; 30 31 // AArch64 instruction latency. 32 // We currently assume that all arm64 CPUs share the same instruction latency list. 33 static constexpr uint32_t kArm64IntegerOpLatency = 2; 34 static constexpr uint32_t kArm64FloatingPointOpLatency = 5; 35 36 37 static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3; 38 static constexpr uint32_t kArm64DivDoubleLatency = 30; 39 static constexpr uint32_t kArm64DivFloatLatency = 15; 40 static constexpr uint32_t kArm64DivIntegerLatency = 5; 41 static constexpr uint32_t kArm64LoadStringInternalLatency = 7; 42 static constexpr uint32_t kArm64MulFloatingPointLatency = 6; 43 static constexpr uint32_t kArm64MulIntegerLatency = 6; 44 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; 45 static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; 46 47 static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; 48 static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; 49 static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; 50 static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; 51 static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; 52 static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; 53 static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; 54 static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; 55 static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; 56 static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; 57 58 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { 59 public: 60 // Default visitor for instructions not handled specifically below. VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)61 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { 62 last_visited_latency_ = kArm64IntegerOpLatency; 63 } 64 65 // We add a second unused parameter to be able to use this macro like the others 66 // defined in `nodes.h`. 67 #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ 68 M(ArrayGet , unused) \ 69 M(ArrayLength , unused) \ 70 M(ArraySet , unused) \ 71 M(BinaryOperation , unused) \ 72 M(BoundsCheck , unused) \ 73 M(Div , unused) \ 74 M(InstanceFieldGet , unused) \ 75 M(InstanceOf , unused) \ 76 M(Invoke , unused) \ 77 M(LoadString , unused) \ 78 M(Mul , unused) \ 79 M(NewArray , unused) \ 80 M(NewInstance , unused) \ 81 M(Rem , unused) \ 82 M(StaticFieldGet , unused) \ 83 M(SuspendCheck , unused) \ 84 M(TypeConversion , unused) \ 85 M(VecReplicateScalar , unused) \ 86 M(VecExtractScalar , unused) \ 87 M(VecReduce , unused) \ 88 M(VecCnv , unused) \ 89 M(VecNeg , unused) \ 90 M(VecAbs , unused) \ 91 M(VecNot , unused) \ 92 M(VecAdd , unused) \ 93 M(VecHalvingAdd , unused) \ 94 M(VecSub , unused) \ 95 M(VecMul , unused) \ 96 M(VecDiv , unused) \ 97 M(VecMin , unused) \ 98 M(VecMax , unused) \ 99 M(VecAnd , unused) \ 100 M(VecAndNot , unused) \ 101 M(VecOr , unused) \ 102 M(VecXor , unused) \ 103 M(VecShl , unused) \ 104 M(VecShr , unused) \ 105 M(VecUShr , unused) \ 106 M(VecSetScalars , unused) \ 107 M(VecMultiplyAccumulate, unused) \ 108 M(VecLoad , unused) \ 109 M(VecStore , unused) 110 111 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ 112 M(BitwiseNegatedRight, unused) \ 113 M(MultiplyAccumulate, unused) \ 114 M(IntermediateAddress, unused) \ 115 M(IntermediateAddressIndex, unused) \ 116 M(DataProcWithShifterOp, unused) 117 118 #define DECLARE_VISIT_INSTRUCTION(type, unused) \ 119 void Visit##type(H##type* instruction) OVERRIDE; 120 121 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 122 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 123 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) 124 125 #undef DECLARE_VISIT_INSTRUCTION 126 127 private: 128 void HandleSimpleArithmeticSIMD(HVecOperation *instr); 129 void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); 130 }; 131 132 class HSchedulerARM64 : public HScheduler { 133 public: HSchedulerARM64(ScopedArenaAllocator * allocator,SchedulingNodeSelector * selector)134 HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector) 135 : HScheduler(allocator, &arm64_latency_visitor_, selector) {} ~HSchedulerARM64()136 ~HSchedulerARM64() OVERRIDE {} 137 IsSchedulable(const HInstruction * instruction)138 bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { 139 #define CASE_INSTRUCTION_KIND(type, unused) case \ 140 HInstruction::InstructionKind::k##type: 141 switch (instruction->GetKind()) { 142 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) 143 return true; 144 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) 145 return true; 146 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) 147 return true; 148 default: 149 return HScheduler::IsSchedulable(instruction); 150 } 151 #undef CASE_INSTRUCTION_KIND 152 } 153 154 // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized 155 // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; 156 // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of 157 // SIMD&FP registers are callee saved) so don't reorder such vector instructions. 158 // 159 // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. IsSchedulingBarrier(const HInstruction * instr)160 bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE { 161 return HScheduler::IsSchedulingBarrier(instr) || 162 instr->IsVecReduce() || 163 instr->IsVecExtractScalar() || 164 instr->IsVecSetScalars() || 165 instr->IsVecReplicateScalar(); 166 } 167 168 private: 169 SchedulingLatencyVisitorARM64 arm64_latency_visitor_; 170 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); 171 }; 172 173 } // namespace arm64 174 } // namespace art 175 176 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 177