/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ #include "code_generator_arm_vixl.h" #include "scheduler.h" namespace art { namespace arm { // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? typedef CodeGeneratorARMVIXL CodeGeneratorARMType; // AArch32 instruction latencies. // We currently assume that all ARM CPUs share the same instruction latency list. // The following latencies were tuned based on performance experiments and // automatic tuning using differential evolution approach on various benchmarks. static constexpr uint32_t kArmIntegerOpLatency = 2; static constexpr uint32_t kArmFloatingPointOpLatency = 11; static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; static constexpr uint32_t kArmMulIntegerLatency = 6; static constexpr uint32_t kArmMulFloatingPointLatency = 11; static constexpr uint32_t kArmDivIntegerLatency = 10; static constexpr uint32_t kArmDivFloatLatency = 20; static constexpr uint32_t kArmDivDoubleLatency = 25; static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; static constexpr uint32_t kArmMemoryLoadLatency = 9; static constexpr uint32_t kArmMemoryStoreLatency = 9; static constexpr uint32_t kArmMemoryBarrierLatency = 6; static constexpr uint32_t kArmBranchLatency = 4; static constexpr uint32_t kArmCallLatency = 5; static constexpr uint32_t kArmCallInternalLatency = 29; static constexpr uint32_t kArmLoadStringInternalLatency = 10; static constexpr uint32_t kArmNopLatency = 2; static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { public: explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) : codegen_(down_cast(codegen)) {} // Default visitor for instructions not handled specifically below. void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArmIntegerOpLatency; } // We add a second unused parameter to be able to use this macro like the others // defined in `nodes.h`. #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ M(ArrayGet , unused) \ M(ArrayLength , unused) \ M(ArraySet , unused) \ M(Add , unused) \ M(Sub , unused) \ M(And , unused) \ M(Or , unused) \ M(Ror , unused) \ M(Xor , unused) \ M(Shl , unused) \ M(Shr , unused) \ M(UShr , unused) \ M(Mul , unused) \ M(Div , unused) \ M(Condition , unused) \ M(Compare , unused) \ M(BoundsCheck , unused) \ M(InstanceFieldGet , unused) \ M(InstanceFieldSet , unused) \ M(InstanceOf , unused) \ M(Invoke , unused) \ M(LoadString , unused) \ M(NewArray , unused) \ M(NewInstance , unused) \ M(Rem , unused) \ M(StaticFieldGet , unused) \ M(StaticFieldSet , unused) \ M(SuspendCheck , unused) \ M(TypeConversion , unused) #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ M(IntermediateAddress, unused) \ M(IntermediateAddressIndex, unused) \ M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ void Visit##type(H##type* instruction) OVERRIDE; FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION private: bool CanGenerateTest(HCondition* cond); void HandleGenerateConditionWithZero(IfCondition cond); void HandleGenerateLongTestConstant(HCondition* cond); void HandleGenerateLongTest(HCondition* cond); void HandleGenerateLongComparesAndJumps(); void HandleGenerateTest(HCondition* cond); void HandleGenerateConditionGeneric(HCondition* cond); void HandleGenerateEqualLong(HCondition* cond); void HandleGenerateConditionLong(HCondition* cond); void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); void HandleCondition(HCondition* instr); void HandleBinaryOperationLantencies(HBinaryOperation* instr); void HandleBitwiseOperationLantencies(HBinaryOperation* instr); void HandleShiftLatencies(HBinaryOperation* instr); void HandleDivRemConstantIntegralLatencies(int32_t imm); void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); void HandleGenerateDataProcInstruction(bool internal_latency = false); void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); // The latency setting for each HInstruction depends on how CodeGenerator may generate code, // latency visitors may query CodeGenerator for such information for accurate latency settings. CodeGeneratorARMType* codegen_; }; class HSchedulerARM : public HScheduler { public: HSchedulerARM(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector, SchedulingLatencyVisitorARM* arm_latency_visitor) : HScheduler(allocator, arm_latency_visitor, selector) {} ~HSchedulerARM() OVERRIDE {} bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { #define CASE_INSTRUCTION_KIND(type, unused) case \ HInstruction::InstructionKind::k##type: switch (instruction->GetKind()) { FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) return true; FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) return true; default: return HScheduler::IsSchedulable(instruction); } #undef CASE_INSTRUCTION_KIND } private: DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); }; } // namespace arm } // namespace art #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_