1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19 
20 #include "code_generator_arm_vixl.h"
21 #include "scheduler.h"
22 
23 namespace art {
24 namespace arm {
25 // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
26 typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
27 
28 // AArch32 instruction latencies.
29 // We currently assume that all ARM CPUs share the same instruction latency list.
30 // The following latencies were tuned based on performance experiments and
31 // automatic tuning using differential evolution approach on various benchmarks.
32 static constexpr uint32_t kArmIntegerOpLatency = 2;
33 static constexpr uint32_t kArmFloatingPointOpLatency = 11;
34 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
35 static constexpr uint32_t kArmMulIntegerLatency = 6;
36 static constexpr uint32_t kArmMulFloatingPointLatency = 11;
37 static constexpr uint32_t kArmDivIntegerLatency = 10;
38 static constexpr uint32_t kArmDivFloatLatency = 20;
39 static constexpr uint32_t kArmDivDoubleLatency = 25;
40 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
41 static constexpr uint32_t kArmMemoryLoadLatency = 9;
42 static constexpr uint32_t kArmMemoryStoreLatency = 9;
43 static constexpr uint32_t kArmMemoryBarrierLatency = 6;
44 static constexpr uint32_t kArmBranchLatency = 4;
45 static constexpr uint32_t kArmCallLatency = 5;
46 static constexpr uint32_t kArmCallInternalLatency = 29;
47 static constexpr uint32_t kArmLoadStringInternalLatency = 10;
48 static constexpr uint32_t kArmNopLatency = 2;
49 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
50 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
51 
52 class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
53  public:
SchedulingLatencyVisitorARM(CodeGenerator * codegen)54   explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
55       : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
56 
57   // Default visitor for instructions not handled specifically below.
VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)58   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
59     last_visited_latency_ = kArmIntegerOpLatency;
60   }
61 
62 // We add a second unused parameter to be able to use this macro like the others
63 // defined in `nodes.h`.
64 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M)    \
65   M(ArrayGet         , unused)                   \
66   M(ArrayLength      , unused)                   \
67   M(ArraySet         , unused)                   \
68   M(Add              , unused)                   \
69   M(Sub              , unused)                   \
70   M(And              , unused)                   \
71   M(Or               , unused)                   \
72   M(Ror              , unused)                   \
73   M(Xor              , unused)                   \
74   M(Shl              , unused)                   \
75   M(Shr              , unused)                   \
76   M(UShr             , unused)                   \
77   M(Mul              , unused)                   \
78   M(Div              , unused)                   \
79   M(Condition        , unused)                   \
80   M(Compare          , unused)                   \
81   M(BoundsCheck      , unused)                   \
82   M(InstanceFieldGet , unused)                   \
83   M(InstanceFieldSet , unused)                   \
84   M(InstanceOf       , unused)                   \
85   M(Invoke           , unused)                   \
86   M(LoadString       , unused)                   \
87   M(NewArray         , unused)                   \
88   M(NewInstance      , unused)                   \
89   M(Rem              , unused)                   \
90   M(StaticFieldGet   , unused)                   \
91   M(StaticFieldSet   , unused)                   \
92   M(SuspendCheck     , unused)                   \
93   M(TypeConversion   , unused)
94 
95 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
96   M(BitwiseNegatedRight, unused)                 \
97   M(MultiplyAccumulate, unused)                  \
98   M(IntermediateAddress, unused)                 \
99   M(IntermediateAddressIndex, unused)            \
100   M(DataProcWithShifterOp, unused)
101 
102 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
103   void Visit##type(H##type* instruction) override;
104 
105   FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
106   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
107   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
108 
109 #undef DECLARE_VISIT_INSTRUCTION
110 
111  private:
112   bool CanGenerateTest(HCondition* cond);
113   void HandleGenerateConditionWithZero(IfCondition cond);
114   void HandleGenerateLongTestConstant(HCondition* cond);
115   void HandleGenerateLongTest(HCondition* cond);
116   void HandleGenerateLongComparesAndJumps();
117   void HandleGenerateTest(HCondition* cond);
118   void HandleGenerateConditionGeneric(HCondition* cond);
119   void HandleGenerateEqualLong(HCondition* cond);
120   void HandleGenerateConditionLong(HCondition* cond);
121   void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
122   void HandleCondition(HCondition* instr);
123   void HandleBinaryOperationLantencies(HBinaryOperation* instr);
124   void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
125   void HandleShiftLatencies(HBinaryOperation* instr);
126   void HandleDivRemConstantIntegralLatencies(int32_t imm);
127   void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
128   void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
129   void HandleGenerateDataProcInstruction(bool internal_latency = false);
130   void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
131   void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
132 
133   // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
134   // latency visitors may query CodeGenerator for such information for accurate latency settings.
135   CodeGeneratorARMType* codegen_;
136 };
137 
138 class HSchedulerARM : public HScheduler {
139  public:
HSchedulerARM(SchedulingNodeSelector * selector,SchedulingLatencyVisitorARM * arm_latency_visitor)140   HSchedulerARM(SchedulingNodeSelector* selector,
141                 SchedulingLatencyVisitorARM* arm_latency_visitor)
142       : HScheduler(arm_latency_visitor, selector) {}
~HSchedulerARM()143   ~HSchedulerARM() override {}
144 
IsSchedulable(const HInstruction * instruction)145   bool IsSchedulable(const HInstruction* instruction) const override {
146 #define CASE_INSTRUCTION_KIND(type, unused) case \
147   HInstruction::InstructionKind::k##type:
148     switch (instruction->GetKind()) {
149       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
150         return true;
151       FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
152         return true;
153       default:
154         return HScheduler::IsSchedulable(instruction);
155     }
156 #undef CASE_INSTRUCTION_KIND
157   }
158 
159  private:
160   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
161 };
162 
163 }  // namespace arm
164 }  // namespace art
165 
166 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
167