1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19 
20 #include "code_generator_arm_vixl.h"
21 #include "scheduler.h"
22 
23 namespace art {
24 namespace arm {
25 // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
26 typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
27 
28 // AArch32 instruction latencies.
29 // We currently assume that all ARM CPUs share the same instruction latency list.
30 // The following latencies were tuned based on performance experiments and
31 // automatic tuning using differential evolution approach on various benchmarks.
32 static constexpr uint32_t kArmIntegerOpLatency = 2;
33 static constexpr uint32_t kArmFloatingPointOpLatency = 11;
34 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
35 static constexpr uint32_t kArmMulIntegerLatency = 6;
36 static constexpr uint32_t kArmMulFloatingPointLatency = 11;
37 static constexpr uint32_t kArmDivIntegerLatency = 10;
38 static constexpr uint32_t kArmDivFloatLatency = 20;
39 static constexpr uint32_t kArmDivDoubleLatency = 25;
40 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
41 static constexpr uint32_t kArmMemoryLoadLatency = 9;
42 static constexpr uint32_t kArmMemoryStoreLatency = 9;
43 static constexpr uint32_t kArmMemoryBarrierLatency = 6;
44 static constexpr uint32_t kArmBranchLatency = 4;
45 static constexpr uint32_t kArmCallLatency = 5;
46 static constexpr uint32_t kArmCallInternalLatency = 29;
47 static constexpr uint32_t kArmLoadStringInternalLatency = 10;
48 static constexpr uint32_t kArmNopLatency = 2;
49 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
50 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
51 
52 class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
53  public:
SchedulingLatencyVisitorARM(CodeGenerator * codegen)54   explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
55       : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
56 
57   // Default visitor for instructions not handled specifically below.
VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)58   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
59     last_visited_latency_ = kArmIntegerOpLatency;
60   }
61 
62 // We add a second unused parameter to be able to use this macro like the others
63 // defined in `nodes.h`.
64 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
65   M(ArrayGet, unused)                         \
66   M(ArrayLength, unused)                      \
67   M(ArraySet, unused)                         \
68   M(Add, unused)                              \
69   M(Sub, unused)                              \
70   M(And, unused)                              \
71   M(Or, unused)                               \
72   M(Ror, unused)                              \
73   M(Xor, unused)                              \
74   M(Shl, unused)                              \
75   M(Shr, unused)                              \
76   M(UShr, unused)                             \
77   M(Mul, unused)                              \
78   M(Div, unused)                              \
79   M(Condition, unused)                        \
80   M(Compare, unused)                          \
81   M(BoundsCheck, unused)                      \
82   M(PredicatedInstanceFieldGet, unused)       \
83   M(InstanceFieldGet, unused)                 \
84   M(InstanceFieldSet, unused)                 \
85   M(InstanceOf, unused)                       \
86   M(Invoke, unused)                           \
87   M(LoadString, unused)                       \
88   M(NewArray, unused)                         \
89   M(NewInstance, unused)                      \
90   M(Rem, unused)                              \
91   M(StaticFieldGet, unused)                   \
92   M(StaticFieldSet, unused)                   \
93   M(SuspendCheck, unused)                     \
94   M(TypeConversion, unused)
95 
96 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
97   M(BitwiseNegatedRight, unused)                 \
98   M(MultiplyAccumulate, unused)                  \
99   M(IntermediateAddress, unused)                 \
100   M(IntermediateAddressIndex, unused)            \
101   M(DataProcWithShifterOp, unused)
102 
103 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
104   void Visit##type(H##type* instruction) override;
105 
106   FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
107   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
108   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
109 
110 #undef DECLARE_VISIT_INSTRUCTION
111 
112  private:
113   bool CanGenerateTest(HCondition* cond);
114   void HandleGenerateConditionWithZero(IfCondition cond);
115   void HandleGenerateLongTestConstant(HCondition* cond);
116   void HandleGenerateLongTest(HCondition* cond);
117   void HandleGenerateLongComparesAndJumps();
118   void HandleGenerateTest(HCondition* cond);
119   void HandleGenerateConditionGeneric(HCondition* cond);
120   void HandleGenerateEqualLong(HCondition* cond);
121   void HandleGenerateConditionLong(HCondition* cond);
122   void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
123   void HandleCondition(HCondition* instr);
124   void HandleBinaryOperationLantencies(HBinaryOperation* instr);
125   void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
126   void HandleShiftLatencies(HBinaryOperation* instr);
127   void HandleDivRemConstantIntegralLatencies(int32_t imm);
128   void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
129   void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
130   void HandleGenerateDataProcInstruction(bool internal_latency = false);
131   void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
132   void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
133 
134   // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
135   // latency visitors may query CodeGenerator for such information for accurate latency settings.
136   CodeGeneratorARMType* codegen_;
137 };
138 
139 class HSchedulerARM : public HScheduler {
140  public:
HSchedulerARM(SchedulingNodeSelector * selector,SchedulingLatencyVisitorARM * arm_latency_visitor)141   HSchedulerARM(SchedulingNodeSelector* selector,
142                 SchedulingLatencyVisitorARM* arm_latency_visitor)
143       : HScheduler(arm_latency_visitor, selector) {}
~HSchedulerARM()144   ~HSchedulerARM() override {}
145 
IsSchedulable(const HInstruction * instruction)146   bool IsSchedulable(const HInstruction* instruction) const override {
147 #define CASE_INSTRUCTION_KIND(type, unused) case \
148   HInstruction::InstructionKind::k##type:
149     switch (instruction->GetKind()) {
150       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
151         return true;
152       FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
153         return true;
154       default:
155         return HScheduler::IsSchedulable(instruction);
156     }
157 #undef CASE_INSTRUCTION_KIND
158   }
159 
160  private:
161   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
162 };
163 
164 }  // namespace arm
165 }  // namespace art
166 
167 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
168