1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
19 
20 #include "scheduler.h"
21 
22 namespace art {
23 namespace arm64 {
24 
25 static constexpr uint32_t kArm64MemoryLoadLatency = 5;
26 static constexpr uint32_t kArm64MemoryStoreLatency = 3;
27 
28 static constexpr uint32_t kArm64CallInternalLatency = 10;
29 static constexpr uint32_t kArm64CallLatency = 5;
30 
31 // AArch64 instruction latency.
32 // We currently assume that all arm64 CPUs share the same instruction latency list.
33 static constexpr uint32_t kArm64IntegerOpLatency = 2;
34 static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
35 
36 
37 static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
38 static constexpr uint32_t kArm64DivDoubleLatency = 30;
39 static constexpr uint32_t kArm64DivFloatLatency = 15;
40 static constexpr uint32_t kArm64DivIntegerLatency = 5;
41 static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
42 static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
43 static constexpr uint32_t kArm64MulIntegerLatency = 6;
44 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
45 static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
46 
47 static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
48 static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
49 static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
50 static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
51 static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
52 static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
53 static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
54 static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
55 static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
56 static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
57 
58 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
59  public:
60   // Default visitor for instructions not handled specifically below.
VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)61   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
62     last_visited_latency_ = kArm64IntegerOpLatency;
63   }
64 
65 // We add a second unused parameter to be able to use this macro like the others
66 // defined in `nodes.h`.
67 #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
68   M(ArrayGet             , unused)                   \
69   M(ArrayLength          , unused)                   \
70   M(ArraySet             , unused)                   \
71   M(BinaryOperation      , unused)                   \
72   M(BoundsCheck          , unused)                   \
73   M(Div                  , unused)                   \
74   M(InstanceFieldGet     , unused)                   \
75   M(InstanceOf           , unused)                   \
76   M(Invoke               , unused)                   \
77   M(LoadString           , unused)                   \
78   M(Mul                  , unused)                   \
79   M(NewArray             , unused)                   \
80   M(NewInstance          , unused)                   \
81   M(Rem                  , unused)                   \
82   M(StaticFieldGet       , unused)                   \
83   M(SuspendCheck         , unused)                   \
84   M(TypeConversion       , unused)                   \
85   M(VecReplicateScalar   , unused)                   \
86   M(VecExtractScalar     , unused)                   \
87   M(VecReduce            , unused)                   \
88   M(VecCnv               , unused)                   \
89   M(VecNeg               , unused)                   \
90   M(VecAbs               , unused)                   \
91   M(VecNot               , unused)                   \
92   M(VecAdd               , unused)                   \
93   M(VecHalvingAdd        , unused)                   \
94   M(VecSub               , unused)                   \
95   M(VecMul               , unused)                   \
96   M(VecDiv               , unused)                   \
97   M(VecMin               , unused)                   \
98   M(VecMax               , unused)                   \
99   M(VecAnd               , unused)                   \
100   M(VecAndNot            , unused)                   \
101   M(VecOr                , unused)                   \
102   M(VecXor               , unused)                   \
103   M(VecShl               , unused)                   \
104   M(VecShr               , unused)                   \
105   M(VecUShr              , unused)                   \
106   M(VecSetScalars        , unused)                   \
107   M(VecMultiplyAccumulate, unused)                   \
108   M(VecLoad              , unused)                   \
109   M(VecStore             , unused)
110 
111 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
112   M(BitwiseNegatedRight, unused)                 \
113   M(MultiplyAccumulate, unused)                  \
114   M(IntermediateAddress, unused)                 \
115   M(IntermediateAddressIndex, unused)            \
116   M(DataProcWithShifterOp, unused)
117 
118 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
119   void Visit##type(H##type* instruction) OVERRIDE;
120 
121   FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
122   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
123   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
124 
125 #undef DECLARE_VISIT_INSTRUCTION
126 
127  private:
128   void HandleSimpleArithmeticSIMD(HVecOperation *instr);
129   void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
130 };
131 
132 class HSchedulerARM64 : public HScheduler {
133  public:
HSchedulerARM64(ScopedArenaAllocator * allocator,SchedulingNodeSelector * selector)134   HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector)
135       : HScheduler(allocator, &arm64_latency_visitor_, selector) {}
~HSchedulerARM64()136   ~HSchedulerARM64() OVERRIDE {}
137 
IsSchedulable(const HInstruction * instruction)138   bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
139 #define CASE_INSTRUCTION_KIND(type, unused) case \
140   HInstruction::InstructionKind::k##type:
141     switch (instruction->GetKind()) {
142       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
143         return true;
144       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
145         return true;
146       FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
147         return true;
148       default:
149         return HScheduler::IsSchedulable(instruction);
150     }
151 #undef CASE_INSTRUCTION_KIND
152   }
153 
154   // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
155   // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
156   // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
157   // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
158   //
159   // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
IsSchedulingBarrier(const HInstruction * instr)160   bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE {
161     return HScheduler::IsSchedulingBarrier(instr) ||
162            instr->IsVecReduce() ||
163            instr->IsVecExtractScalar() ||
164            instr->IsVecSetScalars() ||
165            instr->IsVecReplicateScalar();
166   }
167 
168  private:
169   SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
170   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
171 };
172 
173 }  // namespace arm64
174 }  // namespace art
175 
176 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
177