1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "pc_relative_fixups_x86.h"
18 #include "code_generator_x86.h"
19 #include "intrinsics_x86.h"
20 
21 namespace art {
22 namespace x86 {
23 
24 /**
25  * Finds instructions that need the constant area base as an input.
26  */
27 class PCRelativeHandlerVisitor : public HGraphVisitor {
28  public:
PCRelativeHandlerVisitor(HGraph * graph,CodeGenerator * codegen)29   PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
30       : HGraphVisitor(graph),
31         codegen_(down_cast<CodeGeneratorX86*>(codegen)),
32         base_(nullptr) {}
33 
MoveBaseIfNeeded()34   void MoveBaseIfNeeded() {
35     if (base_ != nullptr) {
36       // Bring the base closer to the first use (previously, it was in the
37       // entry block) and relieve some pressure on the register allocator
38       // while avoiding recalculation of the base in a loop.
39       base_->MoveBeforeFirstUserAndOutOfLoops();
40     }
41   }
42 
43  private:
VisitAdd(HAdd * add)44   void VisitAdd(HAdd* add) OVERRIDE {
45     BinaryFP(add);
46   }
47 
VisitSub(HSub * sub)48   void VisitSub(HSub* sub) OVERRIDE {
49     BinaryFP(sub);
50   }
51 
VisitMul(HMul * mul)52   void VisitMul(HMul* mul) OVERRIDE {
53     BinaryFP(mul);
54   }
55 
VisitDiv(HDiv * div)56   void VisitDiv(HDiv* div) OVERRIDE {
57     BinaryFP(div);
58   }
59 
VisitCompare(HCompare * compare)60   void VisitCompare(HCompare* compare) OVERRIDE {
61     BinaryFP(compare);
62   }
63 
VisitReturn(HReturn * ret)64   void VisitReturn(HReturn* ret) OVERRIDE {
65     HConstant* value = ret->InputAt(0)->AsConstant();
66     if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
67       ReplaceInput(ret, value, 0, true);
68     }
69   }
70 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)71   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
72     HandleInvoke(invoke);
73   }
74 
VisitInvokeVirtual(HInvokeVirtual * invoke)75   void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
76     HandleInvoke(invoke);
77   }
78 
VisitInvokeInterface(HInvokeInterface * invoke)79   void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
80     HandleInvoke(invoke);
81   }
82 
VisitLoadString(HLoadString * load_string)83   void VisitLoadString(HLoadString* load_string) OVERRIDE {
84     HLoadString::LoadKind load_kind = load_string->GetLoadKind();
85     if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
86         load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
87       InitializePCRelativeBasePointer();
88       load_string->AddSpecialInput(base_);
89     }
90   }
91 
BinaryFP(HBinaryOperation * bin)92   void BinaryFP(HBinaryOperation* bin) {
93     HConstant* rhs = bin->InputAt(1)->AsConstant();
94     if (rhs != nullptr && Primitive::IsFloatingPointType(rhs->GetType())) {
95       ReplaceInput(bin, rhs, 1, false);
96     }
97   }
98 
VisitEqual(HEqual * cond)99   void VisitEqual(HEqual* cond) OVERRIDE {
100     BinaryFP(cond);
101   }
102 
VisitNotEqual(HNotEqual * cond)103   void VisitNotEqual(HNotEqual* cond) OVERRIDE {
104     BinaryFP(cond);
105   }
106 
VisitLessThan(HLessThan * cond)107   void VisitLessThan(HLessThan* cond) OVERRIDE {
108     BinaryFP(cond);
109   }
110 
VisitLessThanOrEqual(HLessThanOrEqual * cond)111   void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE {
112     BinaryFP(cond);
113   }
114 
VisitGreaterThan(HGreaterThan * cond)115   void VisitGreaterThan(HGreaterThan* cond) OVERRIDE {
116     BinaryFP(cond);
117   }
118 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * cond)119   void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE {
120     BinaryFP(cond);
121   }
122 
VisitNeg(HNeg * neg)123   void VisitNeg(HNeg* neg) OVERRIDE {
124     if (Primitive::IsFloatingPointType(neg->GetType())) {
125       // We need to replace the HNeg with a HX86FPNeg in order to address the constant area.
126       InitializePCRelativeBasePointer();
127       HGraph* graph = GetGraph();
128       HBasicBlock* block = neg->GetBlock();
129       HX86FPNeg* x86_fp_neg = new (graph->GetArena()) HX86FPNeg(
130           neg->GetType(),
131           neg->InputAt(0),
132           base_,
133           neg->GetDexPc());
134       block->ReplaceAndRemoveInstructionWith(neg, x86_fp_neg);
135     }
136   }
137 
VisitPackedSwitch(HPackedSwitch * switch_insn)138   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
139     if (switch_insn->GetNumEntries() <=
140         InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
141       return;
142     }
143     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
144     // address the constant area.
145     InitializePCRelativeBasePointer();
146     HGraph* graph = GetGraph();
147     HBasicBlock* block = switch_insn->GetBlock();
148     HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
149         switch_insn->GetStartValue(),
150         switch_insn->GetNumEntries(),
151         switch_insn->InputAt(0),
152         base_,
153         switch_insn->GetDexPc());
154     block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
155   }
156 
InitializePCRelativeBasePointer()157   void InitializePCRelativeBasePointer() {
158     // Ensure we only initialize the pointer once.
159     if (base_ != nullptr) {
160       return;
161     }
162     // Insert the base at the start of the entry block, move it to a better
163     // position later in MoveBaseIfNeeded().
164     base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
165     HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
166     entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
167     DCHECK(base_ != nullptr);
168   }
169 
ReplaceInput(HInstruction * insn,HConstant * value,int input_index,bool materialize)170   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
171     InitializePCRelativeBasePointer();
172     HX86LoadFromConstantTable* load_constant =
173         new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value);
174     if (!materialize) {
175       load_constant->MarkEmittedAtUseSite();
176     }
177     insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
178     insn->ReplaceInput(load_constant, input_index);
179   }
180 
HandleInvoke(HInvoke * invoke)181   void HandleInvoke(HInvoke* invoke) {
182     // If this is an invoke-static/-direct with PC-relative dex cache array
183     // addressing, we need the PC-relative address base.
184     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
185     // We can't add a pointer to the constant area if we already have a current
186     // method pointer. This may arise when sharpening doesn't remove the current
187     // method pointer from the invoke.
188     if (invoke_static_or_direct != nullptr &&
189         invoke_static_or_direct->HasCurrentMethodInput()) {
190       DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
191       return;
192     }
193 
194     bool base_added = false;
195     if (invoke_static_or_direct != nullptr &&
196         invoke_static_or_direct->HasPcRelativeDexCache() &&
197         !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
198       InitializePCRelativeBasePointer();
199       // Add the extra parameter base_.
200       invoke_static_or_direct->AddSpecialInput(base_);
201       base_added = true;
202     }
203 
204     // Ensure that we can load FP arguments from the constant area.
205     for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
206       HConstant* input = invoke->InputAt(i)->AsConstant();
207       if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
208         ReplaceInput(invoke, input, i, true);
209       }
210     }
211 
212     // These intrinsics need the constant area.
213     switch (invoke->GetIntrinsic()) {
214       case Intrinsics::kMathAbsDouble:
215       case Intrinsics::kMathAbsFloat:
216       case Intrinsics::kMathMaxDoubleDouble:
217       case Intrinsics::kMathMaxFloatFloat:
218       case Intrinsics::kMathMinDoubleDouble:
219       case Intrinsics::kMathMinFloatFloat:
220         if (!base_added) {
221           DCHECK(invoke_static_or_direct != nullptr);
222           DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
223           InitializePCRelativeBasePointer();
224           invoke_static_or_direct->AddSpecialInput(base_);
225         }
226         break;
227       default:
228         break;
229     }
230   }
231 
WillHaveCallFreeIntrinsicsCodeGen(HInvoke * invoke)232   bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
233     if (invoke->GetIntrinsic() != Intrinsics::kNone) {
234       // This invoke may have intrinsic code generation defined. However, we must
235       // now also determine if this code generation is truly there and call-free
236       // (not unimplemented, no bail on instruction features, or call on slow path).
237       // This is done by actually calling the locations builder on the instruction
238       // and clearing out the locations once result is known. We assume this
239       // call only has creating locations as side effects!
240       IntrinsicLocationsBuilderX86 builder(codegen_);
241       bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
242       invoke->SetLocations(nullptr);
243       return success;
244     }
245     return false;
246   }
247 
248   CodeGeneratorX86* codegen_;
249 
250   // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
251   // input to the HX86LoadFromConstantTable instructions.
252   HX86ComputeBaseMethodAddress* base_;
253 };
254 
Run()255 void PcRelativeFixups::Run() {
256   if (graph_->HasIrreducibleLoops()) {
257     // Do not run this optimization, as irreducible loops do not work with an instruction
258     // that can be live-in at the irreducible loop header.
259     return;
260   }
261   PCRelativeHandlerVisitor visitor(graph_, codegen_);
262   visitor.VisitInsertionOrder();
263   visitor.MoveBaseIfNeeded();
264 }
265 
266 }  // namespace x86
267 }  // namespace art
268