1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
18 #define ART_COMPILER_OPTIMIZING_INLINER_H_
19 
20 #include "base/macros.h"
21 #include "dex/dex_file_types.h"
22 #include "dex/invoke_type.h"
23 #include "jit/profiling_info.h"
24 #include "optimization.h"
25 #include "profile/profile_compilation_info.h"
26 
27 namespace art HIDDEN {
28 
29 class CodeGenerator;
30 class DexCompilationUnit;
31 class HGraph;
32 class HInvoke;
33 class OptimizingCompilerStats;
34 
35 class HInliner : public HOptimization {
36  public:
37   HInliner(HGraph* outer_graph,
38            HGraph* outermost_graph,
39            CodeGenerator* codegen,
40            const DexCompilationUnit& outer_compilation_unit,
41            const DexCompilationUnit& caller_compilation_unit,
42            OptimizingCompilerStats* stats,
43            size_t total_number_of_dex_registers,
44            size_t total_number_of_instructions,
45            HInliner* parent,
46            HEnvironment* caller_environment,
47            size_t depth,
48            bool try_catch_inlining_allowed,
49            const char* name = kInlinerPassName)
HOptimization(outer_graph,name,stats)50       : HOptimization(outer_graph, name, stats),
51         outermost_graph_(outermost_graph),
52         outer_compilation_unit_(outer_compilation_unit),
53         caller_compilation_unit_(caller_compilation_unit),
54         codegen_(codegen),
55         total_number_of_dex_registers_(total_number_of_dex_registers),
56         total_number_of_instructions_(total_number_of_instructions),
57         parent_(parent),
58         caller_environment_(caller_environment),
59         depth_(depth),
60         inlining_budget_(0),
61         try_catch_inlining_allowed_(try_catch_inlining_allowed),
62         run_extra_type_propagation_(false),
63         inline_stats_(nullptr) {}
64 
65   bool Run() override;
66 
67   static constexpr const char* kInlinerPassName = "inliner";
68 
GetParent()69   const HInliner* GetParent() const { return parent_; }
GetCallerEnvironment()70   const HEnvironment* GetCallerEnvironment() const { return caller_environment_; }
71 
GetOutermostGraph()72   const HGraph* GetOutermostGraph() const { return outermost_graph_; }
GetGraph()73   const HGraph* GetGraph() const { return graph_; }
74 
75  private:
76   enum InlineCacheType {
77     kInlineCacheNoData = 0,
78     kInlineCacheUninitialized = 1,
79     kInlineCacheMonomorphic = 2,
80     kInlineCachePolymorphic = 3,
81     kInlineCacheMegamorphic = 4,
82     kInlineCacheMissingTypes = 5
83   };
84 
85   bool TryInline(HInvoke* invoke_instruction);
86 
87   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
88   // reference type propagation can run after the inlining. If the inlining is successful, this
89   // method will replace and remove the `invoke_instruction`.
90   bool TryInlineAndReplace(HInvoke* invoke_instruction,
91                            ArtMethod* resolved_method,
92                            ReferenceTypeInfo receiver_type,
93                            bool do_rtp,
94                            bool is_speculative)
95     REQUIRES_SHARED(Locks::mutator_lock_);
96 
97   bool TryBuildAndInline(HInvoke* invoke_instruction,
98                          ArtMethod* resolved_method,
99                          ReferenceTypeInfo receiver_type,
100                          HInstruction** return_replacement,
101                          bool is_speculative)
102     REQUIRES_SHARED(Locks::mutator_lock_);
103 
104   bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
105                                ArtMethod* resolved_method,
106                                ReferenceTypeInfo receiver_type,
107                                HInstruction** return_replacement,
108                                bool is_speculative)
109     REQUIRES_SHARED(Locks::mutator_lock_);
110 
111   // Substitutes parameters in the callee graph with their values from the caller.
112   void SubstituteArguments(HGraph* callee_graph,
113                            HInvoke* invoke_instruction,
114                            ReferenceTypeInfo receiver_type,
115                            const DexCompilationUnit& dex_compilation_unit)
116     REQUIRES_SHARED(Locks::mutator_lock_);
117 
118   // Run simple optimizations on `callee_graph`.
119   void RunOptimizations(HGraph* callee_graph,
120                         HEnvironment* caller_environment,
121                         const dex::CodeItem* code_item,
122                         const DexCompilationUnit& dex_compilation_unit,
123                         bool try_catch_inlining_allowed_for_recursive_inline)
124       REQUIRES_SHARED(Locks::mutator_lock_);
125 
126   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
127   bool TryPatternSubstitution(HInvoke* invoke_instruction,
128                               ArtMethod* method,
129                               const CodeItemDataAccessor& accessor,
130                               HInstruction** return_replacement)
131     REQUIRES_SHARED(Locks::mutator_lock_);
132 
133   // Returns whether inlining is allowed based on ART semantics.
134   bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
135     REQUIRES_SHARED(Locks::mutator_lock_);
136 
137 
138   // Returns whether ART supports inlining this method.
139   //
140   // Some methods are not supported because they have features for which inlining
141   // is not implemented. For example, we do not currently support inlining throw
142   // instructions into a try block.
143   bool IsInliningSupported(const HInvoke* invoke_instruction,
144                            art::ArtMethod* method,
145                            const CodeItemDataAccessor& accessor) const
146     REQUIRES_SHARED(Locks::mutator_lock_);
147 
148   // Returns whether inlining is encouraged.
149   //
150   // For example, this checks whether the function has grown too large and
151   // inlining should be prevented.
152   bool IsInliningEncouraged(const HInvoke* invoke_instruction,
153                             art::ArtMethod* method,
154                             const CodeItemDataAccessor& accessor) const
155       REQUIRES_SHARED(Locks::mutator_lock_);
156 
157   // Inspects the body of a method (callee_graph) and returns whether it can be
158   // inlined.
159   //
160   // This checks for instructions and constructs that we do not support
161   // inlining, such as inlining a throw instruction into a try block.
162   bool CanInlineBody(const HGraph* callee_graph,
163                      HInvoke* invoke,
164                      size_t* out_number_of_instructions,
165                      bool is_speculative) const
166     REQUIRES_SHARED(Locks::mutator_lock_);
167 
168   // Create a new HInstanceFieldGet.
169   HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
170                                             ArtMethod* referrer,
171                                             HInstruction* obj);
172   // Create a new HInstanceFieldSet.
173   HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
174                                             ArtMethod* referrer,
175                                             HInstruction* obj,
176                                             HInstruction* value,
177                                             bool* is_final = nullptr);
178 
179   // Try inlining the invoke instruction using inline caches.
180   bool TryInlineFromInlineCache(HInvoke* invoke_instruction)
181     REQUIRES_SHARED(Locks::mutator_lock_);
182 
183   // Try inlining the invoke instruction using CHA.
184   bool TryInlineFromCHA(HInvoke* invoke_instruction)
185     REQUIRES_SHARED(Locks::mutator_lock_);
186 
187   // When we fail inlining `invoke_instruction`, we will try to devirtualize the
188   // call.
189   bool TryDevirtualize(HInvoke* invoke_instruction,
190                        ArtMethod* method,
191                        HInvoke** replacement)
192     REQUIRES_SHARED(Locks::mutator_lock_);
193 
194   // Try getting the inline cache from JIT code cache.
195   // Return true if the inline cache was successfully allocated and the
196   // invoke info was found in the profile info.
197   InlineCacheType GetInlineCacheJIT(
198       HInvoke* invoke_instruction,
199       /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes)
200     REQUIRES_SHARED(Locks::mutator_lock_);
201 
202   // Try getting the inline cache from AOT offline profile.
203   // Return true if the inline cache was successfully allocated and the
204   // invoke info was found in the profile info.
205   InlineCacheType GetInlineCacheAOT(
206       HInvoke* invoke_instruction,
207       /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes)
208     REQUIRES_SHARED(Locks::mutator_lock_);
209 
210   // Compute the inline cache type.
211   static InlineCacheType GetInlineCacheType(
212       const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
213     REQUIRES_SHARED(Locks::mutator_lock_);
214 
215   // Try to inline the target of a monomorphic call. If successful, the code
216   // in the graph will look like:
217   // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
218   // ... // inlined code
219   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
220                                 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
221     REQUIRES_SHARED(Locks::mutator_lock_);
222 
223   // Try to inline targets of a polymorphic call.
224   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
225                                 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
226     REQUIRES_SHARED(Locks::mutator_lock_);
227 
228   bool TryInlinePolymorphicCallToSameTarget(
229       HInvoke* invoke_instruction,
230       const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
231     REQUIRES_SHARED(Locks::mutator_lock_);
232 
233   // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
234   bool UseOnlyPolymorphicInliningWithNoDeopt();
235 
236   // Try CHA-based devirtualization to change virtual method calls into
237   // direct calls.
238   // Returns the actual method that resolved_method can be devirtualized to.
239   ArtMethod* FindMethodFromCHA(ArtMethod* resolved_method)
240     REQUIRES_SHARED(Locks::mutator_lock_);
241 
242   // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
243   // should_deoptimize flag and if it's true, does deoptimization.
244   void AddCHAGuard(HInstruction* invoke_instruction,
245                    uint32_t dex_pc,
246                    HInstruction* cursor,
247                    HBasicBlock* bb_cursor);
248 
249   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
250                                            HInstruction* receiver,
251                                            uint32_t dex_pc) const
252     REQUIRES_SHARED(Locks::mutator_lock_);
253 
254   void MaybeRunReferenceTypePropagation(HInstruction* replacement,
255                                         HInvoke* invoke_instruction)
256     REQUIRES_SHARED(Locks::mutator_lock_);
257 
258   void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
259     REQUIRES_SHARED(Locks::mutator_lock_);
260 
261   bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
262     REQUIRES_SHARED(Locks::mutator_lock_);
263 
264   bool ReturnTypeMoreSpecific(HInstruction* return_replacement, HInvoke* invoke_instruction)
265     REQUIRES_SHARED(Locks::mutator_lock_);
266 
267   // Add a type guard on the given `receiver`. This will add to the graph:
268   // i0 = HFieldGet(receiver, klass)
269   // i1 = HLoadClass(class_index, is_referrer)
270   // i2 = HNotEqual(i0, i1)
271   //
272   // And if `with_deoptimization` is true:
273   // HDeoptimize(i2)
274   //
275   // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
276   HInstruction* AddTypeGuard(HInstruction* receiver,
277                              HInstruction* cursor,
278                              HBasicBlock* bb_cursor,
279                              dex::TypeIndex class_index,
280                              Handle<mirror::Class> klass,
281                              HInstruction* invoke_instruction,
282                              bool with_deoptimization)
283     REQUIRES_SHARED(Locks::mutator_lock_);
284 
285   /*
286    * Ad-hoc implementation for implementing a diamond pattern in the graph for
287    * polymorphic inlining:
288    * 1) `compare` becomes the input of the new `HIf`.
289    * 2) Everything up until `invoke_instruction` is in the then branch (could
290    *    contain multiple blocks).
291    * 3) `invoke_instruction` is moved to the otherwise block.
292    * 4) If `return_replacement` is not null, the merge block will have
293    *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
294    *
295    * Before:
296    *             Block1
297    *             compare
298    *              ...
299    *         invoke_instruction
300    *
301    * After:
302    *            Block1
303    *            compare
304    *              if
305    *          /        \
306    *         /          \
307    *   Then block    Otherwise block
308    *      ...       invoke_instruction
309    *       \              /
310    *        \            /
311    *          Merge block
312    *  phi(return_replacement, invoke_instruction)
313    */
314   void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
315                                                 HInstruction* return_replacement,
316                                                 HInstruction* invoke_instruction);
317 
318   // Update the inlining budget based on `total_number_of_instructions_`.
319   void UpdateInliningBudget();
320 
321   // Count the number of calls of `method` being inlined recursively.
322   size_t CountRecursiveCallsOf(ArtMethod* method) const;
323 
324   // Pretty-print for spaces during logging.
325   std::string DepthString(int line) const;
326 
327   HGraph* const outermost_graph_;
328   const DexCompilationUnit& outer_compilation_unit_;
329   const DexCompilationUnit& caller_compilation_unit_;
330   CodeGenerator* const codegen_;
331   const size_t total_number_of_dex_registers_;
332   size_t total_number_of_instructions_;
333 
334   // The 'parent' inliner, that means the inlining optimization that requested
335   // `graph_` to be inlined.
336   const HInliner* const parent_;
337   const HEnvironment* const caller_environment_;
338   const size_t depth_;
339 
340   // The budget left for inlining, in number of instructions.
341   size_t inlining_budget_;
342 
343   // States if we are allowing try catch inlining to occur at this particular instance of inlining.
344   bool try_catch_inlining_allowed_;
345 
346   // True if we need to run type propagation to type guards we inserted.
347   bool run_extra_type_propagation_;
348 
349   // Used to record stats about optimizations on the inlined graph.
350   // If the inlining is successful, these stats are merged to the caller graph's stats.
351   OptimizingCompilerStats* inline_stats_;
352 
353   DISALLOW_COPY_AND_ASSIGN(HInliner);
354 };
355 
356 }  // namespace art
357 
358 #endif  // ART_COMPILER_OPTIMIZING_INLINER_H_
359