1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
18 #define ART_COMPILER_OPTIMIZING_INLINER_H_
19 
20 #include "dex/dex_file_types.h"
21 #include "dex/invoke_type.h"
22 #include "optimization.h"
23 #include "profile/profile_compilation_info.h"
24 
25 namespace art {
26 
27 class CodeGenerator;
28 class DexCompilationUnit;
29 class HGraph;
30 class HInvoke;
31 class OptimizingCompilerStats;
32 
33 class HInliner : public HOptimization {
34  public:
35   HInliner(HGraph* outer_graph,
36            HGraph* outermost_graph,
37            CodeGenerator* codegen,
38            const DexCompilationUnit& outer_compilation_unit,
39            const DexCompilationUnit& caller_compilation_unit,
40            VariableSizedHandleScope* handles,
41            OptimizingCompilerStats* stats,
42            size_t total_number_of_dex_registers,
43            size_t total_number_of_instructions,
44            HInliner* parent,
45            size_t depth = 0,
46            const char* name = kInlinerPassName)
HOptimization(outer_graph,name,stats)47       : HOptimization(outer_graph, name, stats),
48         outermost_graph_(outermost_graph),
49         outer_compilation_unit_(outer_compilation_unit),
50         caller_compilation_unit_(caller_compilation_unit),
51         codegen_(codegen),
52         total_number_of_dex_registers_(total_number_of_dex_registers),
53         total_number_of_instructions_(total_number_of_instructions),
54         parent_(parent),
55         depth_(depth),
56         inlining_budget_(0),
57         handles_(handles),
58         inline_stats_(nullptr) {}
59 
60   bool Run() override;
61 
62   static constexpr const char* kInlinerPassName = "inliner";
63 
64  private:
65   enum InlineCacheType {
66     kInlineCacheNoData = 0,
67     kInlineCacheUninitialized = 1,
68     kInlineCacheMonomorphic = 2,
69     kInlineCachePolymorphic = 3,
70     kInlineCacheMegamorphic = 4,
71     kInlineCacheMissingTypes = 5
72   };
73 
74   bool TryInline(HInvoke* invoke_instruction);
75 
76   // Attempt to resolve the target of the invoke instruction to an acutal call
77   // target.
78   //
79   // Returns the target directly in the case of static or direct invokes.
80   // Otherwise, uses CHA devirtualization or other methods to try to find the
81   // call target.
82   ArtMethod* FindActualCallTarget(HInvoke* invoke_instruction, bool* cha_devirtualize)
83     REQUIRES_SHARED(Locks::mutator_lock_);
84 
85   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
86   // reference type propagation can run after the inlining. If the inlining is successful, this
87   // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true,
88   // a CHA guard needs to be added for the inlining.
89   bool TryInlineAndReplace(HInvoke* invoke_instruction,
90                            ArtMethod* resolved_method,
91                            ReferenceTypeInfo receiver_type,
92                            bool do_rtp,
93                            bool cha_devirtualize)
94     REQUIRES_SHARED(Locks::mutator_lock_);
95 
96   bool TryBuildAndInline(HInvoke* invoke_instruction,
97                          ArtMethod* resolved_method,
98                          ReferenceTypeInfo receiver_type,
99                          HInstruction** return_replacement)
100     REQUIRES_SHARED(Locks::mutator_lock_);
101 
102   bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
103                                ArtMethod* resolved_method,
104                                ReferenceTypeInfo receiver_type,
105                                HInstruction** return_replacement)
106     REQUIRES_SHARED(Locks::mutator_lock_);
107 
108   // Substitutes parameters in the callee graph with their values from the caller.
109   void SubstituteArguments(HGraph* callee_graph,
110                            HInvoke* invoke_instruction,
111                            ReferenceTypeInfo receiver_type,
112                            const DexCompilationUnit& dex_compilation_unit)
113     REQUIRES_SHARED(Locks::mutator_lock_);
114 
115   // Run simple optimizations on `callee_graph`.
116   void RunOptimizations(HGraph* callee_graph,
117                         const dex::CodeItem* code_item,
118                         const DexCompilationUnit& dex_compilation_unit)
119     REQUIRES_SHARED(Locks::mutator_lock_);
120 
121   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
122   bool TryPatternSubstitution(HInvoke* invoke_instruction,
123                               ArtMethod* resolved_method,
124                               HInstruction** return_replacement)
125     REQUIRES_SHARED(Locks::mutator_lock_);
126 
127   // Returns whether inlining is allowed based on ART semantics.
128   bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
129     REQUIRES_SHARED(Locks::mutator_lock_);
130 
131 
132   // Returns whether ART supports inlining this method.
133   //
134   // Some methods are not supported because they have features for which inlining
135   // is not implemented. For example, we do not currently support inlining throw
136   // instructions into a try block.
137   bool IsInliningSupported(const HInvoke* invoke_instruction,
138                            art::ArtMethod* method,
139                            const CodeItemDataAccessor& accessor) const
140     REQUIRES_SHARED(Locks::mutator_lock_);
141 
142   // Returns whether the inlining budget allows inlining method.
143   //
144   // For example, this checks whether the function has grown too large and
145   // inlining should be prevented.
146   bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
147     REQUIRES_SHARED(Locks::mutator_lock_);
148 
149   // Inspects the body of a method (callee_graph) and returns whether it can be
150   // inlined.
151   //
152   // This checks for instructions and constructs that we do not support
153   // inlining, such as inlining a throw instruction into a try block.
154   bool CanInlineBody(const HGraph* callee_graph,
155                      const HBasicBlock* target_block,
156                      size_t* out_number_of_instructions) const
157     REQUIRES_SHARED(Locks::mutator_lock_);
158 
159   // Create a new HInstanceFieldGet.
160   HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
161                                             ArtMethod* referrer,
162                                             HInstruction* obj);
163   // Create a new HInstanceFieldSet.
164   HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
165                                             ArtMethod* referrer,
166                                             HInstruction* obj,
167                                             HInstruction* value,
168                                             bool* is_final = nullptr);
169 
170   // Try inlining the invoke instruction using inline caches.
171   bool TryInlineFromInlineCache(
172       const DexFile& caller_dex_file,
173       HInvoke* invoke_instruction,
174       ArtMethod* resolved_method)
175     REQUIRES_SHARED(Locks::mutator_lock_);
176 
177   // Try getting the inline cache from JIT code cache.
178   // Return true if the inline cache was successfully allocated and the
179   // invoke info was found in the profile info.
180   InlineCacheType GetInlineCacheJIT(
181       HInvoke* invoke_instruction,
182       StackHandleScope<1>* hs,
183       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
184     REQUIRES_SHARED(Locks::mutator_lock_);
185 
186   // Try getting the inline cache from AOT offline profile.
187   // Return true if the inline cache was successfully allocated and the
188   // invoke info was found in the profile info.
189   InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file,
190       HInvoke* invoke_instruction,
191       StackHandleScope<1>* hs,
192       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
193     REQUIRES_SHARED(Locks::mutator_lock_);
194 
195   // Extract the mirror classes from the offline profile and add them to the `inline_cache`.
196   // Note that even if we have profile data for the invoke the inline_cache might contain
197   // only null entries if the types cannot be resolved.
198   InlineCacheType ExtractClassesFromOfflineProfile(
199       const HInvoke* invoke_instruction,
200       const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
201       /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
202     REQUIRES_SHARED(Locks::mutator_lock_);
203 
204   // Compute the inline cache type.
205   InlineCacheType GetInlineCacheType(
206       const Handle<mirror::ObjectArray<mirror::Class>>& classes)
207     REQUIRES_SHARED(Locks::mutator_lock_);
208 
209   // Try to inline the target of a monomorphic call. If successful, the code
210   // in the graph will look like:
211   // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
212   // ... // inlined code
213   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
214                                 ArtMethod* resolved_method,
215                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
216     REQUIRES_SHARED(Locks::mutator_lock_);
217 
218   // Try to inline targets of a polymorphic call.
219   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
220                                 ArtMethod* resolved_method,
221                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
222     REQUIRES_SHARED(Locks::mutator_lock_);
223 
224   bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
225                                             ArtMethod* resolved_method,
226                                             Handle<mirror::ObjectArray<mirror::Class>> classes)
227     REQUIRES_SHARED(Locks::mutator_lock_);
228 
229   // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
230   bool UseOnlyPolymorphicInliningWithNoDeopt();
231 
232   // Try CHA-based devirtualization to change virtual method calls into
233   // direct calls.
234   // Returns the actual method that resolved_method can be devirtualized to.
235   ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method)
236     REQUIRES_SHARED(Locks::mutator_lock_);
237 
238   // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
239   // should_deoptimize flag and if it's true, does deoptimization.
240   void AddCHAGuard(HInstruction* invoke_instruction,
241                    uint32_t dex_pc,
242                    HInstruction* cursor,
243                    HBasicBlock* bb_cursor);
244 
245   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
246                                            HInstruction* receiver,
247                                            uint32_t dex_pc) const
248     REQUIRES_SHARED(Locks::mutator_lock_);
249 
250   void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
251     REQUIRES_SHARED(Locks::mutator_lock_);
252 
253   // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
254   // admissible (see ReferenceTypePropagation::IsAdmissible for details).
255   // Otherwise returns inexact Object RTI.
256   ReferenceTypeInfo GetClassRTI(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
257 
258   bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
259     REQUIRES_SHARED(Locks::mutator_lock_);
260 
261   bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
262     REQUIRES_SHARED(Locks::mutator_lock_);
263 
264   // Add a type guard on the given `receiver`. This will add to the graph:
265   // i0 = HFieldGet(receiver, klass)
266   // i1 = HLoadClass(class_index, is_referrer)
267   // i2 = HNotEqual(i0, i1)
268   //
269   // And if `with_deoptimization` is true:
270   // HDeoptimize(i2)
271   //
272   // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
273   HInstruction* AddTypeGuard(HInstruction* receiver,
274                              HInstruction* cursor,
275                              HBasicBlock* bb_cursor,
276                              dex::TypeIndex class_index,
277                              Handle<mirror::Class> klass,
278                              HInstruction* invoke_instruction,
279                              bool with_deoptimization)
280     REQUIRES_SHARED(Locks::mutator_lock_);
281 
282   /*
283    * Ad-hoc implementation for implementing a diamond pattern in the graph for
284    * polymorphic inlining:
285    * 1) `compare` becomes the input of the new `HIf`.
286    * 2) Everything up until `invoke_instruction` is in the then branch (could
287    *    contain multiple blocks).
288    * 3) `invoke_instruction` is moved to the otherwise block.
289    * 4) If `return_replacement` is not null, the merge block will have
290    *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
291    *
292    * Before:
293    *             Block1
294    *             compare
295    *              ...
296    *         invoke_instruction
297    *
298    * After:
299    *            Block1
300    *            compare
301    *              if
302    *          /        \
303    *         /          \
304    *   Then block    Otherwise block
305    *      ...       invoke_instruction
306    *       \              /
307    *        \            /
308    *          Merge block
309    *  phi(return_replacement, invoke_instruction)
310    */
311   void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
312                                                 HInstruction* return_replacement,
313                                                 HInstruction* invoke_instruction);
314 
315   // Update the inlining budget based on `total_number_of_instructions_`.
316   void UpdateInliningBudget();
317 
318   // Count the number of calls of `method` being inlined recursively.
319   size_t CountRecursiveCallsOf(ArtMethod* method) const;
320 
321   // Pretty-print for spaces during logging.
322   std::string DepthString(int line) const;
323 
324   HGraph* const outermost_graph_;
325   const DexCompilationUnit& outer_compilation_unit_;
326   const DexCompilationUnit& caller_compilation_unit_;
327   CodeGenerator* const codegen_;
328   const size_t total_number_of_dex_registers_;
329   size_t total_number_of_instructions_;
330 
331   // The 'parent' inliner, that means the inlinigng optimization that requested
332   // `graph_` to be inlined.
333   const HInliner* const parent_;
334   const size_t depth_;
335 
336   // The budget left for inlining, in number of instructions.
337   size_t inlining_budget_;
338   VariableSizedHandleScope* const handles_;
339 
340   // Used to record stats about optimizations on the inlined graph.
341   // If the inlining is successful, these stats are merged to the caller graph's stats.
342   OptimizingCompilerStats* inline_stats_;
343 
344   DISALLOW_COPY_AND_ASSIGN(HInliner);
345 };
346 
347 }  // namespace art
348 
349 #endif  // ART_COMPILER_OPTIMIZING_INLINER_H_
350