1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
18 #define ART_COMPILER_OPTIMIZING_INLINER_H_
19 
20 #include "dex/dex_file_types.h"
21 #include "dex/invoke_type.h"
22 #include "jit/profile_compilation_info.h"
23 #include "optimization.h"
24 
25 namespace art {
26 
27 class CodeGenerator;
28 class CompilerDriver;
29 class DexCompilationUnit;
30 class HGraph;
31 class HInvoke;
32 class OptimizingCompilerStats;
33 
34 class HInliner : public HOptimization {
35  public:
36   HInliner(HGraph* outer_graph,
37            HGraph* outermost_graph,
38            CodeGenerator* codegen,
39            const DexCompilationUnit& outer_compilation_unit,
40            const DexCompilationUnit& caller_compilation_unit,
41            CompilerDriver* compiler_driver,
42            VariableSizedHandleScope* handles,
43            OptimizingCompilerStats* stats,
44            size_t total_number_of_dex_registers,
45            size_t total_number_of_instructions,
46            HInliner* parent,
47            size_t depth = 0,
48            const char* name = kInlinerPassName)
HOptimization(outer_graph,name,stats)49       : HOptimization(outer_graph, name, stats),
50         outermost_graph_(outermost_graph),
51         outer_compilation_unit_(outer_compilation_unit),
52         caller_compilation_unit_(caller_compilation_unit),
53         codegen_(codegen),
54         compiler_driver_(compiler_driver),
55         total_number_of_dex_registers_(total_number_of_dex_registers),
56         total_number_of_instructions_(total_number_of_instructions),
57         parent_(parent),
58         depth_(depth),
59         inlining_budget_(0),
60         handles_(handles),
61         inline_stats_(nullptr) {}
62 
63   void Run() OVERRIDE;
64 
65   static constexpr const char* kInlinerPassName = "inliner";
66 
67  private:
68   enum InlineCacheType {
69     kInlineCacheNoData = 0,
70     kInlineCacheUninitialized = 1,
71     kInlineCacheMonomorphic = 2,
72     kInlineCachePolymorphic = 3,
73     kInlineCacheMegamorphic = 4,
74     kInlineCacheMissingTypes = 5
75   };
76 
77   bool TryInline(HInvoke* invoke_instruction);
78 
79   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
80   // reference type propagation can run after the inlining. If the inlining is successful, this
81   // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true,
82   // a CHA guard needs to be added for the inlining.
83   bool TryInlineAndReplace(HInvoke* invoke_instruction,
84                            ArtMethod* resolved_method,
85                            ReferenceTypeInfo receiver_type,
86                            bool do_rtp,
87                            bool cha_devirtualize)
88     REQUIRES_SHARED(Locks::mutator_lock_);
89 
90   bool TryBuildAndInline(HInvoke* invoke_instruction,
91                          ArtMethod* resolved_method,
92                          ReferenceTypeInfo receiver_type,
93                          HInstruction** return_replacement)
94     REQUIRES_SHARED(Locks::mutator_lock_);
95 
96   bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
97                                ArtMethod* resolved_method,
98                                ReferenceTypeInfo receiver_type,
99                                bool same_dex_file,
100                                HInstruction** return_replacement);
101 
102   // Run simple optimizations on `callee_graph`.
103   void RunOptimizations(HGraph* callee_graph,
104                         const DexFile::CodeItem* code_item,
105                         const DexCompilationUnit& dex_compilation_unit)
106     REQUIRES_SHARED(Locks::mutator_lock_);
107 
108   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
109   bool TryPatternSubstitution(HInvoke* invoke_instruction,
110                               ArtMethod* resolved_method,
111                               HInstruction** return_replacement)
112     REQUIRES_SHARED(Locks::mutator_lock_);
113 
114   // Create a new HInstanceFieldGet.
115   HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
116                                             ArtMethod* referrer,
117                                             HInstruction* obj);
118   // Create a new HInstanceFieldSet.
119   HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
120                                             ArtMethod* referrer,
121                                             HInstruction* obj,
122                                             HInstruction* value,
123                                             bool* is_final = nullptr);
124 
125   // Try inlining the invoke instruction using inline caches.
126   bool TryInlineFromInlineCache(
127       const DexFile& caller_dex_file,
128       HInvoke* invoke_instruction,
129       ArtMethod* resolved_method)
130     REQUIRES_SHARED(Locks::mutator_lock_);
131 
132   // Try getting the inline cache from JIT code cache.
133   // Return true if the inline cache was successfully allocated and the
134   // invoke info was found in the profile info.
135   InlineCacheType GetInlineCacheJIT(
136       HInvoke* invoke_instruction,
137       StackHandleScope<1>* hs,
138       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
139     REQUIRES_SHARED(Locks::mutator_lock_);
140 
141   // Try getting the inline cache from AOT offline profile.
142   // Return true if the inline cache was successfully allocated and the
143   // invoke info was found in the profile info.
144   InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file,
145       HInvoke* invoke_instruction,
146       StackHandleScope<1>* hs,
147       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
148     REQUIRES_SHARED(Locks::mutator_lock_);
149 
150   // Extract the mirror classes from the offline profile and add them to the `inline_cache`.
151   // Note that even if we have profile data for the invoke the inline_cache might contain
152   // only null entries if the types cannot be resolved.
153   InlineCacheType ExtractClassesFromOfflineProfile(
154       const HInvoke* invoke_instruction,
155       const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
156       /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
157     REQUIRES_SHARED(Locks::mutator_lock_);
158 
159   // Compute the inline cache type.
160   InlineCacheType GetInlineCacheType(
161       const Handle<mirror::ObjectArray<mirror::Class>>& classes)
162     REQUIRES_SHARED(Locks::mutator_lock_);
163 
164   // Try to inline the target of a monomorphic call. If successful, the code
165   // in the graph will look like:
166   // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
167   // ... // inlined code
168   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
169                                 ArtMethod* resolved_method,
170                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
171     REQUIRES_SHARED(Locks::mutator_lock_);
172 
173   // Try to inline targets of a polymorphic call.
174   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
175                                 ArtMethod* resolved_method,
176                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
177     REQUIRES_SHARED(Locks::mutator_lock_);
178 
179   bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
180                                             ArtMethod* resolved_method,
181                                             Handle<mirror::ObjectArray<mirror::Class>> classes)
182     REQUIRES_SHARED(Locks::mutator_lock_);
183 
184   // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
185   bool UseOnlyPolymorphicInliningWithNoDeopt();
186 
187   // Try CHA-based devirtualization to change virtual method calls into
188   // direct calls.
189   // Returns the actual method that resolved_method can be devirtualized to.
190   ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method)
191     REQUIRES_SHARED(Locks::mutator_lock_);
192 
193   // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
194   // should_deoptimize flag and if it's true, does deoptimization.
195   void AddCHAGuard(HInstruction* invoke_instruction,
196                    uint32_t dex_pc,
197                    HInstruction* cursor,
198                    HBasicBlock* bb_cursor);
199 
200   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
201                                            HInstruction* receiver,
202                                            uint32_t dex_pc) const
203     REQUIRES_SHARED(Locks::mutator_lock_);
204 
205   void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
206     REQUIRES_SHARED(Locks::mutator_lock_);
207 
208   // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
209   // admissible (see ReferenceTypePropagation::IsAdmissible for details).
210   // Otherwise returns inexact Object RTI.
211   ReferenceTypeInfo GetClassRTI(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
212 
213   bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
214     REQUIRES_SHARED(Locks::mutator_lock_);
215 
216   bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
217     REQUIRES_SHARED(Locks::mutator_lock_);
218 
219   // Add a type guard on the given `receiver`. This will add to the graph:
220   // i0 = HFieldGet(receiver, klass)
221   // i1 = HLoadClass(class_index, is_referrer)
222   // i2 = HNotEqual(i0, i1)
223   //
224   // And if `with_deoptimization` is true:
225   // HDeoptimize(i2)
226   //
227   // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
228   HInstruction* AddTypeGuard(HInstruction* receiver,
229                              HInstruction* cursor,
230                              HBasicBlock* bb_cursor,
231                              dex::TypeIndex class_index,
232                              Handle<mirror::Class> klass,
233                              HInstruction* invoke_instruction,
234                              bool with_deoptimization)
235     REQUIRES_SHARED(Locks::mutator_lock_);
236 
237   /*
238    * Ad-hoc implementation for implementing a diamond pattern in the graph for
239    * polymorphic inlining:
240    * 1) `compare` becomes the input of the new `HIf`.
241    * 2) Everything up until `invoke_instruction` is in the then branch (could
242    *    contain multiple blocks).
243    * 3) `invoke_instruction` is moved to the otherwise block.
244    * 4) If `return_replacement` is not null, the merge block will have
245    *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
246    *
247    * Before:
248    *             Block1
249    *             compare
250    *              ...
251    *         invoke_instruction
252    *
253    * After:
254    *            Block1
255    *            compare
256    *              if
257    *          /        \
258    *         /          \
259    *   Then block    Otherwise block
260    *      ...       invoke_instruction
261    *       \              /
262    *        \            /
263    *          Merge block
264    *  phi(return_replacement, invoke_instruction)
265    */
266   void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
267                                                 HInstruction* return_replacement,
268                                                 HInstruction* invoke_instruction);
269 
270   // Update the inlining budget based on `total_number_of_instructions_`.
271   void UpdateInliningBudget();
272 
273   // Count the number of calls of `method` being inlined recursively.
274   size_t CountRecursiveCallsOf(ArtMethod* method) const;
275 
276   // Pretty-print for spaces during logging.
277   std::string DepthString(int line) const;
278 
279   HGraph* const outermost_graph_;
280   const DexCompilationUnit& outer_compilation_unit_;
281   const DexCompilationUnit& caller_compilation_unit_;
282   CodeGenerator* const codegen_;
283   CompilerDriver* const compiler_driver_;
284   const size_t total_number_of_dex_registers_;
285   size_t total_number_of_instructions_;
286 
287   // The 'parent' inliner, that means the inlinigng optimization that requested
288   // `graph_` to be inlined.
289   const HInliner* const parent_;
290   const size_t depth_;
291 
292   // The budget left for inlining, in number of instructions.
293   size_t inlining_budget_;
294   VariableSizedHandleScope* const handles_;
295 
296   // Used to record stats about optimizations on the inlined graph.
297   // If the inlining is successful, these stats are merged to the caller graph's stats.
298   OptimizingCompilerStats* inline_stats_;
299 
300   DISALLOW_COPY_AND_ASSIGN(HInliner);
301 };
302 
303 }  // namespace art
304 
305 #endif  // ART_COMPILER_OPTIMIZING_INLINER_H_
306