1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
18 #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
19 
20 #include "induction_var_range.h"
21 #include "nodes.h"
22 #include "optimization.h"
23 
24 namespace art {
25 
26 class CompilerDriver;
27 
28 /**
29  * Loop optimizations. Builds a loop hierarchy and applies optimizations to
30  * the detected nested loops, such as removal of dead induction and empty loops
31  * and inner loop vectorization.
32  */
33 class HLoopOptimization : public HOptimization {
34  public:
35   HLoopOptimization(HGraph* graph,
36                     CompilerDriver* compiler_driver,
37                     HInductionVarAnalysis* induction_analysis);
38 
39   void Run() OVERRIDE;
40 
41   static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
42 
43  private:
44   /**
45    * A single loop inside the loop hierarchy representation.
46    */
47   struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
LoopNodeLoopNode48     explicit LoopNode(HLoopInformation* lp_info)
49         : loop_info(lp_info),
50           outer(nullptr),
51           inner(nullptr),
52           previous(nullptr),
53           next(nullptr) {}
54     HLoopInformation* loop_info;
55     LoopNode* outer;
56     LoopNode* inner;
57     LoopNode* previous;
58     LoopNode* next;
59   };
60 
61   /*
62    * Vectorization restrictions (bit mask).
63    */
64   enum VectorRestrictions {
65     kNone            = 0,    // no restrictions
66     kNoMul           = 1,    // no multiplication
67     kNoDiv           = 2,    // no division
68     kNoShift         = 4,    // no shift
69     kNoShr           = 8,    // no arithmetic shift right
70     kNoHiBits        = 16,   // "wider" operations cannot bring in higher order bits
71     kNoSignedHAdd    = 32,   // no signed halving add
72     kNoUnroundedHAdd = 64,   // no unrounded halving add
73     kNoAbs           = 128,  // no absolute value
74   };
75 
76   /*
77    * Vectorization mode during synthesis
78    * (sequential peeling/cleanup loop or vector loop).
79    */
80   enum VectorMode {
81     kSequential,
82     kVector
83   };
84 
85   /*
86    * Representation of a unit-stride array reference.
87    */
88   struct ArrayReference {
ArrayReferenceArrayReference89     ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
90         : base(b), offset(o), type(t), lhs(l) { }
91     bool operator<(const ArrayReference& other) const {
92       return
93           (base < other.base) ||
94           (base == other.base &&
95            (offset < other.offset || (offset == other.offset &&
96                                       (type < other.type ||
97                                        (type == other.type && lhs < other.lhs)))));
98     }
99     HInstruction* base;    // base address
100     HInstruction* offset;  // offset + i
101     Primitive::Type type;  // component type
102     bool lhs;              // def/use
103   };
104 
105   // Loop setup and traversal.
106   void LocalRun();
107   void AddLoop(HLoopInformation* loop_info);
108   void RemoveLoop(LoopNode* node);
109   void TraverseLoopsInnerToOuter(LoopNode* node);
110 
111   // Optimization.
112   void SimplifyInduction(LoopNode* node);
113   void SimplifyBlocks(LoopNode* node);
114   void OptimizeInnerLoop(LoopNode* node);
115 
116   // Vectorization analysis and synthesis.
117   bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
118   void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
119   void GenerateNewLoop(LoopNode* node,
120                        HBasicBlock* block,
121                        HBasicBlock* new_preheader,
122                        HInstruction* lo,
123                        HInstruction* hi,
124                        HInstruction* step);
125   bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
126   bool VectorizeUse(LoopNode* node,
127                     HInstruction* instruction,
128                     bool generate_code,
129                     Primitive::Type type,
130                     uint64_t restrictions);
131   bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
132   bool TrySetVectorLength(uint32_t length);
133   void GenerateVecInv(HInstruction* org, Primitive::Type type);
134   void GenerateVecSub(HInstruction* org, HInstruction* off);
135   void GenerateVecMem(HInstruction* org,
136                       HInstruction* opa,
137                       HInstruction* opb,
138                       Primitive::Type type);
139   void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
140 
141   // Vectorization idioms.
142   bool VectorizeHalvingAddIdiom(LoopNode* node,
143                                 HInstruction* instruction,
144                                 bool generate_code,
145                                 Primitive::Type type,
146                                 uint64_t restrictions);
147 
148   // Helpers.
149   bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
150   bool TrySetSimpleLoopHeader(HBasicBlock* block);
151   bool IsEmptyBody(HBasicBlock* block);
152   bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
153                            HInstruction* instruction,
154                            bool collect_loop_uses,
155                            /*out*/ int32_t* use_count);
156   bool IsUsedOutsideLoop(HLoopInformation* loop_info,
157                          HInstruction* instruction);
158   bool TryReplaceWithLastValue(HLoopInformation* loop_info,
159                                HInstruction* instruction,
160                                HBasicBlock* block);
161   bool TryAssignLastValue(HLoopInformation* loop_info,
162                           HInstruction* instruction,
163                           HBasicBlock* block,
164                           bool collect_loop_uses);
165   void RemoveDeadInstructions(const HInstructionList& list);
166   bool CanRemoveCycle();  // Whether the current 'iset_' is removable.
167 
168   // Compiler driver (to query ISA features).
169   const CompilerDriver* compiler_driver_;
170 
171   // Range information based on prior induction variable analysis.
172   InductionVarRange induction_range_;
173 
174   // Phase-local heap memory allocator for the loop optimizer. Storage obtained
175   // through this allocator is immediately released when the loop optimizer is done.
176   ArenaAllocator* loop_allocator_;
177 
178   // Global heap memory allocator. Used to build HIR.
179   ArenaAllocator* global_allocator_;
180 
181   // Entries into the loop hierarchy representation. The hierarchy resides
182   // in phase-local heap memory.
183   LoopNode* top_loop_;
184   LoopNode* last_loop_;
185 
186   // Temporary bookkeeping of a set of instructions.
187   // Contents reside in phase-local heap memory.
188   ArenaSet<HInstruction*>* iset_;
189 
190   // Counter that tracks how many induction cycles have been simplified. Useful
191   // to trigger incremental updates of induction variable analysis of outer loops
192   // when the induction of inner loops has changed.
193   uint32_t induction_simplication_count_;
194 
195   // Flag that tracks if any simplifications have occurred.
196   bool simplified_;
197 
198   // Number of "lanes" for selected packed type.
199   uint32_t vector_length_;
200 
201   // Set of array references in the vector loop.
202   // Contents reside in phase-local heap memory.
203   ArenaSet<ArrayReference>* vector_refs_;
204 
205   // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
206   // loop (simd_ is false) and the actual vector loop (simd_ is true). The data
207   // structure maps original instructions into the new instructions.
208   // Contents reside in phase-local heap memory.
209   ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
210 
211   // Temporary vectorization bookkeeping.
212   HBasicBlock* vector_preheader_;  // preheader of the new loop
213   HBasicBlock* vector_header_;  // header of the new loop
214   HBasicBlock* vector_body_;  // body of the new loop
215   HInstruction* vector_runtime_test_a_;
216   HInstruction* vector_runtime_test_b_;  // defines a != b runtime test
217   HPhi* vector_phi_;  // the Phi representing the normalized loop index
218   VectorMode vector_mode_;  // selects synthesis mode
219 
220   friend class LoopOptimizationTest;
221 
222   DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
223 };
224 
225 }  // namespace art
226 
227 #endif  // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
228