1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ 18 #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ 19 20 #include "induction_var_range.h" 21 #include "nodes.h" 22 #include "optimization.h" 23 24 namespace art { 25 26 class CompilerDriver; 27 28 /** 29 * Loop optimizations. Builds a loop hierarchy and applies optimizations to 30 * the detected nested loops, such as removal of dead induction and empty loops 31 * and inner loop vectorization. 32 */ 33 class HLoopOptimization : public HOptimization { 34 public: 35 HLoopOptimization(HGraph* graph, 36 CompilerDriver* compiler_driver, 37 HInductionVarAnalysis* induction_analysis); 38 39 void Run() OVERRIDE; 40 41 static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; 42 43 private: 44 /** 45 * A single loop inside the loop hierarchy representation. 46 */ 47 struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> { LoopNodeLoopNode48 explicit LoopNode(HLoopInformation* lp_info) 49 : loop_info(lp_info), 50 outer(nullptr), 51 inner(nullptr), 52 previous(nullptr), 53 next(nullptr) {} 54 HLoopInformation* loop_info; 55 LoopNode* outer; 56 LoopNode* inner; 57 LoopNode* previous; 58 LoopNode* next; 59 }; 60 61 /* 62 * Vectorization restrictions (bit mask). 63 */ 64 enum VectorRestrictions { 65 kNone = 0, // no restrictions 66 kNoMul = 1, // no multiplication 67 kNoDiv = 2, // no division 68 kNoShift = 4, // no shift 69 kNoShr = 8, // no arithmetic shift right 70 kNoHiBits = 16, // "wider" operations cannot bring in higher order bits 71 kNoSignedHAdd = 32, // no signed halving add 72 kNoUnroundedHAdd = 64, // no unrounded halving add 73 kNoAbs = 128, // no absolute value 74 }; 75 76 /* 77 * Vectorization mode during synthesis 78 * (sequential peeling/cleanup loop or vector loop). 79 */ 80 enum VectorMode { 81 kSequential, 82 kVector 83 }; 84 85 /* 86 * Representation of a unit-stride array reference. 87 */ 88 struct ArrayReference { ArrayReferenceArrayReference89 ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l) 90 : base(b), offset(o), type(t), lhs(l) { } 91 bool operator<(const ArrayReference& other) const { 92 return 93 (base < other.base) || 94 (base == other.base && 95 (offset < other.offset || (offset == other.offset && 96 (type < other.type || 97 (type == other.type && lhs < other.lhs))))); 98 } 99 HInstruction* base; // base address 100 HInstruction* offset; // offset + i 101 Primitive::Type type; // component type 102 bool lhs; // def/use 103 }; 104 105 // Loop setup and traversal. 106 void LocalRun(); 107 void AddLoop(HLoopInformation* loop_info); 108 void RemoveLoop(LoopNode* node); 109 void TraverseLoopsInnerToOuter(LoopNode* node); 110 111 // Optimization. 112 void SimplifyInduction(LoopNode* node); 113 void SimplifyBlocks(LoopNode* node); 114 void OptimizeInnerLoop(LoopNode* node); 115 116 // Vectorization analysis and synthesis. 117 bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); 118 void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); 119 void GenerateNewLoop(LoopNode* node, 120 HBasicBlock* block, 121 HBasicBlock* new_preheader, 122 HInstruction* lo, 123 HInstruction* hi, 124 HInstruction* step); 125 bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); 126 bool VectorizeUse(LoopNode* node, 127 HInstruction* instruction, 128 bool generate_code, 129 Primitive::Type type, 130 uint64_t restrictions); 131 bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions); 132 bool TrySetVectorLength(uint32_t length); 133 void GenerateVecInv(HInstruction* org, Primitive::Type type); 134 void GenerateVecSub(HInstruction* org, HInstruction* off); 135 void GenerateVecMem(HInstruction* org, 136 HInstruction* opa, 137 HInstruction* opb, 138 Primitive::Type type); 139 void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); 140 141 // Vectorization idioms. 142 bool VectorizeHalvingAddIdiom(LoopNode* node, 143 HInstruction* instruction, 144 bool generate_code, 145 Primitive::Type type, 146 uint64_t restrictions); 147 148 // Helpers. 149 bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); 150 bool TrySetSimpleLoopHeader(HBasicBlock* block); 151 bool IsEmptyBody(HBasicBlock* block); 152 bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, 153 HInstruction* instruction, 154 bool collect_loop_uses, 155 /*out*/ int32_t* use_count); 156 bool IsUsedOutsideLoop(HLoopInformation* loop_info, 157 HInstruction* instruction); 158 bool TryReplaceWithLastValue(HLoopInformation* loop_info, 159 HInstruction* instruction, 160 HBasicBlock* block); 161 bool TryAssignLastValue(HLoopInformation* loop_info, 162 HInstruction* instruction, 163 HBasicBlock* block, 164 bool collect_loop_uses); 165 void RemoveDeadInstructions(const HInstructionList& list); 166 bool CanRemoveCycle(); // Whether the current 'iset_' is removable. 167 168 // Compiler driver (to query ISA features). 169 const CompilerDriver* compiler_driver_; 170 171 // Range information based on prior induction variable analysis. 172 InductionVarRange induction_range_; 173 174 // Phase-local heap memory allocator for the loop optimizer. Storage obtained 175 // through this allocator is immediately released when the loop optimizer is done. 176 ArenaAllocator* loop_allocator_; 177 178 // Global heap memory allocator. Used to build HIR. 179 ArenaAllocator* global_allocator_; 180 181 // Entries into the loop hierarchy representation. The hierarchy resides 182 // in phase-local heap memory. 183 LoopNode* top_loop_; 184 LoopNode* last_loop_; 185 186 // Temporary bookkeeping of a set of instructions. 187 // Contents reside in phase-local heap memory. 188 ArenaSet<HInstruction*>* iset_; 189 190 // Counter that tracks how many induction cycles have been simplified. Useful 191 // to trigger incremental updates of induction variable analysis of outer loops 192 // when the induction of inner loops has changed. 193 uint32_t induction_simplication_count_; 194 195 // Flag that tracks if any simplifications have occurred. 196 bool simplified_; 197 198 // Number of "lanes" for selected packed type. 199 uint32_t vector_length_; 200 201 // Set of array references in the vector loop. 202 // Contents reside in phase-local heap memory. 203 ArenaSet<ArrayReference>* vector_refs_; 204 205 // Mapping used during vectorization synthesis for both the scalar peeling/cleanup 206 // loop (simd_ is false) and the actual vector loop (simd_ is true). The data 207 // structure maps original instructions into the new instructions. 208 // Contents reside in phase-local heap memory. 209 ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; 210 211 // Temporary vectorization bookkeeping. 212 HBasicBlock* vector_preheader_; // preheader of the new loop 213 HBasicBlock* vector_header_; // header of the new loop 214 HBasicBlock* vector_body_; // body of the new loop 215 HInstruction* vector_runtime_test_a_; 216 HInstruction* vector_runtime_test_b_; // defines a != b runtime test 217 HPhi* vector_phi_; // the Phi representing the normalized loop index 218 VectorMode vector_mode_; // selects synthesis mode 219 220 friend class LoopOptimizationTest; 221 222 DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); 223 }; 224 225 } // namespace art 226 227 #endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ 228