1 //===- LoopGenerators.h - IR helper to create loops -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create scalar and OpenMP parallel loops 10 // as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #ifndef POLLY_LOOP_GENERATORS_H 14 #define POLLY_LOOP_GENERATORS_H 15 16 #include "polly/CodeGen/IRBuilder.h" 17 #include "polly/Support/ScopHelper.h" 18 #include "llvm/ADT/SetVector.h" 19 20 namespace polly { 21 using namespace llvm; 22 23 /// General scheduling types of parallel OpenMP for loops. 24 /// Initialization values taken from OpenMP's enum in kmp.h: sched_type. 25 /// Currently, only 'static' scheduling may change from chunked to non-chunked. 26 enum class OMPGeneralSchedulingType { 27 StaticChunked = 33, 28 StaticNonChunked = 34, 29 Dynamic = 35, 30 Guided = 36, 31 Runtime = 37 32 }; 33 34 extern int PollyNumThreads; 35 extern OMPGeneralSchedulingType PollyScheduling; 36 extern int PollyChunkSize; 37 38 /// Create a scalar do/for-style loop. 39 /// 40 /// @param LowerBound The starting value of the induction variable. 41 /// @param UpperBound The upper bound of the induction variable. 42 /// @param Stride The value by which the induction variable 43 /// is incremented. 44 /// 45 /// @param Builder The builder used to create the loop. 46 /// @param P A pointer to the pass that uses this function. 47 /// It is used to update analysis information. 48 /// @param LI The loop info for the current function 49 /// @param DT The dominator tree we need to update 50 /// @param ExitBlock The block the loop will exit to. 51 /// @param Predicate The predicate used to generate the upper loop 52 /// bound. 53 /// @param Annotator This function can (optionally) take 54 /// a ScopAnnotator which 55 /// annotates loops and alias information in the SCoP. 56 /// @param Parallel If this loop should be marked parallel in 57 /// the Annotator. 58 /// @param UseGuard Create a guard in front of the header to check if 59 /// the loop is executed at least once, otherwise just 60 /// assume it. 61 /// @param LoopVectDisabled If the Loop vectorizer should be disabled for this 62 /// loop. 63 /// 64 /// @return Value* The newly created induction variable for this loop. 65 Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride, 66 PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, 67 BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate, 68 ScopAnnotator *Annotator = NULL, bool Parallel = false, 69 bool UseGuard = true, bool LoopVectDisabled = false); 70 71 /// The ParallelLoopGenerator allows to create parallelized loops 72 /// 73 /// To parallelize a loop, we perform the following steps: 74 /// o Generate a subfunction which will hold the loop body. 75 /// o Create a struct to hold all outer values needed in the loop body. 76 /// o Create calls to a runtime library to achieve the actual parallelism. 77 /// These calls will spawn and join threads, define how the work (here the 78 /// iterations) are distributed between them and make sure each has access 79 /// to the struct holding all needed values. 80 /// 81 /// At the moment we support only one parallel runtime, OpenMP. 82 /// 83 /// If we parallelize the outer loop of the following loop nest, 84 /// 85 /// S0; 86 /// for (int i = 0; i < N; i++) 87 /// for (int j = 0; j < M; j++) 88 /// S1(i, j); 89 /// S2; 90 /// 91 /// we will generate the following code (with different runtime function names): 92 /// 93 /// S0; 94 /// auto *values = storeValuesIntoStruct(); 95 /// // Execute subfunction with multiple threads 96 /// spawn_threads(subfunction, values); 97 /// join_threads(); 98 /// S2; 99 /// 100 /// // This function is executed in parallel by different threads 101 /// void subfunction(values) { 102 /// while (auto *WorkItem = getWorkItem()) { 103 /// int LB = WorkItem.begin(); 104 /// int UB = WorkItem.end(); 105 /// for (int i = LB; i < UB; i++) 106 /// for (int j = 0; j < M; j++) 107 /// S1(i, j); 108 /// } 109 /// cleanup_thread(); 110 /// } 111 class ParallelLoopGenerator { 112 public: 113 /// Create a parallel loop generator for the current function. ParallelLoopGenerator(PollyIRBuilder & Builder,LoopInfo & LI,DominatorTree & DT,const DataLayout & DL)114 ParallelLoopGenerator(PollyIRBuilder &Builder, LoopInfo &LI, 115 DominatorTree &DT, const DataLayout &DL) 116 : Builder(Builder), LI(LI), DT(DT), 117 LongType( 118 Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())), 119 M(Builder.GetInsertBlock()->getParent()->getParent()) {} 120 ~ParallelLoopGenerator()121 virtual ~ParallelLoopGenerator() {} 122 123 /// Create a parallel loop. 124 /// 125 /// This function is the main function to automatically generate a parallel 126 /// loop with all its components. 127 /// 128 /// @param LB The lower bound for the loop we parallelize. 129 /// @param UB The upper bound for the loop we parallelize. 130 /// @param Stride The stride of the loop we parallelize. 131 /// @param Values A set of LLVM-IR Values that should be available in 132 /// the new loop body. 133 /// @param VMap A map to allow outside access to the new versions of 134 /// the values in @p Values. 135 /// @param LoopBody A pointer to an iterator that is set to point to the 136 /// body of the created loop. It should be used to insert 137 /// instructions that form the actual loop body. 138 /// 139 /// @return The newly created induction variable for this loop. 140 Value *createParallelLoop(Value *LB, Value *UB, Value *Stride, 141 SetVector<Value *> &Values, ValueMapT &VMap, 142 BasicBlock::iterator *LoopBody); 143 144 protected: 145 /// The IR builder we use to create instructions. 146 PollyIRBuilder &Builder; 147 148 /// The loop info of the current function we need to update. 149 LoopInfo &LI; 150 151 /// The dominance tree of the current function we need to update. 152 DominatorTree &DT; 153 154 /// The type of a "long" on this hardware used for backend calls. 155 Type *LongType; 156 157 /// The current module 158 Module *M; 159 160 public: 161 /// Create a struct for all @p Values and store them in there. 162 /// 163 /// @param Values The values which should be stored in the struct. 164 /// 165 /// @return The created struct. 166 AllocaInst *storeValuesIntoStruct(SetVector<Value *> &Values); 167 168 /// Extract all values from the @p Struct and construct the mapping. 169 /// 170 /// @param Values The values which were stored in the struct. 171 /// @param Struct The struct holding all the values in @p Values. 172 /// @param VMap A map to associate every element of @p Values with the 173 /// new llvm value loaded from the @p Struct. 174 void extractValuesFromStruct(SetVector<Value *> Values, Type *Ty, 175 Value *Struct, ValueMapT &VMap); 176 177 /// Create the definition of the parallel subfunction. 178 /// 179 /// @return A pointer to the subfunction. 180 Function *createSubFnDefinition(); 181 182 /// Create the runtime library calls for spawn and join of the worker threads. 183 /// Additionally, places a call to the specified subfunction. 184 /// 185 /// @param SubFn The subfunction which holds the loop body. 186 /// @param SubFnParam The parameter for the subfunction (basically the struct 187 /// filled with the outside values). 188 /// @param LB The lower bound for the loop we parallelize. 189 /// @param UB The upper bound for the loop we parallelize. 190 /// @param Stride The stride of the loop we parallelize. 191 virtual void deployParallelExecution(Function *SubFn, Value *SubFnParam, 192 Value *LB, Value *UB, Value *Stride) = 0; 193 194 /// Prepare the definition of the parallel subfunction. 195 /// Creates the argument list and names them (as well as the subfunction). 196 /// 197 /// @param F A pointer to the (parallel) subfunction's parent function. 198 /// 199 /// @return The pointer to the (parallel) subfunction. 200 virtual Function *prepareSubFnDefinition(Function *F) const = 0; 201 202 /// Create the parallel subfunction. 203 /// 204 /// @param Stride The induction variable increment. 205 /// @param Struct A struct holding all values in @p Values. 206 /// @param Values A set of LLVM-IR Values that should be available in 207 /// the new loop body. 208 /// @param VMap A map to allow outside access to the new versions of 209 /// the values in @p Values. 210 /// @param SubFn The newly created subfunction is returned here. 211 /// 212 /// @return The newly created induction variable. 213 virtual std::tuple<Value *, Function *> 214 createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues, 215 ValueMapT &VMap) = 0; 216 }; 217 } // end namespace polly 218 #endif 219