1 //===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create scalar and OpenMP parallel loops
10 // as LLVM-IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #ifndef POLLY_LOOP_GENERATORS_KMP_H
14 #define POLLY_LOOP_GENERATORS_KMP_H
15 
16 #include "polly/CodeGen/IRBuilder.h"
17 #include "polly/CodeGen/LoopGenerators.h"
18 #include "polly/Support/ScopHelper.h"
19 #include "llvm/ADT/SetVector.h"
20 
21 namespace polly {
22 using namespace llvm;
23 
24 /// This ParallelLoopGenerator subclass handles the generation of parallelized
25 /// code, utilizing the LLVM OpenMP library.
26 class ParallelLoopGeneratorKMP : public ParallelLoopGenerator {
27 public:
28   /// Create a parallel loop generator for the current function.
ParallelLoopGeneratorKMP(PollyIRBuilder & Builder,LoopInfo & LI,DominatorTree & DT,const DataLayout & DL)29   ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
30                            DominatorTree &DT, const DataLayout &DL)
31       : ParallelLoopGenerator(Builder, LI, DT, DL) {
32     SourceLocationInfo = createSourceLocation();
33   }
34 
35 protected:
36   /// The source location struct of this loop.
37   /// ident_t = type { i32, i32, i32, i32, i8* }
38   GlobalValue *SourceLocationInfo;
39 
40   /// Convert the combination of given chunk size and scheduling type (which
41   /// might have been set via the command line) into the corresponding
42   /// scheduling type. This may result (e.g.) in a 'change' from
43   /// "static chunked" scheduling to "static non-chunked" (regarding the
44   /// provided and returned scheduling types).
45   ///
46   /// @param ChunkSize    The chunk size, set via command line or its default.
47   /// @param Scheduling   The scheduling, set via command line or its default.
48   ///
49   /// @return The corresponding OMPGeneralSchedulingType.
50   OMPGeneralSchedulingType
51   getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const;
52 
53   /// Returns True if 'LongType' is 64bit wide, otherwise: False.
54   bool is64BitArch();
55 
56 public:
57   // The functions below may be used if one does not want to generate a
58   // specific OpenMP parallel loop, but generate individual parts of it
59   // (e.g. the subfunction definition).
60 
61   /// Create a runtime library call to spawn the worker threads.
62   ///
63   /// @param SubFn      The subfunction which holds the loop body.
64   /// @param SubFnParam The parameter for the subfunction (basically the struct
65   ///                   filled with the outside values).
66   /// @param LB         The lower bound for the loop we parallelize.
67   /// @param UB         The upper bound for the loop we parallelize.
68   /// @param Stride     The stride of the loop we parallelize.
69   void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
70                               Value *UB, Value *Stride);
71 
72   void deployParallelExecution(Function *SubFn, Value *SubFnParam, Value *LB,
73                                Value *UB, Value *Stride) override;
74 
75   virtual Function *prepareSubFnDefinition(Function *F) const override;
76 
77   std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct,
78                                               SetVector<Value *> UsedValues,
79                                               ValueMapT &VMap) override;
80 
81   /// Create a runtime library call to get the current global thread number.
82   ///
83   /// @return A Value ref which holds the current global thread number.
84   Value *createCallGlobalThreadNum();
85 
86   /// Create a runtime library call to request a number of threads.
87   /// Which will be used in the next OpenMP section (by the next fork).
88   ///
89   /// @param GlobalThreadID   The global thread ID.
90   /// @param NumThreads       The number of threads to use.
91   void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads);
92 
93   /// Create a runtime library call to prepare the OpenMP runtime.
94   /// For dynamically scheduled loops, saving the loop arguments.
95   ///
96   /// @param GlobalThreadID   The global thread ID.
97   /// @param LB               The loop's lower bound.
98   /// @param UB               The loop's upper bound.
99   /// @param Inc              The loop increment.
100   /// @param ChunkSize        The chunk size of the parallel loop.
101   void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB,
102                               Value *Inc, Value *ChunkSize);
103 
104   /// Create a runtime library call to retrieve the next (dynamically)
105   /// allocated chunk of work for this thread.
106   ///
107   /// @param GlobalThreadID   The global thread ID.
108   /// @param IsLastPtr        Pointer to a flag, which is set to 1 if this is
109   ///                         the last chunk of work, or 0 otherwise.
110   /// @param LBPtr            Pointer to the lower bound for the next chunk.
111   /// @param UBPtr            Pointer to the upper bound for the next chunk.
112   /// @param StridePtr        Pointer to the stride for the next chunk.
113   ///
114   /// @return A Value which holds 1 if there is work to be done, 0 otherwise.
115   Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr,
116                                 Value *LBPtr, Value *UBPtr, Value *StridePtr);
117 
118   /// Create a runtime library call to prepare the OpenMP runtime.
119   /// For statically scheduled loops, saving the loop arguments.
120   ///
121   /// @param GlobalThreadID   The global thread ID.
122   /// @param IsLastPtr        Pointer to a flag, which is set to 1 if this is
123   ///                         the last chunk of work, or 0 otherwise.
124   /// @param LBPtr            Pointer to the lower bound for the next chunk.
125   /// @param UBPtr            Pointer to the upper bound for the next chunk.
126   /// @param StridePtr        Pointer to the stride for the next chunk.
127   /// @param ChunkSize        The chunk size of the parallel loop.
128   void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr,
129                             Value *LBPtr, Value *UBPtr, Value *StridePtr,
130                             Value *ChunkSize);
131 
132   /// Create a runtime library call to mark the end of
133   /// a statically scheduled loop.
134   ///
135   /// @param GlobalThreadID   The global thread ID.
136   void createCallStaticFini(Value *GlobalThreadID);
137 
138   /// Create the current source location.
139   ///
140   /// TODO: Generates only(!) dummy values.
141   GlobalVariable *createSourceLocation();
142 };
143 } // end namespace polly
144 #endif
145