1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 
30 using namespace clang;
31 using namespace CodeGen;
32 
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50 
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57 
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63 
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67 
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70 
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74 
emitUntiedSwitch(CodeGenFunction &)75   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
76 
getRegionKind() const77   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
78 
getDirectiveKind() const79   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
80 
hasCancel() const81   bool hasCancel() const { return HasCancel; }
82 
classof(const CGCapturedStmtInfo * Info)83   static bool classof(const CGCapturedStmtInfo *Info) {
84     return Info->getKind() == CR_OpenMP;
85   }
86 
87   ~CGOpenMPRegionInfo() override = default;
88 
89 protected:
90   CGOpenMPRegionKind RegionKind;
91   RegionCodeGenTy CodeGen;
92   OpenMPDirectiveKind Kind;
93   bool HasCancel;
94 };
95 
96 /// \brief API for captured statement code generation in OpenMP constructs.
97 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
98 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)99   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
100                              const RegionCodeGenTy &CodeGen,
101                              OpenMPDirectiveKind Kind, bool HasCancel)
102       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
103                            HasCancel),
104         ThreadIDVar(ThreadIDVar) {
105     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
106   }
107 
108   /// \brief Get a variable or parameter for storing global thread id
109   /// inside OpenMP construct.
getThreadIDVariable() const110   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
111 
112   /// \brief Get the name of the capture helper.
getHelperName() const113   StringRef getHelperName() const override { return ".omp_outlined."; }
114 
classof(const CGCapturedStmtInfo * Info)115   static bool classof(const CGCapturedStmtInfo *Info) {
116     return CGOpenMPRegionInfo::classof(Info) &&
117            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
118                ParallelOutlinedRegion;
119   }
120 
121 private:
122   /// \brief A variable or parameter storing global thread id for OpenMP
123   /// constructs.
124   const VarDecl *ThreadIDVar;
125 };
126 
127 /// \brief API for captured statement code generation in OpenMP constructs.
128 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
129 public:
130   class UntiedTaskActionTy final : public PrePostActionTy {
131     bool Untied;
132     const VarDecl *PartIDVar;
133     const RegionCodeGenTy UntiedCodeGen;
134     llvm::SwitchInst *UntiedSwitch = nullptr;
135 
136   public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)137     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
138                        const RegionCodeGenTy &UntiedCodeGen)
139         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)140     void Enter(CodeGenFunction &CGF) override {
141       if (Untied) {
142         // Emit task switching point.
143         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
144             CGF.GetAddrOfLocalVar(PartIDVar),
145             PartIDVar->getType()->castAs<PointerType>());
146         auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
147         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
148         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
149         CGF.EmitBlock(DoneBB);
150         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
151         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
152         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
153                               CGF.Builder.GetInsertBlock());
154         emitUntiedSwitch(CGF);
155       }
156     }
emitUntiedSwitch(CodeGenFunction & CGF) const157     void emitUntiedSwitch(CodeGenFunction &CGF) const {
158       if (Untied) {
159         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
163                               PartIdLVal);
164         UntiedCodeGen(CGF);
165         CodeGenFunction::JumpDest CurPoint =
166             CGF.getJumpDestInCurrentScope(".untied.next.");
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
170                               CGF.Builder.GetInsertBlock());
171         CGF.EmitBranchThroughCleanup(CurPoint);
172         CGF.EmitBlock(CurPoint.getBlock());
173       }
174     }
getNumberOfParts() const175     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
176   };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)177   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
178                                  const VarDecl *ThreadIDVar,
179                                  const RegionCodeGenTy &CodeGen,
180                                  OpenMPDirectiveKind Kind, bool HasCancel,
181                                  const UntiedTaskActionTy &Action)
182       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
183         ThreadIDVar(ThreadIDVar), Action(Action) {
184     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
185   }
186 
187   /// \brief Get a variable or parameter for storing global thread id
188   /// inside OpenMP construct.
getThreadIDVariable() const189   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
190 
191   /// \brief Get an LValue for the current ThreadID variable.
192   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
193 
194   /// \brief Get the name of the capture helper.
getHelperName() const195   StringRef getHelperName() const override { return ".omp_outlined."; }
196 
emitUntiedSwitch(CodeGenFunction & CGF)197   void emitUntiedSwitch(CodeGenFunction &CGF) override {
198     Action.emitUntiedSwitch(CGF);
199   }
200 
classof(const CGCapturedStmtInfo * Info)201   static bool classof(const CGCapturedStmtInfo *Info) {
202     return CGOpenMPRegionInfo::classof(Info) &&
203            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
204                TaskOutlinedRegion;
205   }
206 
207 private:
208   /// \brief A variable or parameter storing global thread id for OpenMP
209   /// constructs.
210   const VarDecl *ThreadIDVar;
211   /// Action for emitting code for untied tasks.
212   const UntiedTaskActionTy &Action;
213 };
214 
215 /// \brief API for inlined captured statement code generation in OpenMP
216 /// constructs.
217 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
218 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)219   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
220                             const RegionCodeGenTy &CodeGen,
221                             OpenMPDirectiveKind Kind, bool HasCancel)
222       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
223         OldCSI(OldCSI),
224         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
225 
226   // \brief Retrieve the value of the context parameter.
getContextValue() const227   llvm::Value *getContextValue() const override {
228     if (OuterRegionInfo)
229       return OuterRegionInfo->getContextValue();
230     llvm_unreachable("No context value for inlined OpenMP region");
231   }
232 
setContextValue(llvm::Value * V)233   void setContextValue(llvm::Value *V) override {
234     if (OuterRegionInfo) {
235       OuterRegionInfo->setContextValue(V);
236       return;
237     }
238     llvm_unreachable("No context value for inlined OpenMP region");
239   }
240 
241   /// \brief Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const242   const FieldDecl *lookup(const VarDecl *VD) const override {
243     if (OuterRegionInfo)
244       return OuterRegionInfo->lookup(VD);
245     // If there is no outer outlined region,no need to lookup in a list of
246     // captured variables, we can use the original one.
247     return nullptr;
248   }
249 
getThisFieldDecl() const250   FieldDecl *getThisFieldDecl() const override {
251     if (OuterRegionInfo)
252       return OuterRegionInfo->getThisFieldDecl();
253     return nullptr;
254   }
255 
256   /// \brief Get a variable or parameter for storing global thread id
257   /// inside OpenMP construct.
getThreadIDVariable() const258   const VarDecl *getThreadIDVariable() const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->getThreadIDVariable();
261     return nullptr;
262   }
263 
264   /// \brief Get the name of the capture helper.
getHelperName() const265   StringRef getHelperName() const override {
266     if (auto *OuterRegionInfo = getOldCSI())
267       return OuterRegionInfo->getHelperName();
268     llvm_unreachable("No helper name for inlined OpenMP construct");
269   }
270 
emitUntiedSwitch(CodeGenFunction & CGF)271   void emitUntiedSwitch(CodeGenFunction &CGF) override {
272     if (OuterRegionInfo)
273       OuterRegionInfo->emitUntiedSwitch(CGF);
274   }
275 
getOldCSI() const276   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
277 
classof(const CGCapturedStmtInfo * Info)278   static bool classof(const CGCapturedStmtInfo *Info) {
279     return CGOpenMPRegionInfo::classof(Info) &&
280            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
281   }
282 
283   ~CGOpenMPInlinedRegionInfo() override = default;
284 
285 private:
286   /// \brief CodeGen info about outer OpenMP region.
287   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
288   CGOpenMPRegionInfo *OuterRegionInfo;
289 };
290 
291 /// \brief API for captured statement code generation in OpenMP target
292 /// constructs. For this captures, implicit parameters are used instead of the
293 /// captured fields. The name of the target region has to be unique in a given
294 /// application so it is provided by the client, because only the client has
295 /// the information to generate that.
296 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
297 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)298   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
299                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
300       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
301                            /*HasCancel=*/false),
302         HelperName(HelperName) {}
303 
304   /// \brief This is unused for target regions because each starts executing
305   /// with a single thread.
getThreadIDVariable() const306   const VarDecl *getThreadIDVariable() const override { return nullptr; }
307 
308   /// \brief Get the name of the capture helper.
getHelperName() const309   StringRef getHelperName() const override { return HelperName; }
310 
classof(const CGCapturedStmtInfo * Info)311   static bool classof(const CGCapturedStmtInfo *Info) {
312     return CGOpenMPRegionInfo::classof(Info) &&
313            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
314   }
315 
316 private:
317   StringRef HelperName;
318 };
319 
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)320 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
321   llvm_unreachable("No codegen for expressions");
322 }
323 /// \brief API for generation of expressions captured in a innermost OpenMP
324 /// region.
325 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
326 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)327   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
328       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
329                                   OMPD_unknown,
330                                   /*HasCancel=*/false),
331         PrivScope(CGF) {
332     // Make sure the globals captured in the provided statement are local by
333     // using the privatization logic. We assume the same variable is not
334     // captured more than once.
335     for (auto &C : CS.captures()) {
336       if (!C.capturesVariable() && !C.capturesVariableByCopy())
337         continue;
338 
339       const VarDecl *VD = C.getCapturedVar();
340       if (VD->isLocalVarDeclOrParm())
341         continue;
342 
343       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
344                       /*RefersToEnclosingVariableOrCapture=*/false,
345                       VD->getType().getNonReferenceType(), VK_LValue,
346                       SourceLocation());
347       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
348         return CGF.EmitLValue(&DRE).getAddress();
349       });
350     }
351     (void)PrivScope.Privatize();
352   }
353 
354   /// \brief Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const355   const FieldDecl *lookup(const VarDecl *VD) const override {
356     if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
357       return FD;
358     return nullptr;
359   }
360 
361   /// \brief Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)362   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
363     llvm_unreachable("No body for expressions");
364   }
365 
366   /// \brief Get a variable or parameter for storing global thread id
367   /// inside OpenMP construct.
getThreadIDVariable() const368   const VarDecl *getThreadIDVariable() const override {
369     llvm_unreachable("No thread id for expressions");
370   }
371 
372   /// \brief Get the name of the capture helper.
getHelperName() const373   StringRef getHelperName() const override {
374     llvm_unreachable("No helper name for expressions");
375   }
376 
classof(const CGCapturedStmtInfo * Info)377   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
378 
379 private:
380   /// Private scope to capture global variables.
381   CodeGenFunction::OMPPrivateScope PrivScope;
382 };
383 
384 /// \brief RAII for emitting code of OpenMP constructs.
385 class InlinedOpenMPRegionRAII {
386   CodeGenFunction &CGF;
387   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
388   FieldDecl *LambdaThisCaptureField = nullptr;
389 
390 public:
391   /// \brief Constructs region for combined constructs.
392   /// \param CodeGen Code generation sequence for combined directives. Includes
393   /// a list of functions used for code generation of implicitly inlined
394   /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)395   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
396                           OpenMPDirectiveKind Kind, bool HasCancel)
397       : CGF(CGF) {
398     // Start emission for the construct.
399     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
400         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
401     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
402     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
403     CGF.LambdaThisCaptureField = nullptr;
404   }
405 
~InlinedOpenMPRegionRAII()406   ~InlinedOpenMPRegionRAII() {
407     // Restore original CapturedStmtInfo only if we're done with code emission.
408     auto *OldCSI =
409         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
410     delete CGF.CapturedStmtInfo;
411     CGF.CapturedStmtInfo = OldCSI;
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
414   }
415 };
416 
417 /// \brief Values for bit flags used in the ident_t to describe the fields.
418 /// All enumeric elements are named and described in accordance with the code
419 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
420 enum OpenMPLocationFlags {
421   /// \brief Use trampoline for internal microtask.
422   OMP_IDENT_IMD = 0x01,
423   /// \brief Use c-style ident structure.
424   OMP_IDENT_KMPC = 0x02,
425   /// \brief Atomic reduction option for kmpc_reduce.
426   OMP_ATOMIC_REDUCE = 0x10,
427   /// \brief Explicit 'barrier' directive.
428   OMP_IDENT_BARRIER_EXPL = 0x20,
429   /// \brief Implicit barrier in code.
430   OMP_IDENT_BARRIER_IMPL = 0x40,
431   /// \brief Implicit barrier in 'for' directive.
432   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
433   /// \brief Implicit barrier in 'sections' directive.
434   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
435   /// \brief Implicit barrier in 'single' directive.
436   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
437 };
438 
439 /// \brief Describes ident structure that describes a source location.
440 /// All descriptions are taken from
441 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
442 /// Original structure:
443 /// typedef struct ident {
444 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
445 ///                                  see above  */
446 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
447 ///                                  KMP_IDENT_KMPC identifies this union
448 ///                                  member  */
449 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
450 ///                                  see above */
451 ///#if USE_ITT_BUILD
452 ///                            /*  but currently used for storing
453 ///                                region-specific ITT */
454 ///                            /*  contextual information. */
455 ///#endif /* USE_ITT_BUILD */
456 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
457 ///                                 C++  */
458 ///    char const *psource;    /**< String describing the source location.
459 ///                            The string is composed of semi-colon separated
460 //                             fields which describe the source file,
461 ///                            the function and a pair of line numbers that
462 ///                            delimit the construct.
463 ///                             */
464 /// } ident_t;
465 enum IdentFieldIndex {
466   /// \brief might be used in Fortran
467   IdentField_Reserved_1,
468   /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
469   IdentField_Flags,
470   /// \brief Not really used in Fortran any more
471   IdentField_Reserved_2,
472   /// \brief Source[4] in Fortran, do not use for C++
473   IdentField_Reserved_3,
474   /// \brief String describing the source location. The string is composed of
475   /// semi-colon separated fields which describe the source file, the function
476   /// and a pair of line numbers that delimit the construct.
477   IdentField_PSource
478 };
479 
480 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
481 /// the enum sched_type in kmp.h).
482 enum OpenMPSchedType {
483   /// \brief Lower bound for default (unordered) versions.
484   OMP_sch_lower = 32,
485   OMP_sch_static_chunked = 33,
486   OMP_sch_static = 34,
487   OMP_sch_dynamic_chunked = 35,
488   OMP_sch_guided_chunked = 36,
489   OMP_sch_runtime = 37,
490   OMP_sch_auto = 38,
491   /// static with chunk adjustment (e.g., simd)
492   OMP_sch_static_balanced_chunked   = 45,
493   /// \brief Lower bound for 'ordered' versions.
494   OMP_ord_lower = 64,
495   OMP_ord_static_chunked = 65,
496   OMP_ord_static = 66,
497   OMP_ord_dynamic_chunked = 67,
498   OMP_ord_guided_chunked = 68,
499   OMP_ord_runtime = 69,
500   OMP_ord_auto = 70,
501   OMP_sch_default = OMP_sch_static,
502   /// \brief dist_schedule types
503   OMP_dist_sch_static_chunked = 91,
504   OMP_dist_sch_static = 92,
505   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
506   /// Set if the monotonic schedule modifier was present.
507   OMP_sch_modifier_monotonic = (1 << 29),
508   /// Set if the nonmonotonic schedule modifier was present.
509   OMP_sch_modifier_nonmonotonic = (1 << 30),
510 };
511 
512 enum OpenMPRTLFunction {
513   /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
514   /// kmpc_micro microtask, ...);
515   OMPRTL__kmpc_fork_call,
516   /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
517   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
518   OMPRTL__kmpc_threadprivate_cached,
519   /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
520   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
521   OMPRTL__kmpc_threadprivate_register,
522   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
523   OMPRTL__kmpc_global_thread_num,
524   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
525   // kmp_critical_name *crit);
526   OMPRTL__kmpc_critical,
527   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
528   // global_tid, kmp_critical_name *crit, uintptr_t hint);
529   OMPRTL__kmpc_critical_with_hint,
530   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
531   // kmp_critical_name *crit);
532   OMPRTL__kmpc_end_critical,
533   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
534   // global_tid);
535   OMPRTL__kmpc_cancel_barrier,
536   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
537   OMPRTL__kmpc_barrier,
538   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
539   OMPRTL__kmpc_for_static_fini,
540   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
541   // global_tid);
542   OMPRTL__kmpc_serialized_parallel,
543   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
544   // global_tid);
545   OMPRTL__kmpc_end_serialized_parallel,
546   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
547   // kmp_int32 num_threads);
548   OMPRTL__kmpc_push_num_threads,
549   // Call to void __kmpc_flush(ident_t *loc);
550   OMPRTL__kmpc_flush,
551   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
552   OMPRTL__kmpc_master,
553   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
554   OMPRTL__kmpc_end_master,
555   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
556   // int end_part);
557   OMPRTL__kmpc_omp_taskyield,
558   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
559   OMPRTL__kmpc_single,
560   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
561   OMPRTL__kmpc_end_single,
562   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
563   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
564   // kmp_routine_entry_t *task_entry);
565   OMPRTL__kmpc_omp_task_alloc,
566   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
567   // new_task);
568   OMPRTL__kmpc_omp_task,
569   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
570   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
571   // kmp_int32 didit);
572   OMPRTL__kmpc_copyprivate,
573   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
574   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
575   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
576   OMPRTL__kmpc_reduce,
577   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
578   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
579   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
580   // *lck);
581   OMPRTL__kmpc_reduce_nowait,
582   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
583   // kmp_critical_name *lck);
584   OMPRTL__kmpc_end_reduce,
585   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
586   // kmp_critical_name *lck);
587   OMPRTL__kmpc_end_reduce_nowait,
588   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
589   // kmp_task_t * new_task);
590   OMPRTL__kmpc_omp_task_begin_if0,
591   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
592   // kmp_task_t * new_task);
593   OMPRTL__kmpc_omp_task_complete_if0,
594   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
595   OMPRTL__kmpc_ordered,
596   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
597   OMPRTL__kmpc_end_ordered,
598   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
599   // global_tid);
600   OMPRTL__kmpc_omp_taskwait,
601   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
602   OMPRTL__kmpc_taskgroup,
603   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
604   OMPRTL__kmpc_end_taskgroup,
605   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
606   // int proc_bind);
607   OMPRTL__kmpc_push_proc_bind,
608   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
609   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
610   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
611   OMPRTL__kmpc_omp_task_with_deps,
612   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
613   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
614   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
615   OMPRTL__kmpc_omp_wait_deps,
616   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
617   // global_tid, kmp_int32 cncl_kind);
618   OMPRTL__kmpc_cancellationpoint,
619   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
620   // kmp_int32 cncl_kind);
621   OMPRTL__kmpc_cancel,
622   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
623   // kmp_int32 num_teams, kmp_int32 thread_limit);
624   OMPRTL__kmpc_push_num_teams,
625   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
626   // microtask, ...);
627   OMPRTL__kmpc_fork_teams,
628   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
629   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
630   // sched, kmp_uint64 grainsize, void *task_dup);
631   OMPRTL__kmpc_taskloop,
632   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
633   // num_dims, struct kmp_dim *dims);
634   OMPRTL__kmpc_doacross_init,
635   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
636   OMPRTL__kmpc_doacross_fini,
637   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
638   // *vec);
639   OMPRTL__kmpc_doacross_post,
640   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
641   // *vec);
642   OMPRTL__kmpc_doacross_wait,
643 
644   //
645   // Offloading related calls
646   //
647   // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
648   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
649   // *arg_types);
650   OMPRTL__tgt_target,
651   // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
652   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
653   // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
654   OMPRTL__tgt_target_teams,
655   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
656   OMPRTL__tgt_register_lib,
657   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
658   OMPRTL__tgt_unregister_lib,
659   // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
660   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
661   OMPRTL__tgt_target_data_begin,
662   // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
663   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
664   OMPRTL__tgt_target_data_end,
665   // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
666   // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
667   OMPRTL__tgt_target_data_update,
668 };
669 
670 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
671 /// region.
672 class CleanupTy final : public EHScopeStack::Cleanup {
673   PrePostActionTy *Action;
674 
675 public:
CleanupTy(PrePostActionTy * Action)676   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)677   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
678     if (!CGF.HaveInsertPoint())
679       return;
680     Action->Exit(CGF);
681   }
682 };
683 
684 } // anonymous namespace
685 
operator ()(CodeGenFunction & CGF) const686 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
687   CodeGenFunction::RunCleanupsScope Scope(CGF);
688   if (PrePostAction) {
689     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
690     Callback(CodeGen, CGF, *PrePostAction);
691   } else {
692     PrePostActionTy Action;
693     Callback(CodeGen, CGF, Action);
694   }
695 }
696 
getThreadIDVariableLValue(CodeGenFunction & CGF)697 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
698   return CGF.EmitLoadOfPointerLValue(
699       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
700       getThreadIDVariable()->getType()->castAs<PointerType>());
701 }
702 
EmitBody(CodeGenFunction & CGF,const Stmt *)703 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
704   if (!CGF.HaveInsertPoint())
705     return;
706   // 1.2.2 OpenMP Language Terminology
707   // Structured block - An executable statement with a single entry at the
708   // top and a single exit at the bottom.
709   // The point of exit cannot be a branch out of the structured block.
710   // longjmp() and throw() must not violate the entry/exit criteria.
711   CGF.EHStack.pushTerminate();
712   CodeGen(CGF);
713   CGF.EHStack.popTerminate();
714 }
715 
getThreadIDVariableLValue(CodeGenFunction & CGF)716 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
717     CodeGenFunction &CGF) {
718   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
719                             getThreadIDVariable()->getType(),
720                             AlignmentSource::Decl);
721 }
722 
CGOpenMPRuntime(CodeGenModule & CGM)723 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
724     : CGM(CGM), OffloadEntriesInfoManager(CGM) {
725   IdentTy = llvm::StructType::create(
726       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
727       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
728       CGM.Int8PtrTy /* psource */, nullptr);
729   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
730 
731   loadOffloadInfoMetadata();
732 }
733 
clear()734 void CGOpenMPRuntime::clear() {
735   InternalVars.clear();
736 }
737 
738 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)739 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
740                           const Expr *CombinerInitializer, const VarDecl *In,
741                           const VarDecl *Out, bool IsCombiner) {
742   // void .omp_combiner.(Ty *in, Ty *out);
743   auto &C = CGM.getContext();
744   QualType PtrTy = C.getPointerType(Ty).withRestrict();
745   FunctionArgList Args;
746   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
747                                /*Id=*/nullptr, PtrTy);
748   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
749                               /*Id=*/nullptr, PtrTy);
750   Args.push_back(&OmpOutParm);
751   Args.push_back(&OmpInParm);
752   auto &FnInfo =
753       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
754   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
755   auto *Fn = llvm::Function::Create(
756       FnTy, llvm::GlobalValue::InternalLinkage,
757       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
758   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
759   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
760   CodeGenFunction CGF(CGM);
761   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
762   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
763   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
764   CodeGenFunction::OMPPrivateScope Scope(CGF);
765   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
766   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
767     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
768         .getAddress();
769   });
770   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
771   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
772     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
773         .getAddress();
774   });
775   (void)Scope.Privatize();
776   CGF.EmitIgnoredExpr(CombinerInitializer);
777   Scope.ForceCleanup();
778   CGF.FinishFunction();
779   return Fn;
780 }
781 
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)782 void CGOpenMPRuntime::emitUserDefinedReduction(
783     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
784   if (UDRMap.count(D) > 0)
785     return;
786   auto &C = CGM.getContext();
787   if (!In || !Out) {
788     In = &C.Idents.get("omp_in");
789     Out = &C.Idents.get("omp_out");
790   }
791   llvm::Function *Combiner = emitCombinerOrInitializer(
792       CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
793       cast<VarDecl>(D->lookup(Out).front()),
794       /*IsCombiner=*/true);
795   llvm::Function *Initializer = nullptr;
796   if (auto *Init = D->getInitializer()) {
797     if (!Priv || !Orig) {
798       Priv = &C.Idents.get("omp_priv");
799       Orig = &C.Idents.get("omp_orig");
800     }
801     Initializer = emitCombinerOrInitializer(
802         CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
803         cast<VarDecl>(D->lookup(Priv).front()),
804         /*IsCombiner=*/false);
805   }
806   UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
807   if (CGF) {
808     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
809     Decls.second.push_back(D);
810   }
811 }
812 
813 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)814 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
815   auto I = UDRMap.find(D);
816   if (I != UDRMap.end())
817     return I->second;
818   emitUserDefinedReduction(/*CGF=*/nullptr, D);
819   return UDRMap.lookup(D);
820 }
821 
822 // Layout information for ident_t.
getIdentAlign(CodeGenModule & CGM)823 static CharUnits getIdentAlign(CodeGenModule &CGM) {
824   return CGM.getPointerAlign();
825 }
getIdentSize(CodeGenModule & CGM)826 static CharUnits getIdentSize(CodeGenModule &CGM) {
827   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
828   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
829 }
getOffsetOfIdentField(IdentFieldIndex Field)830 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
831   // All the fields except the last are i32, so this works beautifully.
832   return unsigned(Field) * CharUnits::fromQuantity(4);
833 }
createIdentFieldGEP(CodeGenFunction & CGF,Address Addr,IdentFieldIndex Field,const llvm::Twine & Name="")834 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
835                                    IdentFieldIndex Field,
836                                    const llvm::Twine &Name = "") {
837   auto Offset = getOffsetOfIdentField(Field);
838   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
839 }
840 
emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)841 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
842     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
843     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
844   assert(ThreadIDVar->getType()->isPointerType() &&
845          "thread id variable must be of type kmp_int32 *");
846   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
847   CodeGenFunction CGF(CGM, true);
848   bool HasCancel = false;
849   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
850     HasCancel = OPD->hasCancel();
851   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
852     HasCancel = OPSD->hasCancel();
853   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
854     HasCancel = OPFD->hasCancel();
855   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
856                                     HasCancel);
857   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
858   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
859 }
860 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)861 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
862     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
863     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
864     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
865     bool Tied, unsigned &NumberOfParts) {
866   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
867                                               PrePostActionTy &) {
868     auto *ThreadID = getThreadID(CGF, D.getLocStart());
869     auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
870     llvm::Value *TaskArgs[] = {
871         UpLoc, ThreadID,
872         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
873                                     TaskTVar->getType()->castAs<PointerType>())
874             .getPointer()};
875     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
876   };
877   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
878                                                             UntiedCodeGen);
879   CodeGen.setAction(Action);
880   assert(!ThreadIDVar->getType()->isPointerType() &&
881          "thread id variable must be of type kmp_int32 for tasks");
882   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
883   auto *TD = dyn_cast<OMPTaskDirective>(&D);
884   CodeGenFunction CGF(CGM, true);
885   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
886                                         InnermostKind,
887                                         TD ? TD->hasCancel() : false, Action);
888   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
889   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
890   if (!Tied)
891     NumberOfParts = Action.getNumberOfParts();
892   return Res;
893 }
894 
getOrCreateDefaultLocation(unsigned Flags)895 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
896   CharUnits Align = getIdentAlign(CGM);
897   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
898   if (!Entry) {
899     if (!DefaultOpenMPPSource) {
900       // Initialize default location for psource field of ident_t structure of
901       // all ident_t objects. Format is ";file;function;line;column;;".
902       // Taken from
903       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
904       DefaultOpenMPPSource =
905           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
906       DefaultOpenMPPSource =
907           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
908     }
909     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
910         CGM.getModule(), IdentTy, /*isConstant*/ true,
911         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
912     DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
913     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
914 
915     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
916     llvm::Constant *Values[] = {Zero,
917                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
918                                 Zero, Zero, DefaultOpenMPPSource};
919     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
920     DefaultOpenMPLocation->setInitializer(Init);
921     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
922   }
923   return Address(Entry, Align);
924 }
925 
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)926 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
927                                                  SourceLocation Loc,
928                                                  unsigned Flags) {
929   Flags |= OMP_IDENT_KMPC;
930   // If no debug info is generated - return global default location.
931   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
932       Loc.isInvalid())
933     return getOrCreateDefaultLocation(Flags).getPointer();
934 
935   assert(CGF.CurFn && "No function in current CodeGenFunction.");
936 
937   Address LocValue = Address::invalid();
938   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
939   if (I != OpenMPLocThreadIDMap.end())
940     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
941 
942   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
943   // GetOpenMPThreadID was called before this routine.
944   if (!LocValue.isValid()) {
945     // Generate "ident_t .kmpc_loc.addr;"
946     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
947                                       ".kmpc_loc.addr");
948     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
949     Elem.second.DebugLoc = AI.getPointer();
950     LocValue = AI;
951 
952     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
953     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
954     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
955                              CGM.getSize(getIdentSize(CGF.CGM)));
956   }
957 
958   // char **psource = &.kmpc_loc_<flags>.addr.psource;
959   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
960 
961   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
962   if (OMPDebugLoc == nullptr) {
963     SmallString<128> Buffer2;
964     llvm::raw_svector_ostream OS2(Buffer2);
965     // Build debug location
966     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
967     OS2 << ";" << PLoc.getFilename() << ";";
968     if (const FunctionDecl *FD =
969             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
970       OS2 << FD->getQualifiedNameAsString();
971     }
972     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
973     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
974     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
975   }
976   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
977   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
978 
979   // Our callers always pass this to a runtime function, so for
980   // convenience, go ahead and return a naked pointer.
981   return LocValue.getPointer();
982 }
983 
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)984 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
985                                           SourceLocation Loc) {
986   assert(CGF.CurFn && "No function in current CodeGenFunction.");
987 
988   llvm::Value *ThreadID = nullptr;
989   // Check whether we've already cached a load of the thread id in this
990   // function.
991   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
992   if (I != OpenMPLocThreadIDMap.end()) {
993     ThreadID = I->second.ThreadID;
994     if (ThreadID != nullptr)
995       return ThreadID;
996   }
997   if (auto *OMPRegionInfo =
998           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
999     if (OMPRegionInfo->getThreadIDVariable()) {
1000       // Check if this an outlined function with thread id passed as argument.
1001       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1002       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1003       // If value loaded in entry block, cache it and use it everywhere in
1004       // function.
1005       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1006         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1007         Elem.second.ThreadID = ThreadID;
1008       }
1009       return ThreadID;
1010     }
1011   }
1012 
1013   // This is not an outlined function region - need to call __kmpc_int32
1014   // kmpc_global_thread_num(ident_t *loc).
1015   // Generate thread id value and cache this value for use across the
1016   // function.
1017   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1018   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1019   ThreadID =
1020       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1021                           emitUpdateLocation(CGF, Loc));
1022   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1023   Elem.second.ThreadID = ThreadID;
1024   return ThreadID;
1025 }
1026 
functionFinished(CodeGenFunction & CGF)1027 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1028   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1029   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1030     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1031   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1032     for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1033       UDRMap.erase(D);
1034     }
1035     FunctionUDRMap.erase(CGF.CurFn);
1036   }
1037 }
1038 
getIdentTyPointerTy()1039 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1040   if (!IdentTy) {
1041   }
1042   return llvm::PointerType::getUnqual(IdentTy);
1043 }
1044 
getKmpc_MicroPointerTy()1045 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1046   if (!Kmpc_MicroTy) {
1047     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1048     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1049                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1050     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1051   }
1052   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1053 }
1054 
1055 llvm::Constant *
createRuntimeFunction(unsigned Function)1056 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1057   llvm::Constant *RTLFn = nullptr;
1058   switch (static_cast<OpenMPRTLFunction>(Function)) {
1059   case OMPRTL__kmpc_fork_call: {
1060     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1061     // microtask, ...);
1062     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1063                                 getKmpc_MicroPointerTy()};
1064     llvm::FunctionType *FnTy =
1065         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1066     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1067     break;
1068   }
1069   case OMPRTL__kmpc_global_thread_num: {
1070     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1071     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1072     llvm::FunctionType *FnTy =
1073         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1074     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1075     break;
1076   }
1077   case OMPRTL__kmpc_threadprivate_cached: {
1078     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1079     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1080     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1081                                 CGM.VoidPtrTy, CGM.SizeTy,
1082                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1083     llvm::FunctionType *FnTy =
1084         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1085     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1086     break;
1087   }
1088   case OMPRTL__kmpc_critical: {
1089     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1090     // kmp_critical_name *crit);
1091     llvm::Type *TypeParams[] = {
1092         getIdentTyPointerTy(), CGM.Int32Ty,
1093         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1094     llvm::FunctionType *FnTy =
1095         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1096     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1097     break;
1098   }
1099   case OMPRTL__kmpc_critical_with_hint: {
1100     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1101     // kmp_critical_name *crit, uintptr_t hint);
1102     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1103                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1104                                 CGM.IntPtrTy};
1105     llvm::FunctionType *FnTy =
1106         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1107     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1108     break;
1109   }
1110   case OMPRTL__kmpc_threadprivate_register: {
1111     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1112     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1113     // typedef void *(*kmpc_ctor)(void *);
1114     auto KmpcCtorTy =
1115         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1116                                 /*isVarArg*/ false)->getPointerTo();
1117     // typedef void *(*kmpc_cctor)(void *, void *);
1118     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1119     auto KmpcCopyCtorTy =
1120         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1121                                 /*isVarArg*/ false)->getPointerTo();
1122     // typedef void (*kmpc_dtor)(void *);
1123     auto KmpcDtorTy =
1124         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1125             ->getPointerTo();
1126     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1127                               KmpcCopyCtorTy, KmpcDtorTy};
1128     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1129                                         /*isVarArg*/ false);
1130     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1131     break;
1132   }
1133   case OMPRTL__kmpc_end_critical: {
1134     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1135     // kmp_critical_name *crit);
1136     llvm::Type *TypeParams[] = {
1137         getIdentTyPointerTy(), CGM.Int32Ty,
1138         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1139     llvm::FunctionType *FnTy =
1140         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1141     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1142     break;
1143   }
1144   case OMPRTL__kmpc_cancel_barrier: {
1145     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1146     // global_tid);
1147     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1148     llvm::FunctionType *FnTy =
1149         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1150     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1151     break;
1152   }
1153   case OMPRTL__kmpc_barrier: {
1154     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1155     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1156     llvm::FunctionType *FnTy =
1157         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1158     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1159     break;
1160   }
1161   case OMPRTL__kmpc_for_static_fini: {
1162     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1163     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1164     llvm::FunctionType *FnTy =
1165         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1166     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1167     break;
1168   }
1169   case OMPRTL__kmpc_push_num_threads: {
1170     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1171     // kmp_int32 num_threads)
1172     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1173                                 CGM.Int32Ty};
1174     llvm::FunctionType *FnTy =
1175         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1176     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1177     break;
1178   }
1179   case OMPRTL__kmpc_serialized_parallel: {
1180     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1181     // global_tid);
1182     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1183     llvm::FunctionType *FnTy =
1184         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1185     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1186     break;
1187   }
1188   case OMPRTL__kmpc_end_serialized_parallel: {
1189     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1190     // global_tid);
1191     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1192     llvm::FunctionType *FnTy =
1193         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1194     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1195     break;
1196   }
1197   case OMPRTL__kmpc_flush: {
1198     // Build void __kmpc_flush(ident_t *loc);
1199     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1200     llvm::FunctionType *FnTy =
1201         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1202     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1203     break;
1204   }
1205   case OMPRTL__kmpc_master: {
1206     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1207     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1208     llvm::FunctionType *FnTy =
1209         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1210     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1211     break;
1212   }
1213   case OMPRTL__kmpc_end_master: {
1214     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1215     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1216     llvm::FunctionType *FnTy =
1217         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1218     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1219     break;
1220   }
1221   case OMPRTL__kmpc_omp_taskyield: {
1222     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1223     // int end_part);
1224     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1225     llvm::FunctionType *FnTy =
1226         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1227     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1228     break;
1229   }
1230   case OMPRTL__kmpc_single: {
1231     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1232     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1233     llvm::FunctionType *FnTy =
1234         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1235     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1236     break;
1237   }
1238   case OMPRTL__kmpc_end_single: {
1239     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1240     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1241     llvm::FunctionType *FnTy =
1242         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1243     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1244     break;
1245   }
1246   case OMPRTL__kmpc_omp_task_alloc: {
1247     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1248     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1249     // kmp_routine_entry_t *task_entry);
1250     assert(KmpRoutineEntryPtrTy != nullptr &&
1251            "Type kmp_routine_entry_t must be created.");
1252     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1253                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1254     // Return void * and then cast to particular kmp_task_t type.
1255     llvm::FunctionType *FnTy =
1256         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1257     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1258     break;
1259   }
1260   case OMPRTL__kmpc_omp_task: {
1261     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1262     // *new_task);
1263     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1264                                 CGM.VoidPtrTy};
1265     llvm::FunctionType *FnTy =
1266         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1267     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1268     break;
1269   }
1270   case OMPRTL__kmpc_copyprivate: {
1271     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1272     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1273     // kmp_int32 didit);
1274     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1275     auto *CpyFnTy =
1276         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1277     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1278                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1279                                 CGM.Int32Ty};
1280     llvm::FunctionType *FnTy =
1281         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1282     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1283     break;
1284   }
1285   case OMPRTL__kmpc_reduce: {
1286     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1287     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1288     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1289     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1290     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1291                                                /*isVarArg=*/false);
1292     llvm::Type *TypeParams[] = {
1293         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1294         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1295         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1296     llvm::FunctionType *FnTy =
1297         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1298     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1299     break;
1300   }
1301   case OMPRTL__kmpc_reduce_nowait: {
1302     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1303     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1304     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1305     // *lck);
1306     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1307     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1308                                                /*isVarArg=*/false);
1309     llvm::Type *TypeParams[] = {
1310         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1311         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1312         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1313     llvm::FunctionType *FnTy =
1314         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1315     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1316     break;
1317   }
1318   case OMPRTL__kmpc_end_reduce: {
1319     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1320     // kmp_critical_name *lck);
1321     llvm::Type *TypeParams[] = {
1322         getIdentTyPointerTy(), CGM.Int32Ty,
1323         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1324     llvm::FunctionType *FnTy =
1325         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1326     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1327     break;
1328   }
1329   case OMPRTL__kmpc_end_reduce_nowait: {
1330     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1331     // kmp_critical_name *lck);
1332     llvm::Type *TypeParams[] = {
1333         getIdentTyPointerTy(), CGM.Int32Ty,
1334         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1335     llvm::FunctionType *FnTy =
1336         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1337     RTLFn =
1338         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1339     break;
1340   }
1341   case OMPRTL__kmpc_omp_task_begin_if0: {
1342     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1343     // *new_task);
1344     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1345                                 CGM.VoidPtrTy};
1346     llvm::FunctionType *FnTy =
1347         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1348     RTLFn =
1349         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1350     break;
1351   }
1352   case OMPRTL__kmpc_omp_task_complete_if0: {
1353     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1354     // *new_task);
1355     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1356                                 CGM.VoidPtrTy};
1357     llvm::FunctionType *FnTy =
1358         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1359     RTLFn = CGM.CreateRuntimeFunction(FnTy,
1360                                       /*Name=*/"__kmpc_omp_task_complete_if0");
1361     break;
1362   }
1363   case OMPRTL__kmpc_ordered: {
1364     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1365     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1366     llvm::FunctionType *FnTy =
1367         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1368     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1369     break;
1370   }
1371   case OMPRTL__kmpc_end_ordered: {
1372     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1373     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1374     llvm::FunctionType *FnTy =
1375         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1376     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1377     break;
1378   }
1379   case OMPRTL__kmpc_omp_taskwait: {
1380     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1381     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1382     llvm::FunctionType *FnTy =
1383         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1384     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1385     break;
1386   }
1387   case OMPRTL__kmpc_taskgroup: {
1388     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1389     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1390     llvm::FunctionType *FnTy =
1391         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1392     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1393     break;
1394   }
1395   case OMPRTL__kmpc_end_taskgroup: {
1396     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1397     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1398     llvm::FunctionType *FnTy =
1399         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1400     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1401     break;
1402   }
1403   case OMPRTL__kmpc_push_proc_bind: {
1404     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1405     // int proc_bind)
1406     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1407     llvm::FunctionType *FnTy =
1408         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1409     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1410     break;
1411   }
1412   case OMPRTL__kmpc_omp_task_with_deps: {
1413     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1414     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1415     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1416     llvm::Type *TypeParams[] = {
1417         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1418         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1419     llvm::FunctionType *FnTy =
1420         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1421     RTLFn =
1422         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1423     break;
1424   }
1425   case OMPRTL__kmpc_omp_wait_deps: {
1426     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1427     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1428     // kmp_depend_info_t *noalias_dep_list);
1429     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1430                                 CGM.Int32Ty,           CGM.VoidPtrTy,
1431                                 CGM.Int32Ty,           CGM.VoidPtrTy};
1432     llvm::FunctionType *FnTy =
1433         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1434     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1435     break;
1436   }
1437   case OMPRTL__kmpc_cancellationpoint: {
1438     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1439     // global_tid, kmp_int32 cncl_kind)
1440     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1441     llvm::FunctionType *FnTy =
1442         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1443     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1444     break;
1445   }
1446   case OMPRTL__kmpc_cancel: {
1447     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1448     // kmp_int32 cncl_kind)
1449     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1450     llvm::FunctionType *FnTy =
1451         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1452     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1453     break;
1454   }
1455   case OMPRTL__kmpc_push_num_teams: {
1456     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1457     // kmp_int32 num_teams, kmp_int32 num_threads)
1458     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1459         CGM.Int32Ty};
1460     llvm::FunctionType *FnTy =
1461         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1462     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1463     break;
1464   }
1465   case OMPRTL__kmpc_fork_teams: {
1466     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1467     // microtask, ...);
1468     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1469                                 getKmpc_MicroPointerTy()};
1470     llvm::FunctionType *FnTy =
1471         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1472     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1473     break;
1474   }
1475   case OMPRTL__kmpc_taskloop: {
1476     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1477     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1478     // sched, kmp_uint64 grainsize, void *task_dup);
1479     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1480                                 CGM.IntTy,
1481                                 CGM.VoidPtrTy,
1482                                 CGM.IntTy,
1483                                 CGM.Int64Ty->getPointerTo(),
1484                                 CGM.Int64Ty->getPointerTo(),
1485                                 CGM.Int64Ty,
1486                                 CGM.IntTy,
1487                                 CGM.IntTy,
1488                                 CGM.Int64Ty,
1489                                 CGM.VoidPtrTy};
1490     llvm::FunctionType *FnTy =
1491         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1492     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1493     break;
1494   }
1495   case OMPRTL__kmpc_doacross_init: {
1496     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1497     // num_dims, struct kmp_dim *dims);
1498     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1499                                 CGM.Int32Ty,
1500                                 CGM.Int32Ty,
1501                                 CGM.VoidPtrTy};
1502     llvm::FunctionType *FnTy =
1503         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1504     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1505     break;
1506   }
1507   case OMPRTL__kmpc_doacross_fini: {
1508     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1509     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1510     llvm::FunctionType *FnTy =
1511         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1512     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1513     break;
1514   }
1515   case OMPRTL__kmpc_doacross_post: {
1516     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1517     // *vec);
1518     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1519                                 CGM.Int64Ty->getPointerTo()};
1520     llvm::FunctionType *FnTy =
1521         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1522     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1523     break;
1524   }
1525   case OMPRTL__kmpc_doacross_wait: {
1526     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1527     // *vec);
1528     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1529                                 CGM.Int64Ty->getPointerTo()};
1530     llvm::FunctionType *FnTy =
1531         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1532     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1533     break;
1534   }
1535   case OMPRTL__tgt_target: {
1536     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1537     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1538     // *arg_types);
1539     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1540                                 CGM.VoidPtrTy,
1541                                 CGM.Int32Ty,
1542                                 CGM.VoidPtrPtrTy,
1543                                 CGM.VoidPtrPtrTy,
1544                                 CGM.SizeTy->getPointerTo(),
1545                                 CGM.Int32Ty->getPointerTo()};
1546     llvm::FunctionType *FnTy =
1547         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1549     break;
1550   }
1551   case OMPRTL__tgt_target_teams: {
1552     // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1553     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1554     // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1555     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1556                                 CGM.VoidPtrTy,
1557                                 CGM.Int32Ty,
1558                                 CGM.VoidPtrPtrTy,
1559                                 CGM.VoidPtrPtrTy,
1560                                 CGM.SizeTy->getPointerTo(),
1561                                 CGM.Int32Ty->getPointerTo(),
1562                                 CGM.Int32Ty,
1563                                 CGM.Int32Ty};
1564     llvm::FunctionType *FnTy =
1565         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1567     break;
1568   }
1569   case OMPRTL__tgt_register_lib: {
1570     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1571     QualType ParamTy =
1572         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1573     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1574     llvm::FunctionType *FnTy =
1575         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1576     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1577     break;
1578   }
1579   case OMPRTL__tgt_unregister_lib: {
1580     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1581     QualType ParamTy =
1582         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1583     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1584     llvm::FunctionType *FnTy =
1585         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1586     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1587     break;
1588   }
1589   case OMPRTL__tgt_target_data_begin: {
1590     // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
1591     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1592     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1593                                 CGM.Int32Ty,
1594                                 CGM.VoidPtrPtrTy,
1595                                 CGM.VoidPtrPtrTy,
1596                                 CGM.SizeTy->getPointerTo(),
1597                                 CGM.Int32Ty->getPointerTo()};
1598     llvm::FunctionType *FnTy =
1599         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1600     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
1601     break;
1602   }
1603   case OMPRTL__tgt_target_data_end: {
1604     // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
1605     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1606     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1607                                 CGM.Int32Ty,
1608                                 CGM.VoidPtrPtrTy,
1609                                 CGM.VoidPtrPtrTy,
1610                                 CGM.SizeTy->getPointerTo(),
1611                                 CGM.Int32Ty->getPointerTo()};
1612     llvm::FunctionType *FnTy =
1613         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1614     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
1615     break;
1616   }
1617   case OMPRTL__tgt_target_data_update: {
1618     // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
1619     // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1620     llvm::Type *TypeParams[] = {CGM.Int32Ty,
1621                                 CGM.Int32Ty,
1622                                 CGM.VoidPtrPtrTy,
1623                                 CGM.VoidPtrPtrTy,
1624                                 CGM.SizeTy->getPointerTo(),
1625                                 CGM.Int32Ty->getPointerTo()};
1626     llvm::FunctionType *FnTy =
1627         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1628     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
1629     break;
1630   }
1631   }
1632   assert(RTLFn && "Unable to find OpenMP runtime function");
1633   return RTLFn;
1634 }
1635 
createForStaticInitFunction(unsigned IVSize,bool IVSigned)1636 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1637                                                              bool IVSigned) {
1638   assert((IVSize == 32 || IVSize == 64) &&
1639          "IV size is not compatible with the omp runtime");
1640   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1641                                        : "__kmpc_for_static_init_4u")
1642                            : (IVSigned ? "__kmpc_for_static_init_8"
1643                                        : "__kmpc_for_static_init_8u");
1644   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1646   llvm::Type *TypeParams[] = {
1647     getIdentTyPointerTy(),                     // loc
1648     CGM.Int32Ty,                               // tid
1649     CGM.Int32Ty,                               // schedtype
1650     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1651     PtrTy,                                     // p_lower
1652     PtrTy,                                     // p_upper
1653     PtrTy,                                     // p_stride
1654     ITy,                                       // incr
1655     ITy                                        // chunk
1656   };
1657   llvm::FunctionType *FnTy =
1658       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1659   return CGM.CreateRuntimeFunction(FnTy, Name);
1660 }
1661 
createDispatchInitFunction(unsigned IVSize,bool IVSigned)1662 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1663                                                             bool IVSigned) {
1664   assert((IVSize == 32 || IVSize == 64) &&
1665          "IV size is not compatible with the omp runtime");
1666   auto Name =
1667       IVSize == 32
1668           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1669           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1670   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1671   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1672                                CGM.Int32Ty,           // tid
1673                                CGM.Int32Ty,           // schedtype
1674                                ITy,                   // lower
1675                                ITy,                   // upper
1676                                ITy,                   // stride
1677                                ITy                    // chunk
1678   };
1679   llvm::FunctionType *FnTy =
1680       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1681   return CGM.CreateRuntimeFunction(FnTy, Name);
1682 }
1683 
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1684 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1685                                                             bool IVSigned) {
1686   assert((IVSize == 32 || IVSize == 64) &&
1687          "IV size is not compatible with the omp runtime");
1688   auto Name =
1689       IVSize == 32
1690           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1691           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1692   llvm::Type *TypeParams[] = {
1693       getIdentTyPointerTy(), // loc
1694       CGM.Int32Ty,           // tid
1695   };
1696   llvm::FunctionType *FnTy =
1697       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1698   return CGM.CreateRuntimeFunction(FnTy, Name);
1699 }
1700 
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1701 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1702                                                             bool IVSigned) {
1703   assert((IVSize == 32 || IVSize == 64) &&
1704          "IV size is not compatible with the omp runtime");
1705   auto Name =
1706       IVSize == 32
1707           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1708           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1709   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1710   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1711   llvm::Type *TypeParams[] = {
1712     getIdentTyPointerTy(),                     // loc
1713     CGM.Int32Ty,                               // tid
1714     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1715     PtrTy,                                     // p_lower
1716     PtrTy,                                     // p_upper
1717     PtrTy                                      // p_stride
1718   };
1719   llvm::FunctionType *FnTy =
1720       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1721   return CGM.CreateRuntimeFunction(FnTy, Name);
1722 }
1723 
1724 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1725 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1726   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1727          !CGM.getContext().getTargetInfo().isTLSSupported());
1728   // Lookup the entry, lazily creating it if necessary.
1729   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1730                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1731 }
1732 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734                                                 const VarDecl *VD,
1735                                                 Address VDAddr,
1736                                                 SourceLocation Loc) {
1737   if (CGM.getLangOpts().OpenMPUseTLS &&
1738       CGM.getContext().getTargetInfo().isTLSSupported())
1739     return VDAddr;
1740 
1741   auto VarTy = VDAddr.getElementType();
1742   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744                                                        CGM.Int8PtrTy),
1745                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746                          getOrCreateThreadPrivateCache(VD)};
1747   return Address(CGF.EmitRuntimeCall(
1748       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1749                  VDAddr.getAlignment());
1750 }
1751 
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1752 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1753     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1754     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1755   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1756   // library.
1757   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1758   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1759                       OMPLoc);
1760   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1761   // to register constructor/destructor for variable.
1762   llvm::Value *Args[] = {OMPLoc,
1763                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1764                                                        CGM.VoidPtrTy),
1765                          Ctor, CopyCtor, Dtor};
1766   CGF.EmitRuntimeCall(
1767       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1768 }
1769 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772     bool PerformInit, CodeGenFunction *CGF) {
1773   if (CGM.getLangOpts().OpenMPUseTLS &&
1774       CGM.getContext().getTargetInfo().isTLSSupported())
1775     return nullptr;
1776 
1777   VD = VD->getDefinition(CGM.getContext());
1778   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1779     ThreadPrivateWithDefinition.insert(VD);
1780     QualType ASTTy = VD->getType();
1781 
1782     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1783     auto Init = VD->getAnyInitializer();
1784     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1785       // Generate function that re-emits the declaration's initializer into the
1786       // threadprivate copy of the variable VD
1787       CodeGenFunction CtorCGF(CGM);
1788       FunctionArgList Args;
1789       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1790                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1791       Args.push_back(&Dst);
1792 
1793       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794           CGM.getContext().VoidPtrTy, Args);
1795       auto FTy = CGM.getTypes().GetFunctionType(FI);
1796       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1797           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1798       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1799                             Args, SourceLocation());
1800       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1801           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1802           CGM.getContext().VoidPtrTy, Dst.getLocation());
1803       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1804       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1805                                              CtorCGF.ConvertTypeForMem(ASTTy));
1806       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1807                                /*IsInitializer=*/true);
1808       ArgVal = CtorCGF.EmitLoadOfScalar(
1809           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1810           CGM.getContext().VoidPtrTy, Dst.getLocation());
1811       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1812       CtorCGF.FinishFunction();
1813       Ctor = Fn;
1814     }
1815     if (VD->getType().isDestructedType() != QualType::DK_none) {
1816       // Generate function that emits destructor call for the threadprivate copy
1817       // of the variable VD
1818       CodeGenFunction DtorCGF(CGM);
1819       FunctionArgList Args;
1820       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1821                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1822       Args.push_back(&Dst);
1823 
1824       auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1825           CGM.getContext().VoidTy, Args);
1826       auto FTy = CGM.getTypes().GetFunctionType(FI);
1827       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1828           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1829       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1830       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1831                             SourceLocation());
1832       // Create a scope with an artificial location for the body of this function.
1833       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1834       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1835           DtorCGF.GetAddrOfLocalVar(&Dst),
1836           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1837       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1838                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1839                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1840       DtorCGF.FinishFunction();
1841       Dtor = Fn;
1842     }
1843     // Do not emit init function if it is not required.
1844     if (!Ctor && !Dtor)
1845       return nullptr;
1846 
1847     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1848     auto CopyCtorTy =
1849         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1850                                 /*isVarArg=*/false)->getPointerTo();
1851     // Copying constructor for the threadprivate variable.
1852     // Must be NULL - reserved by runtime, but currently it requires that this
1853     // parameter is always NULL. Otherwise it fires assertion.
1854     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1855     if (Ctor == nullptr) {
1856       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1857                                             /*isVarArg=*/false)->getPointerTo();
1858       Ctor = llvm::Constant::getNullValue(CtorTy);
1859     }
1860     if (Dtor == nullptr) {
1861       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                             /*isVarArg=*/false)->getPointerTo();
1863       Dtor = llvm::Constant::getNullValue(DtorTy);
1864     }
1865     if (!CGF) {
1866       auto InitFunctionTy =
1867           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1868       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1869           InitFunctionTy, ".__omp_threadprivate_init_.",
1870           CGM.getTypes().arrangeNullaryFunction());
1871       CodeGenFunction InitCGF(CGM);
1872       FunctionArgList ArgList;
1873       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1874                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1875                             Loc);
1876       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1877       InitCGF.FinishFunction();
1878       return InitFunction;
1879     }
1880     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1881   }
1882   return nullptr;
1883 }
1884 
1885 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1886 /// function. Here is the logic:
1887 /// if (Cond) {
1888 ///   ThenGen();
1889 /// } else {
1890 ///   ElseGen();
1891 /// }
emitOMPIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)1892 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1893                             const RegionCodeGenTy &ThenGen,
1894                             const RegionCodeGenTy &ElseGen) {
1895   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1896 
1897   // If the condition constant folds and can be elided, try to avoid emitting
1898   // the condition and the dead arm of the if/else.
1899   bool CondConstant;
1900   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1901     if (CondConstant)
1902       ThenGen(CGF);
1903     else
1904       ElseGen(CGF);
1905     return;
1906   }
1907 
1908   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1909   // emit the conditional branch.
1910   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1911   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1912   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1913   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1914 
1915   // Emit the 'then' code.
1916   CGF.EmitBlock(ThenBlock);
1917   ThenGen(CGF);
1918   CGF.EmitBranch(ContBlock);
1919   // Emit the 'else' code if present.
1920   // There is no need to emit line number for unconditional branch.
1921   (void)ApplyDebugLocation::CreateEmpty(CGF);
1922   CGF.EmitBlock(ElseBlock);
1923   ElseGen(CGF);
1924   // There is no need to emit line number for unconditional branch.
1925   (void)ApplyDebugLocation::CreateEmpty(CGF);
1926   CGF.EmitBranch(ContBlock);
1927   // Emit the continuation block for code after the if.
1928   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1929 }
1930 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)1931 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1932                                        llvm::Value *OutlinedFn,
1933                                        ArrayRef<llvm::Value *> CapturedVars,
1934                                        const Expr *IfCond) {
1935   if (!CGF.HaveInsertPoint())
1936     return;
1937   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1938   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1939                                                      PrePostActionTy &) {
1940     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1941     auto &RT = CGF.CGM.getOpenMPRuntime();
1942     llvm::Value *Args[] = {
1943         RTLoc,
1944         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1945         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1946     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1947     RealArgs.append(std::begin(Args), std::end(Args));
1948     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1949 
1950     auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1951     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1952   };
1953   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1954                                                           PrePostActionTy &) {
1955     auto &RT = CGF.CGM.getOpenMPRuntime();
1956     auto ThreadID = RT.getThreadID(CGF, Loc);
1957     // Build calls:
1958     // __kmpc_serialized_parallel(&Loc, GTid);
1959     llvm::Value *Args[] = {RTLoc, ThreadID};
1960     CGF.EmitRuntimeCall(
1961         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1962 
1963     // OutlinedFn(&GTid, &zero, CapturedStruct);
1964     auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1965     Address ZeroAddr =
1966         CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1967                              /*Name*/ ".zero.addr");
1968     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1969     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1970     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1971     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1972     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1973     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1974 
1975     // __kmpc_end_serialized_parallel(&Loc, GTid);
1976     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1977     CGF.EmitRuntimeCall(
1978         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1979         EndArgs);
1980   };
1981   if (IfCond)
1982     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1983   else {
1984     RegionCodeGenTy ThenRCG(ThenGen);
1985     ThenRCG(CGF);
1986   }
1987 }
1988 
1989 // If we're inside an (outlined) parallel region, use the region info's
1990 // thread-ID variable (it is passed in a first argument of the outlined function
1991 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1992 // regular serial code region, get thread ID by calling kmp_int32
1993 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1994 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)1995 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1996                                              SourceLocation Loc) {
1997   if (auto *OMPRegionInfo =
1998           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1999     if (OMPRegionInfo->getThreadIDVariable())
2000       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2001 
2002   auto ThreadID = getThreadID(CGF, Loc);
2003   auto Int32Ty =
2004       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2005   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2006   CGF.EmitStoreOfScalar(ThreadID,
2007                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2008 
2009   return ThreadIDTemp;
2010 }
2011 
2012 llvm::Constant *
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name)2013 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2014                                              const llvm::Twine &Name) {
2015   SmallString<256> Buffer;
2016   llvm::raw_svector_ostream Out(Buffer);
2017   Out << Name;
2018   auto RuntimeName = Out.str();
2019   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2020   if (Elem.second) {
2021     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2022            "OMP internal variable has different type than requested");
2023     return &*Elem.second;
2024   }
2025 
2026   return Elem.second = new llvm::GlobalVariable(
2027              CGM.getModule(), Ty, /*IsConstant*/ false,
2028              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2029              Elem.first());
2030 }
2031 
getCriticalRegionLock(StringRef CriticalName)2032 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2033   llvm::Twine Name(".gomp_critical_user_", CriticalName);
2034   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2035 }
2036 
2037 namespace {
2038 /// Common pre(post)-action for different OpenMP constructs.
2039 class CommonActionTy final : public PrePostActionTy {
2040   llvm::Value *EnterCallee;
2041   ArrayRef<llvm::Value *> EnterArgs;
2042   llvm::Value *ExitCallee;
2043   ArrayRef<llvm::Value *> ExitArgs;
2044   bool Conditional;
2045   llvm::BasicBlock *ContBlock = nullptr;
2046 
2047 public:
CommonActionTy(llvm::Value * EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::Value * ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)2048   CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2049                  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2050                  bool Conditional = false)
2051       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2052         ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)2053   void Enter(CodeGenFunction &CGF) override {
2054     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2055     if (Conditional) {
2056       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2057       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2058       ContBlock = CGF.createBasicBlock("omp_if.end");
2059       // Generate the branch (If-stmt)
2060       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2061       CGF.EmitBlock(ThenBlock);
2062     }
2063   }
Done(CodeGenFunction & CGF)2064   void Done(CodeGenFunction &CGF) {
2065     // Emit the rest of blocks/branches
2066     CGF.EmitBranch(ContBlock);
2067     CGF.EmitBlock(ContBlock, true);
2068   }
Exit(CodeGenFunction & CGF)2069   void Exit(CodeGenFunction &CGF) override {
2070     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2071   }
2072 };
2073 } // anonymous namespace
2074 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2075 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2076                                          StringRef CriticalName,
2077                                          const RegionCodeGenTy &CriticalOpGen,
2078                                          SourceLocation Loc, const Expr *Hint) {
2079   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2080   // CriticalOpGen();
2081   // __kmpc_end_critical(ident_t *, gtid, Lock);
2082   // Prepare arguments and build a call to __kmpc_critical
2083   if (!CGF.HaveInsertPoint())
2084     return;
2085   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2086                          getCriticalRegionLock(CriticalName)};
2087   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2088                                                 std::end(Args));
2089   if (Hint) {
2090     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2091         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2092   }
2093   CommonActionTy Action(
2094       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2095                                  : OMPRTL__kmpc_critical),
2096       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2097   CriticalOpGen.setAction(Action);
2098   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2099 }
2100 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2101 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2102                                        const RegionCodeGenTy &MasterOpGen,
2103                                        SourceLocation Loc) {
2104   if (!CGF.HaveInsertPoint())
2105     return;
2106   // if(__kmpc_master(ident_t *, gtid)) {
2107   //   MasterOpGen();
2108   //   __kmpc_end_master(ident_t *, gtid);
2109   // }
2110   // Prepare arguments and build a call to __kmpc_master
2111   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2112   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2113                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2114                         /*Conditional=*/true);
2115   MasterOpGen.setAction(Action);
2116   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2117   Action.Done(CGF);
2118 }
2119 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2120 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2121                                         SourceLocation Loc) {
2122   if (!CGF.HaveInsertPoint())
2123     return;
2124   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2125   llvm::Value *Args[] = {
2126       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2127       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2128   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2129   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2130     Region->emitUntiedSwitch(CGF);
2131 }
2132 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2133 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2134                                           const RegionCodeGenTy &TaskgroupOpGen,
2135                                           SourceLocation Loc) {
2136   if (!CGF.HaveInsertPoint())
2137     return;
2138   // __kmpc_taskgroup(ident_t *, gtid);
2139   // TaskgroupOpGen();
2140   // __kmpc_end_taskgroup(ident_t *, gtid);
2141   // Prepare arguments and build a call to __kmpc_taskgroup
2142   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2143   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2144                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2145                         Args);
2146   TaskgroupOpGen.setAction(Action);
2147   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2148 }
2149 
2150 /// Given an array of pointers to variables, project the address of a
2151 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2152 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2153                                       unsigned Index, const VarDecl *Var) {
2154   // Pull out the pointer to the variable.
2155   Address PtrAddr =
2156       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2157   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2158 
2159   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2160   Addr = CGF.Builder.CreateElementBitCast(
2161       Addr, CGF.ConvertTypeForMem(Var->getType()));
2162   return Addr;
2163 }
2164 
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)2165 static llvm::Value *emitCopyprivateCopyFunction(
2166     CodeGenModule &CGM, llvm::Type *ArgsType,
2167     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2168     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2169   auto &C = CGM.getContext();
2170   // void copy_func(void *LHSArg, void *RHSArg);
2171   FunctionArgList Args;
2172   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2173                            C.VoidPtrTy);
2174   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2175                            C.VoidPtrTy);
2176   Args.push_back(&LHSArg);
2177   Args.push_back(&RHSArg);
2178   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2179   auto *Fn = llvm::Function::Create(
2180       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2181       ".omp.copyprivate.copy_func", &CGM.getModule());
2182   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2183   CodeGenFunction CGF(CGM);
2184   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2185   // Dest = (void*[n])(LHSArg);
2186   // Src = (void*[n])(RHSArg);
2187   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2188       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2189       ArgsType), CGF.getPointerAlign());
2190   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2191       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2192       ArgsType), CGF.getPointerAlign());
2193   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2194   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2195   // ...
2196   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2197   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2198     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2199     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2200 
2201     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2202     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2203 
2204     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2205     QualType Type = VD->getType();
2206     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2207   }
2208   CGF.FinishFunction();
2209   return Fn;
2210 }
2211 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2212 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2213                                        const RegionCodeGenTy &SingleOpGen,
2214                                        SourceLocation Loc,
2215                                        ArrayRef<const Expr *> CopyprivateVars,
2216                                        ArrayRef<const Expr *> SrcExprs,
2217                                        ArrayRef<const Expr *> DstExprs,
2218                                        ArrayRef<const Expr *> AssignmentOps) {
2219   if (!CGF.HaveInsertPoint())
2220     return;
2221   assert(CopyprivateVars.size() == SrcExprs.size() &&
2222          CopyprivateVars.size() == DstExprs.size() &&
2223          CopyprivateVars.size() == AssignmentOps.size());
2224   auto &C = CGM.getContext();
2225   // int32 did_it = 0;
2226   // if(__kmpc_single(ident_t *, gtid)) {
2227   //   SingleOpGen();
2228   //   __kmpc_end_single(ident_t *, gtid);
2229   //   did_it = 1;
2230   // }
2231   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2232   // <copy_func>, did_it);
2233 
2234   Address DidIt = Address::invalid();
2235   if (!CopyprivateVars.empty()) {
2236     // int32 did_it = 0;
2237     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2238     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2239     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2240   }
2241   // Prepare arguments and build a call to __kmpc_single
2242   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2243   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2244                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2245                         /*Conditional=*/true);
2246   SingleOpGen.setAction(Action);
2247   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2248   if (DidIt.isValid()) {
2249     // did_it = 1;
2250     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2251   }
2252   Action.Done(CGF);
2253   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2254   // <copy_func>, did_it);
2255   if (DidIt.isValid()) {
2256     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2257     auto CopyprivateArrayTy =
2258         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2259                                /*IndexTypeQuals=*/0);
2260     // Create a list of all private variables for copyprivate.
2261     Address CopyprivateList =
2262         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2263     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2264       Address Elem = CGF.Builder.CreateConstArrayGEP(
2265           CopyprivateList, I, CGF.getPointerSize());
2266       CGF.Builder.CreateStore(
2267           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2268               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2269           Elem);
2270     }
2271     // Build function that copies private values from single region to all other
2272     // threads in the corresponding parallel region.
2273     auto *CpyFn = emitCopyprivateCopyFunction(
2274         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2275         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2276     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2277     Address CL =
2278       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2279                                                       CGF.VoidPtrTy);
2280     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2281     llvm::Value *Args[] = {
2282         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2283         getThreadID(CGF, Loc),        // i32 <gtid>
2284         BufSize,                      // size_t <buf_size>
2285         CL.getPointer(),              // void *<copyprivate list>
2286         CpyFn,                        // void (*) (void *, void *) <copy_func>
2287         DidItVal                      // i32 did_it
2288     };
2289     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2290   }
2291 }
2292 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2293 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2294                                         const RegionCodeGenTy &OrderedOpGen,
2295                                         SourceLocation Loc, bool IsThreads) {
2296   if (!CGF.HaveInsertPoint())
2297     return;
2298   // __kmpc_ordered(ident_t *, gtid);
2299   // OrderedOpGen();
2300   // __kmpc_end_ordered(ident_t *, gtid);
2301   // Prepare arguments and build a call to __kmpc_ordered
2302   if (IsThreads) {
2303     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2304     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2305                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2306                           Args);
2307     OrderedOpGen.setAction(Action);
2308     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2309     return;
2310   }
2311   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2312 }
2313 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2314 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2315                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2316                                       bool ForceSimpleCall) {
2317   if (!CGF.HaveInsertPoint())
2318     return;
2319   // Build call __kmpc_cancel_barrier(loc, thread_id);
2320   // Build call __kmpc_barrier(loc, thread_id);
2321   unsigned Flags;
2322   if (Kind == OMPD_for)
2323     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2324   else if (Kind == OMPD_sections)
2325     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2326   else if (Kind == OMPD_single)
2327     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2328   else if (Kind == OMPD_barrier)
2329     Flags = OMP_IDENT_BARRIER_EXPL;
2330   else
2331     Flags = OMP_IDENT_BARRIER_IMPL;
2332   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2333   // thread_id);
2334   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2335                          getThreadID(CGF, Loc)};
2336   if (auto *OMPRegionInfo =
2337           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2338     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2339       auto *Result = CGF.EmitRuntimeCall(
2340           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2341       if (EmitChecks) {
2342         // if (__kmpc_cancel_barrier()) {
2343         //   exit from construct;
2344         // }
2345         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2346         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2347         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2348         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2349         CGF.EmitBlock(ExitBB);
2350         //   exit from construct;
2351         auto CancelDestination =
2352             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2353         CGF.EmitBranchThroughCleanup(CancelDestination);
2354         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2355       }
2356       return;
2357     }
2358   }
2359   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2360 }
2361 
2362 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2363 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2364                                           bool Chunked, bool Ordered) {
2365   switch (ScheduleKind) {
2366   case OMPC_SCHEDULE_static:
2367     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2368                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2369   case OMPC_SCHEDULE_dynamic:
2370     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2371   case OMPC_SCHEDULE_guided:
2372     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2373   case OMPC_SCHEDULE_runtime:
2374     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2375   case OMPC_SCHEDULE_auto:
2376     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2377   case OMPC_SCHEDULE_unknown:
2378     assert(!Chunked && "chunk was specified but schedule kind not known");
2379     return Ordered ? OMP_ord_static : OMP_sch_static;
2380   }
2381   llvm_unreachable("Unexpected runtime schedule");
2382 }
2383 
2384 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2385 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2386 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2387   // only static is allowed for dist_schedule
2388   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2389 }
2390 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2391 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2392                                          bool Chunked) const {
2393   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2394   return Schedule == OMP_sch_static;
2395 }
2396 
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2397 bool CGOpenMPRuntime::isStaticNonchunked(
2398     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2399   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2400   return Schedule == OMP_dist_sch_static;
2401 }
2402 
2403 
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2404 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2405   auto Schedule =
2406       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2407   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2408   return Schedule != OMP_sch_static;
2409 }
2410 
addMonoNonMonoModifier(OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2411 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2412                                   OpenMPScheduleClauseModifier M1,
2413                                   OpenMPScheduleClauseModifier M2) {
2414   int Modifier = 0;
2415   switch (M1) {
2416   case OMPC_SCHEDULE_MODIFIER_monotonic:
2417     Modifier = OMP_sch_modifier_monotonic;
2418     break;
2419   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2420     Modifier = OMP_sch_modifier_nonmonotonic;
2421     break;
2422   case OMPC_SCHEDULE_MODIFIER_simd:
2423     if (Schedule == OMP_sch_static_chunked)
2424       Schedule = OMP_sch_static_balanced_chunked;
2425     break;
2426   case OMPC_SCHEDULE_MODIFIER_last:
2427   case OMPC_SCHEDULE_MODIFIER_unknown:
2428     break;
2429   }
2430   switch (M2) {
2431   case OMPC_SCHEDULE_MODIFIER_monotonic:
2432     Modifier = OMP_sch_modifier_monotonic;
2433     break;
2434   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2435     Modifier = OMP_sch_modifier_nonmonotonic;
2436     break;
2437   case OMPC_SCHEDULE_MODIFIER_simd:
2438     if (Schedule == OMP_sch_static_chunked)
2439       Schedule = OMP_sch_static_balanced_chunked;
2440     break;
2441   case OMPC_SCHEDULE_MODIFIER_last:
2442   case OMPC_SCHEDULE_MODIFIER_unknown:
2443     break;
2444   }
2445   return Schedule | Modifier;
2446 }
2447 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,llvm::Value * UB,llvm::Value * Chunk)2448 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2449                                           SourceLocation Loc,
2450                                           const OpenMPScheduleTy &ScheduleKind,
2451                                           unsigned IVSize, bool IVSigned,
2452                                           bool Ordered, llvm::Value *UB,
2453                                           llvm::Value *Chunk) {
2454   if (!CGF.HaveInsertPoint())
2455     return;
2456   OpenMPSchedType Schedule =
2457       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2458   assert(Ordered ||
2459          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2460           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2461           Schedule != OMP_sch_static_balanced_chunked));
2462   // Call __kmpc_dispatch_init(
2463   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2464   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2465   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2466 
2467   // If the Chunk was not specified in the clause - use default value 1.
2468   if (Chunk == nullptr)
2469     Chunk = CGF.Builder.getIntN(IVSize, 1);
2470   llvm::Value *Args[] = {
2471       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2472       CGF.Builder.getInt32(addMonoNonMonoModifier(
2473           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2474       CGF.Builder.getIntN(IVSize, 0),                   // Lower
2475       UB,                                               // Upper
2476       CGF.Builder.getIntN(IVSize, 1),                   // Stride
2477       Chunk                                             // Chunk
2478   };
2479   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2480 }
2481 
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::Constant * ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,unsigned IVSize,bool Ordered,Address IL,Address LB,Address UB,Address ST,llvm::Value * Chunk)2482 static void emitForStaticInitCall(
2483     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2484     llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2485     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2486     unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2487     Address ST, llvm::Value *Chunk) {
2488   if (!CGF.HaveInsertPoint())
2489      return;
2490 
2491    assert(!Ordered);
2492    assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2493           Schedule == OMP_sch_static_balanced_chunked ||
2494           Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2495           Schedule == OMP_dist_sch_static ||
2496           Schedule == OMP_dist_sch_static_chunked);
2497 
2498    // Call __kmpc_for_static_init(
2499    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2500    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2501    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2502    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2503    if (Chunk == nullptr) {
2504      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2505              Schedule == OMP_dist_sch_static) &&
2506             "expected static non-chunked schedule");
2507      // If the Chunk was not specified in the clause - use default value 1.
2508        Chunk = CGF.Builder.getIntN(IVSize, 1);
2509    } else {
2510      assert((Schedule == OMP_sch_static_chunked ||
2511              Schedule == OMP_sch_static_balanced_chunked ||
2512              Schedule == OMP_ord_static_chunked ||
2513              Schedule == OMP_dist_sch_static_chunked) &&
2514             "expected static chunked schedule");
2515    }
2516    llvm::Value *Args[] = {
2517        UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2518                                      Schedule, M1, M2)), // Schedule type
2519        IL.getPointer(),                                  // &isLastIter
2520        LB.getPointer(),                                  // &LB
2521        UB.getPointer(),                                  // &UB
2522        ST.getPointer(),                                  // &Stride
2523        CGF.Builder.getIntN(IVSize, 1),                   // Incr
2524        Chunk                                             // Chunk
2525    };
2526    CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2527 }
2528 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,Address IL,Address LB,Address UB,Address ST,llvm::Value * Chunk)2529 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2530                                         SourceLocation Loc,
2531                                         const OpenMPScheduleTy &ScheduleKind,
2532                                         unsigned IVSize, bool IVSigned,
2533                                         bool Ordered, Address IL, Address LB,
2534                                         Address UB, Address ST,
2535                                         llvm::Value *Chunk) {
2536   OpenMPSchedType ScheduleNum =
2537       getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2538   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2539   auto *ThreadId = getThreadID(CGF, Loc);
2540   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2541   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2542                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
2543                         Ordered, IL, LB, UB, ST, Chunk);
2544 }
2545 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,unsigned IVSize,bool IVSigned,bool Ordered,Address IL,Address LB,Address UB,Address ST,llvm::Value * Chunk)2546 void CGOpenMPRuntime::emitDistributeStaticInit(
2547     CodeGenFunction &CGF, SourceLocation Loc,
2548     OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
2549     bool Ordered, Address IL, Address LB, Address UB, Address ST,
2550     llvm::Value *Chunk) {
2551   OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2552   auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2553   auto *ThreadId = getThreadID(CGF, Loc);
2554   auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2555   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2556                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2557                         OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
2558                         UB, ST, Chunk);
2559 }
2560 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc)2561 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2562                                           SourceLocation Loc) {
2563   if (!CGF.HaveInsertPoint())
2564     return;
2565   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2566   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2567   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2568                       Args);
2569 }
2570 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2571 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2572                                                  SourceLocation Loc,
2573                                                  unsigned IVSize,
2574                                                  bool IVSigned) {
2575   if (!CGF.HaveInsertPoint())
2576     return;
2577   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2578   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2579   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2580 }
2581 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2582 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2583                                           SourceLocation Loc, unsigned IVSize,
2584                                           bool IVSigned, Address IL,
2585                                           Address LB, Address UB,
2586                                           Address ST) {
2587   // Call __kmpc_dispatch_next(
2588   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2589   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2590   //          kmp_int[32|64] *p_stride);
2591   llvm::Value *Args[] = {
2592       emitUpdateLocation(CGF, Loc),
2593       getThreadID(CGF, Loc),
2594       IL.getPointer(), // &isLastIter
2595       LB.getPointer(), // &Lower
2596       UB.getPointer(), // &Upper
2597       ST.getPointer()  // &Stride
2598   };
2599   llvm::Value *Call =
2600       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2601   return CGF.EmitScalarConversion(
2602       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2603       CGF.getContext().BoolTy, Loc);
2604 }
2605 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2606 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2607                                            llvm::Value *NumThreads,
2608                                            SourceLocation Loc) {
2609   if (!CGF.HaveInsertPoint())
2610     return;
2611   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2612   llvm::Value *Args[] = {
2613       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2614       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2615   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2616                       Args);
2617 }
2618 
emitProcBindClause(CodeGenFunction & CGF,OpenMPProcBindClauseKind ProcBind,SourceLocation Loc)2619 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2620                                          OpenMPProcBindClauseKind ProcBind,
2621                                          SourceLocation Loc) {
2622   if (!CGF.HaveInsertPoint())
2623     return;
2624   // Constants for proc bind value accepted by the runtime.
2625   enum ProcBindTy {
2626     ProcBindFalse = 0,
2627     ProcBindTrue,
2628     ProcBindMaster,
2629     ProcBindClose,
2630     ProcBindSpread,
2631     ProcBindIntel,
2632     ProcBindDefault
2633   } RuntimeProcBind;
2634   switch (ProcBind) {
2635   case OMPC_PROC_BIND_master:
2636     RuntimeProcBind = ProcBindMaster;
2637     break;
2638   case OMPC_PROC_BIND_close:
2639     RuntimeProcBind = ProcBindClose;
2640     break;
2641   case OMPC_PROC_BIND_spread:
2642     RuntimeProcBind = ProcBindSpread;
2643     break;
2644   case OMPC_PROC_BIND_unknown:
2645     llvm_unreachable("Unsupported proc_bind value.");
2646   }
2647   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2648   llvm::Value *Args[] = {
2649       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2650       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2651   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2652 }
2653 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc)2654 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2655                                 SourceLocation Loc) {
2656   if (!CGF.HaveInsertPoint())
2657     return;
2658   // Build call void __kmpc_flush(ident_t *loc)
2659   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2660                       emitUpdateLocation(CGF, Loc));
2661 }
2662 
2663 namespace {
2664 /// \brief Indexes of fields for type kmp_task_t.
2665 enum KmpTaskTFields {
2666   /// \brief List of shared variables.
2667   KmpTaskTShareds,
2668   /// \brief Task routine.
2669   KmpTaskTRoutine,
2670   /// \brief Partition id for the untied tasks.
2671   KmpTaskTPartId,
2672   /// Function with call of destructors for private variables.
2673   Data1,
2674   /// Task priority.
2675   Data2,
2676   /// (Taskloops only) Lower bound.
2677   KmpTaskTLowerBound,
2678   /// (Taskloops only) Upper bound.
2679   KmpTaskTUpperBound,
2680   /// (Taskloops only) Stride.
2681   KmpTaskTStride,
2682   /// (Taskloops only) Is last iteration flag.
2683   KmpTaskTLastIter,
2684 };
2685 } // anonymous namespace
2686 
empty() const2687 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2688   // FIXME: Add other entries type when they become supported.
2689   return OffloadEntriesTargetRegion.empty();
2690 }
2691 
2692 /// \brief Initialize target region entry.
2693 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)2694     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2695                                     StringRef ParentName, unsigned LineNum,
2696                                     unsigned Order) {
2697   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2698                                              "only required for the device "
2699                                              "code generation.");
2700   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2701       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2702   ++OffloadingEntriesNum;
2703 }
2704 
2705 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID)2706     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2707                                   StringRef ParentName, unsigned LineNum,
2708                                   llvm::Constant *Addr, llvm::Constant *ID) {
2709   // If we are emitting code for a target, the entry is already initialized,
2710   // only has to be registered.
2711   if (CGM.getLangOpts().OpenMPIsDevice) {
2712     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2713            "Entry must exist.");
2714     auto &Entry =
2715         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2716     assert(Entry.isValid() && "Entry not initialized!");
2717     Entry.setAddress(Addr);
2718     Entry.setID(ID);
2719     return;
2720   } else {
2721     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2722     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2723   }
2724 }
2725 
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum) const2726 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2727     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2728     unsigned LineNum) const {
2729   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2730   if (PerDevice == OffloadEntriesTargetRegion.end())
2731     return false;
2732   auto PerFile = PerDevice->second.find(FileID);
2733   if (PerFile == PerDevice->second.end())
2734     return false;
2735   auto PerParentName = PerFile->second.find(ParentName);
2736   if (PerParentName == PerFile->second.end())
2737     return false;
2738   auto PerLine = PerParentName->second.find(LineNum);
2739   if (PerLine == PerParentName->second.end())
2740     return false;
2741   // Fail if this entry is already registered.
2742   if (PerLine->second.getAddress() || PerLine->second.getID())
2743     return false;
2744   return true;
2745 }
2746 
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)2747 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2748     const OffloadTargetRegionEntryInfoActTy &Action) {
2749   // Scan all target region entries and perform the provided action.
2750   for (auto &D : OffloadEntriesTargetRegion)
2751     for (auto &F : D.second)
2752       for (auto &P : F.second)
2753         for (auto &L : P.second)
2754           Action(D.first, F.first, P.first(), L.first, L.second);
2755 }
2756 
2757 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2758 /// \a Codegen. This is used to emit the two functions that register and
2759 /// unregister the descriptor of the current compilation unit.
2760 static llvm::Function *
createOffloadingBinaryDescriptorFunction(CodeGenModule & CGM,StringRef Name,const RegionCodeGenTy & Codegen)2761 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2762                                          const RegionCodeGenTy &Codegen) {
2763   auto &C = CGM.getContext();
2764   FunctionArgList Args;
2765   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2766                              /*Id=*/nullptr, C.VoidPtrTy);
2767   Args.push_back(&DummyPtr);
2768 
2769   CodeGenFunction CGF(CGM);
2770   GlobalDecl();
2771   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2772   auto FTy = CGM.getTypes().GetFunctionType(FI);
2773   auto *Fn =
2774       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2775   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2776   Codegen(CGF);
2777   CGF.FinishFunction();
2778   return Fn;
2779 }
2780 
2781 llvm::Function *
createOffloadingBinaryDescriptorRegistration()2782 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2783 
2784   // If we don't have entries or if we are emitting code for the device, we
2785   // don't need to do anything.
2786   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2787     return nullptr;
2788 
2789   auto &M = CGM.getModule();
2790   auto &C = CGM.getContext();
2791 
2792   // Get list of devices we care about
2793   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2794 
2795   // We should be creating an offloading descriptor only if there are devices
2796   // specified.
2797   assert(!Devices.empty() && "No OpenMP offloading devices??");
2798 
2799   // Create the external variables that will point to the begin and end of the
2800   // host entries section. These will be defined by the linker.
2801   auto *OffloadEntryTy =
2802       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2803   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2804       M, OffloadEntryTy, /*isConstant=*/true,
2805       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2806       ".omp_offloading.entries_begin");
2807   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2808       M, OffloadEntryTy, /*isConstant=*/true,
2809       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2810       ".omp_offloading.entries_end");
2811 
2812   // Create all device images
2813   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2814   auto *DeviceImageTy = cast<llvm::StructType>(
2815       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2816 
2817   for (unsigned i = 0; i < Devices.size(); ++i) {
2818     StringRef T = Devices[i].getTriple();
2819     auto *ImgBegin = new llvm::GlobalVariable(
2820         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2821         /*Initializer=*/nullptr,
2822         Twine(".omp_offloading.img_start.") + Twine(T));
2823     auto *ImgEnd = new llvm::GlobalVariable(
2824         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2825         /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2826 
2827     llvm::Constant *Dev =
2828         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2829                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2830     DeviceImagesEntires.push_back(Dev);
2831   }
2832 
2833   // Create device images global array.
2834   llvm::ArrayType *DeviceImagesInitTy =
2835       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2836   llvm::Constant *DeviceImagesInit =
2837       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2838 
2839   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2840       M, DeviceImagesInitTy, /*isConstant=*/true,
2841       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2842       ".omp_offloading.device_images");
2843   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2844 
2845   // This is a Zero array to be used in the creation of the constant expressions
2846   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2847                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2848 
2849   // Create the target region descriptor.
2850   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2851       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2852   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2853       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2854       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2855                                            Index),
2856       HostEntriesBegin, HostEntriesEnd, nullptr);
2857 
2858   auto *Desc = new llvm::GlobalVariable(
2859       M, BinaryDescriptorTy, /*isConstant=*/true,
2860       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2861       ".omp_offloading.descriptor");
2862 
2863   // Emit code to register or unregister the descriptor at execution
2864   // startup or closing, respectively.
2865 
2866   // Create a variable to drive the registration and unregistration of the
2867   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2868   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2869   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2870                                 IdentInfo, C.CharTy);
2871 
2872   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2873       CGM, ".omp_offloading.descriptor_unreg",
2874       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2875         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2876                              Desc);
2877       });
2878   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2879       CGM, ".omp_offloading.descriptor_reg",
2880       [&](CodeGenFunction &CGF, PrePostActionTy &) {
2881         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2882                              Desc);
2883         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2884       });
2885   return RegFn;
2886 }
2887 
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size)2888 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2889                                          llvm::Constant *Addr, uint64_t Size) {
2890   StringRef Name = Addr->getName();
2891   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2892       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2893   llvm::LLVMContext &C = CGM.getModule().getContext();
2894   llvm::Module &M = CGM.getModule();
2895 
2896   // Make sure the address has the right type.
2897   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2898 
2899   // Create constant string with the name.
2900   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2901 
2902   llvm::GlobalVariable *Str =
2903       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2904                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2905                                ".omp_offloading.entry_name");
2906   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2907   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2908 
2909   // Create the entry struct.
2910   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2911       TgtOffloadEntryType, AddrPtr, StrPtr,
2912       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2913   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2914       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2915       EntryInit, ".omp_offloading.entry");
2916 
2917   // The entry has to be created in the section the linker expects it to be.
2918   Entry->setSection(".omp_offloading.entries");
2919   // We can't have any padding between symbols, so we need to have 1-byte
2920   // alignment.
2921   Entry->setAlignment(1);
2922 }
2923 
createOffloadEntriesAndInfoMetadata()2924 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2925   // Emit the offloading entries and metadata so that the device codegen side
2926   // can
2927   // easily figure out what to emit. The produced metadata looks like this:
2928   //
2929   // !omp_offload.info = !{!1, ...}
2930   //
2931   // Right now we only generate metadata for function that contain target
2932   // regions.
2933 
2934   // If we do not have entries, we dont need to do anything.
2935   if (OffloadEntriesInfoManager.empty())
2936     return;
2937 
2938   llvm::Module &M = CGM.getModule();
2939   llvm::LLVMContext &C = M.getContext();
2940   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2941       OrderedEntries(OffloadEntriesInfoManager.size());
2942 
2943   // Create the offloading info metadata node.
2944   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2945 
2946   // Auxiliar methods to create metadata values and strings.
2947   auto getMDInt = [&](unsigned v) {
2948     return llvm::ConstantAsMetadata::get(
2949         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2950   };
2951 
2952   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2953 
2954   // Create function that emits metadata for each target region entry;
2955   auto &&TargetRegionMetadataEmitter = [&](
2956       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2957       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2958     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2959     // Generate metadata for target regions. Each entry of this metadata
2960     // contains:
2961     // - Entry 0 -> Kind of this type of metadata (0).
2962     // - Entry 1 -> Device ID of the file where the entry was identified.
2963     // - Entry 2 -> File ID of the file where the entry was identified.
2964     // - Entry 3 -> Mangled name of the function where the entry was identified.
2965     // - Entry 4 -> Line in the file where the entry was identified.
2966     // - Entry 5 -> Order the entry was created.
2967     // The first element of the metadata node is the kind.
2968     Ops.push_back(getMDInt(E.getKind()));
2969     Ops.push_back(getMDInt(DeviceID));
2970     Ops.push_back(getMDInt(FileID));
2971     Ops.push_back(getMDString(ParentName));
2972     Ops.push_back(getMDInt(Line));
2973     Ops.push_back(getMDInt(E.getOrder()));
2974 
2975     // Save this entry in the right position of the ordered entries array.
2976     OrderedEntries[E.getOrder()] = &E;
2977 
2978     // Add metadata to the named metadata node.
2979     MD->addOperand(llvm::MDNode::get(C, Ops));
2980   };
2981 
2982   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2983       TargetRegionMetadataEmitter);
2984 
2985   for (auto *E : OrderedEntries) {
2986     assert(E && "All ordered entries must exist!");
2987     if (auto *CE =
2988             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2989                 E)) {
2990       assert(CE->getID() && CE->getAddress() &&
2991              "Entry ID and Addr are invalid!");
2992       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
2993     } else
2994       llvm_unreachable("Unsupported entry kind.");
2995   }
2996 }
2997 
2998 /// \brief Loads all the offload entries information from the host IR
2999 /// metadata.
loadOffloadInfoMetadata()3000 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3001   // If we are in target mode, load the metadata from the host IR. This code has
3002   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3003 
3004   if (!CGM.getLangOpts().OpenMPIsDevice)
3005     return;
3006 
3007   if (CGM.getLangOpts().OMPHostIRFile.empty())
3008     return;
3009 
3010   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3011   if (Buf.getError())
3012     return;
3013 
3014   llvm::LLVMContext C;
3015   auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
3016 
3017   if (ME.getError())
3018     return;
3019 
3020   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3021   if (!MD)
3022     return;
3023 
3024   for (auto I : MD->operands()) {
3025     llvm::MDNode *MN = cast<llvm::MDNode>(I);
3026 
3027     auto getMDInt = [&](unsigned Idx) {
3028       llvm::ConstantAsMetadata *V =
3029           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3030       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3031     };
3032 
3033     auto getMDString = [&](unsigned Idx) {
3034       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3035       return V->getString();
3036     };
3037 
3038     switch (getMDInt(0)) {
3039     default:
3040       llvm_unreachable("Unexpected metadata!");
3041       break;
3042     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3043         OFFLOAD_ENTRY_INFO_TARGET_REGION:
3044       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3045           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3046           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3047           /*Order=*/getMDInt(5));
3048       break;
3049     }
3050   }
3051 }
3052 
emitKmpRoutineEntryT(QualType KmpInt32Ty)3053 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3054   if (!KmpRoutineEntryPtrTy) {
3055     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3056     auto &C = CGM.getContext();
3057     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3058     FunctionProtoType::ExtProtoInfo EPI;
3059     KmpRoutineEntryPtrQTy = C.getPointerType(
3060         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3061     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3062   }
3063 }
3064 
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)3065 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3066                                        QualType FieldTy) {
3067   auto *Field = FieldDecl::Create(
3068       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3069       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3070       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3071   Field->setAccess(AS_public);
3072   DC->addDecl(Field);
3073   return Field;
3074 }
3075 
getTgtOffloadEntryQTy()3076 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3077 
3078   // Make sure the type of the entry is already created. This is the type we
3079   // have to create:
3080   // struct __tgt_offload_entry{
3081   //   void      *addr;       // Pointer to the offload entry info.
3082   //                          // (function or global)
3083   //   char      *name;       // Name of the function or global.
3084   //   size_t     size;       // Size of the entry info (0 if it a function).
3085   // };
3086   if (TgtOffloadEntryQTy.isNull()) {
3087     ASTContext &C = CGM.getContext();
3088     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3089     RD->startDefinition();
3090     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3091     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3092     addFieldToRecordDecl(C, RD, C.getSizeType());
3093     RD->completeDefinition();
3094     TgtOffloadEntryQTy = C.getRecordType(RD);
3095   }
3096   return TgtOffloadEntryQTy;
3097 }
3098 
getTgtDeviceImageQTy()3099 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3100   // These are the types we need to build:
3101   // struct __tgt_device_image{
3102   // void   *ImageStart;       // Pointer to the target code start.
3103   // void   *ImageEnd;         // Pointer to the target code end.
3104   // // We also add the host entries to the device image, as it may be useful
3105   // // for the target runtime to have access to that information.
3106   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3107   //                                       // the entries.
3108   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3109   //                                       // entries (non inclusive).
3110   // };
3111   if (TgtDeviceImageQTy.isNull()) {
3112     ASTContext &C = CGM.getContext();
3113     auto *RD = C.buildImplicitRecord("__tgt_device_image");
3114     RD->startDefinition();
3115     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3116     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3117     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3118     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3119     RD->completeDefinition();
3120     TgtDeviceImageQTy = C.getRecordType(RD);
3121   }
3122   return TgtDeviceImageQTy;
3123 }
3124 
getTgtBinaryDescriptorQTy()3125 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3126   // struct __tgt_bin_desc{
3127   //   int32_t              NumDevices;      // Number of devices supported.
3128   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3129   //                                         // (one per device).
3130   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3131   //                                         // entries.
3132   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3133   //                                         // entries (non inclusive).
3134   // };
3135   if (TgtBinaryDescriptorQTy.isNull()) {
3136     ASTContext &C = CGM.getContext();
3137     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3138     RD->startDefinition();
3139     addFieldToRecordDecl(
3140         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3141     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3142     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3143     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3144     RD->completeDefinition();
3145     TgtBinaryDescriptorQTy = C.getRecordType(RD);
3146   }
3147   return TgtBinaryDescriptorQTy;
3148 }
3149 
3150 namespace {
3151 struct PrivateHelpersTy {
PrivateHelpersTy__anondef231dd1111::PrivateHelpersTy3152   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3153                    const VarDecl *PrivateElemInit)
3154       : Original(Original), PrivateCopy(PrivateCopy),
3155         PrivateElemInit(PrivateElemInit) {}
3156   const VarDecl *Original;
3157   const VarDecl *PrivateCopy;
3158   const VarDecl *PrivateElemInit;
3159 };
3160 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3161 } // anonymous namespace
3162 
3163 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)3164 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3165   if (!Privates.empty()) {
3166     auto &C = CGM.getContext();
3167     // Build struct .kmp_privates_t. {
3168     //         /*  private vars  */
3169     //       };
3170     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3171     RD->startDefinition();
3172     for (auto &&Pair : Privates) {
3173       auto *VD = Pair.second.Original;
3174       auto Type = VD->getType();
3175       Type = Type.getNonReferenceType();
3176       auto *FD = addFieldToRecordDecl(C, RD, Type);
3177       if (VD->hasAttrs()) {
3178         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3179              E(VD->getAttrs().end());
3180              I != E; ++I)
3181           FD->addAttr(*I);
3182       }
3183     }
3184     RD->completeDefinition();
3185     return RD;
3186   }
3187   return nullptr;
3188 }
3189 
3190 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)3191 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3192                          QualType KmpInt32Ty,
3193                          QualType KmpRoutineEntryPointerQTy) {
3194   auto &C = CGM.getContext();
3195   // Build struct kmp_task_t {
3196   //         void *              shareds;
3197   //         kmp_routine_entry_t routine;
3198   //         kmp_int32           part_id;
3199   //         kmp_cmplrdata_t data1;
3200   //         kmp_cmplrdata_t data2;
3201   // For taskloops additional fields:
3202   //         kmp_uint64          lb;
3203   //         kmp_uint64          ub;
3204   //         kmp_int64           st;
3205   //         kmp_int32           liter;
3206   //       };
3207   auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3208   UD->startDefinition();
3209   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3210   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3211   UD->completeDefinition();
3212   QualType KmpCmplrdataTy = C.getRecordType(UD);
3213   auto *RD = C.buildImplicitRecord("kmp_task_t");
3214   RD->startDefinition();
3215   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3216   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3217   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3218   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3219   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3220   if (isOpenMPTaskLoopDirective(Kind)) {
3221     QualType KmpUInt64Ty =
3222         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3223     QualType KmpInt64Ty =
3224         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3225     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3226     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3227     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3228     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3229   }
3230   RD->completeDefinition();
3231   return RD;
3232 }
3233 
3234 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)3235 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3236                                      ArrayRef<PrivateDataTy> Privates) {
3237   auto &C = CGM.getContext();
3238   // Build struct kmp_task_t_with_privates {
3239   //         kmp_task_t task_data;
3240   //         .kmp_privates_t. privates;
3241   //       };
3242   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3243   RD->startDefinition();
3244   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3245   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3246     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3247   }
3248   RD->completeDefinition();
3249   return RD;
3250 }
3251 
3252 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3253 /// argument.
3254 /// \code
3255 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3256 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3257 ///   For taskloops:
3258 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3259 ///   tt->shareds);
3260 ///   return 0;
3261 /// }
3262 /// \endcode
3263 static llvm::Value *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Value * TaskFunction,llvm::Value * TaskPrivatesMap)3264 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3265                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3266                       QualType KmpTaskTWithPrivatesPtrQTy,
3267                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3268                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
3269                       llvm::Value *TaskPrivatesMap) {
3270   auto &C = CGM.getContext();
3271   FunctionArgList Args;
3272   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3273   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3274                                 /*Id=*/nullptr,
3275                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3276   Args.push_back(&GtidArg);
3277   Args.push_back(&TaskTypeArg);
3278   auto &TaskEntryFnInfo =
3279       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3280   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3281   auto *TaskEntry =
3282       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3283                              ".omp_task_entry.", &CGM.getModule());
3284   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3285   CodeGenFunction CGF(CGM);
3286   CGF.disableDebugInfo();
3287   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3288 
3289   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3290   // tt,
3291   // For taskloops:
3292   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3293   // tt->task_data.shareds);
3294   auto *GtidParam = CGF.EmitLoadOfScalar(
3295       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3296   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3297       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3298       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3299   auto *KmpTaskTWithPrivatesQTyRD =
3300       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3301   LValue Base =
3302       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3303   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3304   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3305   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3306   auto *PartidParam = PartIdLVal.getPointer();
3307 
3308   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3309   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3310   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3311       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3312       CGF.ConvertTypeForMem(SharedsPtrTy));
3313 
3314   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3315   llvm::Value *PrivatesParam;
3316   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3317     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3318     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3319         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3320   } else
3321     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3322 
3323   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3324                                TaskPrivatesMap,
3325                                CGF.Builder
3326                                    .CreatePointerBitCastOrAddrSpaceCast(
3327                                        TDBase.getAddress(), CGF.VoidPtrTy)
3328                                    .getPointer()};
3329   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3330                                           std::end(CommonArgs));
3331   if (isOpenMPTaskLoopDirective(Kind)) {
3332     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3333     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3334     auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3335     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3336     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3337     auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3338     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3339     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3340     auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3341     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3342     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3343     auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3344     CallArgs.push_back(LBParam);
3345     CallArgs.push_back(UBParam);
3346     CallArgs.push_back(StParam);
3347     CallArgs.push_back(LIParam);
3348   }
3349   CallArgs.push_back(SharedsParam);
3350 
3351   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3352   CGF.EmitStoreThroughLValue(
3353       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3354       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3355   CGF.FinishFunction();
3356   return TaskEntry;
3357 }
3358 
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3359 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3360                                             SourceLocation Loc,
3361                                             QualType KmpInt32Ty,
3362                                             QualType KmpTaskTWithPrivatesPtrQTy,
3363                                             QualType KmpTaskTWithPrivatesQTy) {
3364   auto &C = CGM.getContext();
3365   FunctionArgList Args;
3366   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3367   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3368                                 /*Id=*/nullptr,
3369                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
3370   Args.push_back(&GtidArg);
3371   Args.push_back(&TaskTypeArg);
3372   FunctionType::ExtInfo Info;
3373   auto &DestructorFnInfo =
3374       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3375   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3376   auto *DestructorFn =
3377       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3378                              ".omp_task_destructor.", &CGM.getModule());
3379   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3380                                     DestructorFnInfo);
3381   CodeGenFunction CGF(CGM);
3382   CGF.disableDebugInfo();
3383   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3384                     Args);
3385 
3386   LValue Base = CGF.EmitLoadOfPointerLValue(
3387       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3388       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3389   auto *KmpTaskTWithPrivatesQTyRD =
3390       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3391   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3392   Base = CGF.EmitLValueForField(Base, *FI);
3393   for (auto *Field :
3394        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3395     if (auto DtorKind = Field->getType().isDestructedType()) {
3396       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3397       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3398     }
3399   }
3400   CGF.FinishFunction();
3401   return DestructorFn;
3402 }
3403 
3404 /// \brief Emit a privates mapping function for correct handling of private and
3405 /// firstprivate variables.
3406 /// \code
3407 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3408 /// **noalias priv1,...,  <tyn> **noalias privn) {
3409 ///   *priv1 = &.privates.priv1;
3410 ///   ...;
3411 ///   *privn = &.privates.privn;
3412 /// }
3413 /// \endcode
3414 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,ArrayRef<const Expr * > PrivateVars,ArrayRef<const Expr * > FirstprivateVars,ArrayRef<const Expr * > LastprivateVars,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3415 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3416                                ArrayRef<const Expr *> PrivateVars,
3417                                ArrayRef<const Expr *> FirstprivateVars,
3418                                ArrayRef<const Expr *> LastprivateVars,
3419                                QualType PrivatesQTy,
3420                                ArrayRef<PrivateDataTy> Privates) {
3421   auto &C = CGM.getContext();
3422   FunctionArgList Args;
3423   ImplicitParamDecl TaskPrivatesArg(
3424       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3425       C.getPointerType(PrivatesQTy).withConst().withRestrict());
3426   Args.push_back(&TaskPrivatesArg);
3427   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3428   unsigned Counter = 1;
3429   for (auto *E: PrivateVars) {
3430     Args.push_back(ImplicitParamDecl::Create(
3431         C, /*DC=*/nullptr, Loc,
3432         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3433                             .withConst()
3434                             .withRestrict()));
3435     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3436     PrivateVarsPos[VD] = Counter;
3437     ++Counter;
3438   }
3439   for (auto *E : FirstprivateVars) {
3440     Args.push_back(ImplicitParamDecl::Create(
3441         C, /*DC=*/nullptr, Loc,
3442         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3443                             .withConst()
3444                             .withRestrict()));
3445     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3446     PrivateVarsPos[VD] = Counter;
3447     ++Counter;
3448   }
3449   for (auto *E: LastprivateVars) {
3450     Args.push_back(ImplicitParamDecl::Create(
3451         C, /*DC=*/nullptr, Loc,
3452         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3453                             .withConst()
3454                             .withRestrict()));
3455     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3456     PrivateVarsPos[VD] = Counter;
3457     ++Counter;
3458   }
3459   auto &TaskPrivatesMapFnInfo =
3460       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3461   auto *TaskPrivatesMapTy =
3462       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3463   auto *TaskPrivatesMap = llvm::Function::Create(
3464       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3465       ".omp_task_privates_map.", &CGM.getModule());
3466   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3467                                     TaskPrivatesMapFnInfo);
3468   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3469   CodeGenFunction CGF(CGM);
3470   CGF.disableDebugInfo();
3471   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3472                     TaskPrivatesMapFnInfo, Args);
3473 
3474   // *privi = &.privates.privi;
3475   LValue Base = CGF.EmitLoadOfPointerLValue(
3476       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3477       TaskPrivatesArg.getType()->castAs<PointerType>());
3478   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3479   Counter = 0;
3480   for (auto *Field : PrivatesQTyRD->fields()) {
3481     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3482     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3483     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3484     auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3485         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3486     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3487     ++Counter;
3488   }
3489   CGF.FinishFunction();
3490   return TaskPrivatesMap;
3491 }
3492 
array_pod_sort_comparator(const PrivateDataTy * P1,const PrivateDataTy * P2)3493 static int array_pod_sort_comparator(const PrivateDataTy *P1,
3494                                      const PrivateDataTy *P2) {
3495   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3496 }
3497 
3498 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3499 static void emitPrivatesInit(CodeGenFunction &CGF,
3500                              const OMPExecutableDirective &D,
3501                              Address KmpTaskSharedsPtr, LValue TDBase,
3502                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3503                              QualType SharedsTy, QualType SharedsPtrTy,
3504                              const OMPTaskDataTy &Data,
3505                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3506   auto &C = CGF.getContext();
3507   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3508   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3509   LValue SrcBase;
3510   if (!Data.FirstprivateVars.empty()) {
3511     SrcBase = CGF.MakeAddrLValue(
3512         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3513             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3514         SharedsTy);
3515   }
3516   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3517       cast<CapturedStmt>(*D.getAssociatedStmt()));
3518   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3519   for (auto &&Pair : Privates) {
3520     auto *VD = Pair.second.PrivateCopy;
3521     auto *Init = VD->getAnyInitializer();
3522     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3523                              !CGF.isTrivialInitializer(Init)))) {
3524       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3525       if (auto *Elem = Pair.second.PrivateElemInit) {
3526         auto *OriginalVD = Pair.second.Original;
3527         auto *SharedField = CapturesInfo.lookup(OriginalVD);
3528         auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3529         SharedRefLValue = CGF.MakeAddrLValue(
3530             Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3531             SharedRefLValue.getType(), AlignmentSource::Decl);
3532         QualType Type = OriginalVD->getType();
3533         if (Type->isArrayType()) {
3534           // Initialize firstprivate array.
3535           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3536             // Perform simple memcpy.
3537             CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3538                                     SharedRefLValue.getAddress(), Type);
3539           } else {
3540             // Initialize firstprivate array using element-by-element
3541             // intialization.
3542             CGF.EmitOMPAggregateAssign(
3543                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3544                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3545                                                   Address SrcElement) {
3546                   // Clean up any temporaries needed by the initialization.
3547                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3548                   InitScope.addPrivate(
3549                       Elem, [SrcElement]() -> Address { return SrcElement; });
3550                   (void)InitScope.Privatize();
3551                   // Emit initialization for single element.
3552                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3553                       CGF, &CapturesInfo);
3554                   CGF.EmitAnyExprToMem(Init, DestElement,
3555                                        Init->getType().getQualifiers(),
3556                                        /*IsInitializer=*/false);
3557                 });
3558           }
3559         } else {
3560           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3561           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3562             return SharedRefLValue.getAddress();
3563           });
3564           (void)InitScope.Privatize();
3565           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3566           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3567                              /*capturedByInit=*/false);
3568         }
3569       } else
3570         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3571     }
3572     ++FI;
3573   }
3574 }
3575 
3576 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3577 static bool checkInitIsRequired(CodeGenFunction &CGF,
3578                                 ArrayRef<PrivateDataTy> Privates) {
3579   bool InitRequired = false;
3580   for (auto &&Pair : Privates) {
3581     auto *VD = Pair.second.PrivateCopy;
3582     auto *Init = VD->getAnyInitializer();
3583     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3584                                     !CGF.isTrivialInitializer(Init));
3585   }
3586   return InitRequired;
3587 }
3588 
3589 
3590 /// Emit task_dup function (for initialization of
3591 /// private/firstprivate/lastprivate vars and last_iter flag)
3592 /// \code
3593 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3594 /// lastpriv) {
3595 /// // setup lastprivate flag
3596 ///    task_dst->last = lastpriv;
3597 /// // could be constructor calls here...
3598 /// }
3599 /// \endcode
3600 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3601 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3602                     const OMPExecutableDirective &D,
3603                     QualType KmpTaskTWithPrivatesPtrQTy,
3604                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3605                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3606                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3607                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3608   auto &C = CGM.getContext();
3609   FunctionArgList Args;
3610   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
3611                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3612   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
3613                            /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3614   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
3615                                 /*Id=*/nullptr, C.IntTy);
3616   Args.push_back(&DstArg);
3617   Args.push_back(&SrcArg);
3618   Args.push_back(&LastprivArg);
3619   auto &TaskDupFnInfo =
3620       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3621   auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3622   auto *TaskDup =
3623       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
3624                              ".omp_task_dup.", &CGM.getModule());
3625   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
3626   CodeGenFunction CGF(CGM);
3627   CGF.disableDebugInfo();
3628   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
3629 
3630   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3631       CGF.GetAddrOfLocalVar(&DstArg),
3632       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3633   // task_dst->liter = lastpriv;
3634   if (WithLastIter) {
3635     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3636     LValue Base = CGF.EmitLValueForField(
3637         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3638     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3639     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3640         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3641     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3642   }
3643 
3644   // Emit initial values for private copies (if any).
3645   assert(!Privates.empty());
3646   Address KmpTaskSharedsPtr = Address::invalid();
3647   if (!Data.FirstprivateVars.empty()) {
3648     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3649         CGF.GetAddrOfLocalVar(&SrcArg),
3650         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3651     LValue Base = CGF.EmitLValueForField(
3652         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3653     KmpTaskSharedsPtr = Address(
3654         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3655                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3656                                                   KmpTaskTShareds)),
3657                              Loc),
3658         CGF.getNaturalTypeAlignment(SharedsTy));
3659   }
3660   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3661                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3662   CGF.FinishFunction();
3663   return TaskDup;
3664 }
3665 
3666 /// Checks if destructor function is required to be generated.
3667 /// \return true if cleanups are required, false otherwise.
3668 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD)3669 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3670   bool NeedsCleanup = false;
3671   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3672   auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3673   for (auto *FD : PrivateRD->fields()) {
3674     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3675     if (NeedsCleanup)
3676       break;
3677   }
3678   return NeedsCleanup;
3679 }
3680 
3681 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Value * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)3682 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3683                               const OMPExecutableDirective &D,
3684                               llvm::Value *TaskFunction, QualType SharedsTy,
3685                               Address Shareds, const OMPTaskDataTy &Data) {
3686   auto &C = CGM.getContext();
3687   llvm::SmallVector<PrivateDataTy, 4> Privates;
3688   // Aggregate privates and sort them by the alignment.
3689   auto I = Data.PrivateCopies.begin();
3690   for (auto *E : Data.PrivateVars) {
3691     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3692     Privates.push_back(std::make_pair(
3693         C.getDeclAlign(VD),
3694         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3695                          /*PrivateElemInit=*/nullptr)));
3696     ++I;
3697   }
3698   I = Data.FirstprivateCopies.begin();
3699   auto IElemInitRef = Data.FirstprivateInits.begin();
3700   for (auto *E : Data.FirstprivateVars) {
3701     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3702     Privates.push_back(std::make_pair(
3703         C.getDeclAlign(VD),
3704         PrivateHelpersTy(
3705             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3706             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3707     ++I;
3708     ++IElemInitRef;
3709   }
3710   I = Data.LastprivateCopies.begin();
3711   for (auto *E : Data.LastprivateVars) {
3712     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3713     Privates.push_back(std::make_pair(
3714         C.getDeclAlign(VD),
3715         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3716                          /*PrivateElemInit=*/nullptr)));
3717     ++I;
3718   }
3719   llvm::array_pod_sort(Privates.begin(), Privates.end(),
3720                        array_pod_sort_comparator);
3721   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3722   // Build type kmp_routine_entry_t (if not built yet).
3723   emitKmpRoutineEntryT(KmpInt32Ty);
3724   // Build type kmp_task_t (if not built yet).
3725   if (KmpTaskTQTy.isNull()) {
3726     KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3727         CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3728   }
3729   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3730   // Build particular struct kmp_task_t for the given task.
3731   auto *KmpTaskTWithPrivatesQTyRD =
3732       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3733   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3734   QualType KmpTaskTWithPrivatesPtrQTy =
3735       C.getPointerType(KmpTaskTWithPrivatesQTy);
3736   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3737   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3738   auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3739   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3740 
3741   // Emit initial values for private copies (if any).
3742   llvm::Value *TaskPrivatesMap = nullptr;
3743   auto *TaskPrivatesMapTy =
3744       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3745                 3)
3746           ->getType();
3747   if (!Privates.empty()) {
3748     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3749     TaskPrivatesMap = emitTaskPrivateMappingFunction(
3750         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
3751         FI->getType(), Privates);
3752     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3753         TaskPrivatesMap, TaskPrivatesMapTy);
3754   } else {
3755     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3756         cast<llvm::PointerType>(TaskPrivatesMapTy));
3757   }
3758   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3759   // kmp_task_t *tt);
3760   auto *TaskEntry = emitProxyTaskFunction(
3761       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3762       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3763       TaskPrivatesMap);
3764 
3765   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3766   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3767   // kmp_routine_entry_t *task_entry);
3768   // Task flags. Format is taken from
3769   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3770   // description of kmp_tasking_flags struct.
3771   enum {
3772     TiedFlag = 0x1,
3773     FinalFlag = 0x2,
3774     DestructorsFlag = 0x8,
3775     PriorityFlag = 0x20
3776   };
3777   unsigned Flags = Data.Tied ? TiedFlag : 0;
3778   bool NeedsCleanup = false;
3779   if (!Privates.empty()) {
3780     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
3781     if (NeedsCleanup)
3782       Flags = Flags | DestructorsFlag;
3783   }
3784   if (Data.Priority.getInt())
3785     Flags = Flags | PriorityFlag;
3786   auto *TaskFlags =
3787       Data.Final.getPointer()
3788           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3789                                      CGF.Builder.getInt32(FinalFlag),
3790                                      CGF.Builder.getInt32(/*C=*/0))
3791           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3792   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3793   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3794   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3795                               getThreadID(CGF, Loc), TaskFlags,
3796                               KmpTaskTWithPrivatesTySize, SharedsSize,
3797                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3798                                   TaskEntry, KmpRoutineEntryPtrTy)};
3799   auto *NewTask = CGF.EmitRuntimeCall(
3800       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3801   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3802       NewTask, KmpTaskTWithPrivatesPtrTy);
3803   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3804                                                KmpTaskTWithPrivatesQTy);
3805   LValue TDBase =
3806       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3807   // Fill the data in the resulting kmp_task_t record.
3808   // Copy shareds if there are any.
3809   Address KmpTaskSharedsPtr = Address::invalid();
3810   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3811     KmpTaskSharedsPtr =
3812         Address(CGF.EmitLoadOfScalar(
3813                     CGF.EmitLValueForField(
3814                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3815                                            KmpTaskTShareds)),
3816                     Loc),
3817                 CGF.getNaturalTypeAlignment(SharedsTy));
3818     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3819   }
3820   // Emit initial values for private copies (if any).
3821   TaskResultTy Result;
3822   if (!Privates.empty()) {
3823     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3824                      SharedsTy, SharedsPtrTy, Data, Privates,
3825                      /*ForDup=*/false);
3826     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3827         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3828       Result.TaskDupFn = emitTaskDupFunction(
3829           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3830           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3831           /*WithLastIter=*/!Data.LastprivateVars.empty());
3832     }
3833   }
3834   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3835   enum { Priority = 0, Destructors = 1 };
3836   // Provide pointer to function with destructors for privates.
3837   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3838   auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
3839   if (NeedsCleanup) {
3840     llvm::Value *DestructorFn = emitDestructorsFunction(
3841         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3842         KmpTaskTWithPrivatesQTy);
3843     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3844     LValue DestructorsLV = CGF.EmitLValueForField(
3845         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3846     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3847                               DestructorFn, KmpRoutineEntryPtrTy),
3848                           DestructorsLV);
3849   }
3850   // Set priority.
3851   if (Data.Priority.getInt()) {
3852     LValue Data2LV = CGF.EmitLValueForField(
3853         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3854     LValue PriorityLV = CGF.EmitLValueForField(
3855         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3856     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3857   }
3858   Result.NewTask = NewTask;
3859   Result.TaskEntry = TaskEntry;
3860   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3861   Result.TDBase = TDBase;
3862   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3863   return Result;
3864 }
3865 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Value * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)3866 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
3867                                    const OMPExecutableDirective &D,
3868                                    llvm::Value *TaskFunction,
3869                                    QualType SharedsTy, Address Shareds,
3870                                    const Expr *IfCond,
3871                                    const OMPTaskDataTy &Data) {
3872   if (!CGF.HaveInsertPoint())
3873     return;
3874 
3875   TaskResultTy Result =
3876       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3877   llvm::Value *NewTask = Result.NewTask;
3878   llvm::Value *TaskEntry = Result.TaskEntry;
3879   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
3880   LValue TDBase = Result.TDBase;
3881   RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
3882   auto &C = CGM.getContext();
3883   // Process list of dependences.
3884   Address DependenciesArray = Address::invalid();
3885   unsigned NumDependencies = Data.Dependences.size();
3886   if (NumDependencies) {
3887     // Dependence kind for RTL.
3888     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3889     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3890     RecordDecl *KmpDependInfoRD;
3891     QualType FlagsTy =
3892         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3893     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3894     if (KmpDependInfoTy.isNull()) {
3895       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3896       KmpDependInfoRD->startDefinition();
3897       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3898       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3899       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3900       KmpDependInfoRD->completeDefinition();
3901       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3902     } else
3903       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3904     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3905     // Define type kmp_depend_info[<Dependences.size()>];
3906     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3907         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3908         ArrayType::Normal, /*IndexTypeQuals=*/0);
3909     // kmp_depend_info[<Dependences.size()>] deps;
3910     DependenciesArray =
3911         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
3912     for (unsigned i = 0; i < NumDependencies; ++i) {
3913       const Expr *E = Data.Dependences[i].second;
3914       auto Addr = CGF.EmitLValue(E);
3915       llvm::Value *Size;
3916       QualType Ty = E->getType();
3917       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3918         LValue UpAddrLVal =
3919             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3920         llvm::Value *UpAddr =
3921             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3922         llvm::Value *LowIntPtr =
3923             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3924         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3925         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3926       } else
3927         Size = CGF.getTypeSize(Ty);
3928       auto Base = CGF.MakeAddrLValue(
3929           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3930           KmpDependInfoTy);
3931       // deps[i].base_addr = &<Dependences[i].second>;
3932       auto BaseAddrLVal = CGF.EmitLValueForField(
3933           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3934       CGF.EmitStoreOfScalar(
3935           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3936           BaseAddrLVal);
3937       // deps[i].len = sizeof(<Dependences[i].second>);
3938       auto LenLVal = CGF.EmitLValueForField(
3939           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3940       CGF.EmitStoreOfScalar(Size, LenLVal);
3941       // deps[i].flags = <Dependences[i].first>;
3942       RTLDependenceKindTy DepKind;
3943       switch (Data.Dependences[i].first) {
3944       case OMPC_DEPEND_in:
3945         DepKind = DepIn;
3946         break;
3947       // Out and InOut dependencies must use the same code.
3948       case OMPC_DEPEND_out:
3949       case OMPC_DEPEND_inout:
3950         DepKind = DepInOut;
3951         break;
3952       case OMPC_DEPEND_source:
3953       case OMPC_DEPEND_sink:
3954       case OMPC_DEPEND_unknown:
3955         llvm_unreachable("Unknown task dependence type");
3956       }
3957       auto FlagsLVal = CGF.EmitLValueForField(
3958           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3959       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3960                             FlagsLVal);
3961     }
3962     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3963         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3964         CGF.VoidPtrTy);
3965   }
3966 
3967   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3968   // libcall.
3969   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3970   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3971   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3972   // list is not empty
3973   auto *ThreadID = getThreadID(CGF, Loc);
3974   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3975   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3976   llvm::Value *DepTaskArgs[7];
3977   if (NumDependencies) {
3978     DepTaskArgs[0] = UpLoc;
3979     DepTaskArgs[1] = ThreadID;
3980     DepTaskArgs[2] = NewTask;
3981     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3982     DepTaskArgs[4] = DependenciesArray.getPointer();
3983     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3984     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3985   }
3986   auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
3987                         NumDependencies, &TaskArgs,
3988                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3989     if (!Data.Tied) {
3990       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3991       auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
3992       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
3993     }
3994     if (NumDependencies) {
3995       CGF.EmitRuntimeCall(
3996           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
3997     } else {
3998       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
3999                           TaskArgs);
4000     }
4001     // Check if parent region is untied and build return for untied task;
4002     if (auto *Region =
4003             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4004       Region->emitUntiedSwitch(CGF);
4005   };
4006 
4007   llvm::Value *DepWaitTaskArgs[6];
4008   if (NumDependencies) {
4009     DepWaitTaskArgs[0] = UpLoc;
4010     DepWaitTaskArgs[1] = ThreadID;
4011     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4012     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4013     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4014     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4015   }
4016   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4017                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
4018                                                            PrePostActionTy &) {
4019     auto &RT = CGF.CGM.getOpenMPRuntime();
4020     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4021     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4022     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4023     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4024     // is specified.
4025     if (NumDependencies)
4026       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4027                           DepWaitTaskArgs);
4028     // Call proxy_task_entry(gtid, new_task);
4029     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
4030         CodeGenFunction &CGF, PrePostActionTy &Action) {
4031       Action.Enter(CGF);
4032       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4033       CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
4034     };
4035 
4036     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4037     // kmp_task_t *new_task);
4038     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4039     // kmp_task_t *new_task);
4040     RegionCodeGenTy RCG(CodeGen);
4041     CommonActionTy Action(
4042         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4043         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4044     RCG.setAction(Action);
4045     RCG(CGF);
4046   };
4047 
4048   if (IfCond)
4049     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4050   else {
4051     RegionCodeGenTy ThenRCG(ThenCodeGen);
4052     ThenRCG(CGF);
4053   }
4054 }
4055 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Value * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4056 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4057                                        const OMPLoopDirective &D,
4058                                        llvm::Value *TaskFunction,
4059                                        QualType SharedsTy, Address Shareds,
4060                                        const Expr *IfCond,
4061                                        const OMPTaskDataTy &Data) {
4062   if (!CGF.HaveInsertPoint())
4063     return;
4064   TaskResultTy Result =
4065       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4066   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4067   // libcall.
4068   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4069   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4070   // sched, kmp_uint64 grainsize, void *task_dup);
4071   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4072   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4073   llvm::Value *IfVal;
4074   if (IfCond) {
4075     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4076                                       /*isSigned=*/true);
4077   } else
4078     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4079 
4080   LValue LBLVal = CGF.EmitLValueForField(
4081       Result.TDBase,
4082       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4083   auto *LBVar =
4084       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4085   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4086                        /*IsInitializer=*/true);
4087   LValue UBLVal = CGF.EmitLValueForField(
4088       Result.TDBase,
4089       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4090   auto *UBVar =
4091       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4092   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4093                        /*IsInitializer=*/true);
4094   LValue StLVal = CGF.EmitLValueForField(
4095       Result.TDBase,
4096       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4097   auto *StVar =
4098       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4099   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4100                        /*IsInitializer=*/true);
4101   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4102   llvm::Value *TaskArgs[] = {
4103       UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
4104       UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4105       llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
4106       llvm::ConstantInt::getSigned(
4107           CGF.IntTy, Data.Schedule.getPointer()
4108                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4109                          : NoSchedule),
4110       Data.Schedule.getPointer()
4111           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4112                                       /*isSigned=*/false)
4113           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4114       Result.TaskDupFn
4115           ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
4116                                                             CGF.VoidPtrTy)
4117           : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4118   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4119 }
4120 
4121 /// \brief Emit reduction operation for each element of array (required for
4122 /// array sections) LHS op = RHS.
4123 /// \param Type Type of array.
4124 /// \param LHSVar Variable on the left side of the reduction operation
4125 /// (references element of array in original variable).
4126 /// \param RHSVar Variable on the right side of the reduction operation
4127 /// (references element of array in original variable).
4128 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4129 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)4130 static void EmitOMPAggregateReduction(
4131     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4132     const VarDecl *RHSVar,
4133     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4134                                   const Expr *, const Expr *)> &RedOpGen,
4135     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4136     const Expr *UpExpr = nullptr) {
4137   // Perform element-by-element initialization.
4138   QualType ElementTy;
4139   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4140   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4141 
4142   // Drill down to the base element type on both arrays.
4143   auto ArrayTy = Type->getAsArrayTypeUnsafe();
4144   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4145 
4146   auto RHSBegin = RHSAddr.getPointer();
4147   auto LHSBegin = LHSAddr.getPointer();
4148   // Cast from pointer to array type to pointer to single element.
4149   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4150   // The basic structure here is a while-do loop.
4151   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4152   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4153   auto IsEmpty =
4154       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4155   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4156 
4157   // Enter the loop body, making that address the current address.
4158   auto EntryBB = CGF.Builder.GetInsertBlock();
4159   CGF.EmitBlock(BodyBB);
4160 
4161   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4162 
4163   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4164       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4165   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4166   Address RHSElementCurrent =
4167       Address(RHSElementPHI,
4168               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4169 
4170   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4171       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4172   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4173   Address LHSElementCurrent =
4174       Address(LHSElementPHI,
4175               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4176 
4177   // Emit copy.
4178   CodeGenFunction::OMPPrivateScope Scope(CGF);
4179   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4180   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4181   Scope.Privatize();
4182   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4183   Scope.ForceCleanup();
4184 
4185   // Shift the address forward by one element.
4186   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4187       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4188   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4189       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4190   // Check whether we've reached the end.
4191   auto Done =
4192       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4193   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4194   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4195   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4196 
4197   // Done.
4198   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4199 }
4200 
4201 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4202 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4203 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)4204 static void emitReductionCombiner(CodeGenFunction &CGF,
4205                                   const Expr *ReductionOp) {
4206   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4207     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4208       if (auto *DRE =
4209               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4210         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4211           std::pair<llvm::Function *, llvm::Function *> Reduction =
4212               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4213           RValue Func = RValue::get(Reduction.first);
4214           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4215           CGF.EmitIgnoredExpr(ReductionOp);
4216           return;
4217         }
4218   CGF.EmitIgnoredExpr(ReductionOp);
4219 }
4220 
emitReductionFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)4221 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
4222                                           llvm::Type *ArgsType,
4223                                           ArrayRef<const Expr *> Privates,
4224                                           ArrayRef<const Expr *> LHSExprs,
4225                                           ArrayRef<const Expr *> RHSExprs,
4226                                           ArrayRef<const Expr *> ReductionOps) {
4227   auto &C = CGM.getContext();
4228 
4229   // void reduction_func(void *LHSArg, void *RHSArg);
4230   FunctionArgList Args;
4231   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4232                            C.VoidPtrTy);
4233   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4234                            C.VoidPtrTy);
4235   Args.push_back(&LHSArg);
4236   Args.push_back(&RHSArg);
4237   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4238   auto *Fn = llvm::Function::Create(
4239       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4240       ".omp.reduction.reduction_func", &CGM.getModule());
4241   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4242   CodeGenFunction CGF(CGM);
4243   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4244 
4245   // Dst = (void*[n])(LHSArg);
4246   // Src = (void*[n])(RHSArg);
4247   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4248       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4249       ArgsType), CGF.getPointerAlign());
4250   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4251       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4252       ArgsType), CGF.getPointerAlign());
4253 
4254   //  ...
4255   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4256   //  ...
4257   CodeGenFunction::OMPPrivateScope Scope(CGF);
4258   auto IPriv = Privates.begin();
4259   unsigned Idx = 0;
4260   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4261     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4262     Scope.addPrivate(RHSVar, [&]() -> Address {
4263       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4264     });
4265     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4266     Scope.addPrivate(LHSVar, [&]() -> Address {
4267       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4268     });
4269     QualType PrivTy = (*IPriv)->getType();
4270     if (PrivTy->isVariablyModifiedType()) {
4271       // Get array size and emit VLA type.
4272       ++Idx;
4273       Address Elem =
4274           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4275       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4276       auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4277       auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4278       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4279           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4280       CGF.EmitVariablyModifiedType(PrivTy);
4281     }
4282   }
4283   Scope.Privatize();
4284   IPriv = Privates.begin();
4285   auto ILHS = LHSExprs.begin();
4286   auto IRHS = RHSExprs.begin();
4287   for (auto *E : ReductionOps) {
4288     if ((*IPriv)->getType()->isArrayType()) {
4289       // Emit reduction for array section.
4290       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4291       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4292       EmitOMPAggregateReduction(
4293           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4294           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4295             emitReductionCombiner(CGF, E);
4296           });
4297     } else
4298       // Emit reduction for array subscript or single variable.
4299       emitReductionCombiner(CGF, E);
4300     ++IPriv;
4301     ++ILHS;
4302     ++IRHS;
4303   }
4304   Scope.ForceCleanup();
4305   CGF.FinishFunction();
4306   return Fn;
4307 }
4308 
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)4309 static void emitSingleReductionCombiner(CodeGenFunction &CGF,
4310                                         const Expr *ReductionOp,
4311                                         const Expr *PrivateRef,
4312                                         const DeclRefExpr *LHS,
4313                                         const DeclRefExpr *RHS) {
4314   if (PrivateRef->getType()->isArrayType()) {
4315     // Emit reduction for array section.
4316     auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4317     auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4318     EmitOMPAggregateReduction(
4319         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4320         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4321           emitReductionCombiner(CGF, ReductionOp);
4322         });
4323   } else
4324     // Emit reduction for array subscript or single variable.
4325     emitReductionCombiner(CGF, ReductionOp);
4326 }
4327 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,bool WithNowait,bool SimpleReduction)4328 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4329                                     ArrayRef<const Expr *> Privates,
4330                                     ArrayRef<const Expr *> LHSExprs,
4331                                     ArrayRef<const Expr *> RHSExprs,
4332                                     ArrayRef<const Expr *> ReductionOps,
4333                                     bool WithNowait, bool SimpleReduction) {
4334   if (!CGF.HaveInsertPoint())
4335     return;
4336   // Next code should be emitted for reduction:
4337   //
4338   // static kmp_critical_name lock = { 0 };
4339   //
4340   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4341   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4342   //  ...
4343   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4344   //  *(Type<n>-1*)rhs[<n>-1]);
4345   // }
4346   //
4347   // ...
4348   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4349   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4350   // RedList, reduce_func, &<lock>)) {
4351   // case 1:
4352   //  ...
4353   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4354   //  ...
4355   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4356   // break;
4357   // case 2:
4358   //  ...
4359   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4360   //  ...
4361   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4362   // break;
4363   // default:;
4364   // }
4365   //
4366   // if SimpleReduction is true, only the next code is generated:
4367   //  ...
4368   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4369   //  ...
4370 
4371   auto &C = CGM.getContext();
4372 
4373   if (SimpleReduction) {
4374     CodeGenFunction::RunCleanupsScope Scope(CGF);
4375     auto IPriv = Privates.begin();
4376     auto ILHS = LHSExprs.begin();
4377     auto IRHS = RHSExprs.begin();
4378     for (auto *E : ReductionOps) {
4379       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4380                                   cast<DeclRefExpr>(*IRHS));
4381       ++IPriv;
4382       ++ILHS;
4383       ++IRHS;
4384     }
4385     return;
4386   }
4387 
4388   // 1. Build a list of reduction variables.
4389   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4390   auto Size = RHSExprs.size();
4391   for (auto *E : Privates) {
4392     if (E->getType()->isVariablyModifiedType())
4393       // Reserve place for array size.
4394       ++Size;
4395   }
4396   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4397   QualType ReductionArrayTy =
4398       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4399                              /*IndexTypeQuals=*/0);
4400   Address ReductionList =
4401       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4402   auto IPriv = Privates.begin();
4403   unsigned Idx = 0;
4404   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4405     Address Elem =
4406       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4407     CGF.Builder.CreateStore(
4408         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4409             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4410         Elem);
4411     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4412       // Store array size.
4413       ++Idx;
4414       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4415                                              CGF.getPointerSize());
4416       llvm::Value *Size = CGF.Builder.CreateIntCast(
4417           CGF.getVLASize(
4418                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4419               .first,
4420           CGF.SizeTy, /*isSigned=*/false);
4421       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4422                               Elem);
4423     }
4424   }
4425 
4426   // 2. Emit reduce_func().
4427   auto *ReductionFn = emitReductionFunction(
4428       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4429       LHSExprs, RHSExprs, ReductionOps);
4430 
4431   // 3. Create static kmp_critical_name lock = { 0 };
4432   auto *Lock = getCriticalRegionLock(".reduction");
4433 
4434   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4435   // RedList, reduce_func, &<lock>);
4436   auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4437   auto *ThreadId = getThreadID(CGF, Loc);
4438   auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4439   auto *RL =
4440     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
4441                                                     CGF.VoidPtrTy);
4442   llvm::Value *Args[] = {
4443       IdentTLoc,                             // ident_t *<loc>
4444       ThreadId,                              // i32 <gtid>
4445       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4446       ReductionArrayTySize,                  // size_type sizeof(RedList)
4447       RL,                                    // void *RedList
4448       ReductionFn, // void (*) (void *, void *) <reduce_func>
4449       Lock         // kmp_critical_name *&<lock>
4450   };
4451   auto Res = CGF.EmitRuntimeCall(
4452       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
4453                                        : OMPRTL__kmpc_reduce),
4454       Args);
4455 
4456   // 5. Build switch(res)
4457   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4458   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4459 
4460   // 6. Build case 1:
4461   //  ...
4462   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4463   //  ...
4464   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4465   // break;
4466   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
4467   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
4468   CGF.EmitBlock(Case1BB);
4469 
4470   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4471   llvm::Value *EndArgs[] = {
4472       IdentTLoc, // ident_t *<loc>
4473       ThreadId,  // i32 <gtid>
4474       Lock       // kmp_critical_name *&<lock>
4475   };
4476   auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4477       CodeGenFunction &CGF, PrePostActionTy &Action) {
4478     auto IPriv = Privates.begin();
4479     auto ILHS = LHSExprs.begin();
4480     auto IRHS = RHSExprs.begin();
4481     for (auto *E : ReductionOps) {
4482       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4483                                   cast<DeclRefExpr>(*IRHS));
4484       ++IPriv;
4485       ++ILHS;
4486       ++IRHS;
4487     }
4488   };
4489   RegionCodeGenTy RCG(CodeGen);
4490   CommonActionTy Action(
4491       nullptr, llvm::None,
4492       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
4493                                        : OMPRTL__kmpc_end_reduce),
4494       EndArgs);
4495   RCG.setAction(Action);
4496   RCG(CGF);
4497 
4498   CGF.EmitBranch(DefaultBB);
4499 
4500   // 7. Build case 2:
4501   //  ...
4502   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4503   //  ...
4504   // break;
4505   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
4506   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
4507   CGF.EmitBlock(Case2BB);
4508 
4509   auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4510       CodeGenFunction &CGF, PrePostActionTy &Action) {
4511     auto ILHS = LHSExprs.begin();
4512     auto IRHS = RHSExprs.begin();
4513     auto IPriv = Privates.begin();
4514     for (auto *E : ReductionOps) {
4515       const Expr *XExpr = nullptr;
4516       const Expr *EExpr = nullptr;
4517       const Expr *UpExpr = nullptr;
4518       BinaryOperatorKind BO = BO_Comma;
4519       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
4520         if (BO->getOpcode() == BO_Assign) {
4521           XExpr = BO->getLHS();
4522           UpExpr = BO->getRHS();
4523         }
4524       }
4525       // Try to emit update expression as a simple atomic.
4526       auto *RHSExpr = UpExpr;
4527       if (RHSExpr) {
4528         // Analyze RHS part of the whole expression.
4529         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
4530                 RHSExpr->IgnoreParenImpCasts())) {
4531           // If this is a conditional operator, analyze its condition for
4532           // min/max reduction operator.
4533           RHSExpr = ACO->getCond();
4534         }
4535         if (auto *BORHS =
4536                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4537           EExpr = BORHS->getRHS();
4538           BO = BORHS->getOpcode();
4539         }
4540       }
4541       if (XExpr) {
4542         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4543         auto &&AtomicRedGen = [BO, VD, IPriv,
4544                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
4545                                     const Expr *EExpr, const Expr *UpExpr) {
4546           LValue X = CGF.EmitLValue(XExpr);
4547           RValue E;
4548           if (EExpr)
4549             E = CGF.EmitAnyExpr(EExpr);
4550           CGF.EmitOMPAtomicSimpleUpdateExpr(
4551               X, E, BO, /*IsXLHSInRHSPart=*/true,
4552               llvm::AtomicOrdering::Monotonic, Loc,
4553               [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
4554                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4555                 PrivateScope.addPrivate(
4556                     VD, [&CGF, VD, XRValue, Loc]() -> Address {
4557                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
4558                       CGF.emitOMPSimpleStore(
4559                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
4560                           VD->getType().getNonReferenceType(), Loc);
4561                       return LHSTemp;
4562                     });
4563                 (void)PrivateScope.Privatize();
4564                 return CGF.EmitAnyExpr(UpExpr);
4565               });
4566         };
4567         if ((*IPriv)->getType()->isArrayType()) {
4568           // Emit atomic reduction for array section.
4569           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4570           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
4571                                     AtomicRedGen, XExpr, EExpr, UpExpr);
4572         } else
4573           // Emit atomic reduction for array subscript or single variable.
4574           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
4575       } else {
4576         // Emit as a critical region.
4577         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
4578                                      const Expr *, const Expr *) {
4579           auto &RT = CGF.CGM.getOpenMPRuntime();
4580           RT.emitCriticalRegion(
4581               CGF, ".atomic_reduction",
4582               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
4583                 Action.Enter(CGF);
4584                 emitReductionCombiner(CGF, E);
4585               },
4586               Loc);
4587         };
4588         if ((*IPriv)->getType()->isArrayType()) {
4589           auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4590           auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4591           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4592                                     CritRedGen);
4593         } else
4594           CritRedGen(CGF, nullptr, nullptr, nullptr);
4595       }
4596       ++ILHS;
4597       ++IRHS;
4598       ++IPriv;
4599     }
4600   };
4601   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4602   if (!WithNowait) {
4603     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4604     llvm::Value *EndArgs[] = {
4605         IdentTLoc, // ident_t *<loc>
4606         ThreadId,  // i32 <gtid>
4607         Lock       // kmp_critical_name *&<lock>
4608     };
4609     CommonActionTy Action(nullptr, llvm::None,
4610                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4611                           EndArgs);
4612     AtomicRCG.setAction(Action);
4613     AtomicRCG(CGF);
4614   } else
4615     AtomicRCG(CGF);
4616 
4617   CGF.EmitBranch(DefaultBB);
4618   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4619 }
4620 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)4621 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4622                                        SourceLocation Loc) {
4623   if (!CGF.HaveInsertPoint())
4624     return;
4625   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4626   // global_tid);
4627   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4628   // Ignore return result until untied tasks are supported.
4629   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4630   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4631     Region->emitUntiedSwitch(CGF);
4632 }
4633 
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)4634 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4635                                            OpenMPDirectiveKind InnerKind,
4636                                            const RegionCodeGenTy &CodeGen,
4637                                            bool HasCancel) {
4638   if (!CGF.HaveInsertPoint())
4639     return;
4640   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4641   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4642 }
4643 
4644 namespace {
4645 enum RTCancelKind {
4646   CancelNoreq = 0,
4647   CancelParallel = 1,
4648   CancelLoop = 2,
4649   CancelSections = 3,
4650   CancelTaskgroup = 4
4651 };
4652 } // anonymous namespace
4653 
getCancellationKind(OpenMPDirectiveKind CancelRegion)4654 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4655   RTCancelKind CancelKind = CancelNoreq;
4656   if (CancelRegion == OMPD_parallel)
4657     CancelKind = CancelParallel;
4658   else if (CancelRegion == OMPD_for)
4659     CancelKind = CancelLoop;
4660   else if (CancelRegion == OMPD_sections)
4661     CancelKind = CancelSections;
4662   else {
4663     assert(CancelRegion == OMPD_taskgroup);
4664     CancelKind = CancelTaskgroup;
4665   }
4666   return CancelKind;
4667 }
4668 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)4669 void CGOpenMPRuntime::emitCancellationPointCall(
4670     CodeGenFunction &CGF, SourceLocation Loc,
4671     OpenMPDirectiveKind CancelRegion) {
4672   if (!CGF.HaveInsertPoint())
4673     return;
4674   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4675   // global_tid, kmp_int32 cncl_kind);
4676   if (auto *OMPRegionInfo =
4677           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4678     if (OMPRegionInfo->hasCancel()) {
4679       llvm::Value *Args[] = {
4680           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4681           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4682       // Ignore return result until untied tasks are supported.
4683       auto *Result = CGF.EmitRuntimeCall(
4684           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4685       // if (__kmpc_cancellationpoint()) {
4686       //  __kmpc_cancel_barrier();
4687       //   exit from construct;
4688       // }
4689       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4690       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4691       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4692       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4693       CGF.EmitBlock(ExitBB);
4694       // __kmpc_cancel_barrier();
4695       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4696       // exit from construct;
4697       auto CancelDest =
4698           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4699       CGF.EmitBranchThroughCleanup(CancelDest);
4700       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4701     }
4702   }
4703 }
4704 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)4705 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4706                                      const Expr *IfCond,
4707                                      OpenMPDirectiveKind CancelRegion) {
4708   if (!CGF.HaveInsertPoint())
4709     return;
4710   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4711   // kmp_int32 cncl_kind);
4712   if (auto *OMPRegionInfo =
4713           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4714     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4715                                                         PrePostActionTy &) {
4716       auto &RT = CGF.CGM.getOpenMPRuntime();
4717       llvm::Value *Args[] = {
4718           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4719           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4720       // Ignore return result until untied tasks are supported.
4721       auto *Result = CGF.EmitRuntimeCall(
4722           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4723       // if (__kmpc_cancel()) {
4724       //  __kmpc_cancel_barrier();
4725       //   exit from construct;
4726       // }
4727       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4728       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4729       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4730       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4731       CGF.EmitBlock(ExitBB);
4732       // __kmpc_cancel_barrier();
4733       RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4734       // exit from construct;
4735       auto CancelDest =
4736           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4737       CGF.EmitBranchThroughCleanup(CancelDest);
4738       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4739     };
4740     if (IfCond)
4741       emitOMPIfClause(CGF, IfCond, ThenGen,
4742                       [](CodeGenFunction &, PrePostActionTy &) {});
4743     else {
4744       RegionCodeGenTy ThenRCG(ThenGen);
4745       ThenRCG(CGF);
4746     }
4747   }
4748 }
4749 
4750 /// \brief Obtain information that uniquely identifies a target entry. This
4751 /// consists of the file and device IDs as well as line number associated with
4752 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)4753 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4754                                      unsigned &DeviceID, unsigned &FileID,
4755                                      unsigned &LineNum) {
4756 
4757   auto &SM = C.getSourceManager();
4758 
4759   // The loc should be always valid and have a file ID (the user cannot use
4760   // #pragma directives in macros)
4761 
4762   assert(Loc.isValid() && "Source location is expected to be always valid.");
4763   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4764 
4765   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4766   assert(PLoc.isValid() && "Source location is expected to be always valid.");
4767 
4768   llvm::sys::fs::UniqueID ID;
4769   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4770     llvm_unreachable("Source file with target region no longer exists!");
4771 
4772   DeviceID = ID.getDevice();
4773   FileID = ID.getFile();
4774   LineNum = PLoc.getLine();
4775 }
4776 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)4777 void CGOpenMPRuntime::emitTargetOutlinedFunction(
4778     const OMPExecutableDirective &D, StringRef ParentName,
4779     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4780     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4781   assert(!ParentName.empty() && "Invalid target region parent name!");
4782 
4783   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4784                                    IsOffloadEntry, CodeGen);
4785 }
4786 
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)4787 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4788     const OMPExecutableDirective &D, StringRef ParentName,
4789     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4790     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4791   // Create a unique name for the entry function using the source location
4792   // information of the current target region. The name will be something like:
4793   //
4794   // __omp_offloading_DD_FFFF_PP_lBB
4795   //
4796   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4797   // mangled name of the function that encloses the target region and BB is the
4798   // line number of the target region.
4799 
4800   unsigned DeviceID;
4801   unsigned FileID;
4802   unsigned Line;
4803   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4804                            Line);
4805   SmallString<64> EntryFnName;
4806   {
4807     llvm::raw_svector_ostream OS(EntryFnName);
4808     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4809        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4810   }
4811 
4812   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4813 
4814   CodeGenFunction CGF(CGM, true);
4815   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4816   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4817 
4818   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
4819 
4820   // If this target outline function is not an offload entry, we don't need to
4821   // register it.
4822   if (!IsOffloadEntry)
4823     return;
4824 
4825   // The target region ID is used by the runtime library to identify the current
4826   // target region, so it only has to be unique and not necessarily point to
4827   // anything. It could be the pointer to the outlined function that implements
4828   // the target region, but we aren't using that so that the compiler doesn't
4829   // need to keep that, and could therefore inline the host function if proven
4830   // worthwhile during optimization. In the other hand, if emitting code for the
4831   // device, the ID has to be the function address so that it can retrieved from
4832   // the offloading entry and launched by the runtime library. We also mark the
4833   // outlined function to have external linkage in case we are emitting code for
4834   // the device, because these functions will be entry points to the device.
4835 
4836   if (CGM.getLangOpts().OpenMPIsDevice) {
4837     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4838     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4839   } else
4840     OutlinedFnID = new llvm::GlobalVariable(
4841         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4842         llvm::GlobalValue::PrivateLinkage,
4843         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4844 
4845   // Register the information for the entry associated with this target region.
4846   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4847       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4848 }
4849 
4850 /// discard all CompoundStmts intervening between two constructs
ignoreCompoundStmts(const Stmt * Body)4851 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
4852   while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
4853     Body = CS->body_front();
4854 
4855   return Body;
4856 }
4857 
4858 /// \brief Emit the num_teams clause of an enclosed teams directive at the
4859 /// target region scope. If there is no teams directive associated with the
4860 /// target directive, or if there is no num_teams clause associated with the
4861 /// enclosed teams directive, return nullptr.
4862 static llvm::Value *
emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime & OMPRuntime,CodeGenFunction & CGF,const OMPExecutableDirective & D)4863 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4864                                      CodeGenFunction &CGF,
4865                                      const OMPExecutableDirective &D) {
4866 
4867   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4868                                               "teams directive expected to be "
4869                                               "emitted only for the host!");
4870 
4871   // FIXME: For the moment we do not support combined directives with target and
4872   // teams, so we do not expect to get any num_teams clause in the provided
4873   // directive. Once we support that, this assertion can be replaced by the
4874   // actual emission of the clause expression.
4875   assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4876          "Not expecting clause in directive.");
4877 
4878   // If the current target region has a teams region enclosed, we need to get
4879   // the number of teams to pass to the runtime function call. This is done
4880   // by generating the expression in a inlined region. This is required because
4881   // the expression is captured in the enclosing target environment when the
4882   // teams directive is not combined with target.
4883 
4884   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4885 
4886   // FIXME: Accommodate other combined directives with teams when they become
4887   // available.
4888   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4889           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4890     if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4891       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4892       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4893       llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4894       return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4895                                        /*IsSigned=*/true);
4896     }
4897 
4898     // If we have an enclosed teams directive but no num_teams clause we use
4899     // the default value 0.
4900     return CGF.Builder.getInt32(0);
4901   }
4902 
4903   // No teams associated with the directive.
4904   return nullptr;
4905 }
4906 
4907 /// \brief Emit the thread_limit clause of an enclosed teams directive at the
4908 /// target region scope. If there is no teams directive associated with the
4909 /// target directive, or if there is no thread_limit clause associated with the
4910 /// enclosed teams directive, return nullptr.
4911 static llvm::Value *
emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime & OMPRuntime,CodeGenFunction & CGF,const OMPExecutableDirective & D)4912 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4913                                         CodeGenFunction &CGF,
4914                                         const OMPExecutableDirective &D) {
4915 
4916   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4917                                               "teams directive expected to be "
4918                                               "emitted only for the host!");
4919 
4920   // FIXME: For the moment we do not support combined directives with target and
4921   // teams, so we do not expect to get any thread_limit clause in the provided
4922   // directive. Once we support that, this assertion can be replaced by the
4923   // actual emission of the clause expression.
4924   assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4925          "Not expecting clause in directive.");
4926 
4927   // If the current target region has a teams region enclosed, we need to get
4928   // the thread limit to pass to the runtime function call. This is done
4929   // by generating the expression in a inlined region. This is required because
4930   // the expression is captured in the enclosing target environment when the
4931   // teams directive is not combined with target.
4932 
4933   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4934 
4935   // FIXME: Accommodate other combined directives with teams when they become
4936   // available.
4937   if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4938           ignoreCompoundStmts(CS.getCapturedStmt()))) {
4939     if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4940       CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4941       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4942       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4943       return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4944                                        /*IsSigned=*/true);
4945     }
4946 
4947     // If we have an enclosed teams directive but no thread_limit clause we use
4948     // the default value 0.
4949     return CGF.Builder.getInt32(0);
4950   }
4951 
4952   // No teams associated with the directive.
4953   return nullptr;
4954 }
4955 
4956 namespace {
4957 // \brief Utility to handle information from clauses associated with a given
4958 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
4959 // It provides a convenient interface to obtain the information and generate
4960 // code for that information.
4961 class MappableExprsHandler {
4962 public:
4963   /// \brief Values for bit flags used to specify the mapping type for
4964   /// offloading.
4965   enum OpenMPOffloadMappingFlags {
4966     /// \brief Allocate memory on the device and move data from host to device.
4967     OMP_MAP_TO = 0x01,
4968     /// \brief Allocate memory on the device and move data from device to host.
4969     OMP_MAP_FROM = 0x02,
4970     /// \brief Always perform the requested mapping action on the element, even
4971     /// if it was already mapped before.
4972     OMP_MAP_ALWAYS = 0x04,
4973     /// \brief Delete the element from the device environment, ignoring the
4974     /// current reference count associated with the element.
4975     OMP_MAP_DELETE = 0x08,
4976     /// \brief The element being mapped is a pointer, therefore the pointee
4977     /// should be mapped as well.
4978     OMP_MAP_IS_PTR = 0x10,
4979     /// \brief This flags signals that an argument is the first one relating to
4980     /// a map/private clause expression. For some cases a single
4981     /// map/privatization results in multiple arguments passed to the runtime
4982     /// library.
4983     OMP_MAP_FIRST_REF = 0x20,
4984     /// \brief This flag signals that the reference being passed is a pointer to
4985     /// private data.
4986     OMP_MAP_PRIVATE_PTR = 0x80,
4987     /// \brief Pass the element to the device by value.
4988     OMP_MAP_PRIVATE_VAL = 0x100,
4989   };
4990 
4991   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
4992   typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
4993 
4994 private:
4995   /// \brief Directive from where the map clauses were extracted.
4996   const OMPExecutableDirective &Directive;
4997 
4998   /// \brief Function the directive is being generated for.
4999   CodeGenFunction &CGF;
5000 
5001   /// \brief Set of all first private variables in the current directive.
5002   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
5003 
getExprTypeSize(const Expr * E) const5004   llvm::Value *getExprTypeSize(const Expr *E) const {
5005     auto ExprTy = E->getType().getCanonicalType();
5006 
5007     // Reference types are ignored for mapping purposes.
5008     if (auto *RefTy = ExprTy->getAs<ReferenceType>())
5009       ExprTy = RefTy->getPointeeType().getCanonicalType();
5010 
5011     // Given that an array section is considered a built-in type, we need to
5012     // do the calculation based on the length of the section instead of relying
5013     // on CGF.getTypeSize(E->getType()).
5014     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
5015       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
5016                             OAE->getBase()->IgnoreParenImpCasts())
5017                             .getCanonicalType();
5018 
5019       // If there is no length associated with the expression, that means we
5020       // are using the whole length of the base.
5021       if (!OAE->getLength() && OAE->getColonLoc().isValid())
5022         return CGF.getTypeSize(BaseTy);
5023 
5024       llvm::Value *ElemSize;
5025       if (auto *PTy = BaseTy->getAs<PointerType>())
5026         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
5027       else {
5028         auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
5029         assert(ATy && "Expecting array type if not a pointer type.");
5030         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
5031       }
5032 
5033       // If we don't have a length at this point, that is because we have an
5034       // array section with a single element.
5035       if (!OAE->getLength())
5036         return ElemSize;
5037 
5038       auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
5039       LengthVal =
5040           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
5041       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
5042     }
5043     return CGF.getTypeSize(ExprTy);
5044   }
5045 
5046   /// \brief Return the corresponding bits for a given map clause modifier. Add
5047   /// a flag marking the map as a pointer if requested. Add a flag marking the
5048   /// map as the first one of a series of maps that relate to the same map
5049   /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,OpenMPMapClauseKind MapTypeModifier,bool AddPtrFlag,bool AddIsFirstFlag) const5050   unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
5051                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
5052                           bool AddIsFirstFlag) const {
5053     unsigned Bits = 0u;
5054     switch (MapType) {
5055     case OMPC_MAP_alloc:
5056     case OMPC_MAP_release:
5057       // alloc and release is the default behavior in the runtime library,  i.e.
5058       // if we don't pass any bits alloc/release that is what the runtime is
5059       // going to do. Therefore, we don't need to signal anything for these two
5060       // type modifiers.
5061       break;
5062     case OMPC_MAP_to:
5063       Bits = OMP_MAP_TO;
5064       break;
5065     case OMPC_MAP_from:
5066       Bits = OMP_MAP_FROM;
5067       break;
5068     case OMPC_MAP_tofrom:
5069       Bits = OMP_MAP_TO | OMP_MAP_FROM;
5070       break;
5071     case OMPC_MAP_delete:
5072       Bits = OMP_MAP_DELETE;
5073       break;
5074     default:
5075       llvm_unreachable("Unexpected map type!");
5076       break;
5077     }
5078     if (AddPtrFlag)
5079       Bits |= OMP_MAP_IS_PTR;
5080     if (AddIsFirstFlag)
5081       Bits |= OMP_MAP_FIRST_REF;
5082     if (MapTypeModifier == OMPC_MAP_always)
5083       Bits |= OMP_MAP_ALWAYS;
5084     return Bits;
5085   }
5086 
5087   /// \brief Return true if the provided expression is a final array section. A
5088   /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const5089   bool isFinalArraySectionExpression(const Expr *E) const {
5090     auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
5091 
5092     // It is not an array section and therefore not a unity-size one.
5093     if (!OASE)
5094       return false;
5095 
5096     // An array section with no colon always refer to a single element.
5097     if (OASE->getColonLoc().isInvalid())
5098       return false;
5099 
5100     auto *Length = OASE->getLength();
5101 
5102     // If we don't have a length we have to check if the array has size 1
5103     // for this dimension. Also, we should always expect a length if the
5104     // base type is pointer.
5105     if (!Length) {
5106       auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
5107                          OASE->getBase()->IgnoreParenImpCasts())
5108                          .getCanonicalType();
5109       if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
5110         return ATy->getSize().getSExtValue() != 1;
5111       // If we don't have a constant dimension length, we have to consider
5112       // the current section as having any size, so it is not necessarily
5113       // unitary. If it happen to be unity size, that's user fault.
5114       return true;
5115     }
5116 
5117     // Check if the length evaluates to 1.
5118     llvm::APSInt ConstLength;
5119     if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
5120       return true; // Can have more that size 1.
5121 
5122     return ConstLength.getSExtValue() != 1;
5123   }
5124 
5125   /// \brief Generate the base pointers, section pointers, sizes and map type
5126   /// bits for the provided map type, map modifier, and expression components.
5127   /// \a IsFirstComponent should be set to true if the provided set of
5128   /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,OpenMPMapClauseKind MapTypeModifier,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,bool IsFirstComponentList) const5129   void generateInfoForComponentList(
5130       OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5131       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5132       MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
5133       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
5134       bool IsFirstComponentList) const {
5135 
5136     // The following summarizes what has to be generated for each map and the
5137     // types bellow. The generated information is expressed in this order:
5138     // base pointer, section pointer, size, flags
5139     // (to add to the ones that come from the map type and modifier).
5140     //
5141     // double d;
5142     // int i[100];
5143     // float *p;
5144     //
5145     // struct S1 {
5146     //   int i;
5147     //   float f[50];
5148     // }
5149     // struct S2 {
5150     //   int i;
5151     //   float f[50];
5152     //   S1 s;
5153     //   double *p;
5154     //   struct S2 *ps;
5155     // }
5156     // S2 s;
5157     // S2 *ps;
5158     //
5159     // map(d)
5160     // &d, &d, sizeof(double), noflags
5161     //
5162     // map(i)
5163     // &i, &i, 100*sizeof(int), noflags
5164     //
5165     // map(i[1:23])
5166     // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
5167     //
5168     // map(p)
5169     // &p, &p, sizeof(float*), noflags
5170     //
5171     // map(p[1:24])
5172     // p, &p[1], 24*sizeof(float), noflags
5173     //
5174     // map(s)
5175     // &s, &s, sizeof(S2), noflags
5176     //
5177     // map(s.i)
5178     // &s, &(s.i), sizeof(int), noflags
5179     //
5180     // map(s.s.f)
5181     // &s, &(s.i.f), 50*sizeof(int), noflags
5182     //
5183     // map(s.p)
5184     // &s, &(s.p), sizeof(double*), noflags
5185     //
5186     // map(s.p[:22], s.a s.b)
5187     // &s, &(s.p), sizeof(double*), noflags
5188     // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
5189     //
5190     // map(s.ps)
5191     // &s, &(s.ps), sizeof(S2*), noflags
5192     //
5193     // map(s.ps->s.i)
5194     // &s, &(s.ps), sizeof(S2*), noflags
5195     // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
5196     //
5197     // map(s.ps->ps)
5198     // &s, &(s.ps), sizeof(S2*), noflags
5199     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5200     //
5201     // map(s.ps->ps->ps)
5202     // &s, &(s.ps), sizeof(S2*), noflags
5203     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5204     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5205     //
5206     // map(s.ps->ps->s.f[:22])
5207     // &s, &(s.ps), sizeof(S2*), noflags
5208     // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5209     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
5210     //
5211     // map(ps)
5212     // &ps, &ps, sizeof(S2*), noflags
5213     //
5214     // map(ps->i)
5215     // ps, &(ps->i), sizeof(int), noflags
5216     //
5217     // map(ps->s.f)
5218     // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
5219     //
5220     // map(ps->p)
5221     // ps, &(ps->p), sizeof(double*), noflags
5222     //
5223     // map(ps->p[:22])
5224     // ps, &(ps->p), sizeof(double*), noflags
5225     // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
5226     //
5227     // map(ps->ps)
5228     // ps, &(ps->ps), sizeof(S2*), noflags
5229     //
5230     // map(ps->ps->s.i)
5231     // ps, &(ps->ps), sizeof(S2*), noflags
5232     // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
5233     //
5234     // map(ps->ps->ps)
5235     // ps, &(ps->ps), sizeof(S2*), noflags
5236     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5237     //
5238     // map(ps->ps->ps->ps)
5239     // ps, &(ps->ps), sizeof(S2*), noflags
5240     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5241     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5242     //
5243     // map(ps->ps->ps->s.f[:22])
5244     // ps, &(ps->ps), sizeof(S2*), noflags
5245     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5246     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
5247     // extra_flag
5248 
5249     // Track if the map information being generated is the first for a capture.
5250     bool IsCaptureFirstInfo = IsFirstComponentList;
5251 
5252     // Scan the components from the base to the complete expression.
5253     auto CI = Components.rbegin();
5254     auto CE = Components.rend();
5255     auto I = CI;
5256 
5257     // Track if the map information being generated is the first for a list of
5258     // components.
5259     bool IsExpressionFirstInfo = true;
5260     llvm::Value *BP = nullptr;
5261 
5262     if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
5263       // The base is the 'this' pointer. The content of the pointer is going
5264       // to be the base of the field being mapped.
5265       BP = CGF.EmitScalarExpr(ME->getBase());
5266     } else {
5267       // The base is the reference to the variable.
5268       // BP = &Var.
5269       BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
5270                .getPointer();
5271 
5272       // If the variable is a pointer and is being dereferenced (i.e. is not
5273       // the last component), the base has to be the pointer itself, not its
5274       // reference.
5275       if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() &&
5276           std::next(I) != CE) {
5277         auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(
5278             BP, I->getAssociatedDeclaration()->getType());
5279         BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
5280                                          I->getAssociatedDeclaration()
5281                                              ->getType()
5282                                              ->getAs<PointerType>())
5283                  .getPointer();
5284 
5285         // We do not need to generate individual map information for the
5286         // pointer, it can be associated with the combined storage.
5287         ++I;
5288       }
5289     }
5290 
5291     for (; I != CE; ++I) {
5292       auto Next = std::next(I);
5293 
5294       // We need to generate the addresses and sizes if this is the last
5295       // component, if the component is a pointer or if it is an array section
5296       // whose length can't be proved to be one. If this is a pointer, it
5297       // becomes the base address for the following components.
5298 
5299       // A final array section, is one whose length can't be proved to be one.
5300       bool IsFinalArraySection =
5301           isFinalArraySectionExpression(I->getAssociatedExpression());
5302 
5303       // Get information on whether the element is a pointer. Have to do a
5304       // special treatment for array sections given that they are built-in
5305       // types.
5306       const auto *OASE =
5307           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
5308       bool IsPointer =
5309           (OASE &&
5310            OMPArraySectionExpr::getBaseOriginalType(OASE)
5311                .getCanonicalType()
5312                ->isAnyPointerType()) ||
5313           I->getAssociatedExpression()->getType()->isAnyPointerType();
5314 
5315       if (Next == CE || IsPointer || IsFinalArraySection) {
5316 
5317         // If this is not the last component, we expect the pointer to be
5318         // associated with an array expression or member expression.
5319         assert((Next == CE ||
5320                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
5321                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
5322                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
5323                "Unexpected expression");
5324 
5325         // Save the base we are currently using.
5326         BasePointers.push_back(BP);
5327 
5328         auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
5329         auto *Size = getExprTypeSize(I->getAssociatedExpression());
5330 
5331         Pointers.push_back(LB);
5332         Sizes.push_back(Size);
5333         // We need to add a pointer flag for each map that comes from the
5334         // same expression except for the first one. We also need to signal
5335         // this map is the first one that relates with the current capture
5336         // (there is a set of entries for each capture).
5337         Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
5338                                        !IsExpressionFirstInfo,
5339                                        IsCaptureFirstInfo));
5340 
5341         // If we have a final array section, we are done with this expression.
5342         if (IsFinalArraySection)
5343           break;
5344 
5345         // The pointer becomes the base for the next element.
5346         if (Next != CE)
5347           BP = LB;
5348 
5349         IsExpressionFirstInfo = false;
5350         IsCaptureFirstInfo = false;
5351         continue;
5352       }
5353     }
5354   }
5355 
5356   /// \brief Return the adjusted map modifiers if the declaration a capture
5357   /// refers to appears in a first-private clause. This is expected to be used
5358   /// only with directives that start with 'target'.
adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap,unsigned CurrentModifiers)5359   unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
5360                                                unsigned CurrentModifiers) {
5361     assert(Cap.capturesVariable() && "Expected capture by reference only!");
5362 
5363     // A first private variable captured by reference will use only the
5364     // 'private ptr' and 'map to' flag. Return the right flags if the captured
5365     // declaration is known as first-private in this handler.
5366     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
5367       return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
5368              MappableExprsHandler::OMP_MAP_TO;
5369 
5370     // We didn't modify anything.
5371     return CurrentModifiers;
5372   }
5373 
5374 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)5375   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
5376       : Directive(Dir), CGF(CGF) {
5377     // Extract firstprivate clause information.
5378     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
5379       for (const auto *D : C->varlists())
5380         FirstPrivateDecls.insert(
5381             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
5382   }
5383 
5384   /// \brief Generate all the base pointers, section pointers, sizes and map
5385   /// types for the extracted mappable expressions.
generateAllInfo(MapValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const5386   void generateAllInfo(MapValuesArrayTy &BasePointers,
5387                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
5388                        MapFlagsArrayTy &Types) const {
5389     BasePointers.clear();
5390     Pointers.clear();
5391     Sizes.clear();
5392     Types.clear();
5393 
5394     struct MapInfo {
5395       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
5396       OpenMPMapClauseKind MapType;
5397       OpenMPMapClauseKind MapTypeModifier;
5398     };
5399 
5400     // We have to process the component lists that relate with the same
5401     // declaration in a single chunk so that we can generate the map flags
5402     // correctly. Therefore, we organize all lists in a map.
5403     llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
5404 
5405     // Helper function to fill the information map for the different supported
5406     // clauses.
5407     auto &&InfoGen =
5408         [&Info](const ValueDecl *D,
5409                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
5410                 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) {
5411           const ValueDecl *VD =
5412               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
5413           Info[VD].push_back({L, MapType, MapModifier});
5414         };
5415 
5416     for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5417       for (auto L : C->component_lists())
5418         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier());
5419     for (auto *C : Directive.getClausesOfKind<OMPToClause>())
5420       for (auto L : C->component_lists())
5421         InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown);
5422     for (auto *C : Directive.getClausesOfKind<OMPFromClause>())
5423       for (auto L : C->component_lists())
5424         InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown);
5425 
5426     for (auto &M : Info) {
5427       // We need to know when we generate information for the first component
5428       // associated with a capture, because the mapping flags depend on it.
5429       bool IsFirstComponentList = true;
5430       for (MapInfo &L : M.second) {
5431         assert(!L.Components.empty() &&
5432                "Not expecting declaration with no component lists.");
5433         generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
5434                                      BasePointers, Pointers, Sizes, Types,
5435                                      IsFirstComponentList);
5436         IsFirstComponentList = false;
5437       }
5438     }
5439   }
5440 
5441   /// \brief Generate the base pointers, section pointers, sizes and map types
5442   /// associated to a given capture.
generateInfoForCapture(const CapturedStmt::Capture * Cap,MapValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const5443   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
5444                               MapValuesArrayTy &BasePointers,
5445                               MapValuesArrayTy &Pointers,
5446                               MapValuesArrayTy &Sizes,
5447                               MapFlagsArrayTy &Types) const {
5448     assert(!Cap->capturesVariableArrayType() &&
5449            "Not expecting to generate map info for a variable array type!");
5450 
5451     BasePointers.clear();
5452     Pointers.clear();
5453     Sizes.clear();
5454     Types.clear();
5455 
5456     const ValueDecl *VD =
5457         Cap->capturesThis()
5458             ? nullptr
5459             : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
5460 
5461     // We need to know when we generating information for the first component
5462     // associated with a capture, because the mapping flags depend on it.
5463     bool IsFirstComponentList = true;
5464     for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5465       for (auto L : C->decl_component_lists(VD)) {
5466         assert(L.first == VD &&
5467                "We got information for the wrong declaration??");
5468         assert(!L.second.empty() &&
5469                "Not expecting declaration with no component lists.");
5470         generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
5471                                      L.second, BasePointers, Pointers, Sizes,
5472                                      Types, IsFirstComponentList);
5473         IsFirstComponentList = false;
5474       }
5475 
5476     return;
5477   }
5478 
5479   /// \brief Generate the default map information for a given capture \a CI,
5480   /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MappableExprsHandler::MapValuesArrayTy & CurBasePointers,MappableExprsHandler::MapValuesArrayTy & CurPointers,MappableExprsHandler::MapValuesArrayTy & CurSizes,MappableExprsHandler::MapFlagsArrayTy & CurMapTypes)5481   void generateDefaultMapInfo(
5482       const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV,
5483       MappableExprsHandler::MapValuesArrayTy &CurBasePointers,
5484       MappableExprsHandler::MapValuesArrayTy &CurPointers,
5485       MappableExprsHandler::MapValuesArrayTy &CurSizes,
5486       MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) {
5487 
5488     // Do the default mapping.
5489     if (CI.capturesThis()) {
5490       CurBasePointers.push_back(CV);
5491       CurPointers.push_back(CV);
5492       const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
5493       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
5494       // Default map type.
5495       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO |
5496                             MappableExprsHandler::OMP_MAP_FROM);
5497     } else if (CI.capturesVariableByCopy()) {
5498       CurBasePointers.push_back(CV);
5499       CurPointers.push_back(CV);
5500       if (!RI.getType()->isAnyPointerType()) {
5501         // We have to signal to the runtime captures passed by value that are
5502         // not pointers.
5503         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL);
5504         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
5505       } else {
5506         // Pointers are implicitly mapped with a zero size and no flags
5507         // (other than first map that is added for all implicit maps).
5508         CurMapTypes.push_back(0u);
5509         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
5510       }
5511     } else {
5512       assert(CI.capturesVariable() && "Expected captured reference.");
5513       CurBasePointers.push_back(CV);
5514       CurPointers.push_back(CV);
5515 
5516       const ReferenceType *PtrTy =
5517           cast<ReferenceType>(RI.getType().getTypePtr());
5518       QualType ElementType = PtrTy->getPointeeType();
5519       CurSizes.push_back(CGF.getTypeSize(ElementType));
5520       // The default map type for a scalar/complex type is 'to' because by
5521       // default the value doesn't have to be retrieved. For an aggregate
5522       // type, the default is 'tofrom'.
5523       CurMapTypes.push_back(ElementType->isAggregateType()
5524                                 ? (MappableExprsHandler::OMP_MAP_TO |
5525                                    MappableExprsHandler::OMP_MAP_FROM)
5526                                 : MappableExprsHandler::OMP_MAP_TO);
5527 
5528       // If we have a capture by reference we may need to add the private
5529       // pointer flag if the base declaration shows in some first-private
5530       // clause.
5531       CurMapTypes.back() =
5532           adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
5533     }
5534     // Every default map produces a single argument, so, it is always the
5535     // first one.
5536     CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF;
5537   }
5538 };
5539 
5540 enum OpenMPOffloadingReservedDeviceIDs {
5541   /// \brief Device ID if the device was not defined, runtime should get it
5542   /// from environment variables in the spec.
5543   OMP_DEVICEID_UNDEF = -1,
5544 };
5545 } // anonymous namespace
5546 
5547 /// \brief Emit the arrays used to pass the captures and map information to the
5548 /// offloading runtime library. If there is no map or capture information,
5549 /// return nullptr by reference.
5550 static void
emitOffloadingArrays(CodeGenFunction & CGF,llvm::Value * & BasePointersArray,llvm::Value * & PointersArray,llvm::Value * & SizesArray,llvm::Value * & MapTypesArray,MappableExprsHandler::MapValuesArrayTy & BasePointers,MappableExprsHandler::MapValuesArrayTy & Pointers,MappableExprsHandler::MapValuesArrayTy & Sizes,MappableExprsHandler::MapFlagsArrayTy & MapTypes)5551 emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
5552                      llvm::Value *&PointersArray, llvm::Value *&SizesArray,
5553                      llvm::Value *&MapTypesArray,
5554                      MappableExprsHandler::MapValuesArrayTy &BasePointers,
5555                      MappableExprsHandler::MapValuesArrayTy &Pointers,
5556                      MappableExprsHandler::MapValuesArrayTy &Sizes,
5557                      MappableExprsHandler::MapFlagsArrayTy &MapTypes) {
5558   auto &CGM = CGF.CGM;
5559   auto &Ctx = CGF.getContext();
5560 
5561   BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr;
5562 
5563   if (unsigned PointerNumVal = BasePointers.size()) {
5564     // Detect if we have any capture size requiring runtime evaluation of the
5565     // size so that a constant array could be eventually used.
5566     bool hasRuntimeEvaluationCaptureSize = false;
5567     for (auto *S : Sizes)
5568       if (!isa<llvm::Constant>(S)) {
5569         hasRuntimeEvaluationCaptureSize = true;
5570         break;
5571       }
5572 
5573     llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
5574     QualType PointerArrayType =
5575         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
5576                                  /*IndexTypeQuals=*/0);
5577 
5578     BasePointersArray =
5579         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
5580     PointersArray =
5581         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
5582 
5583     // If we don't have any VLA types or other types that require runtime
5584     // evaluation, we can use a constant array for the map sizes, otherwise we
5585     // need to fill up the arrays as we do for the pointers.
5586     if (hasRuntimeEvaluationCaptureSize) {
5587       QualType SizeArrayType = Ctx.getConstantArrayType(
5588           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
5589           /*IndexTypeQuals=*/0);
5590       SizesArray =
5591           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
5592     } else {
5593       // We expect all the sizes to be constant, so we collect them to create
5594       // a constant array.
5595       SmallVector<llvm::Constant *, 16> ConstSizes;
5596       for (auto S : Sizes)
5597         ConstSizes.push_back(cast<llvm::Constant>(S));
5598 
5599       auto *SizesArrayInit = llvm::ConstantArray::get(
5600           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
5601       auto *SizesArrayGbl = new llvm::GlobalVariable(
5602           CGM.getModule(), SizesArrayInit->getType(),
5603           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5604           SizesArrayInit, ".offload_sizes");
5605       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5606       SizesArray = SizesArrayGbl;
5607     }
5608 
5609     // The map types are always constant so we don't need to generate code to
5610     // fill arrays. Instead, we create an array constant.
5611     llvm::Constant *MapTypesArrayInit =
5612         llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
5613     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
5614         CGM.getModule(), MapTypesArrayInit->getType(),
5615         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5616         MapTypesArrayInit, ".offload_maptypes");
5617     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5618     MapTypesArray = MapTypesArrayGbl;
5619 
5620     for (unsigned i = 0; i < PointerNumVal; ++i) {
5621       llvm::Value *BPVal = BasePointers[i];
5622       if (BPVal->getType()->isPointerTy())
5623         BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
5624       else {
5625         assert(BPVal->getType()->isIntegerTy() &&
5626                "If not a pointer, the value type must be an integer.");
5627         BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
5628       }
5629       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
5630           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
5631           0, i);
5632       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5633       CGF.Builder.CreateStore(BPVal, BPAddr);
5634 
5635       llvm::Value *PVal = Pointers[i];
5636       if (PVal->getType()->isPointerTy())
5637         PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
5638       else {
5639         assert(PVal->getType()->isIntegerTy() &&
5640                "If not a pointer, the value type must be an integer.");
5641         PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
5642       }
5643       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
5644           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
5645           i);
5646       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5647       CGF.Builder.CreateStore(PVal, PAddr);
5648 
5649       if (hasRuntimeEvaluationCaptureSize) {
5650         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
5651             llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
5652             /*Idx0=*/0,
5653             /*Idx1=*/i);
5654         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
5655         CGF.Builder.CreateStore(
5656             CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
5657             SAddr);
5658       }
5659     }
5660   }
5661 }
5662 /// \brief Emit the arguments to be passed to the runtime library based on the
5663 /// arrays of pointers, sizes and map types.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,llvm::Value * BasePointersArray,llvm::Value * PointersArray,llvm::Value * SizesArray,llvm::Value * MapTypesArray,unsigned NumElems)5664 static void emitOffloadingArraysArgument(
5665     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
5666     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
5667     llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray,
5668     llvm::Value *PointersArray, llvm::Value *SizesArray,
5669     llvm::Value *MapTypesArray, unsigned NumElems) {
5670   auto &CGM = CGF.CGM;
5671   if (NumElems) {
5672     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5673         llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray,
5674         /*Idx0=*/0, /*Idx1=*/0);
5675     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5676         llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray,
5677         /*Idx0=*/0,
5678         /*Idx1=*/0);
5679     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5680         llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray,
5681         /*Idx0=*/0, /*Idx1=*/0);
5682     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5683         llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray,
5684         /*Idx0=*/0,
5685         /*Idx1=*/0);
5686   } else {
5687     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5688     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5689     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
5690     MapTypesArrayArg =
5691         llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
5692   }
5693 }
5694 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,const Expr * Device,ArrayRef<llvm::Value * > CapturedVars)5695 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
5696                                      const OMPExecutableDirective &D,
5697                                      llvm::Value *OutlinedFn,
5698                                      llvm::Value *OutlinedFnID,
5699                                      const Expr *IfCond, const Expr *Device,
5700                                      ArrayRef<llvm::Value *> CapturedVars) {
5701   if (!CGF.HaveInsertPoint())
5702     return;
5703 
5704   assert(OutlinedFn && "Invalid outlined function!");
5705 
5706   auto &Ctx = CGF.getContext();
5707 
5708   // Fill up the arrays with all the captured variables.
5709   MappableExprsHandler::MapValuesArrayTy KernelArgs;
5710   MappableExprsHandler::MapValuesArrayTy BasePointers;
5711   MappableExprsHandler::MapValuesArrayTy Pointers;
5712   MappableExprsHandler::MapValuesArrayTy Sizes;
5713   MappableExprsHandler::MapFlagsArrayTy MapTypes;
5714 
5715   MappableExprsHandler::MapValuesArrayTy CurBasePointers;
5716   MappableExprsHandler::MapValuesArrayTy CurPointers;
5717   MappableExprsHandler::MapValuesArrayTy CurSizes;
5718   MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
5719 
5720   // Get mappable expression information.
5721   MappableExprsHandler MEHandler(D, CGF);
5722 
5723   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5724   auto RI = CS.getCapturedRecordDecl()->field_begin();
5725   auto CV = CapturedVars.begin();
5726   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
5727                                             CE = CS.capture_end();
5728        CI != CE; ++CI, ++RI, ++CV) {
5729     StringRef Name;
5730     QualType Ty;
5731 
5732     CurBasePointers.clear();
5733     CurPointers.clear();
5734     CurSizes.clear();
5735     CurMapTypes.clear();
5736 
5737     // VLA sizes are passed to the outlined region by copy and do not have map
5738     // information associated.
5739     if (CI->capturesVariableArrayType()) {
5740       CurBasePointers.push_back(*CV);
5741       CurPointers.push_back(*CV);
5742       CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5743       // Copy to the device as an argument. No need to retrieve it.
5744       CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
5745                             MappableExprsHandler::OMP_MAP_FIRST_REF);
5746     } else {
5747       // If we have any information in the map clause, we use it, otherwise we
5748       // just do a default mapping.
5749       MEHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers,
5750                                        CurSizes, CurMapTypes);
5751       if (CurBasePointers.empty())
5752         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
5753                                          CurPointers, CurSizes, CurMapTypes);
5754     }
5755     // We expect to have at least an element of information for this capture.
5756     assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
5757     assert(CurBasePointers.size() == CurPointers.size() &&
5758            CurBasePointers.size() == CurSizes.size() &&
5759            CurBasePointers.size() == CurMapTypes.size() &&
5760            "Inconsistent map information sizes!");
5761 
5762     // The kernel args are always the first elements of the base pointers
5763     // associated with a capture.
5764     KernelArgs.push_back(CurBasePointers.front());
5765     // We need to append the results of this capture to what we already have.
5766     BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
5767     Pointers.append(CurPointers.begin(), CurPointers.end());
5768     Sizes.append(CurSizes.begin(), CurSizes.end());
5769     MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
5770   }
5771 
5772   // Keep track on whether the host function has to be executed.
5773   auto OffloadErrorQType =
5774       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
5775   auto OffloadError = CGF.MakeAddrLValue(
5776       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
5777       OffloadErrorQType);
5778   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
5779                         OffloadError);
5780 
5781   // Fill up the pointer arrays and transfer execution to the device.
5782   auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
5783                     OutlinedFnID, OffloadError, OffloadErrorQType,
5784                     &D](CodeGenFunction &CGF, PrePostActionTy &) {
5785     auto &RT = CGF.CGM.getOpenMPRuntime();
5786     // Emit the offloading arrays.
5787     llvm::Value *BasePointersArray;
5788     llvm::Value *PointersArray;
5789     llvm::Value *SizesArray;
5790     llvm::Value *MapTypesArray;
5791     emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
5792                          MapTypesArray, BasePointers, Pointers, Sizes,
5793                          MapTypes);
5794     emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
5795                                  SizesArray, MapTypesArray, BasePointersArray,
5796                                  PointersArray, SizesArray, MapTypesArray,
5797                                  BasePointers.size());
5798 
5799     // On top of the arrays that were filled up, the target offloading call
5800     // takes as arguments the device id as well as the host pointer. The host
5801     // pointer is used by the runtime library to identify the current target
5802     // region, so it only has to be unique and not necessarily point to
5803     // anything. It could be the pointer to the outlined function that
5804     // implements the target region, but we aren't using that so that the
5805     // compiler doesn't need to keep that, and could therefore inline the host
5806     // function if proven worthwhile during optimization.
5807 
5808     // From this point on, we need to have an ID of the target region defined.
5809     assert(OutlinedFnID && "Invalid outlined function ID!");
5810 
5811     // Emit device ID if any.
5812     llvm::Value *DeviceID;
5813     if (Device)
5814       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5815                                            CGF.Int32Ty, /*isSigned=*/true);
5816     else
5817       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
5818 
5819     // Emit the number of elements in the offloading arrays.
5820     llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
5821 
5822     // Return value of the runtime offloading call.
5823     llvm::Value *Return;
5824 
5825     auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
5826     auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
5827 
5828     // If we have NumTeams defined this means that we have an enclosed teams
5829     // region. Therefore we also expect to have ThreadLimit defined. These two
5830     // values should be defined in the presence of a teams directive, regardless
5831     // of having any clauses associated. If the user is using teams but no
5832     // clauses, these two values will be the default that should be passed to
5833     // the runtime library - a 32-bit integer with the value zero.
5834     if (NumTeams) {
5835       assert(ThreadLimit && "Thread limit expression should be available along "
5836                             "with number of teams.");
5837       llvm::Value *OffloadingArgs[] = {
5838           DeviceID,          OutlinedFnID,  PointerNum,
5839           BasePointersArray, PointersArray, SizesArray,
5840           MapTypesArray,     NumTeams,      ThreadLimit};
5841       Return = CGF.EmitRuntimeCall(
5842           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
5843     } else {
5844       llvm::Value *OffloadingArgs[] = {
5845           DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
5846           PointersArray, SizesArray,   MapTypesArray};
5847       Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
5848                                    OffloadingArgs);
5849     }
5850 
5851     CGF.EmitStoreOfScalar(Return, OffloadError);
5852   };
5853 
5854   // Notify that the host version must be executed.
5855   auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
5856     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
5857                           OffloadError);
5858   };
5859 
5860   // If we have a target function ID it means that we need to support
5861   // offloading, otherwise, just execute on the host. We need to execute on host
5862   // regardless of the conditional in the if clause if, e.g., the user do not
5863   // specify target triples.
5864   if (OutlinedFnID) {
5865     if (IfCond)
5866       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
5867     else {
5868       RegionCodeGenTy ThenRCG(ThenGen);
5869       ThenRCG(CGF);
5870     }
5871   } else {
5872     RegionCodeGenTy ElseRCG(ElseGen);
5873     ElseRCG(CGF);
5874   }
5875 
5876   // Check the error code and execute the host version if required.
5877   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
5878   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
5879   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
5880   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
5881   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
5882 
5883   CGF.EmitBlock(OffloadFailedBlock);
5884   CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
5885   CGF.EmitBranch(OffloadContBlock);
5886 
5887   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
5888 }
5889 
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)5890 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
5891                                                     StringRef ParentName) {
5892   if (!S)
5893     return;
5894 
5895   // If we find a OMP target directive, codegen the outline function and
5896   // register the result.
5897   // FIXME: Add other directives with target when they become supported.
5898   bool isTargetDirective = isa<OMPTargetDirective>(S);
5899 
5900   if (isTargetDirective) {
5901     auto *E = cast<OMPExecutableDirective>(S);
5902     unsigned DeviceID;
5903     unsigned FileID;
5904     unsigned Line;
5905     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
5906                              FileID, Line);
5907 
5908     // Is this a target region that should not be emitted as an entry point? If
5909     // so just signal we are done with this target region.
5910     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
5911                                                             ParentName, Line))
5912       return;
5913 
5914     llvm::Function *Fn;
5915     llvm::Constant *Addr;
5916     std::tie(Fn, Addr) =
5917         CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
5918             CGM, cast<OMPTargetDirective>(*E), ParentName,
5919             /*isOffloadEntry=*/true);
5920     assert(Fn && Addr && "Target region emission failed.");
5921     return;
5922   }
5923 
5924   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
5925     if (!E->hasAssociatedStmt())
5926       return;
5927 
5928     scanForTargetRegionsFunctions(
5929         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
5930         ParentName);
5931     return;
5932   }
5933 
5934   // If this is a lambda function, look into its body.
5935   if (auto *L = dyn_cast<LambdaExpr>(S))
5936     S = L->getBody();
5937 
5938   // Keep looking for target regions recursively.
5939   for (auto *II : S->children())
5940     scanForTargetRegionsFunctions(II, ParentName);
5941 }
5942 
emitTargetFunctions(GlobalDecl GD)5943 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
5944   auto &FD = *cast<FunctionDecl>(GD.getDecl());
5945 
5946   // If emitting code for the host, we do not process FD here. Instead we do
5947   // the normal code generation.
5948   if (!CGM.getLangOpts().OpenMPIsDevice)
5949     return false;
5950 
5951   // Try to detect target regions in the function.
5952   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
5953 
5954   // We should not emit any function othen that the ones created during the
5955   // scanning. Therefore, we signal that this function is completely dealt
5956   // with.
5957   return true;
5958 }
5959 
emitTargetGlobalVariable(GlobalDecl GD)5960 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
5961   if (!CGM.getLangOpts().OpenMPIsDevice)
5962     return false;
5963 
5964   // Check if there are Ctors/Dtors in this declaration and look for target
5965   // regions in it. We use the complete variant to produce the kernel name
5966   // mangling.
5967   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
5968   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
5969     for (auto *Ctor : RD->ctors()) {
5970       StringRef ParentName =
5971           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
5972       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
5973     }
5974     auto *Dtor = RD->getDestructor();
5975     if (Dtor) {
5976       StringRef ParentName =
5977           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
5978       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
5979     }
5980   }
5981 
5982   // If we are in target mode we do not emit any global (declare target is not
5983   // implemented yet). Therefore we signal that GD was processed in this case.
5984   return true;
5985 }
5986 
emitTargetGlobal(GlobalDecl GD)5987 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
5988   auto *VD = GD.getDecl();
5989   if (isa<FunctionDecl>(VD))
5990     return emitTargetFunctions(GD);
5991 
5992   return emitTargetGlobalVariable(GD);
5993 }
5994 
emitRegistrationFunction()5995 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
5996   // If we have offloading in the current module, we need to emit the entries
5997   // now and register the offloading descriptor.
5998   createOffloadEntriesAndInfoMetadata();
5999 
6000   // Create and register the offloading binary descriptors. This is the main
6001   // entity that captures all the information about offloading in the current
6002   // compilation unit.
6003   return createOffloadingBinaryDescriptorRegistration();
6004 }
6005 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Value * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)6006 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
6007                                     const OMPExecutableDirective &D,
6008                                     SourceLocation Loc,
6009                                     llvm::Value *OutlinedFn,
6010                                     ArrayRef<llvm::Value *> CapturedVars) {
6011   if (!CGF.HaveInsertPoint())
6012     return;
6013 
6014   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6015   CodeGenFunction::RunCleanupsScope Scope(CGF);
6016 
6017   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
6018   llvm::Value *Args[] = {
6019       RTLoc,
6020       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
6021       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
6022   llvm::SmallVector<llvm::Value *, 16> RealArgs;
6023   RealArgs.append(std::begin(Args), std::end(Args));
6024   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
6025 
6026   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
6027   CGF.EmitRuntimeCall(RTLFn, RealArgs);
6028 }
6029 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)6030 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
6031                                          const Expr *NumTeams,
6032                                          const Expr *ThreadLimit,
6033                                          SourceLocation Loc) {
6034   if (!CGF.HaveInsertPoint())
6035     return;
6036 
6037   auto *RTLoc = emitUpdateLocation(CGF, Loc);
6038 
6039   llvm::Value *NumTeamsVal =
6040       (NumTeams)
6041           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
6042                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6043           : CGF.Builder.getInt32(0);
6044 
6045   llvm::Value *ThreadLimitVal =
6046       (ThreadLimit)
6047           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
6048                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
6049           : CGF.Builder.getInt32(0);
6050 
6051   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
6052   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
6053                                      ThreadLimitVal};
6054   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
6055                       PushNumTeamsArgs);
6056 }
6057 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen)6058 void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
6059                                           const OMPExecutableDirective &D,
6060                                           const Expr *IfCond,
6061                                           const Expr *Device,
6062                                           const RegionCodeGenTy &CodeGen) {
6063 
6064   if (!CGF.HaveInsertPoint())
6065     return;
6066 
6067   llvm::Value *BasePointersArray = nullptr;
6068   llvm::Value *PointersArray = nullptr;
6069   llvm::Value *SizesArray = nullptr;
6070   llvm::Value *MapTypesArray = nullptr;
6071   unsigned NumOfPtrs = 0;
6072 
6073   // Generate the code for the opening of the data environment. Capture all the
6074   // arguments of the runtime call by reference because they are used in the
6075   // closing of the region.
6076   auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray,
6077                          &SizesArray, &MapTypesArray, Device,
6078                          &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
6079     // Fill up the arrays with all the mapped variables.
6080     MappableExprsHandler::MapValuesArrayTy BasePointers;
6081     MappableExprsHandler::MapValuesArrayTy Pointers;
6082     MappableExprsHandler::MapValuesArrayTy Sizes;
6083     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6084 
6085     // Get map clause information.
6086     MappableExprsHandler MCHandler(D, CGF);
6087     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6088     NumOfPtrs = BasePointers.size();
6089 
6090     // Fill up the arrays and create the arguments.
6091     emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
6092                          MapTypesArray, BasePointers, Pointers, Sizes,
6093                          MapTypes);
6094 
6095     llvm::Value *BasePointersArrayArg = nullptr;
6096     llvm::Value *PointersArrayArg = nullptr;
6097     llvm::Value *SizesArrayArg = nullptr;
6098     llvm::Value *MapTypesArrayArg = nullptr;
6099     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6100                                  SizesArrayArg, MapTypesArrayArg,
6101                                  BasePointersArray, PointersArray, SizesArray,
6102                                  MapTypesArray, NumOfPtrs);
6103 
6104     // Emit device ID if any.
6105     llvm::Value *DeviceID = nullptr;
6106     if (Device)
6107       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6108                                            CGF.Int32Ty, /*isSigned=*/true);
6109     else
6110       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6111 
6112     // Emit the number of elements in the offloading arrays.
6113     auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
6114 
6115     llvm::Value *OffloadingArgs[] = {
6116         DeviceID,         PointerNum,    BasePointersArrayArg,
6117         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6118     auto &RT = CGF.CGM.getOpenMPRuntime();
6119     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
6120                         OffloadingArgs);
6121   };
6122 
6123   // Generate code for the closing of the data region.
6124   auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray,
6125                        &MapTypesArray, Device,
6126                        &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
6127     assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
6128            NumOfPtrs && "Invalid data environment closing arguments.");
6129 
6130     llvm::Value *BasePointersArrayArg = nullptr;
6131     llvm::Value *PointersArrayArg = nullptr;
6132     llvm::Value *SizesArrayArg = nullptr;
6133     llvm::Value *MapTypesArrayArg = nullptr;
6134     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6135                                  SizesArrayArg, MapTypesArrayArg,
6136                                  BasePointersArray, PointersArray, SizesArray,
6137                                  MapTypesArray, NumOfPtrs);
6138 
6139     // Emit device ID if any.
6140     llvm::Value *DeviceID = nullptr;
6141     if (Device)
6142       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6143                                            CGF.Int32Ty, /*isSigned=*/true);
6144     else
6145       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6146 
6147     // Emit the number of elements in the offloading arrays.
6148     auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
6149 
6150     llvm::Value *OffloadingArgs[] = {
6151         DeviceID,         PointerNum,    BasePointersArrayArg,
6152         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6153     auto &RT = CGF.CGM.getOpenMPRuntime();
6154     CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
6155                         OffloadingArgs);
6156   };
6157 
6158   // In the event we get an if clause, we don't have to take any action on the
6159   // else side.
6160   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6161 
6162   if (IfCond) {
6163     emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen);
6164   } else {
6165     RegionCodeGenTy BeginThenRCG(BeginThenGen);
6166     BeginThenRCG(CGF);
6167   }
6168 
6169   CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen);
6170 
6171   if (IfCond) {
6172     emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen);
6173   } else {
6174     RegionCodeGenTy EndThenRCG(EndThenGen);
6175     EndThenRCG(CGF);
6176   }
6177 }
6178 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)6179 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
6180     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6181     const Expr *Device) {
6182   if (!CGF.HaveInsertPoint())
6183     return;
6184 
6185   assert((isa<OMPTargetEnterDataDirective>(D) ||
6186           isa<OMPTargetExitDataDirective>(D) ||
6187           isa<OMPTargetUpdateDirective>(D)) &&
6188          "Expecting either target enter, exit data, or update directives.");
6189 
6190   // Generate the code for the opening of the data environment.
6191   auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
6192     // Fill up the arrays with all the mapped variables.
6193     MappableExprsHandler::MapValuesArrayTy BasePointers;
6194     MappableExprsHandler::MapValuesArrayTy Pointers;
6195     MappableExprsHandler::MapValuesArrayTy Sizes;
6196     MappableExprsHandler::MapFlagsArrayTy MapTypes;
6197 
6198     // Get map clause information.
6199     MappableExprsHandler MEHandler(D, CGF);
6200     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6201 
6202     llvm::Value *BasePointersArrayArg = nullptr;
6203     llvm::Value *PointersArrayArg = nullptr;
6204     llvm::Value *SizesArrayArg = nullptr;
6205     llvm::Value *MapTypesArrayArg = nullptr;
6206 
6207     // Fill up the arrays and create the arguments.
6208     emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg,
6209                          SizesArrayArg, MapTypesArrayArg, BasePointers,
6210                          Pointers, Sizes, MapTypes);
6211     emitOffloadingArraysArgument(
6212         CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6213         MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6214         MapTypesArrayArg, BasePointers.size());
6215 
6216     // Emit device ID if any.
6217     llvm::Value *DeviceID = nullptr;
6218     if (Device)
6219       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6220                                            CGF.Int32Ty, /*isSigned=*/true);
6221     else
6222       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6223 
6224     // Emit the number of elements in the offloading arrays.
6225     auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6226 
6227     llvm::Value *OffloadingArgs[] = {
6228         DeviceID,         PointerNum,    BasePointersArrayArg,
6229         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6230 
6231     auto &RT = CGF.CGM.getOpenMPRuntime();
6232     // Select the right runtime function call for each expected standalone
6233     // directive.
6234     OpenMPRTLFunction RTLFn;
6235     switch (D.getDirectiveKind()) {
6236     default:
6237       llvm_unreachable("Unexpected standalone target data directive.");
6238       break;
6239     case OMPD_target_enter_data:
6240       RTLFn = OMPRTL__tgt_target_data_begin;
6241       break;
6242     case OMPD_target_exit_data:
6243       RTLFn = OMPRTL__tgt_target_data_end;
6244       break;
6245     case OMPD_target_update:
6246       RTLFn = OMPRTL__tgt_target_data_update;
6247       break;
6248     }
6249     CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
6250   };
6251 
6252   // In the event we get an if clause, we don't have to take any action on the
6253   // else side.
6254   auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6255 
6256   if (IfCond) {
6257     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6258   } else {
6259     RegionCodeGenTy ThenGenRCG(ThenGen);
6260     ThenGenRCG(CGF);
6261   }
6262 }
6263 
6264 namespace {
6265   /// Kind of parameter in a function with 'declare simd' directive.
6266   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
6267   /// Attribute set of the parameter.
6268   struct ParamAttrTy {
6269     ParamKindTy Kind = Vector;
6270     llvm::APSInt StrideOrArg;
6271     llvm::APSInt Alignment;
6272   };
6273 } // namespace
6274 
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)6275 static unsigned evaluateCDTSize(const FunctionDecl *FD,
6276                                 ArrayRef<ParamAttrTy> ParamAttrs) {
6277   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
6278   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
6279   // of that clause. The VLEN value must be power of 2.
6280   // In other case the notion of the function`s "characteristic data type" (CDT)
6281   // is used to compute the vector length.
6282   // CDT is defined in the following order:
6283   //   a) For non-void function, the CDT is the return type.
6284   //   b) If the function has any non-uniform, non-linear parameters, then the
6285   //   CDT is the type of the first such parameter.
6286   //   c) If the CDT determined by a) or b) above is struct, union, or class
6287   //   type which is pass-by-value (except for the type that maps to the
6288   //   built-in complex data type), the characteristic data type is int.
6289   //   d) If none of the above three cases is applicable, the CDT is int.
6290   // The VLEN is then determined based on the CDT and the size of vector
6291   // register of that ISA for which current vector version is generated. The
6292   // VLEN is computed using the formula below:
6293   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
6294   // where vector register size specified in section 3.2.1 Registers and the
6295   // Stack Frame of original AMD64 ABI document.
6296   QualType RetType = FD->getReturnType();
6297   if (RetType.isNull())
6298     return 0;
6299   ASTContext &C = FD->getASTContext();
6300   QualType CDT;
6301   if (!RetType.isNull() && !RetType->isVoidType())
6302     CDT = RetType;
6303   else {
6304     unsigned Offset = 0;
6305     if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
6306       if (ParamAttrs[Offset].Kind == Vector)
6307         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
6308       ++Offset;
6309     }
6310     if (CDT.isNull()) {
6311       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
6312         if (ParamAttrs[I + Offset].Kind == Vector) {
6313           CDT = FD->getParamDecl(I)->getType();
6314           break;
6315         }
6316       }
6317     }
6318   }
6319   if (CDT.isNull())
6320     CDT = C.IntTy;
6321   CDT = CDT->getCanonicalTypeUnqualified();
6322   if (CDT->isRecordType() || CDT->isUnionType())
6323     CDT = C.IntTy;
6324   return C.getTypeSize(CDT);
6325 }
6326 
6327 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,llvm::APSInt VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)6328 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
6329                            llvm::APSInt VLENVal,
6330                            ArrayRef<ParamAttrTy> ParamAttrs,
6331                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
6332   struct ISADataTy {
6333     char ISA;
6334     unsigned VecRegSize;
6335   };
6336   ISADataTy ISAData[] = {
6337       {
6338           'b', 128
6339       }, // SSE
6340       {
6341           'c', 256
6342       }, // AVX
6343       {
6344           'd', 256
6345       }, // AVX2
6346       {
6347           'e', 512
6348       }, // AVX512
6349   };
6350   llvm::SmallVector<char, 2> Masked;
6351   switch (State) {
6352   case OMPDeclareSimdDeclAttr::BS_Undefined:
6353     Masked.push_back('N');
6354     Masked.push_back('M');
6355     break;
6356   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
6357     Masked.push_back('N');
6358     break;
6359   case OMPDeclareSimdDeclAttr::BS_Inbranch:
6360     Masked.push_back('M');
6361     break;
6362   }
6363   for (auto Mask : Masked) {
6364     for (auto &Data : ISAData) {
6365       SmallString<256> Buffer;
6366       llvm::raw_svector_ostream Out(Buffer);
6367       Out << "_ZGV" << Data.ISA << Mask;
6368       if (!VLENVal) {
6369         Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
6370                                          evaluateCDTSize(FD, ParamAttrs));
6371       } else
6372         Out << VLENVal;
6373       for (auto &ParamAttr : ParamAttrs) {
6374         switch (ParamAttr.Kind){
6375         case LinearWithVarStride:
6376           Out << 's' << ParamAttr.StrideOrArg;
6377           break;
6378         case Linear:
6379           Out << 'l';
6380           if (!!ParamAttr.StrideOrArg)
6381             Out << ParamAttr.StrideOrArg;
6382           break;
6383         case Uniform:
6384           Out << 'u';
6385           break;
6386         case Vector:
6387           Out << 'v';
6388           break;
6389         }
6390         if (!!ParamAttr.Alignment)
6391           Out << 'a' << ParamAttr.Alignment;
6392       }
6393       Out << '_' << Fn->getName();
6394       Fn->addFnAttr(Out.str());
6395     }
6396   }
6397 }
6398 
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)6399 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
6400                                               llvm::Function *Fn) {
6401   ASTContext &C = CGM.getContext();
6402   FD = FD->getCanonicalDecl();
6403   // Map params to their positions in function decl.
6404   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
6405   if (isa<CXXMethodDecl>(FD))
6406     ParamPositions.insert({FD, 0});
6407   unsigned ParamPos = ParamPositions.size();
6408   for (auto *P : FD->parameters()) {
6409     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
6410     ++ParamPos;
6411   }
6412   for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
6413     llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
6414     // Mark uniform parameters.
6415     for (auto *E : Attr->uniforms()) {
6416       E = E->IgnoreParenImpCasts();
6417       unsigned Pos;
6418       if (isa<CXXThisExpr>(E))
6419         Pos = ParamPositions[FD];
6420       else {
6421         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6422                         ->getCanonicalDecl();
6423         Pos = ParamPositions[PVD];
6424       }
6425       ParamAttrs[Pos].Kind = Uniform;
6426     }
6427     // Get alignment info.
6428     auto NI = Attr->alignments_begin();
6429     for (auto *E : Attr->aligneds()) {
6430       E = E->IgnoreParenImpCasts();
6431       unsigned Pos;
6432       QualType ParmTy;
6433       if (isa<CXXThisExpr>(E)) {
6434         Pos = ParamPositions[FD];
6435         ParmTy = E->getType();
6436       } else {
6437         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6438                         ->getCanonicalDecl();
6439         Pos = ParamPositions[PVD];
6440         ParmTy = PVD->getType();
6441       }
6442       ParamAttrs[Pos].Alignment =
6443           (*NI) ? (*NI)->EvaluateKnownConstInt(C)
6444                 : llvm::APSInt::getUnsigned(
6445                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
6446                           .getQuantity());
6447       ++NI;
6448     }
6449     // Mark linear parameters.
6450     auto SI = Attr->steps_begin();
6451     auto MI = Attr->modifiers_begin();
6452     for (auto *E : Attr->linears()) {
6453       E = E->IgnoreParenImpCasts();
6454       unsigned Pos;
6455       if (isa<CXXThisExpr>(E))
6456         Pos = ParamPositions[FD];
6457       else {
6458         auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6459                         ->getCanonicalDecl();
6460         Pos = ParamPositions[PVD];
6461       }
6462       auto &ParamAttr = ParamAttrs[Pos];
6463       ParamAttr.Kind = Linear;
6464       if (*SI) {
6465         if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
6466                                   Expr::SE_AllowSideEffects)) {
6467           if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
6468             if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
6469               ParamAttr.Kind = LinearWithVarStride;
6470               ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
6471                   ParamPositions[StridePVD->getCanonicalDecl()]);
6472             }
6473           }
6474         }
6475       }
6476       ++SI;
6477       ++MI;
6478     }
6479     llvm::APSInt VLENVal;
6480     if (const Expr *VLEN = Attr->getSimdlen())
6481       VLENVal = VLEN->EvaluateKnownConstInt(C);
6482     OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
6483     if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
6484         CGM.getTriple().getArch() == llvm::Triple::x86_64)
6485       emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
6486   }
6487 }
6488 
6489 namespace {
6490 /// Cleanup action for doacross support.
6491 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
6492 public:
6493   static const int DoacrossFinArgs = 2;
6494 
6495 private:
6496   llvm::Value *RTLFn;
6497   llvm::Value *Args[DoacrossFinArgs];
6498 
6499 public:
DoacrossCleanupTy(llvm::Value * RTLFn,ArrayRef<llvm::Value * > CallArgs)6500   DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
6501       : RTLFn(RTLFn) {
6502     assert(CallArgs.size() == DoacrossFinArgs);
6503     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
6504   }
Emit(CodeGenFunction & CGF,Flags)6505   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
6506     if (!CGF.HaveInsertPoint())
6507       return;
6508     CGF.EmitRuntimeCall(RTLFn, Args);
6509   }
6510 };
6511 } // namespace
6512 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D)6513 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
6514                                        const OMPLoopDirective &D) {
6515   if (!CGF.HaveInsertPoint())
6516     return;
6517 
6518   ASTContext &C = CGM.getContext();
6519   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
6520   RecordDecl *RD;
6521   if (KmpDimTy.isNull()) {
6522     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
6523     //  kmp_int64 lo; // lower
6524     //  kmp_int64 up; // upper
6525     //  kmp_int64 st; // stride
6526     // };
6527     RD = C.buildImplicitRecord("kmp_dim");
6528     RD->startDefinition();
6529     addFieldToRecordDecl(C, RD, Int64Ty);
6530     addFieldToRecordDecl(C, RD, Int64Ty);
6531     addFieldToRecordDecl(C, RD, Int64Ty);
6532     RD->completeDefinition();
6533     KmpDimTy = C.getRecordType(RD);
6534   } else
6535     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
6536 
6537   Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
6538   CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
6539   enum { LowerFD = 0, UpperFD, StrideFD };
6540   // Fill dims with data.
6541   LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
6542   // dims.upper = num_iterations;
6543   LValue UpperLVal =
6544       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
6545   llvm::Value *NumIterVal = CGF.EmitScalarConversion(
6546       CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
6547       Int64Ty, D.getNumIterations()->getExprLoc());
6548   CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
6549   // dims.stride = 1;
6550   LValue StrideLVal =
6551       CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
6552   CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
6553                         StrideLVal);
6554 
6555   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
6556   // kmp_int32 num_dims, struct kmp_dim * dims);
6557   llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
6558                          getThreadID(CGF, D.getLocStart()),
6559                          llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
6560                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6561                              DimsAddr.getPointer(), CGM.VoidPtrTy)};
6562 
6563   llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
6564   CGF.EmitRuntimeCall(RTLFn, Args);
6565   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
6566       emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
6567   llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
6568   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
6569                                              llvm::makeArrayRef(FiniArgs));
6570 }
6571 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)6572 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
6573                                           const OMPDependClause *C) {
6574   QualType Int64Ty =
6575       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
6576   const Expr *CounterVal = C->getCounterValue();
6577   assert(CounterVal);
6578   llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
6579                                                  CounterVal->getType(), Int64Ty,
6580                                                  CounterVal->getExprLoc());
6581   Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
6582   CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
6583   llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
6584                          getThreadID(CGF, C->getLocStart()),
6585                          CntAddr.getPointer()};
6586   llvm::Value *RTLFn;
6587   if (C->getDependencyKind() == OMPC_DEPEND_source)
6588     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
6589   else {
6590     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
6591     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
6592   }
6593   CGF.EmitRuntimeCall(RTLFn, Args);
6594 }
6595 
6596