1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44     /// \brief Region with outlined function for standalone 'target' directive.
45     TargetRegion,
46   };
47 
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)48   CGOpenMPRegionInfo(const CapturedStmt &CS,
49                      const CGOpenMPRegionKind RegionKind,
50                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
51                      bool HasCancel)
52       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
53         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
54 
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)55   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
59         Kind(Kind), HasCancel(HasCancel) {}
60 
61   /// \brief Get a variable or parameter for storing global thread id
62   /// inside OpenMP construct.
63   virtual const VarDecl *getThreadIDVariable() const = 0;
64 
65   /// \brief Emit the captured statement body.
66   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
67 
68   /// \brief Get an LValue for the current ThreadID variable.
69   /// \return LValue for thread id variable. This LValue always has type int32*.
70   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
71 
getRegionKind() const72   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
73 
getDirectiveKind() const74   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
75 
hasCancel() const76   bool hasCancel() const { return HasCancel; }
77 
classof(const CGCapturedStmtInfo * Info)78   static bool classof(const CGCapturedStmtInfo *Info) {
79     return Info->getKind() == CR_OpenMP;
80   }
81 
82 protected:
83   CGOpenMPRegionKind RegionKind;
84   const RegionCodeGenTy &CodeGen;
85   OpenMPDirectiveKind Kind;
86   bool HasCancel;
87 };
88 
89 /// \brief API for captured statement code generation in OpenMP constructs.
90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
91 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)92   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
93                              const RegionCodeGenTy &CodeGen,
94                              OpenMPDirectiveKind Kind, bool HasCancel)
95       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
96                            HasCancel),
97         ThreadIDVar(ThreadIDVar) {
98     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
99   }
100   /// \brief Get a variable or parameter for storing global thread id
101   /// inside OpenMP construct.
getThreadIDVariable() const102   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
103 
104   /// \brief Get the name of the capture helper.
getHelperName() const105   StringRef getHelperName() const override { return ".omp_outlined."; }
106 
classof(const CGCapturedStmtInfo * Info)107   static bool classof(const CGCapturedStmtInfo *Info) {
108     return CGOpenMPRegionInfo::classof(Info) &&
109            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
110                ParallelOutlinedRegion;
111   }
112 
113 private:
114   /// \brief A variable or parameter storing global thread id for OpenMP
115   /// constructs.
116   const VarDecl *ThreadIDVar;
117 };
118 
119 /// \brief API for captured statement code generation in OpenMP constructs.
120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
121 public:
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)122   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
123                                  const VarDecl *ThreadIDVar,
124                                  const RegionCodeGenTy &CodeGen,
125                                  OpenMPDirectiveKind Kind, bool HasCancel)
126       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
127         ThreadIDVar(ThreadIDVar) {
128     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
129   }
130   /// \brief Get a variable or parameter for storing global thread id
131   /// inside OpenMP construct.
getThreadIDVariable() const132   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133 
134   /// \brief Get an LValue for the current ThreadID variable.
135   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
136 
137   /// \brief Get the name of the capture helper.
getHelperName() const138   StringRef getHelperName() const override { return ".omp_outlined."; }
139 
classof(const CGCapturedStmtInfo * Info)140   static bool classof(const CGCapturedStmtInfo *Info) {
141     return CGOpenMPRegionInfo::classof(Info) &&
142            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
143                TaskOutlinedRegion;
144   }
145 
146 private:
147   /// \brief A variable or parameter storing global thread id for OpenMP
148   /// constructs.
149   const VarDecl *ThreadIDVar;
150 };
151 
152 /// \brief API for inlined captured statement code generation in OpenMP
153 /// constructs.
154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
155 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)156   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
157                             const RegionCodeGenTy &CodeGen,
158                             OpenMPDirectiveKind Kind, bool HasCancel)
159       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
160         OldCSI(OldCSI),
161         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
162   // \brief Retrieve the value of the context parameter.
getContextValue() const163   llvm::Value *getContextValue() const override {
164     if (OuterRegionInfo)
165       return OuterRegionInfo->getContextValue();
166     llvm_unreachable("No context value for inlined OpenMP region");
167   }
setContextValue(llvm::Value * V)168   void setContextValue(llvm::Value *V) override {
169     if (OuterRegionInfo) {
170       OuterRegionInfo->setContextValue(V);
171       return;
172     }
173     llvm_unreachable("No context value for inlined OpenMP region");
174   }
175   /// \brief Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const176   const FieldDecl *lookup(const VarDecl *VD) const override {
177     if (OuterRegionInfo)
178       return OuterRegionInfo->lookup(VD);
179     // If there is no outer outlined region,no need to lookup in a list of
180     // captured variables, we can use the original one.
181     return nullptr;
182   }
getThisFieldDecl() const183   FieldDecl *getThisFieldDecl() const override {
184     if (OuterRegionInfo)
185       return OuterRegionInfo->getThisFieldDecl();
186     return nullptr;
187   }
188   /// \brief Get a variable or parameter for storing global thread id
189   /// inside OpenMP construct.
getThreadIDVariable() const190   const VarDecl *getThreadIDVariable() const override {
191     if (OuterRegionInfo)
192       return OuterRegionInfo->getThreadIDVariable();
193     return nullptr;
194   }
195 
196   /// \brief Get the name of the capture helper.
getHelperName() const197   StringRef getHelperName() const override {
198     if (auto *OuterRegionInfo = getOldCSI())
199       return OuterRegionInfo->getHelperName();
200     llvm_unreachable("No helper name for inlined OpenMP construct");
201   }
202 
getOldCSI() const203   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
204 
classof(const CGCapturedStmtInfo * Info)205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
208   }
209 
210 private:
211   /// \brief CodeGen info about outer OpenMP region.
212   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
213   CGOpenMPRegionInfo *OuterRegionInfo;
214 };
215 
216 /// \brief API for captured statement code generation in OpenMP target
217 /// constructs. For this captures, implicit parameters are used instead of the
218 /// captured fields.
219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
220 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen)221   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
222                            const RegionCodeGenTy &CodeGen)
223       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
224                            /*HasCancel = */ false) {}
225 
226   /// \brief This is unused for target regions because each starts executing
227   /// with a single thread.
getThreadIDVariable() const228   const VarDecl *getThreadIDVariable() const override { return nullptr; }
229 
230   /// \brief Get the name of the capture helper.
getHelperName() const231   StringRef getHelperName() const override { return ".omp_offloading."; }
232 
classof(const CGCapturedStmtInfo * Info)233   static bool classof(const CGCapturedStmtInfo *Info) {
234     return CGOpenMPRegionInfo::classof(Info) &&
235            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
236   }
237 };
238 
239 /// \brief RAII for emitting code of OpenMP constructs.
240 class InlinedOpenMPRegionRAII {
241   CodeGenFunction &CGF;
242 
243 public:
244   /// \brief Constructs region for combined constructs.
245   /// \param CodeGen Code generation sequence for combined directives. Includes
246   /// a list of functions used for code generation of implicitly inlined
247   /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)248   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
249                           OpenMPDirectiveKind Kind, bool HasCancel)
250       : CGF(CGF) {
251     // Start emission for the construct.
252     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
253         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
254   }
~InlinedOpenMPRegionRAII()255   ~InlinedOpenMPRegionRAII() {
256     // Restore original CapturedStmtInfo only if we're done with code emission.
257     auto *OldCSI =
258         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
259     delete CGF.CapturedStmtInfo;
260     CGF.CapturedStmtInfo = OldCSI;
261   }
262 };
263 
264 } // anonymous namespace
265 
emitLoadOfPointerLValue(CodeGenFunction & CGF,Address PtrAddr,QualType Ty)266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
267                                       QualType Ty) {
268   AlignmentSource Source;
269   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
270   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
271                             Ty->getPointeeType(), Source);
272 }
273 
getThreadIDVariableLValue(CodeGenFunction & CGF)274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
275   return emitLoadOfPointerLValue(CGF,
276                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
277                                  getThreadIDVariable()->getType());
278 }
279 
EmitBody(CodeGenFunction & CGF,const Stmt *)280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
281   if (!CGF.HaveInsertPoint())
282     return;
283   // 1.2.2 OpenMP Language Terminology
284   // Structured block - An executable statement with a single entry at the
285   // top and a single exit at the bottom.
286   // The point of exit cannot be a branch out of the structured block.
287   // longjmp() and throw() must not violate the entry/exit criteria.
288   CGF.EHStack.pushTerminate();
289   {
290     CodeGenFunction::RunCleanupsScope Scope(CGF);
291     CodeGen(CGF);
292   }
293   CGF.EHStack.popTerminate();
294 }
295 
getThreadIDVariableLValue(CodeGenFunction & CGF)296 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
297     CodeGenFunction &CGF) {
298   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
299                             getThreadIDVariable()->getType(),
300                             AlignmentSource::Decl);
301 }
302 
CGOpenMPRuntime(CodeGenModule & CGM)303 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
304     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
305   IdentTy = llvm::StructType::create(
306       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
307       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
308       CGM.Int8PtrTy /* psource */, nullptr);
309   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
310   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
311                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
312   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
313   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
314 }
315 
clear()316 void CGOpenMPRuntime::clear() {
317   InternalVars.clear();
318 }
319 
320 // Layout information for ident_t.
getIdentAlign(CodeGenModule & CGM)321 static CharUnits getIdentAlign(CodeGenModule &CGM) {
322   return CGM.getPointerAlign();
323 }
getIdentSize(CodeGenModule & CGM)324 static CharUnits getIdentSize(CodeGenModule &CGM) {
325   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
326   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
327 }
getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field)328 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
329   // All the fields except the last are i32, so this works beautifully.
330   return unsigned(Field) * CharUnits::fromQuantity(4);
331 }
createIdentFieldGEP(CodeGenFunction & CGF,Address Addr,CGOpenMPRuntime::IdentFieldIndex Field,const llvm::Twine & Name="")332 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
333                                    CGOpenMPRuntime::IdentFieldIndex Field,
334                                    const llvm::Twine &Name = "") {
335   auto Offset = getOffsetOfIdentField(Field);
336   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
337 }
338 
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)339 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
340     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
341     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
342   assert(ThreadIDVar->getType()->isPointerType() &&
343          "thread id variable must be of type kmp_int32 *");
344   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
345   CodeGenFunction CGF(CGM, true);
346   bool HasCancel = false;
347   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
348     HasCancel = OPD->hasCancel();
349   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
350     HasCancel = OPSD->hasCancel();
351   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
352     HasCancel = OPFD->hasCancel();
353   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
354                                     HasCancel);
355   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
356   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
357 }
358 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)359 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
360     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
361     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
362   assert(!ThreadIDVar->getType()->isPointerType() &&
363          "thread id variable must be of type kmp_int32 for tasks");
364   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
365   CodeGenFunction CGF(CGM, true);
366   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
367                                         InnermostKind,
368                                         cast<OMPTaskDirective>(D).hasCancel());
369   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
370   return CGF.GenerateCapturedStmtFunction(*CS);
371 }
372 
getOrCreateDefaultLocation(OpenMPLocationFlags Flags)373 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
374   CharUnits Align = getIdentAlign(CGM);
375   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
376   if (!Entry) {
377     if (!DefaultOpenMPPSource) {
378       // Initialize default location for psource field of ident_t structure of
379       // all ident_t objects. Format is ";file;function;line;column;;".
380       // Taken from
381       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
382       DefaultOpenMPPSource =
383           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
384       DefaultOpenMPPSource =
385           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
386     }
387     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
388         CGM.getModule(), IdentTy, /*isConstant*/ true,
389         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
390     DefaultOpenMPLocation->setUnnamedAddr(true);
391     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
392 
393     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
394     llvm::Constant *Values[] = {Zero,
395                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
396                                 Zero, Zero, DefaultOpenMPPSource};
397     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
398     DefaultOpenMPLocation->setInitializer(Init);
399     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
400   }
401   return Address(Entry, Align);
402 }
403 
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,OpenMPLocationFlags Flags)404 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
405                                                  SourceLocation Loc,
406                                                  OpenMPLocationFlags Flags) {
407   // If no debug info is generated - return global default location.
408   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
409       Loc.isInvalid())
410     return getOrCreateDefaultLocation(Flags).getPointer();
411 
412   assert(CGF.CurFn && "No function in current CodeGenFunction.");
413 
414   Address LocValue = Address::invalid();
415   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
416   if (I != OpenMPLocThreadIDMap.end())
417     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
418 
419   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
420   // GetOpenMPThreadID was called before this routine.
421   if (!LocValue.isValid()) {
422     // Generate "ident_t .kmpc_loc.addr;"
423     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
424                                       ".kmpc_loc.addr");
425     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
426     Elem.second.DebugLoc = AI.getPointer();
427     LocValue = AI;
428 
429     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
430     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
431     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
432                              CGM.getSize(getIdentSize(CGF.CGM)));
433   }
434 
435   // char **psource = &.kmpc_loc_<flags>.addr.psource;
436   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
437 
438   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
439   if (OMPDebugLoc == nullptr) {
440     SmallString<128> Buffer2;
441     llvm::raw_svector_ostream OS2(Buffer2);
442     // Build debug location
443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
444     OS2 << ";" << PLoc.getFilename() << ";";
445     if (const FunctionDecl *FD =
446             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
447       OS2 << FD->getQualifiedNameAsString();
448     }
449     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
450     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
451     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
452   }
453   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
454   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
455 
456   // Our callers always pass this to a runtime function, so for
457   // convenience, go ahead and return a naked pointer.
458   return LocValue.getPointer();
459 }
460 
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)461 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
462                                           SourceLocation Loc) {
463   assert(CGF.CurFn && "No function in current CodeGenFunction.");
464 
465   llvm::Value *ThreadID = nullptr;
466   // Check whether we've already cached a load of the thread id in this
467   // function.
468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
469   if (I != OpenMPLocThreadIDMap.end()) {
470     ThreadID = I->second.ThreadID;
471     if (ThreadID != nullptr)
472       return ThreadID;
473   }
474   if (auto OMPRegionInfo =
475           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
476     if (OMPRegionInfo->getThreadIDVariable()) {
477       // Check if this an outlined function with thread id passed as argument.
478       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
479       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
480       // If value loaded in entry block, cache it and use it everywhere in
481       // function.
482       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
483         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
484         Elem.second.ThreadID = ThreadID;
485       }
486       return ThreadID;
487     }
488   }
489 
490   // This is not an outlined function region - need to call __kmpc_int32
491   // kmpc_global_thread_num(ident_t *loc).
492   // Generate thread id value and cache this value for use across the
493   // function.
494   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
495   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
496   ThreadID =
497       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
498                           emitUpdateLocation(CGF, Loc));
499   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
500   Elem.second.ThreadID = ThreadID;
501   return ThreadID;
502 }
503 
functionFinished(CodeGenFunction & CGF)504 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
505   assert(CGF.CurFn && "No function in current CodeGenFunction.");
506   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
507     OpenMPLocThreadIDMap.erase(CGF.CurFn);
508 }
509 
getIdentTyPointerTy()510 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
511   return llvm::PointerType::getUnqual(IdentTy);
512 }
513 
getKmpc_MicroPointerTy()514 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
515   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
516 }
517 
518 llvm::Constant *
createRuntimeFunction(OpenMPRTLFunction Function)519 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
520   llvm::Constant *RTLFn = nullptr;
521   switch (Function) {
522   case OMPRTL__kmpc_fork_call: {
523     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
524     // microtask, ...);
525     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
526                                 getKmpc_MicroPointerTy()};
527     llvm::FunctionType *FnTy =
528         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
529     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
530     break;
531   }
532   case OMPRTL__kmpc_global_thread_num: {
533     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
534     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
535     llvm::FunctionType *FnTy =
536         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
537     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
538     break;
539   }
540   case OMPRTL__kmpc_threadprivate_cached: {
541     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
542     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
543     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
544                                 CGM.VoidPtrTy, CGM.SizeTy,
545                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
546     llvm::FunctionType *FnTy =
547         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
549     break;
550   }
551   case OMPRTL__kmpc_critical: {
552     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
553     // kmp_critical_name *crit);
554     llvm::Type *TypeParams[] = {
555         getIdentTyPointerTy(), CGM.Int32Ty,
556         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
557     llvm::FunctionType *FnTy =
558         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
559     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
560     break;
561   }
562   case OMPRTL__kmpc_critical_with_hint: {
563     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
564     // kmp_critical_name *crit, uintptr_t hint);
565     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
566                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
567                                 CGM.IntPtrTy};
568     llvm::FunctionType *FnTy =
569         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
570     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
571     break;
572   }
573   case OMPRTL__kmpc_threadprivate_register: {
574     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
575     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
576     // typedef void *(*kmpc_ctor)(void *);
577     auto KmpcCtorTy =
578         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
579                                 /*isVarArg*/ false)->getPointerTo();
580     // typedef void *(*kmpc_cctor)(void *, void *);
581     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
582     auto KmpcCopyCtorTy =
583         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
584                                 /*isVarArg*/ false)->getPointerTo();
585     // typedef void (*kmpc_dtor)(void *);
586     auto KmpcDtorTy =
587         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
588             ->getPointerTo();
589     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
590                               KmpcCopyCtorTy, KmpcDtorTy};
591     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
592                                         /*isVarArg*/ false);
593     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
594     break;
595   }
596   case OMPRTL__kmpc_end_critical: {
597     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
598     // kmp_critical_name *crit);
599     llvm::Type *TypeParams[] = {
600         getIdentTyPointerTy(), CGM.Int32Ty,
601         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
602     llvm::FunctionType *FnTy =
603         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
604     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
605     break;
606   }
607   case OMPRTL__kmpc_cancel_barrier: {
608     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
609     // global_tid);
610     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
611     llvm::FunctionType *FnTy =
612         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
613     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
614     break;
615   }
616   case OMPRTL__kmpc_barrier: {
617     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
618     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
619     llvm::FunctionType *FnTy =
620         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
621     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
622     break;
623   }
624   case OMPRTL__kmpc_for_static_fini: {
625     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
626     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
630     break;
631   }
632   case OMPRTL__kmpc_push_num_threads: {
633     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
634     // kmp_int32 num_threads)
635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
636                                 CGM.Int32Ty};
637     llvm::FunctionType *FnTy =
638         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
639     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
640     break;
641   }
642   case OMPRTL__kmpc_serialized_parallel: {
643     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
644     // global_tid);
645     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
646     llvm::FunctionType *FnTy =
647         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
648     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
649     break;
650   }
651   case OMPRTL__kmpc_end_serialized_parallel: {
652     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
653     // global_tid);
654     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
655     llvm::FunctionType *FnTy =
656         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
657     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
658     break;
659   }
660   case OMPRTL__kmpc_flush: {
661     // Build void __kmpc_flush(ident_t *loc);
662     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
663     llvm::FunctionType *FnTy =
664         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
665     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
666     break;
667   }
668   case OMPRTL__kmpc_master: {
669     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
670     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
671     llvm::FunctionType *FnTy =
672         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
673     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
674     break;
675   }
676   case OMPRTL__kmpc_end_master: {
677     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
678     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
679     llvm::FunctionType *FnTy =
680         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
681     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
682     break;
683   }
684   case OMPRTL__kmpc_omp_taskyield: {
685     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
686     // int end_part);
687     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
688     llvm::FunctionType *FnTy =
689         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
690     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
691     break;
692   }
693   case OMPRTL__kmpc_single: {
694     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
695     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
696     llvm::FunctionType *FnTy =
697         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
699     break;
700   }
701   case OMPRTL__kmpc_end_single: {
702     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
703     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
704     llvm::FunctionType *FnTy =
705         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
706     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
707     break;
708   }
709   case OMPRTL__kmpc_omp_task_alloc: {
710     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
711     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
712     // kmp_routine_entry_t *task_entry);
713     assert(KmpRoutineEntryPtrTy != nullptr &&
714            "Type kmp_routine_entry_t must be created.");
715     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
716                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
717     // Return void * and then cast to particular kmp_task_t type.
718     llvm::FunctionType *FnTy =
719         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
720     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
721     break;
722   }
723   case OMPRTL__kmpc_omp_task: {
724     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
725     // *new_task);
726     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
727                                 CGM.VoidPtrTy};
728     llvm::FunctionType *FnTy =
729         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
730     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
731     break;
732   }
733   case OMPRTL__kmpc_copyprivate: {
734     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
735     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
736     // kmp_int32 didit);
737     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
738     auto *CpyFnTy =
739         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
740     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
741                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
742                                 CGM.Int32Ty};
743     llvm::FunctionType *FnTy =
744         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
745     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
746     break;
747   }
748   case OMPRTL__kmpc_reduce: {
749     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
750     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
751     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
752     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
753     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
754                                                /*isVarArg=*/false);
755     llvm::Type *TypeParams[] = {
756         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
757         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
758         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
759     llvm::FunctionType *FnTy =
760         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
761     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
762     break;
763   }
764   case OMPRTL__kmpc_reduce_nowait: {
765     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
766     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
767     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
768     // *lck);
769     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
770     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
771                                                /*isVarArg=*/false);
772     llvm::Type *TypeParams[] = {
773         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
774         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
775         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
776     llvm::FunctionType *FnTy =
777         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
778     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
779     break;
780   }
781   case OMPRTL__kmpc_end_reduce: {
782     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
783     // kmp_critical_name *lck);
784     llvm::Type *TypeParams[] = {
785         getIdentTyPointerTy(), CGM.Int32Ty,
786         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
787     llvm::FunctionType *FnTy =
788         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
789     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
790     break;
791   }
792   case OMPRTL__kmpc_end_reduce_nowait: {
793     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
794     // kmp_critical_name *lck);
795     llvm::Type *TypeParams[] = {
796         getIdentTyPointerTy(), CGM.Int32Ty,
797         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
798     llvm::FunctionType *FnTy =
799         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
800     RTLFn =
801         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
802     break;
803   }
804   case OMPRTL__kmpc_omp_task_begin_if0: {
805     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
806     // *new_task);
807     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
808                                 CGM.VoidPtrTy};
809     llvm::FunctionType *FnTy =
810         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
811     RTLFn =
812         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
813     break;
814   }
815   case OMPRTL__kmpc_omp_task_complete_if0: {
816     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
817     // *new_task);
818     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
819                                 CGM.VoidPtrTy};
820     llvm::FunctionType *FnTy =
821         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
822     RTLFn = CGM.CreateRuntimeFunction(FnTy,
823                                       /*Name=*/"__kmpc_omp_task_complete_if0");
824     break;
825   }
826   case OMPRTL__kmpc_ordered: {
827     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
828     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
829     llvm::FunctionType *FnTy =
830         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
831     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
832     break;
833   }
834   case OMPRTL__kmpc_end_ordered: {
835     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
836     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
837     llvm::FunctionType *FnTy =
838         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
839     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
840     break;
841   }
842   case OMPRTL__kmpc_omp_taskwait: {
843     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
844     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
845     llvm::FunctionType *FnTy =
846         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
847     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
848     break;
849   }
850   case OMPRTL__kmpc_taskgroup: {
851     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
852     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
853     llvm::FunctionType *FnTy =
854         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
855     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
856     break;
857   }
858   case OMPRTL__kmpc_end_taskgroup: {
859     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
860     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
861     llvm::FunctionType *FnTy =
862         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
863     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
864     break;
865   }
866   case OMPRTL__kmpc_push_proc_bind: {
867     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
868     // int proc_bind)
869     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
870     llvm::FunctionType *FnTy =
871         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
872     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
873     break;
874   }
875   case OMPRTL__kmpc_omp_task_with_deps: {
876     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
877     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
878     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
879     llvm::Type *TypeParams[] = {
880         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
881         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
884     RTLFn =
885         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
886     break;
887   }
888   case OMPRTL__kmpc_omp_wait_deps: {
889     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
890     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
891     // kmp_depend_info_t *noalias_dep_list);
892     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
893                                 CGM.Int32Ty,           CGM.VoidPtrTy,
894                                 CGM.Int32Ty,           CGM.VoidPtrTy};
895     llvm::FunctionType *FnTy =
896         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
897     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
898     break;
899   }
900   case OMPRTL__kmpc_cancellationpoint: {
901     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
902     // global_tid, kmp_int32 cncl_kind)
903     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
904     llvm::FunctionType *FnTy =
905         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
906     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
907     break;
908   }
909   case OMPRTL__kmpc_cancel: {
910     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
911     // kmp_int32 cncl_kind)
912     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
913     llvm::FunctionType *FnTy =
914         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
915     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
916     break;
917   }
918   case OMPRTL__tgt_target: {
919     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
920     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
921     // *arg_types);
922     llvm::Type *TypeParams[] = {CGM.Int32Ty,
923                                 CGM.VoidPtrTy,
924                                 CGM.Int32Ty,
925                                 CGM.VoidPtrPtrTy,
926                                 CGM.VoidPtrPtrTy,
927                                 CGM.SizeTy->getPointerTo(),
928                                 CGM.Int32Ty->getPointerTo()};
929     llvm::FunctionType *FnTy =
930         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
931     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
932     break;
933   }
934   }
935   return RTLFn;
936 }
937 
getTypeSize(CodeGenFunction & CGF,QualType Ty)938 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
939   auto &C = CGF.getContext();
940   llvm::Value *Size = nullptr;
941   auto SizeInChars = C.getTypeSizeInChars(Ty);
942   if (SizeInChars.isZero()) {
943     // getTypeSizeInChars() returns 0 for a VLA.
944     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
945       llvm::Value *ArraySize;
946       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
947       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
948     }
949     SizeInChars = C.getTypeSizeInChars(Ty);
950     assert(!SizeInChars.isZero());
951     Size = CGF.Builder.CreateNUWMul(
952         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
953   } else
954     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
955   return Size;
956 }
957 
createForStaticInitFunction(unsigned IVSize,bool IVSigned)958 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
959                                                              bool IVSigned) {
960   assert((IVSize == 32 || IVSize == 64) &&
961          "IV size is not compatible with the omp runtime");
962   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
963                                        : "__kmpc_for_static_init_4u")
964                            : (IVSigned ? "__kmpc_for_static_init_8"
965                                        : "__kmpc_for_static_init_8u");
966   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
967   auto PtrTy = llvm::PointerType::getUnqual(ITy);
968   llvm::Type *TypeParams[] = {
969     getIdentTyPointerTy(),                     // loc
970     CGM.Int32Ty,                               // tid
971     CGM.Int32Ty,                               // schedtype
972     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
973     PtrTy,                                     // p_lower
974     PtrTy,                                     // p_upper
975     PtrTy,                                     // p_stride
976     ITy,                                       // incr
977     ITy                                        // chunk
978   };
979   llvm::FunctionType *FnTy =
980       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
981   return CGM.CreateRuntimeFunction(FnTy, Name);
982 }
983 
createDispatchInitFunction(unsigned IVSize,bool IVSigned)984 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
985                                                             bool IVSigned) {
986   assert((IVSize == 32 || IVSize == 64) &&
987          "IV size is not compatible with the omp runtime");
988   auto Name =
989       IVSize == 32
990           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
991           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
992   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
993   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
994                                CGM.Int32Ty,           // tid
995                                CGM.Int32Ty,           // schedtype
996                                ITy,                   // lower
997                                ITy,                   // upper
998                                ITy,                   // stride
999                                ITy                    // chunk
1000   };
1001   llvm::FunctionType *FnTy =
1002       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1003   return CGM.CreateRuntimeFunction(FnTy, Name);
1004 }
1005 
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1006 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1007                                                             bool IVSigned) {
1008   assert((IVSize == 32 || IVSize == 64) &&
1009          "IV size is not compatible with the omp runtime");
1010   auto Name =
1011       IVSize == 32
1012           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1013           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1014   llvm::Type *TypeParams[] = {
1015       getIdentTyPointerTy(), // loc
1016       CGM.Int32Ty,           // tid
1017   };
1018   llvm::FunctionType *FnTy =
1019       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1020   return CGM.CreateRuntimeFunction(FnTy, Name);
1021 }
1022 
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1023 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1024                                                             bool IVSigned) {
1025   assert((IVSize == 32 || IVSize == 64) &&
1026          "IV size is not compatible with the omp runtime");
1027   auto Name =
1028       IVSize == 32
1029           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1030           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1031   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1032   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1033   llvm::Type *TypeParams[] = {
1034     getIdentTyPointerTy(),                     // loc
1035     CGM.Int32Ty,                               // tid
1036     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1037     PtrTy,                                     // p_lower
1038     PtrTy,                                     // p_upper
1039     PtrTy                                      // p_stride
1040   };
1041   llvm::FunctionType *FnTy =
1042       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1043   return CGM.CreateRuntimeFunction(FnTy, Name);
1044 }
1045 
1046 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1047 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1048   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1049          !CGM.getContext().getTargetInfo().isTLSSupported());
1050   // Lookup the entry, lazily creating it if necessary.
1051   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1052                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1053 }
1054 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1055 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1056                                                 const VarDecl *VD,
1057                                                 Address VDAddr,
1058                                                 SourceLocation Loc) {
1059   if (CGM.getLangOpts().OpenMPUseTLS &&
1060       CGM.getContext().getTargetInfo().isTLSSupported())
1061     return VDAddr;
1062 
1063   auto VarTy = VDAddr.getElementType();
1064   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1065                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1066                                                        CGM.Int8PtrTy),
1067                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1068                          getOrCreateThreadPrivateCache(VD)};
1069   return Address(CGF.EmitRuntimeCall(
1070       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1071                  VDAddr.getAlignment());
1072 }
1073 
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1074 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1075     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1076     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1077   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1078   // library.
1079   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1080   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1081                       OMPLoc);
1082   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1083   // to register constructor/destructor for variable.
1084   llvm::Value *Args[] = {OMPLoc,
1085                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1086                                                        CGM.VoidPtrTy),
1087                          Ctor, CopyCtor, Dtor};
1088   CGF.EmitRuntimeCall(
1089       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1090 }
1091 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1092 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1093     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1094     bool PerformInit, CodeGenFunction *CGF) {
1095   if (CGM.getLangOpts().OpenMPUseTLS &&
1096       CGM.getContext().getTargetInfo().isTLSSupported())
1097     return nullptr;
1098 
1099   VD = VD->getDefinition(CGM.getContext());
1100   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1101     ThreadPrivateWithDefinition.insert(VD);
1102     QualType ASTTy = VD->getType();
1103 
1104     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1105     auto Init = VD->getAnyInitializer();
1106     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1107       // Generate function that re-emits the declaration's initializer into the
1108       // threadprivate copy of the variable VD
1109       CodeGenFunction CtorCGF(CGM);
1110       FunctionArgList Args;
1111       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1112                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1113       Args.push_back(&Dst);
1114 
1115       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1116           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1117           /*isVariadic=*/false);
1118       auto FTy = CGM.getTypes().GetFunctionType(FI);
1119       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1120           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1121       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1122                             Args, SourceLocation());
1123       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1124           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1125           CGM.getContext().VoidPtrTy, Dst.getLocation());
1126       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1127       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1128                                              CtorCGF.ConvertTypeForMem(ASTTy));
1129       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1130                                /*IsInitializer=*/true);
1131       ArgVal = CtorCGF.EmitLoadOfScalar(
1132           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1133           CGM.getContext().VoidPtrTy, Dst.getLocation());
1134       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1135       CtorCGF.FinishFunction();
1136       Ctor = Fn;
1137     }
1138     if (VD->getType().isDestructedType() != QualType::DK_none) {
1139       // Generate function that emits destructor call for the threadprivate copy
1140       // of the variable VD
1141       CodeGenFunction DtorCGF(CGM);
1142       FunctionArgList Args;
1143       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1144                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1145       Args.push_back(&Dst);
1146 
1147       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1148           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1149           /*isVariadic=*/false);
1150       auto FTy = CGM.getTypes().GetFunctionType(FI);
1151       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1152           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1153       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1154                             SourceLocation());
1155       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1156           DtorCGF.GetAddrOfLocalVar(&Dst),
1157           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1158       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1159                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1160                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1161       DtorCGF.FinishFunction();
1162       Dtor = Fn;
1163     }
1164     // Do not emit init function if it is not required.
1165     if (!Ctor && !Dtor)
1166       return nullptr;
1167 
1168     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1169     auto CopyCtorTy =
1170         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1171                                 /*isVarArg=*/false)->getPointerTo();
1172     // Copying constructor for the threadprivate variable.
1173     // Must be NULL - reserved by runtime, but currently it requires that this
1174     // parameter is always NULL. Otherwise it fires assertion.
1175     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1176     if (Ctor == nullptr) {
1177       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1178                                             /*isVarArg=*/false)->getPointerTo();
1179       Ctor = llvm::Constant::getNullValue(CtorTy);
1180     }
1181     if (Dtor == nullptr) {
1182       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1183                                             /*isVarArg=*/false)->getPointerTo();
1184       Dtor = llvm::Constant::getNullValue(DtorTy);
1185     }
1186     if (!CGF) {
1187       auto InitFunctionTy =
1188           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1189       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1190           InitFunctionTy, ".__omp_threadprivate_init_.",
1191           CGM.getTypes().arrangeNullaryFunction());
1192       CodeGenFunction InitCGF(CGM);
1193       FunctionArgList ArgList;
1194       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1195                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1196                             Loc);
1197       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1198       InitCGF.FinishFunction();
1199       return InitFunction;
1200     }
1201     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1202   }
1203   return nullptr;
1204 }
1205 
1206 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1207 /// function. Here is the logic:
1208 /// if (Cond) {
1209 ///   ThenGen();
1210 /// } else {
1211 ///   ElseGen();
1212 /// }
emitOMPIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)1213 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1214                             const RegionCodeGenTy &ThenGen,
1215                             const RegionCodeGenTy &ElseGen) {
1216   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1217 
1218   // If the condition constant folds and can be elided, try to avoid emitting
1219   // the condition and the dead arm of the if/else.
1220   bool CondConstant;
1221   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1222     CodeGenFunction::RunCleanupsScope Scope(CGF);
1223     if (CondConstant) {
1224       ThenGen(CGF);
1225     } else {
1226       ElseGen(CGF);
1227     }
1228     return;
1229   }
1230 
1231   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1232   // emit the conditional branch.
1233   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1234   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1235   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1236   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1237 
1238   // Emit the 'then' code.
1239   CGF.EmitBlock(ThenBlock);
1240   {
1241     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1242     ThenGen(CGF);
1243   }
1244   CGF.EmitBranch(ContBlock);
1245   // Emit the 'else' code if present.
1246   {
1247     // There is no need to emit line number for unconditional branch.
1248     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1249     CGF.EmitBlock(ElseBlock);
1250   }
1251   {
1252     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1253     ElseGen(CGF);
1254   }
1255   {
1256     // There is no need to emit line number for unconditional branch.
1257     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1258     CGF.EmitBranch(ContBlock);
1259   }
1260   // Emit the continuation block for code after the if.
1261   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1262 }
1263 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)1264 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1265                                        llvm::Value *OutlinedFn,
1266                                        ArrayRef<llvm::Value *> CapturedVars,
1267                                        const Expr *IfCond) {
1268   if (!CGF.HaveInsertPoint())
1269     return;
1270   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1271   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1272                     RTLoc](CodeGenFunction &CGF) {
1273     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1274     llvm::Value *Args[] = {
1275         RTLoc,
1276         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1277         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1278     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1279     RealArgs.append(std::begin(Args), std::end(Args));
1280     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1281 
1282     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1283     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1284   };
1285   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1286                     Loc](CodeGenFunction &CGF) {
1287     auto ThreadID = getThreadID(CGF, Loc);
1288     // Build calls:
1289     // __kmpc_serialized_parallel(&Loc, GTid);
1290     llvm::Value *Args[] = {RTLoc, ThreadID};
1291     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1292                         Args);
1293 
1294     // OutlinedFn(&GTid, &zero, CapturedStruct);
1295     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1296     Address ZeroAddr =
1297       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1298                            /*Name*/ ".zero.addr");
1299     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1300     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1301     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1302     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1303     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1304     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1305 
1306     // __kmpc_end_serialized_parallel(&Loc, GTid);
1307     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1308     CGF.EmitRuntimeCall(
1309         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1310   };
1311   if (IfCond) {
1312     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1313   } else {
1314     CodeGenFunction::RunCleanupsScope Scope(CGF);
1315     ThenGen(CGF);
1316   }
1317 }
1318 
1319 // If we're inside an (outlined) parallel region, use the region info's
1320 // thread-ID variable (it is passed in a first argument of the outlined function
1321 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1322 // regular serial code region, get thread ID by calling kmp_int32
1323 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1324 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)1325 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1326                                              SourceLocation Loc) {
1327   if (auto OMPRegionInfo =
1328           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1329     if (OMPRegionInfo->getThreadIDVariable())
1330       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1331 
1332   auto ThreadID = getThreadID(CGF, Loc);
1333   auto Int32Ty =
1334       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1335   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1336   CGF.EmitStoreOfScalar(ThreadID,
1337                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1338 
1339   return ThreadIDTemp;
1340 }
1341 
1342 llvm::Constant *
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name)1343 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1344                                              const llvm::Twine &Name) {
1345   SmallString<256> Buffer;
1346   llvm::raw_svector_ostream Out(Buffer);
1347   Out << Name;
1348   auto RuntimeName = Out.str();
1349   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1350   if (Elem.second) {
1351     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1352            "OMP internal variable has different type than requested");
1353     return &*Elem.second;
1354   }
1355 
1356   return Elem.second = new llvm::GlobalVariable(
1357              CGM.getModule(), Ty, /*IsConstant*/ false,
1358              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1359              Elem.first());
1360 }
1361 
getCriticalRegionLock(StringRef CriticalName)1362 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1363   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1364   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1365 }
1366 
1367 namespace {
1368 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1369   llvm::Value *Callee;
1370   llvm::Value *Args[N];
1371 
1372 public:
CallEndCleanup(llvm::Value * Callee,ArrayRef<llvm::Value * > CleanupArgs)1373   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1374       : Callee(Callee) {
1375     assert(CleanupArgs.size() == N);
1376     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1377   }
Emit(CodeGenFunction & CGF,Flags)1378   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1379     if (!CGF.HaveInsertPoint())
1380       return;
1381     CGF.EmitRuntimeCall(Callee, Args);
1382   }
1383 };
1384 } // anonymous namespace
1385 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)1386 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1387                                          StringRef CriticalName,
1388                                          const RegionCodeGenTy &CriticalOpGen,
1389                                          SourceLocation Loc, const Expr *Hint) {
1390   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1391   // CriticalOpGen();
1392   // __kmpc_end_critical(ident_t *, gtid, Lock);
1393   // Prepare arguments and build a call to __kmpc_critical
1394   if (!CGF.HaveInsertPoint())
1395     return;
1396   CodeGenFunction::RunCleanupsScope Scope(CGF);
1397   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1398                          getCriticalRegionLock(CriticalName)};
1399   if (Hint) {
1400     llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
1401                                                      std::end(Args));
1402     auto *HintVal = CGF.EmitScalarExpr(Hint);
1403     ArgsWithHint.push_back(
1404         CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
1405     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
1406                         ArgsWithHint);
1407   } else
1408     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1409   // Build a call to __kmpc_end_critical
1410   CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1411       NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1412       llvm::makeArrayRef(Args));
1413   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1414 }
1415 
emitIfStmt(CodeGenFunction & CGF,llvm::Value * IfCond,OpenMPDirectiveKind Kind,SourceLocation Loc,const RegionCodeGenTy & BodyOpGen)1416 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1417                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1418                        const RegionCodeGenTy &BodyOpGen) {
1419   llvm::Value *CallBool = CGF.EmitScalarConversion(
1420       IfCond,
1421       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1422       CGF.getContext().BoolTy, Loc);
1423 
1424   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1425   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1426   // Generate the branch (If-stmt)
1427   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1428   CGF.EmitBlock(ThenBlock);
1429   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1430   // Emit the rest of bblocks/branches
1431   CGF.EmitBranch(ContBlock);
1432   CGF.EmitBlock(ContBlock, true);
1433 }
1434 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)1435 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1436                                        const RegionCodeGenTy &MasterOpGen,
1437                                        SourceLocation Loc) {
1438   if (!CGF.HaveInsertPoint())
1439     return;
1440   // if(__kmpc_master(ident_t *, gtid)) {
1441   //   MasterOpGen();
1442   //   __kmpc_end_master(ident_t *, gtid);
1443   // }
1444   // Prepare arguments and build a call to __kmpc_master
1445   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1446   auto *IsMaster =
1447       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1448   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1449       MasterCallEndCleanup;
1450   emitIfStmt(
1451       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1452         CodeGenFunction::RunCleanupsScope Scope(CGF);
1453         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1454             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1455             llvm::makeArrayRef(Args));
1456         MasterOpGen(CGF);
1457       });
1458 }
1459 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)1460 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1461                                         SourceLocation Loc) {
1462   if (!CGF.HaveInsertPoint())
1463     return;
1464   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1465   llvm::Value *Args[] = {
1466       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1467       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1468   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1469 }
1470 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)1471 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1472                                           const RegionCodeGenTy &TaskgroupOpGen,
1473                                           SourceLocation Loc) {
1474   if (!CGF.HaveInsertPoint())
1475     return;
1476   // __kmpc_taskgroup(ident_t *, gtid);
1477   // TaskgroupOpGen();
1478   // __kmpc_end_taskgroup(ident_t *, gtid);
1479   // Prepare arguments and build a call to __kmpc_taskgroup
1480   {
1481     CodeGenFunction::RunCleanupsScope Scope(CGF);
1482     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1483     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1484     // Build a call to __kmpc_end_taskgroup
1485     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1486         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1487         llvm::makeArrayRef(Args));
1488     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1489   }
1490 }
1491 
1492 /// Given an array of pointers to variables, project the address of a
1493 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)1494 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1495                                       unsigned Index, const VarDecl *Var) {
1496   // Pull out the pointer to the variable.
1497   Address PtrAddr =
1498       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1499   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1500 
1501   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1502   Addr = CGF.Builder.CreateElementBitCast(
1503       Addr, CGF.ConvertTypeForMem(Var->getType()));
1504   return Addr;
1505 }
1506 
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)1507 static llvm::Value *emitCopyprivateCopyFunction(
1508     CodeGenModule &CGM, llvm::Type *ArgsType,
1509     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1510     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1511   auto &C = CGM.getContext();
1512   // void copy_func(void *LHSArg, void *RHSArg);
1513   FunctionArgList Args;
1514   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1515                            C.VoidPtrTy);
1516   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1517                            C.VoidPtrTy);
1518   Args.push_back(&LHSArg);
1519   Args.push_back(&RHSArg);
1520   FunctionType::ExtInfo EI;
1521   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1522       C.VoidTy, Args, EI, /*isVariadic=*/false);
1523   auto *Fn = llvm::Function::Create(
1524       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1525       ".omp.copyprivate.copy_func", &CGM.getModule());
1526   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
1527   CodeGenFunction CGF(CGM);
1528   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1529   // Dest = (void*[n])(LHSArg);
1530   // Src = (void*[n])(RHSArg);
1531   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1532       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1533       ArgsType), CGF.getPointerAlign());
1534   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1535       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1536       ArgsType), CGF.getPointerAlign());
1537   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1538   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1539   // ...
1540   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1541   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1542     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1543     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1544 
1545     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1546     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1547 
1548     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1549     QualType Type = VD->getType();
1550     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1551   }
1552   CGF.FinishFunction();
1553   return Fn;
1554 }
1555 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)1556 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1557                                        const RegionCodeGenTy &SingleOpGen,
1558                                        SourceLocation Loc,
1559                                        ArrayRef<const Expr *> CopyprivateVars,
1560                                        ArrayRef<const Expr *> SrcExprs,
1561                                        ArrayRef<const Expr *> DstExprs,
1562                                        ArrayRef<const Expr *> AssignmentOps) {
1563   if (!CGF.HaveInsertPoint())
1564     return;
1565   assert(CopyprivateVars.size() == SrcExprs.size() &&
1566          CopyprivateVars.size() == DstExprs.size() &&
1567          CopyprivateVars.size() == AssignmentOps.size());
1568   auto &C = CGM.getContext();
1569   // int32 did_it = 0;
1570   // if(__kmpc_single(ident_t *, gtid)) {
1571   //   SingleOpGen();
1572   //   __kmpc_end_single(ident_t *, gtid);
1573   //   did_it = 1;
1574   // }
1575   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1576   // <copy_func>, did_it);
1577 
1578   Address DidIt = Address::invalid();
1579   if (!CopyprivateVars.empty()) {
1580     // int32 did_it = 0;
1581     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1582     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1583     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1584   }
1585   // Prepare arguments and build a call to __kmpc_single
1586   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1587   auto *IsSingle =
1588       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1589   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1590       SingleCallEndCleanup;
1591   emitIfStmt(
1592       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1593         CodeGenFunction::RunCleanupsScope Scope(CGF);
1594         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1595             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1596             llvm::makeArrayRef(Args));
1597         SingleOpGen(CGF);
1598         if (DidIt.isValid()) {
1599           // did_it = 1;
1600           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1601         }
1602       });
1603   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1604   // <copy_func>, did_it);
1605   if (DidIt.isValid()) {
1606     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1607     auto CopyprivateArrayTy =
1608         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1609                                /*IndexTypeQuals=*/0);
1610     // Create a list of all private variables for copyprivate.
1611     Address CopyprivateList =
1612         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1613     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1614       Address Elem = CGF.Builder.CreateConstArrayGEP(
1615           CopyprivateList, I, CGF.getPointerSize());
1616       CGF.Builder.CreateStore(
1617           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1618               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1619           Elem);
1620     }
1621     // Build function that copies private values from single region to all other
1622     // threads in the corresponding parallel region.
1623     auto *CpyFn = emitCopyprivateCopyFunction(
1624         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1625         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1626     auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
1627     Address CL =
1628       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1629                                                       CGF.VoidPtrTy);
1630     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1631     llvm::Value *Args[] = {
1632         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1633         getThreadID(CGF, Loc),        // i32 <gtid>
1634         BufSize,                      // size_t <buf_size>
1635         CL.getPointer(),              // void *<copyprivate list>
1636         CpyFn,                        // void (*) (void *, void *) <copy_func>
1637         DidItVal                      // i32 did_it
1638     };
1639     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1640   }
1641 }
1642 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)1643 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1644                                         const RegionCodeGenTy &OrderedOpGen,
1645                                         SourceLocation Loc, bool IsThreads) {
1646   if (!CGF.HaveInsertPoint())
1647     return;
1648   // __kmpc_ordered(ident_t *, gtid);
1649   // OrderedOpGen();
1650   // __kmpc_end_ordered(ident_t *, gtid);
1651   // Prepare arguments and build a call to __kmpc_ordered
1652   CodeGenFunction::RunCleanupsScope Scope(CGF);
1653   if (IsThreads) {
1654     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1655     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1656     // Build a call to __kmpc_end_ordered
1657     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1658         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1659         llvm::makeArrayRef(Args));
1660   }
1661   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1662 }
1663 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)1664 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1665                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1666                                       bool ForceSimpleCall) {
1667   if (!CGF.HaveInsertPoint())
1668     return;
1669   // Build call __kmpc_cancel_barrier(loc, thread_id);
1670   // Build call __kmpc_barrier(loc, thread_id);
1671   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1672   if (Kind == OMPD_for) {
1673     Flags =
1674         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1675   } else if (Kind == OMPD_sections) {
1676     Flags = static_cast<OpenMPLocationFlags>(Flags |
1677                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1678   } else if (Kind == OMPD_single) {
1679     Flags =
1680         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1681   } else if (Kind == OMPD_barrier) {
1682     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1683   } else {
1684     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1685   }
1686   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1687   // thread_id);
1688   auto *OMPRegionInfo =
1689       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
1690   // Do not emit barrier call in the single directive emitted in some rare cases
1691   // for sections directives.
1692   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
1693     return;
1694   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1695                          getThreadID(CGF, Loc)};
1696   if (OMPRegionInfo) {
1697     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1698       auto *Result = CGF.EmitRuntimeCall(
1699           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1700       if (EmitChecks) {
1701         // if (__kmpc_cancel_barrier()) {
1702         //   exit from construct;
1703         // }
1704         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1705         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1706         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1707         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1708         CGF.EmitBlock(ExitBB);
1709         //   exit from construct;
1710         auto CancelDestination =
1711             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1712         CGF.EmitBranchThroughCleanup(CancelDestination);
1713         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1714       }
1715       return;
1716     }
1717   }
1718   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1719 }
1720 
1721 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1722 /// the enum sched_type in kmp.h).
1723 enum OpenMPSchedType {
1724   /// \brief Lower bound for default (unordered) versions.
1725   OMP_sch_lower = 32,
1726   OMP_sch_static_chunked = 33,
1727   OMP_sch_static = 34,
1728   OMP_sch_dynamic_chunked = 35,
1729   OMP_sch_guided_chunked = 36,
1730   OMP_sch_runtime = 37,
1731   OMP_sch_auto = 38,
1732   /// \brief Lower bound for 'ordered' versions.
1733   OMP_ord_lower = 64,
1734   OMP_ord_static_chunked = 65,
1735   OMP_ord_static = 66,
1736   OMP_ord_dynamic_chunked = 67,
1737   OMP_ord_guided_chunked = 68,
1738   OMP_ord_runtime = 69,
1739   OMP_ord_auto = 70,
1740   OMP_sch_default = OMP_sch_static,
1741 };
1742 
1743 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)1744 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1745                                           bool Chunked, bool Ordered) {
1746   switch (ScheduleKind) {
1747   case OMPC_SCHEDULE_static:
1748     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1749                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1750   case OMPC_SCHEDULE_dynamic:
1751     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1752   case OMPC_SCHEDULE_guided:
1753     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1754   case OMPC_SCHEDULE_runtime:
1755     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1756   case OMPC_SCHEDULE_auto:
1757     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1758   case OMPC_SCHEDULE_unknown:
1759     assert(!Chunked && "chunk was specified but schedule kind not known");
1760     return Ordered ? OMP_ord_static : OMP_sch_static;
1761   }
1762   llvm_unreachable("Unexpected runtime schedule");
1763 }
1764 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const1765 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1766                                          bool Chunked) const {
1767   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1768   return Schedule == OMP_sch_static;
1769 }
1770 
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const1771 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1772   auto Schedule =
1773       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1774   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1775   return Schedule != OMP_sch_static;
1776 }
1777 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,llvm::Value * UB,llvm::Value * Chunk)1778 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1779                                           SourceLocation Loc,
1780                                           OpenMPScheduleClauseKind ScheduleKind,
1781                                           unsigned IVSize, bool IVSigned,
1782                                           bool Ordered, llvm::Value *UB,
1783                                           llvm::Value *Chunk) {
1784   if (!CGF.HaveInsertPoint())
1785     return;
1786   OpenMPSchedType Schedule =
1787       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1788   assert(Ordered ||
1789          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1790           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1791   // Call __kmpc_dispatch_init(
1792   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1793   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1794   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1795 
1796   // If the Chunk was not specified in the clause - use default value 1.
1797   if (Chunk == nullptr)
1798     Chunk = CGF.Builder.getIntN(IVSize, 1);
1799   llvm::Value *Args[] = {
1800     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1801     getThreadID(CGF, Loc),
1802     CGF.Builder.getInt32(Schedule), // Schedule type
1803     CGF.Builder.getIntN(IVSize, 0), // Lower
1804     UB,                             // Upper
1805     CGF.Builder.getIntN(IVSize, 1), // Stride
1806     Chunk                           // Chunk
1807   };
1808   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1809 }
1810 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,Address IL,Address LB,Address UB,Address ST,llvm::Value * Chunk)1811 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1812                                         SourceLocation Loc,
1813                                         OpenMPScheduleClauseKind ScheduleKind,
1814                                         unsigned IVSize, bool IVSigned,
1815                                         bool Ordered, Address IL, Address LB,
1816                                         Address UB, Address ST,
1817                                         llvm::Value *Chunk) {
1818   if (!CGF.HaveInsertPoint())
1819     return;
1820   OpenMPSchedType Schedule =
1821     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1822   assert(!Ordered);
1823   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1824          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1825 
1826   // Call __kmpc_for_static_init(
1827   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1828   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1829   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1830   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1831   if (Chunk == nullptr) {
1832     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1833            "expected static non-chunked schedule");
1834     // If the Chunk was not specified in the clause - use default value 1.
1835       Chunk = CGF.Builder.getIntN(IVSize, 1);
1836   } else {
1837     assert((Schedule == OMP_sch_static_chunked ||
1838             Schedule == OMP_ord_static_chunked) &&
1839            "expected static chunked schedule");
1840   }
1841   llvm::Value *Args[] = {
1842     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1843     getThreadID(CGF, Loc),
1844     CGF.Builder.getInt32(Schedule), // Schedule type
1845     IL.getPointer(),                // &isLastIter
1846     LB.getPointer(),                // &LB
1847     UB.getPointer(),                // &UB
1848     ST.getPointer(),                // &Stride
1849     CGF.Builder.getIntN(IVSize, 1), // Incr
1850     Chunk                           // Chunk
1851   };
1852   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1853 }
1854 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc)1855 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1856                                           SourceLocation Loc) {
1857   if (!CGF.HaveInsertPoint())
1858     return;
1859   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1860   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1861                          getThreadID(CGF, Loc)};
1862   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1863                       Args);
1864 }
1865 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)1866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1867                                                  SourceLocation Loc,
1868                                                  unsigned IVSize,
1869                                                  bool IVSigned) {
1870   if (!CGF.HaveInsertPoint())
1871     return;
1872   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1873   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1874                          getThreadID(CGF, Loc)};
1875   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1876 }
1877 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)1878 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1879                                           SourceLocation Loc, unsigned IVSize,
1880                                           bool IVSigned, Address IL,
1881                                           Address LB, Address UB,
1882                                           Address ST) {
1883   // Call __kmpc_dispatch_next(
1884   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1885   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1886   //          kmp_int[32|64] *p_stride);
1887   llvm::Value *Args[] = {
1888       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1889       IL.getPointer(), // &isLastIter
1890       LB.getPointer(), // &Lower
1891       UB.getPointer(), // &Upper
1892       ST.getPointer()  // &Stride
1893   };
1894   llvm::Value *Call =
1895       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1896   return CGF.EmitScalarConversion(
1897       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1898       CGF.getContext().BoolTy, Loc);
1899 }
1900 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)1901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1902                                            llvm::Value *NumThreads,
1903                                            SourceLocation Loc) {
1904   if (!CGF.HaveInsertPoint())
1905     return;
1906   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1907   llvm::Value *Args[] = {
1908       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1909       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1910   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1911                       Args);
1912 }
1913 
emitProcBindClause(CodeGenFunction & CGF,OpenMPProcBindClauseKind ProcBind,SourceLocation Loc)1914 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1915                                          OpenMPProcBindClauseKind ProcBind,
1916                                          SourceLocation Loc) {
1917   if (!CGF.HaveInsertPoint())
1918     return;
1919   // Constants for proc bind value accepted by the runtime.
1920   enum ProcBindTy {
1921     ProcBindFalse = 0,
1922     ProcBindTrue,
1923     ProcBindMaster,
1924     ProcBindClose,
1925     ProcBindSpread,
1926     ProcBindIntel,
1927     ProcBindDefault
1928   } RuntimeProcBind;
1929   switch (ProcBind) {
1930   case OMPC_PROC_BIND_master:
1931     RuntimeProcBind = ProcBindMaster;
1932     break;
1933   case OMPC_PROC_BIND_close:
1934     RuntimeProcBind = ProcBindClose;
1935     break;
1936   case OMPC_PROC_BIND_spread:
1937     RuntimeProcBind = ProcBindSpread;
1938     break;
1939   case OMPC_PROC_BIND_unknown:
1940     llvm_unreachable("Unsupported proc_bind value.");
1941   }
1942   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1943   llvm::Value *Args[] = {
1944       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1945       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1946   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1947 }
1948 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc)1949 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1950                                 SourceLocation Loc) {
1951   if (!CGF.HaveInsertPoint())
1952     return;
1953   // Build call void __kmpc_flush(ident_t *loc)
1954   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1955                       emitUpdateLocation(CGF, Loc));
1956 }
1957 
1958 namespace {
1959 /// \brief Indexes of fields for type kmp_task_t.
1960 enum KmpTaskTFields {
1961   /// \brief List of shared variables.
1962   KmpTaskTShareds,
1963   /// \brief Task routine.
1964   KmpTaskTRoutine,
1965   /// \brief Partition id for the untied tasks.
1966   KmpTaskTPartId,
1967   /// \brief Function with call of destructors for private variables.
1968   KmpTaskTDestructors,
1969 };
1970 } // anonymous namespace
1971 
emitKmpRoutineEntryT(QualType KmpInt32Ty)1972 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1973   if (!KmpRoutineEntryPtrTy) {
1974     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1975     auto &C = CGM.getContext();
1976     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1977     FunctionProtoType::ExtProtoInfo EPI;
1978     KmpRoutineEntryPtrQTy = C.getPointerType(
1979         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1980     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1981   }
1982 }
1983 
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1984 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1985                                        QualType FieldTy) {
1986   auto *Field = FieldDecl::Create(
1987       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1988       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1989       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1990   Field->setAccess(AS_public);
1991   DC->addDecl(Field);
1992   return Field;
1993 }
1994 
1995 namespace {
1996 struct PrivateHelpersTy {
PrivateHelpersTy__anondef231dd0811::PrivateHelpersTy1997   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1998                    const VarDecl *PrivateElemInit)
1999       : Original(Original), PrivateCopy(PrivateCopy),
2000         PrivateElemInit(PrivateElemInit) {}
2001   const VarDecl *Original;
2002   const VarDecl *PrivateCopy;
2003   const VarDecl *PrivateElemInit;
2004 };
2005 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2006 } // anonymous namespace
2007 
2008 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)2009 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2010   if (!Privates.empty()) {
2011     auto &C = CGM.getContext();
2012     // Build struct .kmp_privates_t. {
2013     //         /*  private vars  */
2014     //       };
2015     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
2016     RD->startDefinition();
2017     for (auto &&Pair : Privates) {
2018       auto *VD = Pair.second.Original;
2019       auto Type = VD->getType();
2020       Type = Type.getNonReferenceType();
2021       auto *FD = addFieldToRecordDecl(C, RD, Type);
2022       if (VD->hasAttrs()) {
2023         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2024              E(VD->getAttrs().end());
2025              I != E; ++I)
2026           FD->addAttr(*I);
2027       }
2028     }
2029     RD->completeDefinition();
2030     return RD;
2031   }
2032   return nullptr;
2033 }
2034 
2035 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)2036 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
2037                          QualType KmpRoutineEntryPointerQTy) {
2038   auto &C = CGM.getContext();
2039   // Build struct kmp_task_t {
2040   //         void *              shareds;
2041   //         kmp_routine_entry_t routine;
2042   //         kmp_int32           part_id;
2043   //         kmp_routine_entry_t destructors;
2044   //       };
2045   auto *RD = C.buildImplicitRecord("kmp_task_t");
2046   RD->startDefinition();
2047   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2048   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2049   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2050   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2051   RD->completeDefinition();
2052   return RD;
2053 }
2054 
2055 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)2056 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2057                                      ArrayRef<PrivateDataTy> Privates) {
2058   auto &C = CGM.getContext();
2059   // Build struct kmp_task_t_with_privates {
2060   //         kmp_task_t task_data;
2061   //         .kmp_privates_t. privates;
2062   //       };
2063   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2064   RD->startDefinition();
2065   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2066   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2067     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2068   }
2069   RD->completeDefinition();
2070   return RD;
2071 }
2072 
2073 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2074 /// argument.
2075 /// \code
2076 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2077 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2078 ///   tt->shareds);
2079 ///   return 0;
2080 /// }
2081 /// \endcode
2082 static llvm::Value *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Value * TaskFunction,llvm::Value * TaskPrivatesMap)2083 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2084                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2085                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2086                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2087                       llvm::Value *TaskPrivatesMap) {
2088   auto &C = CGM.getContext();
2089   FunctionArgList Args;
2090   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2091   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2092                                 /*Id=*/nullptr,
2093                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2094   Args.push_back(&GtidArg);
2095   Args.push_back(&TaskTypeArg);
2096   FunctionType::ExtInfo Info;
2097   auto &TaskEntryFnInfo =
2098       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2099                                                     /*isVariadic=*/false);
2100   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2101   auto *TaskEntry =
2102       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2103                              ".omp_task_entry.", &CGM.getModule());
2104   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
2105   CodeGenFunction CGF(CGM);
2106   CGF.disableDebugInfo();
2107   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2108 
2109   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2110   // tt->task_data.shareds);
2111   auto *GtidParam = CGF.EmitLoadOfScalar(
2112       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2113   LValue TDBase = emitLoadOfPointerLValue(
2114       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2115   auto *KmpTaskTWithPrivatesQTyRD =
2116       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2117   LValue Base =
2118       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2119   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2120   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2121   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2122   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2123 
2124   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2125   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2126   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2127       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2128       CGF.ConvertTypeForMem(SharedsPtrTy));
2129 
2130   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2131   llvm::Value *PrivatesParam;
2132   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2133     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2134     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2135         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2136   } else {
2137     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2138   }
2139 
2140   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2141                              TaskPrivatesMap, SharedsParam};
2142   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2143   CGF.EmitStoreThroughLValue(
2144       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2145       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2146   CGF.FinishFunction();
2147   return TaskEntry;
2148 }
2149 
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)2150 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2151                                             SourceLocation Loc,
2152                                             QualType KmpInt32Ty,
2153                                             QualType KmpTaskTWithPrivatesPtrQTy,
2154                                             QualType KmpTaskTWithPrivatesQTy) {
2155   auto &C = CGM.getContext();
2156   FunctionArgList Args;
2157   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2158   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2159                                 /*Id=*/nullptr,
2160                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2161   Args.push_back(&GtidArg);
2162   Args.push_back(&TaskTypeArg);
2163   FunctionType::ExtInfo Info;
2164   auto &DestructorFnInfo =
2165       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2166                                                     /*isVariadic=*/false);
2167   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2168   auto *DestructorFn =
2169       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2170                              ".omp_task_destructor.", &CGM.getModule());
2171   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
2172                                     DestructorFnInfo);
2173   CodeGenFunction CGF(CGM);
2174   CGF.disableDebugInfo();
2175   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2176                     Args);
2177 
2178   LValue Base = emitLoadOfPointerLValue(
2179       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2180   auto *KmpTaskTWithPrivatesQTyRD =
2181       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2182   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2183   Base = CGF.EmitLValueForField(Base, *FI);
2184   for (auto *Field :
2185        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2186     if (auto DtorKind = Field->getType().isDestructedType()) {
2187       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2188       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2189     }
2190   }
2191   CGF.FinishFunction();
2192   return DestructorFn;
2193 }
2194 
2195 /// \brief Emit a privates mapping function for correct handling of private and
2196 /// firstprivate variables.
2197 /// \code
2198 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2199 /// **noalias priv1,...,  <tyn> **noalias privn) {
2200 ///   *priv1 = &.privates.priv1;
2201 ///   ...;
2202 ///   *privn = &.privates.privn;
2203 /// }
2204 /// \endcode
2205 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,ArrayRef<const Expr * > PrivateVars,ArrayRef<const Expr * > FirstprivateVars,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)2206 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2207                                ArrayRef<const Expr *> PrivateVars,
2208                                ArrayRef<const Expr *> FirstprivateVars,
2209                                QualType PrivatesQTy,
2210                                ArrayRef<PrivateDataTy> Privates) {
2211   auto &C = CGM.getContext();
2212   FunctionArgList Args;
2213   ImplicitParamDecl TaskPrivatesArg(
2214       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2215       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2216   Args.push_back(&TaskPrivatesArg);
2217   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2218   unsigned Counter = 1;
2219   for (auto *E: PrivateVars) {
2220     Args.push_back(ImplicitParamDecl::Create(
2221         C, /*DC=*/nullptr, Loc,
2222         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2223                             .withConst()
2224                             .withRestrict()));
2225     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2226     PrivateVarsPos[VD] = Counter;
2227     ++Counter;
2228   }
2229   for (auto *E : FirstprivateVars) {
2230     Args.push_back(ImplicitParamDecl::Create(
2231         C, /*DC=*/nullptr, Loc,
2232         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2233                             .withConst()
2234                             .withRestrict()));
2235     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2236     PrivateVarsPos[VD] = Counter;
2237     ++Counter;
2238   }
2239   FunctionType::ExtInfo Info;
2240   auto &TaskPrivatesMapFnInfo =
2241       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2242                                                     /*isVariadic=*/false);
2243   auto *TaskPrivatesMapTy =
2244       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2245   auto *TaskPrivatesMap = llvm::Function::Create(
2246       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2247       ".omp_task_privates_map.", &CGM.getModule());
2248   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
2249                                     TaskPrivatesMapFnInfo);
2250   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2251   CodeGenFunction CGF(CGM);
2252   CGF.disableDebugInfo();
2253   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2254                     TaskPrivatesMapFnInfo, Args);
2255 
2256   // *privi = &.privates.privi;
2257   LValue Base = emitLoadOfPointerLValue(
2258       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2259   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2260   Counter = 0;
2261   for (auto *Field : PrivatesQTyRD->fields()) {
2262     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2263     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2264     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2265     auto RefLoadLVal =
2266         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2267     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2268     ++Counter;
2269   }
2270   CGF.FinishFunction();
2271   return TaskPrivatesMap;
2272 }
2273 
array_pod_sort_comparator(const PrivateDataTy * P1,const PrivateDataTy * P2)2274 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2275                                      const PrivateDataTy *P2) {
2276   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2277 }
2278 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,bool Tied,llvm::PointerIntPair<llvm::Value *,1,bool> Final,llvm::Value * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,ArrayRef<const Expr * > PrivateVars,ArrayRef<const Expr * > PrivateCopies,ArrayRef<const Expr * > FirstprivateVars,ArrayRef<const Expr * > FirstprivateCopies,ArrayRef<const Expr * > FirstprivateInits,ArrayRef<std::pair<OpenMPDependClauseKind,const Expr * >> Dependences)2279 void CGOpenMPRuntime::emitTaskCall(
2280     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2281     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2282     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2283     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2284     ArrayRef<const Expr *> PrivateCopies,
2285     ArrayRef<const Expr *> FirstprivateVars,
2286     ArrayRef<const Expr *> FirstprivateCopies,
2287     ArrayRef<const Expr *> FirstprivateInits,
2288     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2289   if (!CGF.HaveInsertPoint())
2290     return;
2291   auto &C = CGM.getContext();
2292   llvm::SmallVector<PrivateDataTy, 8> Privates;
2293   // Aggregate privates and sort them by the alignment.
2294   auto I = PrivateCopies.begin();
2295   for (auto *E : PrivateVars) {
2296     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2297     Privates.push_back(std::make_pair(
2298         C.getDeclAlign(VD),
2299         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2300                          /*PrivateElemInit=*/nullptr)));
2301     ++I;
2302   }
2303   I = FirstprivateCopies.begin();
2304   auto IElemInitRef = FirstprivateInits.begin();
2305   for (auto *E : FirstprivateVars) {
2306     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2307     Privates.push_back(std::make_pair(
2308         C.getDeclAlign(VD),
2309         PrivateHelpersTy(
2310             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2311             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2312     ++I, ++IElemInitRef;
2313   }
2314   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2315                        array_pod_sort_comparator);
2316   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2317   // Build type kmp_routine_entry_t (if not built yet).
2318   emitKmpRoutineEntryT(KmpInt32Ty);
2319   // Build type kmp_task_t (if not built yet).
2320   if (KmpTaskTQTy.isNull()) {
2321     KmpTaskTQTy = C.getRecordType(
2322         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2323   }
2324   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2325   // Build particular struct kmp_task_t for the given task.
2326   auto *KmpTaskTWithPrivatesQTyRD =
2327       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2328   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2329   QualType KmpTaskTWithPrivatesPtrQTy =
2330       C.getPointerType(KmpTaskTWithPrivatesQTy);
2331   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2332   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2333   auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
2334   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2335 
2336   // Emit initial values for private copies (if any).
2337   llvm::Value *TaskPrivatesMap = nullptr;
2338   auto *TaskPrivatesMapTy =
2339       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2340                 3)
2341           ->getType();
2342   if (!Privates.empty()) {
2343     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2344     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2345         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2346     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2347         TaskPrivatesMap, TaskPrivatesMapTy);
2348   } else {
2349     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2350         cast<llvm::PointerType>(TaskPrivatesMapTy));
2351   }
2352   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2353   // kmp_task_t *tt);
2354   auto *TaskEntry = emitProxyTaskFunction(
2355       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2356       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2357 
2358   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2359   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2360   // kmp_routine_entry_t *task_entry);
2361   // Task flags. Format is taken from
2362   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2363   // description of kmp_tasking_flags struct.
2364   const unsigned TiedFlag = 0x1;
2365   const unsigned FinalFlag = 0x2;
2366   unsigned Flags = Tied ? TiedFlag : 0;
2367   auto *TaskFlags =
2368       Final.getPointer()
2369           ? CGF.Builder.CreateSelect(Final.getPointer(),
2370                                      CGF.Builder.getInt32(FinalFlag),
2371                                      CGF.Builder.getInt32(/*C=*/0))
2372           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2373   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2374   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
2375   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
2376                               getThreadID(CGF, Loc), TaskFlags,
2377                               KmpTaskTWithPrivatesTySize, SharedsSize,
2378                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2379                                   TaskEntry, KmpRoutineEntryPtrTy)};
2380   auto *NewTask = CGF.EmitRuntimeCall(
2381       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2382   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2383       NewTask, KmpTaskTWithPrivatesPtrTy);
2384   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2385                                                KmpTaskTWithPrivatesQTy);
2386   LValue TDBase =
2387       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2388   // Fill the data in the resulting kmp_task_t record.
2389   // Copy shareds if there are any.
2390   Address KmpTaskSharedsPtr = Address::invalid();
2391   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2392     KmpTaskSharedsPtr =
2393         Address(CGF.EmitLoadOfScalar(
2394                     CGF.EmitLValueForField(
2395                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2396                                            KmpTaskTShareds)),
2397                     Loc),
2398                 CGF.getNaturalTypeAlignment(SharedsTy));
2399     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2400   }
2401   // Emit initial values for private copies (if any).
2402   bool NeedsCleanup = false;
2403   if (!Privates.empty()) {
2404     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2405     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2406     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2407     LValue SharedsBase;
2408     if (!FirstprivateVars.empty()) {
2409       SharedsBase = CGF.MakeAddrLValue(
2410           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2411               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2412           SharedsTy);
2413     }
2414     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2415         cast<CapturedStmt>(*D.getAssociatedStmt()));
2416     for (auto &&Pair : Privates) {
2417       auto *VD = Pair.second.PrivateCopy;
2418       auto *Init = VD->getAnyInitializer();
2419       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2420       if (Init) {
2421         if (auto *Elem = Pair.second.PrivateElemInit) {
2422           auto *OriginalVD = Pair.second.Original;
2423           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2424           auto SharedRefLValue =
2425               CGF.EmitLValueForField(SharedsBase, SharedField);
2426           SharedRefLValue = CGF.MakeAddrLValue(
2427               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2428               SharedRefLValue.getType(), AlignmentSource::Decl);
2429           QualType Type = OriginalVD->getType();
2430           if (Type->isArrayType()) {
2431             // Initialize firstprivate array.
2432             if (!isa<CXXConstructExpr>(Init) ||
2433                 CGF.isTrivialInitializer(Init)) {
2434               // Perform simple memcpy.
2435               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2436                                       SharedRefLValue.getAddress(), Type);
2437             } else {
2438               // Initialize firstprivate array using element-by-element
2439               // intialization.
2440               CGF.EmitOMPAggregateAssign(
2441                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2442                   Type, [&CGF, Elem, Init, &CapturesInfo](
2443                             Address DestElement, Address SrcElement) {
2444                     // Clean up any temporaries needed by the initialization.
2445                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2446                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2447                       return SrcElement;
2448                     });
2449                     (void)InitScope.Privatize();
2450                     // Emit initialization for single element.
2451                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2452                         CGF, &CapturesInfo);
2453                     CGF.EmitAnyExprToMem(Init, DestElement,
2454                                          Init->getType().getQualifiers(),
2455                                          /*IsInitializer=*/false);
2456                   });
2457             }
2458           } else {
2459             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2460             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2461               return SharedRefLValue.getAddress();
2462             });
2463             (void)InitScope.Privatize();
2464             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2465             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2466                                /*capturedByInit=*/false);
2467           }
2468         } else {
2469           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2470         }
2471       }
2472       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2473       ++FI;
2474     }
2475   }
2476   // Provide pointer to function with destructors for privates.
2477   llvm::Value *DestructorFn =
2478       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2479                                              KmpTaskTWithPrivatesPtrQTy,
2480                                              KmpTaskTWithPrivatesQTy)
2481                    : llvm::ConstantPointerNull::get(
2482                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2483   LValue Destructor = CGF.EmitLValueForField(
2484       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2485   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2486                             DestructorFn, KmpRoutineEntryPtrTy),
2487                         Destructor);
2488 
2489   // Process list of dependences.
2490   Address DependenciesArray = Address::invalid();
2491   unsigned NumDependencies = Dependences.size();
2492   if (NumDependencies) {
2493     // Dependence kind for RTL.
2494     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
2495     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2496     RecordDecl *KmpDependInfoRD;
2497     QualType FlagsTy =
2498         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
2499     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2500     if (KmpDependInfoTy.isNull()) {
2501       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2502       KmpDependInfoRD->startDefinition();
2503       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2504       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2505       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2506       KmpDependInfoRD->completeDefinition();
2507       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2508     } else {
2509       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2510     }
2511     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2512     // Define type kmp_depend_info[<Dependences.size()>];
2513     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2514         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2515         ArrayType::Normal, /*IndexTypeQuals=*/0);
2516     // kmp_depend_info[<Dependences.size()>] deps;
2517     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2518     for (unsigned i = 0; i < NumDependencies; ++i) {
2519       const Expr *E = Dependences[i].second;
2520       auto Addr = CGF.EmitLValue(E);
2521       llvm::Value *Size;
2522       QualType Ty = E->getType();
2523       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
2524         LValue UpAddrLVal =
2525             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
2526         llvm::Value *UpAddr =
2527             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
2528         llvm::Value *LowIntPtr =
2529             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
2530         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
2531         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
2532       } else
2533         Size = getTypeSize(CGF, Ty);
2534       auto Base = CGF.MakeAddrLValue(
2535           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
2536           KmpDependInfoTy);
2537       // deps[i].base_addr = &<Dependences[i].second>;
2538       auto BaseAddrLVal = CGF.EmitLValueForField(
2539           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2540       CGF.EmitStoreOfScalar(
2541           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
2542           BaseAddrLVal);
2543       // deps[i].len = sizeof(<Dependences[i].second>);
2544       auto LenLVal = CGF.EmitLValueForField(
2545           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2546       CGF.EmitStoreOfScalar(Size, LenLVal);
2547       // deps[i].flags = <Dependences[i].first>;
2548       RTLDependenceKindTy DepKind;
2549       switch (Dependences[i].first) {
2550       case OMPC_DEPEND_in:
2551         DepKind = DepIn;
2552         break;
2553       // Out and InOut dependencies must use the same code.
2554       case OMPC_DEPEND_out:
2555       case OMPC_DEPEND_inout:
2556         DepKind = DepInOut;
2557         break;
2558       case OMPC_DEPEND_source:
2559       case OMPC_DEPEND_unknown:
2560         llvm_unreachable("Unknown task dependence type");
2561       }
2562       auto FlagsLVal = CGF.EmitLValueForField(
2563           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2564       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2565                             FlagsLVal);
2566     }
2567     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2568         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
2569         CGF.VoidPtrTy);
2570   }
2571 
2572   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2573   // libcall.
2574   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2575   // *new_task);
2576   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2577   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2578   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2579   // list is not empty
2580   auto *ThreadID = getThreadID(CGF, Loc);
2581   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2582   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
2583   llvm::Value *DepTaskArgs[7];
2584   if (NumDependencies) {
2585     DepTaskArgs[0] = UpLoc;
2586     DepTaskArgs[1] = ThreadID;
2587     DepTaskArgs[2] = NewTask;
2588     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
2589     DepTaskArgs[4] = DependenciesArray.getPointer();
2590     DepTaskArgs[5] = CGF.Builder.getInt32(0);
2591     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2592   }
2593   auto &&ThenCodeGen = [this, NumDependencies,
2594                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
2595     // TODO: add check for untied tasks.
2596     if (NumDependencies) {
2597       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
2598                           DepTaskArgs);
2599     } else {
2600       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
2601                           TaskArgs);
2602     }
2603   };
2604   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2605       IfCallEndCleanup;
2606 
2607   llvm::Value *DepWaitTaskArgs[6];
2608   if (NumDependencies) {
2609     DepWaitTaskArgs[0] = UpLoc;
2610     DepWaitTaskArgs[1] = ThreadID;
2611     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
2612     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
2613     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
2614     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2615   }
2616   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2617                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2618     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2619     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2620     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2621     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2622     // is specified.
2623     if (NumDependencies)
2624       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2625                           DepWaitTaskArgs);
2626     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2627     // kmp_task_t *new_task);
2628     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2629                         TaskArgs);
2630     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2631     // kmp_task_t *new_task);
2632     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2633         NormalAndEHCleanup,
2634         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2635         llvm::makeArrayRef(TaskArgs));
2636 
2637     // Call proxy_task_entry(gtid, new_task);
2638     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2639     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2640   };
2641 
2642   if (IfCond) {
2643     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2644   } else {
2645     CodeGenFunction::RunCleanupsScope Scope(CGF);
2646     ThenCodeGen(CGF);
2647   }
2648 }
2649 
2650 /// \brief Emit reduction operation for each element of array (required for
2651 /// array sections) LHS op = RHS.
2652 /// \param Type Type of array.
2653 /// \param LHSVar Variable on the left side of the reduction operation
2654 /// (references element of array in original variable).
2655 /// \param RHSVar Variable on the right side of the reduction operation
2656 /// (references element of array in original variable).
2657 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
2658 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)2659 static void EmitOMPAggregateReduction(
2660     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
2661     const VarDecl *RHSVar,
2662     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
2663                                   const Expr *, const Expr *)> &RedOpGen,
2664     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
2665     const Expr *UpExpr = nullptr) {
2666   // Perform element-by-element initialization.
2667   QualType ElementTy;
2668   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
2669   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
2670 
2671   // Drill down to the base element type on both arrays.
2672   auto ArrayTy = Type->getAsArrayTypeUnsafe();
2673   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
2674 
2675   auto RHSBegin = RHSAddr.getPointer();
2676   auto LHSBegin = LHSAddr.getPointer();
2677   // Cast from pointer to array type to pointer to single element.
2678   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
2679   // The basic structure here is a while-do loop.
2680   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
2681   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
2682   auto IsEmpty =
2683       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
2684   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
2685 
2686   // Enter the loop body, making that address the current address.
2687   auto EntryBB = CGF.Builder.GetInsertBlock();
2688   CGF.EmitBlock(BodyBB);
2689 
2690   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
2691 
2692   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
2693       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
2694   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
2695   Address RHSElementCurrent =
2696       Address(RHSElementPHI,
2697               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2698 
2699   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
2700       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
2701   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
2702   Address LHSElementCurrent =
2703       Address(LHSElementPHI,
2704               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2705 
2706   // Emit copy.
2707   CodeGenFunction::OMPPrivateScope Scope(CGF);
2708   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
2709   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
2710   Scope.Privatize();
2711   RedOpGen(CGF, XExpr, EExpr, UpExpr);
2712   Scope.ForceCleanup();
2713 
2714   // Shift the address forward by one element.
2715   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
2716       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
2717   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
2718       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
2719   // Check whether we've reached the end.
2720   auto Done =
2721       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
2722   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
2723   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
2724   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
2725 
2726   // Done.
2727   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
2728 }
2729 
emitReductionFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)2730 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2731                                           llvm::Type *ArgsType,
2732                                           ArrayRef<const Expr *> Privates,
2733                                           ArrayRef<const Expr *> LHSExprs,
2734                                           ArrayRef<const Expr *> RHSExprs,
2735                                           ArrayRef<const Expr *> ReductionOps) {
2736   auto &C = CGM.getContext();
2737 
2738   // void reduction_func(void *LHSArg, void *RHSArg);
2739   FunctionArgList Args;
2740   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2741                            C.VoidPtrTy);
2742   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2743                            C.VoidPtrTy);
2744   Args.push_back(&LHSArg);
2745   Args.push_back(&RHSArg);
2746   FunctionType::ExtInfo EI;
2747   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2748       C.VoidTy, Args, EI, /*isVariadic=*/false);
2749   auto *Fn = llvm::Function::Create(
2750       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2751       ".omp.reduction.reduction_func", &CGM.getModule());
2752   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2753   CodeGenFunction CGF(CGM);
2754   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2755 
2756   // Dst = (void*[n])(LHSArg);
2757   // Src = (void*[n])(RHSArg);
2758   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2759       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2760       ArgsType), CGF.getPointerAlign());
2761   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2762       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2763       ArgsType), CGF.getPointerAlign());
2764 
2765   //  ...
2766   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2767   //  ...
2768   CodeGenFunction::OMPPrivateScope Scope(CGF);
2769   auto IPriv = Privates.begin();
2770   unsigned Idx = 0;
2771   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
2772     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
2773     Scope.addPrivate(RHSVar, [&]() -> Address {
2774       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
2775     });
2776     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
2777     Scope.addPrivate(LHSVar, [&]() -> Address {
2778       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
2779     });
2780     QualType PrivTy = (*IPriv)->getType();
2781     if (PrivTy->isArrayType()) {
2782       // Get array size and emit VLA type.
2783       ++Idx;
2784       Address Elem =
2785           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
2786       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
2787       CodeGenFunction::OpaqueValueMapping OpaqueMap(
2788           CGF,
2789           cast<OpaqueValueExpr>(
2790               CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
2791           RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
2792       CGF.EmitVariablyModifiedType(PrivTy);
2793     }
2794   }
2795   Scope.Privatize();
2796   IPriv = Privates.begin();
2797   auto ILHS = LHSExprs.begin();
2798   auto IRHS = RHSExprs.begin();
2799   for (auto *E : ReductionOps) {
2800     if ((*IPriv)->getType()->isArrayType()) {
2801       // Emit reduction for array section.
2802       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2803       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2804       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2805                                 [=](CodeGenFunction &CGF, const Expr *,
2806                                     const Expr *,
2807                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
2808     } else
2809       // Emit reduction for array subscript or single variable.
2810       CGF.EmitIgnoredExpr(E);
2811     ++IPriv, ++ILHS, ++IRHS;
2812   }
2813   Scope.ForceCleanup();
2814   CGF.FinishFunction();
2815   return Fn;
2816 }
2817 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,bool WithNowait,bool SimpleReduction)2818 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2819                                     ArrayRef<const Expr *> Privates,
2820                                     ArrayRef<const Expr *> LHSExprs,
2821                                     ArrayRef<const Expr *> RHSExprs,
2822                                     ArrayRef<const Expr *> ReductionOps,
2823                                     bool WithNowait, bool SimpleReduction) {
2824   if (!CGF.HaveInsertPoint())
2825     return;
2826   // Next code should be emitted for reduction:
2827   //
2828   // static kmp_critical_name lock = { 0 };
2829   //
2830   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2831   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2832   //  ...
2833   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2834   //  *(Type<n>-1*)rhs[<n>-1]);
2835   // }
2836   //
2837   // ...
2838   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2839   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2840   // RedList, reduce_func, &<lock>)) {
2841   // case 1:
2842   //  ...
2843   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2844   //  ...
2845   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2846   // break;
2847   // case 2:
2848   //  ...
2849   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2850   //  ...
2851   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2852   // break;
2853   // default:;
2854   // }
2855   //
2856   // if SimpleReduction is true, only the next code is generated:
2857   //  ...
2858   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2859   //  ...
2860 
2861   auto &C = CGM.getContext();
2862 
2863   if (SimpleReduction) {
2864     CodeGenFunction::RunCleanupsScope Scope(CGF);
2865     auto IPriv = Privates.begin();
2866     auto ILHS = LHSExprs.begin();
2867     auto IRHS = RHSExprs.begin();
2868     for (auto *E : ReductionOps) {
2869       if ((*IPriv)->getType()->isArrayType()) {
2870         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2871         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2872         EmitOMPAggregateReduction(
2873             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2874             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2875                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2876       } else
2877         CGF.EmitIgnoredExpr(E);
2878       ++IPriv, ++ILHS, ++IRHS;
2879     }
2880     return;
2881   }
2882 
2883   // 1. Build a list of reduction variables.
2884   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2885   auto Size = RHSExprs.size();
2886   for (auto *E : Privates) {
2887     if (E->getType()->isArrayType())
2888       // Reserve place for array size.
2889       ++Size;
2890   }
2891   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
2892   QualType ReductionArrayTy =
2893       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2894                              /*IndexTypeQuals=*/0);
2895   Address ReductionList =
2896       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2897   auto IPriv = Privates.begin();
2898   unsigned Idx = 0;
2899   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
2900     Address Elem =
2901       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
2902     CGF.Builder.CreateStore(
2903         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2904             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
2905         Elem);
2906     if ((*IPriv)->getType()->isArrayType()) {
2907       // Store array size.
2908       ++Idx;
2909       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
2910                                              CGF.getPointerSize());
2911       CGF.Builder.CreateStore(
2912           CGF.Builder.CreateIntToPtr(
2913               CGF.Builder.CreateIntCast(
2914                   CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
2915                                      (*IPriv)->getType()))
2916                       .first,
2917                   CGF.SizeTy, /*isSigned=*/false),
2918               CGF.VoidPtrTy),
2919           Elem);
2920     }
2921   }
2922 
2923   // 2. Emit reduce_func().
2924   auto *ReductionFn = emitReductionFunction(
2925       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
2926       LHSExprs, RHSExprs, ReductionOps);
2927 
2928   // 3. Create static kmp_critical_name lock = { 0 };
2929   auto *Lock = getCriticalRegionLock(".reduction");
2930 
2931   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2932   // RedList, reduce_func, &<lock>);
2933   auto *IdentTLoc = emitUpdateLocation(
2934       CGF, Loc,
2935       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2936   auto *ThreadId = getThreadID(CGF, Loc);
2937   auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
2938   auto *RL =
2939     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
2940                                                     CGF.VoidPtrTy);
2941   llvm::Value *Args[] = {
2942       IdentTLoc,                             // ident_t *<loc>
2943       ThreadId,                              // i32 <gtid>
2944       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2945       ReductionArrayTySize,                  // size_type sizeof(RedList)
2946       RL,                                    // void *RedList
2947       ReductionFn, // void (*) (void *, void *) <reduce_func>
2948       Lock         // kmp_critical_name *&<lock>
2949   };
2950   auto Res = CGF.EmitRuntimeCall(
2951       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2952                                        : OMPRTL__kmpc_reduce),
2953       Args);
2954 
2955   // 5. Build switch(res)
2956   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2957   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2958 
2959   // 6. Build case 1:
2960   //  ...
2961   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2962   //  ...
2963   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2964   // break;
2965   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2966   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2967   CGF.EmitBlock(Case1BB);
2968 
2969   {
2970     CodeGenFunction::RunCleanupsScope Scope(CGF);
2971     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2972     llvm::Value *EndArgs[] = {
2973         IdentTLoc, // ident_t *<loc>
2974         ThreadId,  // i32 <gtid>
2975         Lock       // kmp_critical_name *&<lock>
2976     };
2977     CGF.EHStack
2978         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2979             NormalAndEHCleanup,
2980             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2981                                              : OMPRTL__kmpc_end_reduce),
2982             llvm::makeArrayRef(EndArgs));
2983     auto IPriv = Privates.begin();
2984     auto ILHS = LHSExprs.begin();
2985     auto IRHS = RHSExprs.begin();
2986     for (auto *E : ReductionOps) {
2987       if ((*IPriv)->getType()->isArrayType()) {
2988         // Emit reduction for array section.
2989         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2990         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2991         EmitOMPAggregateReduction(
2992             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2993             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2994                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2995       } else
2996         // Emit reduction for array subscript or single variable.
2997         CGF.EmitIgnoredExpr(E);
2998       ++IPriv, ++ILHS, ++IRHS;
2999     }
3000   }
3001 
3002   CGF.EmitBranch(DefaultBB);
3003 
3004   // 7. Build case 2:
3005   //  ...
3006   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3007   //  ...
3008   // break;
3009   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
3010   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
3011   CGF.EmitBlock(Case2BB);
3012 
3013   {
3014     CodeGenFunction::RunCleanupsScope Scope(CGF);
3015     if (!WithNowait) {
3016       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
3017       llvm::Value *EndArgs[] = {
3018           IdentTLoc, // ident_t *<loc>
3019           ThreadId,  // i32 <gtid>
3020           Lock       // kmp_critical_name *&<lock>
3021       };
3022       CGF.EHStack
3023           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3024               NormalAndEHCleanup,
3025               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
3026               llvm::makeArrayRef(EndArgs));
3027     }
3028     auto ILHS = LHSExprs.begin();
3029     auto IRHS = RHSExprs.begin();
3030     auto IPriv = Privates.begin();
3031     for (auto *E : ReductionOps) {
3032         const Expr *XExpr = nullptr;
3033         const Expr *EExpr = nullptr;
3034         const Expr *UpExpr = nullptr;
3035         BinaryOperatorKind BO = BO_Comma;
3036         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
3037           if (BO->getOpcode() == BO_Assign) {
3038             XExpr = BO->getLHS();
3039             UpExpr = BO->getRHS();
3040           }
3041         }
3042         // Try to emit update expression as a simple atomic.
3043         auto *RHSExpr = UpExpr;
3044         if (RHSExpr) {
3045           // Analyze RHS part of the whole expression.
3046           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
3047                   RHSExpr->IgnoreParenImpCasts())) {
3048             // If this is a conditional operator, analyze its condition for
3049             // min/max reduction operator.
3050             RHSExpr = ACO->getCond();
3051           }
3052           if (auto *BORHS =
3053                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
3054             EExpr = BORHS->getRHS();
3055             BO = BORHS->getOpcode();
3056           }
3057         }
3058         if (XExpr) {
3059           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3060           auto &&AtomicRedGen = [this, BO, VD, IPriv,
3061                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
3062                                       const Expr *EExpr, const Expr *UpExpr) {
3063             LValue X = CGF.EmitLValue(XExpr);
3064             RValue E;
3065             if (EExpr)
3066               E = CGF.EmitAnyExpr(EExpr);
3067             CGF.EmitOMPAtomicSimpleUpdateExpr(
3068                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
3069                 [&CGF, UpExpr, VD, IPriv](RValue XRValue) {
3070                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3071                   PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address {
3072                     Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3073                     CGF.EmitStoreThroughLValue(
3074                         XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType()));
3075                     return LHSTemp;
3076                   });
3077                   (void)PrivateScope.Privatize();
3078                   return CGF.EmitAnyExpr(UpExpr);
3079                 });
3080           };
3081           if ((*IPriv)->getType()->isArrayType()) {
3082             // Emit atomic reduction for array section.
3083             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3084             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3085                                       AtomicRedGen, XExpr, EExpr, UpExpr);
3086           } else
3087             // Emit atomic reduction for array subscript or single variable.
3088             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3089         } else {
3090           // Emit as a critical region.
3091           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
3092                                              const Expr *, const Expr *) {
3093             emitCriticalRegion(
3094                 CGF, ".atomic_reduction",
3095                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
3096           };
3097           if ((*IPriv)->getType()->isArrayType()) {
3098             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3099             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3100             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3101                                       CritRedGen);
3102           } else
3103             CritRedGen(CGF, nullptr, nullptr, nullptr);
3104         }
3105       ++ILHS, ++IRHS, ++IPriv;
3106     }
3107   }
3108 
3109   CGF.EmitBranch(DefaultBB);
3110   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
3111 }
3112 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)3113 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
3114                                        SourceLocation Loc) {
3115   if (!CGF.HaveInsertPoint())
3116     return;
3117   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
3118   // global_tid);
3119   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3120   // Ignore return result until untied tasks are supported.
3121   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
3122 }
3123 
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)3124 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
3125                                            OpenMPDirectiveKind InnerKind,
3126                                            const RegionCodeGenTy &CodeGen,
3127                                            bool HasCancel) {
3128   if (!CGF.HaveInsertPoint())
3129     return;
3130   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
3131   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
3132 }
3133 
3134 namespace {
3135 enum RTCancelKind {
3136   CancelNoreq = 0,
3137   CancelParallel = 1,
3138   CancelLoop = 2,
3139   CancelSections = 3,
3140   CancelTaskgroup = 4
3141 };
3142 }
3143 
getCancellationKind(OpenMPDirectiveKind CancelRegion)3144 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
3145   RTCancelKind CancelKind = CancelNoreq;
3146   if (CancelRegion == OMPD_parallel)
3147     CancelKind = CancelParallel;
3148   else if (CancelRegion == OMPD_for)
3149     CancelKind = CancelLoop;
3150   else if (CancelRegion == OMPD_sections)
3151     CancelKind = CancelSections;
3152   else {
3153     assert(CancelRegion == OMPD_taskgroup);
3154     CancelKind = CancelTaskgroup;
3155   }
3156   return CancelKind;
3157 }
3158 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)3159 void CGOpenMPRuntime::emitCancellationPointCall(
3160     CodeGenFunction &CGF, SourceLocation Loc,
3161     OpenMPDirectiveKind CancelRegion) {
3162   if (!CGF.HaveInsertPoint())
3163     return;
3164   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
3165   // global_tid, kmp_int32 cncl_kind);
3166   if (auto *OMPRegionInfo =
3167           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3168     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3169       return;
3170     if (OMPRegionInfo->hasCancel()) {
3171       llvm::Value *Args[] = {
3172           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3173           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3174       // Ignore return result until untied tasks are supported.
3175       auto *Result = CGF.EmitRuntimeCall(
3176           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
3177       // if (__kmpc_cancellationpoint()) {
3178       //  __kmpc_cancel_barrier();
3179       //   exit from construct;
3180       // }
3181       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3182       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3183       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3184       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3185       CGF.EmitBlock(ExitBB);
3186       // __kmpc_cancel_barrier();
3187       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3188       // exit from construct;
3189       auto CancelDest =
3190           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3191       CGF.EmitBranchThroughCleanup(CancelDest);
3192       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3193     }
3194   }
3195 }
3196 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)3197 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
3198                                      const Expr *IfCond,
3199                                      OpenMPDirectiveKind CancelRegion) {
3200   if (!CGF.HaveInsertPoint())
3201     return;
3202   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
3203   // kmp_int32 cncl_kind);
3204   if (auto *OMPRegionInfo =
3205           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3206     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3207       return;
3208     auto &&ThenGen = [this, Loc, CancelRegion,
3209                       OMPRegionInfo](CodeGenFunction &CGF) {
3210       llvm::Value *Args[] = {
3211           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3212           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3213       // Ignore return result until untied tasks are supported.
3214       auto *Result =
3215           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
3216       // if (__kmpc_cancel()) {
3217       //  __kmpc_cancel_barrier();
3218       //   exit from construct;
3219       // }
3220       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3221       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3222       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3223       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3224       CGF.EmitBlock(ExitBB);
3225       // __kmpc_cancel_barrier();
3226       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3227       // exit from construct;
3228       auto CancelDest =
3229           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3230       CGF.EmitBranchThroughCleanup(CancelDest);
3231       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3232     };
3233     if (IfCond)
3234       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
3235     else
3236       ThenGen(CGF);
3237   }
3238 }
3239 
3240 llvm::Value *
emitTargetOutlinedFunction(const OMPExecutableDirective & D,const RegionCodeGenTy & CodeGen)3241 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
3242                                             const RegionCodeGenTy &CodeGen) {
3243   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3244 
3245   CodeGenFunction CGF(CGM, true);
3246   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
3247   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3248   return CGF.GenerateOpenMPCapturedStmtFunction(CS);
3249 }
3250 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * OutlinedFn,const Expr * IfCond,const Expr * Device,ArrayRef<llvm::Value * > CapturedVars)3251 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3252                                      const OMPExecutableDirective &D,
3253                                      llvm::Value *OutlinedFn,
3254                                      const Expr *IfCond, const Expr *Device,
3255                                      ArrayRef<llvm::Value *> CapturedVars) {
3256   if (!CGF.HaveInsertPoint())
3257     return;
3258   /// \brief Values for bit flags used to specify the mapping type for
3259   /// offloading.
3260   enum OpenMPOffloadMappingFlags {
3261     /// \brief Allocate memory on the device and move data from host to device.
3262     OMP_MAP_TO = 0x01,
3263     /// \brief Allocate memory on the device and move data from device to host.
3264     OMP_MAP_FROM = 0x02,
3265     /// \brief The element passed to the device is a pointer.
3266     OMP_MAP_PTR = 0x20,
3267     /// \brief Pass the element to the device by value.
3268     OMP_MAP_BYCOPY = 0x80,
3269   };
3270 
3271   enum OpenMPOffloadingReservedDeviceIDs {
3272     /// \brief Device ID if the device was not defined, runtime should get it
3273     /// from environment variables in the spec.
3274     OMP_DEVICEID_UNDEF = -1,
3275   };
3276 
3277   auto &Ctx = CGF.getContext();
3278 
3279   // Fill up the arrays with the all the captured variables.
3280   SmallVector<llvm::Value *, 16> BasePointers;
3281   SmallVector<llvm::Value *, 16> Pointers;
3282   SmallVector<llvm::Value *, 16> Sizes;
3283   SmallVector<unsigned, 16> MapTypes;
3284 
3285   bool hasVLACaptures = false;
3286 
3287   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3288   auto RI = CS.getCapturedRecordDecl()->field_begin();
3289   // auto II = CS.capture_init_begin();
3290   auto CV = CapturedVars.begin();
3291   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3292                                             CE = CS.capture_end();
3293        CI != CE; ++CI, ++RI, ++CV) {
3294     StringRef Name;
3295     QualType Ty;
3296     llvm::Value *BasePointer;
3297     llvm::Value *Pointer;
3298     llvm::Value *Size;
3299     unsigned MapType;
3300 
3301     // VLA sizes are passed to the outlined region by copy.
3302     if (CI->capturesVariableArrayType()) {
3303       BasePointer = Pointer = *CV;
3304       Size = getTypeSize(CGF, RI->getType());
3305       // Copy to the device as an argument. No need to retrieve it.
3306       MapType = OMP_MAP_BYCOPY;
3307       hasVLACaptures = true;
3308     } else if (CI->capturesThis()) {
3309       BasePointer = Pointer = *CV;
3310       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3311       Size = getTypeSize(CGF, PtrTy->getPointeeType());
3312       // Default map type.
3313       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3314     } else if (CI->capturesVariableByCopy()) {
3315       MapType = OMP_MAP_BYCOPY;
3316       if (!RI->getType()->isAnyPointerType()) {
3317         // If the field is not a pointer, we need to save the actual value and
3318         // load it as a void pointer.
3319         auto DstAddr = CGF.CreateMemTemp(
3320             Ctx.getUIntPtrType(),
3321             Twine(CI->getCapturedVar()->getName()) + ".casted");
3322         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
3323 
3324         auto *SrcAddrVal = CGF.EmitScalarConversion(
3325             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
3326             Ctx.getPointerType(RI->getType()), SourceLocation());
3327         LValue SrcLV =
3328             CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
3329 
3330         // Store the value using the source type pointer.
3331         CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
3332 
3333         // Load the value using the destination type pointer.
3334         BasePointer = Pointer =
3335             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
3336       } else {
3337         MapType |= OMP_MAP_PTR;
3338         BasePointer = Pointer = *CV;
3339       }
3340       Size = getTypeSize(CGF, RI->getType());
3341     } else {
3342       assert(CI->capturesVariable() && "Expected captured reference.");
3343       BasePointer = Pointer = *CV;
3344 
3345       const ReferenceType *PtrTy =
3346           cast<ReferenceType>(RI->getType().getTypePtr());
3347       QualType ElementType = PtrTy->getPointeeType();
3348       Size = getTypeSize(CGF, ElementType);
3349       // The default map type for a scalar/complex type is 'to' because by
3350       // default the value doesn't have to be retrieved. For an aggregate type,
3351       // the default is 'tofrom'.
3352       MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
3353                                                : OMP_MAP_TO;
3354       if (ElementType->isAnyPointerType())
3355         MapType |= OMP_MAP_PTR;
3356     }
3357 
3358     BasePointers.push_back(BasePointer);
3359     Pointers.push_back(Pointer);
3360     Sizes.push_back(Size);
3361     MapTypes.push_back(MapType);
3362   }
3363 
3364   // Keep track on whether the host function has to be executed.
3365   auto OffloadErrorQType =
3366       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3367   auto OffloadError = CGF.MakeAddrLValue(
3368       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3369       OffloadErrorQType);
3370   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3371                         OffloadError);
3372 
3373   // Fill up the pointer arrays and transfer execution to the device.
3374   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
3375                     hasVLACaptures, Device, OffloadError,
3376                     OffloadErrorQType](CodeGenFunction &CGF) {
3377     unsigned PointerNumVal = BasePointers.size();
3378     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3379     llvm::Value *BasePointersArray;
3380     llvm::Value *PointersArray;
3381     llvm::Value *SizesArray;
3382     llvm::Value *MapTypesArray;
3383 
3384     if (PointerNumVal) {
3385       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3386       QualType PointerArrayType = Ctx.getConstantArrayType(
3387           Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
3388           /*IndexTypeQuals=*/0);
3389 
3390       BasePointersArray =
3391           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3392       PointersArray =
3393           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3394 
3395       // If we don't have any VLA types, we can use a constant array for the map
3396       // sizes, otherwise we need to fill up the arrays as we do for the
3397       // pointers.
3398       if (hasVLACaptures) {
3399         QualType SizeArrayType = Ctx.getConstantArrayType(
3400             Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
3401             /*IndexTypeQuals=*/0);
3402         SizesArray =
3403             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3404       } else {
3405         // We expect all the sizes to be constant, so we collect them to create
3406         // a constant array.
3407         SmallVector<llvm::Constant *, 16> ConstSizes;
3408         for (auto S : Sizes)
3409           ConstSizes.push_back(cast<llvm::Constant>(S));
3410 
3411         auto *SizesArrayInit = llvm::ConstantArray::get(
3412             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3413         auto *SizesArrayGbl = new llvm::GlobalVariable(
3414             CGM.getModule(), SizesArrayInit->getType(),
3415             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3416             SizesArrayInit, ".offload_sizes");
3417         SizesArrayGbl->setUnnamedAddr(true);
3418         SizesArray = SizesArrayGbl;
3419       }
3420 
3421       // The map types are always constant so we don't need to generate code to
3422       // fill arrays. Instead, we create an array constant.
3423       llvm::Constant *MapTypesArrayInit =
3424           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
3425       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
3426           CGM.getModule(), MapTypesArrayInit->getType(),
3427           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3428           MapTypesArrayInit, ".offload_maptypes");
3429       MapTypesArrayGbl->setUnnamedAddr(true);
3430       MapTypesArray = MapTypesArrayGbl;
3431 
3432       for (unsigned i = 0; i < PointerNumVal; ++i) {
3433 
3434         llvm::Value *BPVal = BasePointers[i];
3435         if (BPVal->getType()->isPointerTy())
3436           BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
3437         else {
3438           assert(BPVal->getType()->isIntegerTy() &&
3439                  "If not a pointer, the value type must be an integer.");
3440           BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
3441         }
3442         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
3443             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
3444             BasePointersArray, 0, i);
3445         Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
3446         CGF.Builder.CreateStore(BPVal, BPAddr);
3447 
3448         llvm::Value *PVal = Pointers[i];
3449         if (PVal->getType()->isPointerTy())
3450           PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
3451         else {
3452           assert(PVal->getType()->isIntegerTy() &&
3453                  "If not a pointer, the value type must be an integer.");
3454           PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
3455         }
3456         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
3457             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3458             0, i);
3459         Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
3460         CGF.Builder.CreateStore(PVal, PAddr);
3461 
3462         if (hasVLACaptures) {
3463           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
3464               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3465               /*Idx0=*/0,
3466               /*Idx1=*/i);
3467           Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
3468           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
3469                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
3470                                   SAddr);
3471         }
3472       }
3473 
3474       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3475           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
3476           /*Idx0=*/0, /*Idx1=*/0);
3477       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3478           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3479           /*Idx0=*/0,
3480           /*Idx1=*/0);
3481       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3482           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3483           /*Idx0=*/0, /*Idx1=*/0);
3484       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3485           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
3486           /*Idx0=*/0,
3487           /*Idx1=*/0);
3488 
3489     } else {
3490       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3491       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3492       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
3493       MapTypesArray =
3494           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
3495     }
3496 
3497     // On top of the arrays that were filled up, the target offloading call
3498     // takes as arguments the device id as well as the host pointer. The host
3499     // pointer is used by the runtime library to identify the current target
3500     // region, so it only has to be unique and not necessarily point to
3501     // anything. It could be the pointer to the outlined function that
3502     // implements the target region, but we aren't using that so that the
3503     // compiler doesn't need to keep that, and could therefore inline the host
3504     // function if proven worthwhile during optimization.
3505 
3506     llvm::Value *HostPtr = new llvm::GlobalVariable(
3507         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3508         llvm::GlobalValue::PrivateLinkage,
3509         llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
3510 
3511     // Emit device ID if any.
3512     llvm::Value *DeviceID;
3513     if (Device)
3514       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3515                                            CGM.Int32Ty, /*isSigned=*/true);
3516     else
3517       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
3518 
3519     llvm::Value *OffloadingArgs[] = {
3520         DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
3521         PointersArray, SizesArray, MapTypesArray};
3522     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
3523                                       OffloadingArgs);
3524 
3525     CGF.EmitStoreOfScalar(Return, OffloadError);
3526   };
3527 
3528   if (IfCond) {
3529     // Notify that the host version must be executed.
3530     auto &&ElseGen = [this, OffloadError,
3531                       OffloadErrorQType](CodeGenFunction &CGF) {
3532       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
3533                             OffloadError);
3534     };
3535     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3536   } else {
3537     CodeGenFunction::RunCleanupsScope Scope(CGF);
3538     ThenGen(CGF);
3539   }
3540 
3541   // Check the error code and execute the host version if required.
3542   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
3543   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
3544   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
3545   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
3546   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
3547 
3548   CGF.EmitBlock(OffloadFailedBlock);
3549   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
3550   CGF.EmitBranch(OffloadContBlock);
3551 
3552   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
3553   return;
3554 }
3555