1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26 
27 using namespace clang;
28 using namespace CodeGen;
29 
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44   };
45 
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen)46   CGOpenMPRegionInfo(const CapturedStmt &CS,
47                      const CGOpenMPRegionKind RegionKind,
48                      const RegionCodeGenTy &CodeGen)
49       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50         CodeGen(CodeGen) {}
51 
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen)52   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen)
54       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
55         CodeGen(CodeGen) {}
56 
57   /// \brief Get a variable or parameter for storing global thread id
58   /// inside OpenMP construct.
59   virtual const VarDecl *getThreadIDVariable() const = 0;
60 
61   /// \brief Emit the captured statement body.
62   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
63 
64   /// \brief Get an LValue for the current ThreadID variable.
65   /// \return LValue for thread id variable. This LValue always has type int32*.
66   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
67 
getRegionKind() const68   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
69 
classof(const CGCapturedStmtInfo * Info)70   static bool classof(const CGCapturedStmtInfo *Info) {
71     return Info->getKind() == CR_OpenMP;
72   }
73 
74 protected:
75   CGOpenMPRegionKind RegionKind;
76   const RegionCodeGenTy &CodeGen;
77 };
78 
79 /// \brief API for captured statement code generation in OpenMP constructs.
80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
81 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)82   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
83                              const RegionCodeGenTy &CodeGen)
84       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
85         ThreadIDVar(ThreadIDVar) {
86     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
87   }
88   /// \brief Get a variable or parameter for storing global thread id
89   /// inside OpenMP construct.
getThreadIDVariable() const90   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
91 
92   /// \brief Get the name of the capture helper.
getHelperName() const93   StringRef getHelperName() const override { return ".omp_outlined."; }
94 
classof(const CGCapturedStmtInfo * Info)95   static bool classof(const CGCapturedStmtInfo *Info) {
96     return CGOpenMPRegionInfo::classof(Info) &&
97            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
98                ParallelOutlinedRegion;
99   }
100 
101 private:
102   /// \brief A variable or parameter storing global thread id for OpenMP
103   /// constructs.
104   const VarDecl *ThreadIDVar;
105 };
106 
107 /// \brief API for captured statement code generation in OpenMP constructs.
108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
109 public:
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)110   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
111                                  const VarDecl *ThreadIDVar,
112                                  const RegionCodeGenTy &CodeGen)
113       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
114         ThreadIDVar(ThreadIDVar) {
115     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
116   }
117   /// \brief Get a variable or parameter for storing global thread id
118   /// inside OpenMP construct.
getThreadIDVariable() const119   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
120 
121   /// \brief Get an LValue for the current ThreadID variable.
122   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
123 
124   /// \brief Get the name of the capture helper.
getHelperName() const125   StringRef getHelperName() const override { return ".omp_outlined."; }
126 
classof(const CGCapturedStmtInfo * Info)127   static bool classof(const CGCapturedStmtInfo *Info) {
128     return CGOpenMPRegionInfo::classof(Info) &&
129            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
130                TaskOutlinedRegion;
131   }
132 
133 private:
134   /// \brief A variable or parameter storing global thread id for OpenMP
135   /// constructs.
136   const VarDecl *ThreadIDVar;
137 };
138 
139 /// \brief API for inlined captured statement code generation in OpenMP
140 /// constructs.
141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
142 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen)143   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
144                             const RegionCodeGenTy &CodeGen)
145       : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
146         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
147   // \brief Retrieve the value of the context parameter.
getContextValue() const148   llvm::Value *getContextValue() const override {
149     if (OuterRegionInfo)
150       return OuterRegionInfo->getContextValue();
151     llvm_unreachable("No context value for inlined OpenMP region");
152   }
setContextValue(llvm::Value * V)153   virtual void setContextValue(llvm::Value *V) override {
154     if (OuterRegionInfo) {
155       OuterRegionInfo->setContextValue(V);
156       return;
157     }
158     llvm_unreachable("No context value for inlined OpenMP region");
159   }
160   /// \brief Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const161   const FieldDecl *lookup(const VarDecl *VD) const override {
162     if (OuterRegionInfo)
163       return OuterRegionInfo->lookup(VD);
164     // If there is no outer outlined region,no need to lookup in a list of
165     // captured variables, we can use the original one.
166     return nullptr;
167   }
getThisFieldDecl() const168   FieldDecl *getThisFieldDecl() const override {
169     if (OuterRegionInfo)
170       return OuterRegionInfo->getThisFieldDecl();
171     return nullptr;
172   }
173   /// \brief Get a variable or parameter for storing global thread id
174   /// inside OpenMP construct.
getThreadIDVariable() const175   const VarDecl *getThreadIDVariable() const override {
176     if (OuterRegionInfo)
177       return OuterRegionInfo->getThreadIDVariable();
178     return nullptr;
179   }
180 
181   /// \brief Get the name of the capture helper.
getHelperName() const182   StringRef getHelperName() const override {
183     if (auto *OuterRegionInfo = getOldCSI())
184       return OuterRegionInfo->getHelperName();
185     llvm_unreachable("No helper name for inlined OpenMP construct");
186   }
187 
getOldCSI() const188   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
189 
classof(const CGCapturedStmtInfo * Info)190   static bool classof(const CGCapturedStmtInfo *Info) {
191     return CGOpenMPRegionInfo::classof(Info) &&
192            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
193   }
194 
195 private:
196   /// \brief CodeGen info about outer OpenMP region.
197   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
198   CGOpenMPRegionInfo *OuterRegionInfo;
199 };
200 
201 /// \brief RAII for emitting code of OpenMP constructs.
202 class InlinedOpenMPRegionRAII {
203   CodeGenFunction &CGF;
204 
205 public:
206   /// \brief Constructs region for combined constructs.
207   /// \param CodeGen Code generation sequence for combined directives. Includes
208   /// a list of functions used for code generation of implicitly inlined
209   /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen)210   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
211       : CGF(CGF) {
212     // Start emission for the construct.
213     CGF.CapturedStmtInfo =
214         new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
215   }
~InlinedOpenMPRegionRAII()216   ~InlinedOpenMPRegionRAII() {
217     // Restore original CapturedStmtInfo only if we're done with code emission.
218     auto *OldCSI =
219         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
220     delete CGF.CapturedStmtInfo;
221     CGF.CapturedStmtInfo = OldCSI;
222   }
223 };
224 
225 } // namespace
226 
getThreadIDVariableLValue(CodeGenFunction & CGF)227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
228   return CGF.MakeNaturalAlignAddrLValue(
229       CGF.Builder.CreateAlignedLoad(
230           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
231           CGF.PointerAlignInBytes),
232       getThreadIDVariable()
233           ->getType()
234           ->castAs<PointerType>()
235           ->getPointeeType());
236 }
237 
EmitBody(CodeGenFunction & CGF,const Stmt *)238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
239   // 1.2.2 OpenMP Language Terminology
240   // Structured block - An executable statement with a single entry at the
241   // top and a single exit at the bottom.
242   // The point of exit cannot be a branch out of the structured block.
243   // longjmp() and throw() must not violate the entry/exit criteria.
244   CGF.EHStack.pushTerminate();
245   {
246     CodeGenFunction::RunCleanupsScope Scope(CGF);
247     CodeGen(CGF);
248   }
249   CGF.EHStack.popTerminate();
250 }
251 
getThreadIDVariableLValue(CodeGenFunction & CGF)252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
253     CodeGenFunction &CGF) {
254   return CGF.MakeNaturalAlignAddrLValue(
255       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
256       getThreadIDVariable()->getType());
257 }
258 
CGOpenMPRuntime(CodeGenModule & CGM)259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
260     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
261   IdentTy = llvm::StructType::create(
262       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
263       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
264       CGM.Int8PtrTy /* psource */, nullptr);
265   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
266   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
267                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
268   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
269   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
270 }
271 
clear()272 void CGOpenMPRuntime::clear() {
273   InternalVars.clear();
274 }
275 
276 llvm::Value *
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
278                                               const VarDecl *ThreadIDVar,
279                                               const RegionCodeGenTy &CodeGen) {
280   assert(ThreadIDVar->getType()->isPointerType() &&
281          "thread id variable must be of type kmp_int32 *");
282   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
283   CodeGenFunction CGF(CGM, true);
284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
285   CGF.CapturedStmtInfo = &CGInfo;
286   return CGF.GenerateCapturedStmtFunction(*CS);
287 }
288 
289 llvm::Value *
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
291                                           const VarDecl *ThreadIDVar,
292                                           const RegionCodeGenTy &CodeGen) {
293   assert(!ThreadIDVar->getType()->isPointerType() &&
294          "thread id variable must be of type kmp_int32 for tasks");
295   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
296   CodeGenFunction CGF(CGM, true);
297   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
298   CGF.CapturedStmtInfo = &CGInfo;
299   return CGF.GenerateCapturedStmtFunction(*CS);
300 }
301 
302 llvm::Value *
getOrCreateDefaultLocation(OpenMPLocationFlags Flags)303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
304   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
305   if (!Entry) {
306     if (!DefaultOpenMPPSource) {
307       // Initialize default location for psource field of ident_t structure of
308       // all ident_t objects. Format is ";file;function;line;column;;".
309       // Taken from
310       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
311       DefaultOpenMPPSource =
312           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
313       DefaultOpenMPPSource =
314           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
315     }
316     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
317         CGM.getModule(), IdentTy, /*isConstant*/ true,
318         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
319     DefaultOpenMPLocation->setUnnamedAddr(true);
320 
321     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
322     llvm::Constant *Values[] = {Zero,
323                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
324                                 Zero, Zero, DefaultOpenMPPSource};
325     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
326     DefaultOpenMPLocation->setInitializer(Init);
327     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
328     return DefaultOpenMPLocation;
329   }
330   return Entry;
331 }
332 
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,OpenMPLocationFlags Flags)333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
334                                                  SourceLocation Loc,
335                                                  OpenMPLocationFlags Flags) {
336   // If no debug info is generated - return global default location.
337   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
338       Loc.isInvalid())
339     return getOrCreateDefaultLocation(Flags);
340 
341   assert(CGF.CurFn && "No function in current CodeGenFunction.");
342 
343   llvm::Value *LocValue = nullptr;
344   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
345   if (I != OpenMPLocThreadIDMap.end())
346     LocValue = I->second.DebugLoc;
347   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
348   // GetOpenMPThreadID was called before this routine.
349   if (LocValue == nullptr) {
350     // Generate "ident_t .kmpc_loc.addr;"
351     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
352     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
353     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
354     Elem.second.DebugLoc = AI;
355     LocValue = AI;
356 
357     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
358     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
359     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
360                              llvm::ConstantExpr::getSizeOf(IdentTy),
361                              CGM.PointerAlignInBytes);
362   }
363 
364   // char **psource = &.kmpc_loc_<flags>.addr.psource;
365   auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
366                                                          IdentField_PSource);
367 
368   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
369   if (OMPDebugLoc == nullptr) {
370     SmallString<128> Buffer2;
371     llvm::raw_svector_ostream OS2(Buffer2);
372     // Build debug location
373     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
374     OS2 << ";" << PLoc.getFilename() << ";";
375     if (const FunctionDecl *FD =
376             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
377       OS2 << FD->getQualifiedNameAsString();
378     }
379     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
380     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
381     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
382   }
383   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
384   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
385 
386   return LocValue;
387 }
388 
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
390                                           SourceLocation Loc) {
391   assert(CGF.CurFn && "No function in current CodeGenFunction.");
392 
393   llvm::Value *ThreadID = nullptr;
394   // Check whether we've already cached a load of the thread id in this
395   // function.
396   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
397   if (I != OpenMPLocThreadIDMap.end()) {
398     ThreadID = I->second.ThreadID;
399     if (ThreadID != nullptr)
400       return ThreadID;
401   }
402   if (auto OMPRegionInfo =
403           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
404     if (OMPRegionInfo->getThreadIDVariable()) {
405       // Check if this an outlined function with thread id passed as argument.
406       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
407       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
408       // If value loaded in entry block, cache it and use it everywhere in
409       // function.
410       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
411         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
412         Elem.second.ThreadID = ThreadID;
413       }
414       return ThreadID;
415     }
416   }
417 
418   // This is not an outlined function region - need to call __kmpc_int32
419   // kmpc_global_thread_num(ident_t *loc).
420   // Generate thread id value and cache this value for use across the
421   // function.
422   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
423   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
424   ThreadID =
425       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
426                           emitUpdateLocation(CGF, Loc));
427   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
428   Elem.second.ThreadID = ThreadID;
429   return ThreadID;
430 }
431 
functionFinished(CodeGenFunction & CGF)432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
433   assert(CGF.CurFn && "No function in current CodeGenFunction.");
434   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
435     OpenMPLocThreadIDMap.erase(CGF.CurFn);
436 }
437 
getIdentTyPointerTy()438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
439   return llvm::PointerType::getUnqual(IdentTy);
440 }
441 
getKmpc_MicroPointerTy()442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
443   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
444 }
445 
446 llvm::Constant *
createRuntimeFunction(OpenMPRTLFunction Function)447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
448   llvm::Constant *RTLFn = nullptr;
449   switch (Function) {
450   case OMPRTL__kmpc_fork_call: {
451     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
452     // microtask, ...);
453     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
454                                 getKmpc_MicroPointerTy()};
455     llvm::FunctionType *FnTy =
456         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
457     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
458     break;
459   }
460   case OMPRTL__kmpc_global_thread_num: {
461     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
462     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
463     llvm::FunctionType *FnTy =
464         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
465     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
466     break;
467   }
468   case OMPRTL__kmpc_threadprivate_cached: {
469     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
470     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
471     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
472                                 CGM.VoidPtrTy, CGM.SizeTy,
473                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
474     llvm::FunctionType *FnTy =
475         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
477     break;
478   }
479   case OMPRTL__kmpc_critical: {
480     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
481     // kmp_critical_name *crit);
482     llvm::Type *TypeParams[] = {
483         getIdentTyPointerTy(), CGM.Int32Ty,
484         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
485     llvm::FunctionType *FnTy =
486         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
487     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
488     break;
489   }
490   case OMPRTL__kmpc_threadprivate_register: {
491     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
492     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
493     // typedef void *(*kmpc_ctor)(void *);
494     auto KmpcCtorTy =
495         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
496                                 /*isVarArg*/ false)->getPointerTo();
497     // typedef void *(*kmpc_cctor)(void *, void *);
498     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
499     auto KmpcCopyCtorTy =
500         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
501                                 /*isVarArg*/ false)->getPointerTo();
502     // typedef void (*kmpc_dtor)(void *);
503     auto KmpcDtorTy =
504         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
505             ->getPointerTo();
506     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
507                               KmpcCopyCtorTy, KmpcDtorTy};
508     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
509                                         /*isVarArg*/ false);
510     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
511     break;
512   }
513   case OMPRTL__kmpc_end_critical: {
514     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
515     // kmp_critical_name *crit);
516     llvm::Type *TypeParams[] = {
517         getIdentTyPointerTy(), CGM.Int32Ty,
518         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
519     llvm::FunctionType *FnTy =
520         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
521     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
522     break;
523   }
524   case OMPRTL__kmpc_cancel_barrier: {
525     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
526     // global_tid);
527     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
528     llvm::FunctionType *FnTy =
529         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
530     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
531     break;
532   }
533   case OMPRTL__kmpc_for_static_fini: {
534     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
535     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
536     llvm::FunctionType *FnTy =
537         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
538     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
539     break;
540   }
541   case OMPRTL__kmpc_push_num_threads: {
542     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
543     // kmp_int32 num_threads)
544     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
545                                 CGM.Int32Ty};
546     llvm::FunctionType *FnTy =
547         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
549     break;
550   }
551   case OMPRTL__kmpc_serialized_parallel: {
552     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
553     // global_tid);
554     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
555     llvm::FunctionType *FnTy =
556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
558     break;
559   }
560   case OMPRTL__kmpc_end_serialized_parallel: {
561     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
562     // global_tid);
563     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
564     llvm::FunctionType *FnTy =
565         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
567     break;
568   }
569   case OMPRTL__kmpc_flush: {
570     // Build void __kmpc_flush(ident_t *loc);
571     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
572     llvm::FunctionType *FnTy =
573         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
575     break;
576   }
577   case OMPRTL__kmpc_master: {
578     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
579     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
580     llvm::FunctionType *FnTy =
581         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
582     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
583     break;
584   }
585   case OMPRTL__kmpc_end_master: {
586     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
587     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
588     llvm::FunctionType *FnTy =
589         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
590     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
591     break;
592   }
593   case OMPRTL__kmpc_omp_taskyield: {
594     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
595     // int end_part);
596     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
597     llvm::FunctionType *FnTy =
598         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
599     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
600     break;
601   }
602   case OMPRTL__kmpc_single: {
603     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
604     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
605     llvm::FunctionType *FnTy =
606         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
607     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
608     break;
609   }
610   case OMPRTL__kmpc_end_single: {
611     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
612     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
613     llvm::FunctionType *FnTy =
614         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
615     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
616     break;
617   }
618   case OMPRTL__kmpc_omp_task_alloc: {
619     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
620     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
621     // kmp_routine_entry_t *task_entry);
622     assert(KmpRoutineEntryPtrTy != nullptr &&
623            "Type kmp_routine_entry_t must be created.");
624     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
625                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
626     // Return void * and then cast to particular kmp_task_t type.
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
630     break;
631   }
632   case OMPRTL__kmpc_omp_task: {
633     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
634     // *new_task);
635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
636                                 CGM.VoidPtrTy};
637     llvm::FunctionType *FnTy =
638         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
639     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
640     break;
641   }
642   case OMPRTL__kmpc_copyprivate: {
643     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
644     // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
645     // kmp_int32 didit);
646     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
647     auto *CpyFnTy =
648         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
650                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
651                                 CGM.Int32Ty};
652     llvm::FunctionType *FnTy =
653         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
654     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
655     break;
656   }
657   case OMPRTL__kmpc_reduce: {
658     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
659     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
660     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
661     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
662     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
663                                                /*isVarArg=*/false);
664     llvm::Type *TypeParams[] = {
665         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
666         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
667         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
668     llvm::FunctionType *FnTy =
669         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
670     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
671     break;
672   }
673   case OMPRTL__kmpc_reduce_nowait: {
674     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
675     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
676     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
677     // *lck);
678     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
679     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
680                                                /*isVarArg=*/false);
681     llvm::Type *TypeParams[] = {
682         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
683         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
684         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
685     llvm::FunctionType *FnTy =
686         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
687     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
688     break;
689   }
690   case OMPRTL__kmpc_end_reduce: {
691     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
692     // kmp_critical_name *lck);
693     llvm::Type *TypeParams[] = {
694         getIdentTyPointerTy(), CGM.Int32Ty,
695         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
696     llvm::FunctionType *FnTy =
697         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
699     break;
700   }
701   case OMPRTL__kmpc_end_reduce_nowait: {
702     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
703     // kmp_critical_name *lck);
704     llvm::Type *TypeParams[] = {
705         getIdentTyPointerTy(), CGM.Int32Ty,
706         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
707     llvm::FunctionType *FnTy =
708         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
709     RTLFn =
710         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
711     break;
712   }
713   }
714   return RTLFn;
715 }
716 
createForStaticInitFunction(unsigned IVSize,bool IVSigned)717 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
718                                                              bool IVSigned) {
719   assert((IVSize == 32 || IVSize == 64) &&
720          "IV size is not compatible with the omp runtime");
721   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
722                                        : "__kmpc_for_static_init_4u")
723                            : (IVSigned ? "__kmpc_for_static_init_8"
724                                        : "__kmpc_for_static_init_8u");
725   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
726   auto PtrTy = llvm::PointerType::getUnqual(ITy);
727   llvm::Type *TypeParams[] = {
728     getIdentTyPointerTy(),                     // loc
729     CGM.Int32Ty,                               // tid
730     CGM.Int32Ty,                               // schedtype
731     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
732     PtrTy,                                     // p_lower
733     PtrTy,                                     // p_upper
734     PtrTy,                                     // p_stride
735     ITy,                                       // incr
736     ITy                                        // chunk
737   };
738   llvm::FunctionType *FnTy =
739       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
740   return CGM.CreateRuntimeFunction(FnTy, Name);
741 }
742 
createDispatchInitFunction(unsigned IVSize,bool IVSigned)743 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
744                                                             bool IVSigned) {
745   assert((IVSize == 32 || IVSize == 64) &&
746          "IV size is not compatible with the omp runtime");
747   auto Name =
748       IVSize == 32
749           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
750           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
751   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
752   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
753                                CGM.Int32Ty,           // tid
754                                CGM.Int32Ty,           // schedtype
755                                ITy,                   // lower
756                                ITy,                   // upper
757                                ITy,                   // stride
758                                ITy                    // chunk
759   };
760   llvm::FunctionType *FnTy =
761       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
762   return CGM.CreateRuntimeFunction(FnTy, Name);
763 }
764 
createDispatchNextFunction(unsigned IVSize,bool IVSigned)765 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
766                                                             bool IVSigned) {
767   assert((IVSize == 32 || IVSize == 64) &&
768          "IV size is not compatible with the omp runtime");
769   auto Name =
770       IVSize == 32
771           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
772           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
773   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
774   auto PtrTy = llvm::PointerType::getUnqual(ITy);
775   llvm::Type *TypeParams[] = {
776     getIdentTyPointerTy(),                     // loc
777     CGM.Int32Ty,                               // tid
778     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
779     PtrTy,                                     // p_lower
780     PtrTy,                                     // p_upper
781     PtrTy                                      // p_stride
782   };
783   llvm::FunctionType *FnTy =
784       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
785   return CGM.CreateRuntimeFunction(FnTy, Name);
786 }
787 
788 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)789 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
790   // Lookup the entry, lazily creating it if necessary.
791   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
792                                      Twine(CGM.getMangledName(VD)) + ".cache.");
793 }
794 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,llvm::Value * VDAddr,SourceLocation Loc)795 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
796                                                      const VarDecl *VD,
797                                                      llvm::Value *VDAddr,
798                                                      SourceLocation Loc) {
799   auto VarTy = VDAddr->getType()->getPointerElementType();
800   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
801                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
802                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
803                          getOrCreateThreadPrivateCache(VD)};
804   return CGF.EmitRuntimeCall(
805       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
806 }
807 
emitThreadPrivateVarInit(CodeGenFunction & CGF,llvm::Value * VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)808 void CGOpenMPRuntime::emitThreadPrivateVarInit(
809     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
810     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
811   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
812   // library.
813   auto OMPLoc = emitUpdateLocation(CGF, Loc);
814   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
815                       OMPLoc);
816   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
817   // to register constructor/destructor for variable.
818   llvm::Value *Args[] = {OMPLoc,
819                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
820                          Ctor, CopyCtor, Dtor};
821   CGF.EmitRuntimeCall(
822       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
823 }
824 
emitThreadPrivateVarDefinition(const VarDecl * VD,llvm::Value * VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)825 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
826     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
827     bool PerformInit, CodeGenFunction *CGF) {
828   VD = VD->getDefinition(CGM.getContext());
829   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
830     ThreadPrivateWithDefinition.insert(VD);
831     QualType ASTTy = VD->getType();
832 
833     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
834     auto Init = VD->getAnyInitializer();
835     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
836       // Generate function that re-emits the declaration's initializer into the
837       // threadprivate copy of the variable VD
838       CodeGenFunction CtorCGF(CGM);
839       FunctionArgList Args;
840       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
841                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
842       Args.push_back(&Dst);
843 
844       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
845           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
846           /*isVariadic=*/false);
847       auto FTy = CGM.getTypes().GetFunctionType(FI);
848       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
849           FTy, ".__kmpc_global_ctor_.", Loc);
850       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
851                             Args, SourceLocation());
852       auto ArgVal = CtorCGF.EmitLoadOfScalar(
853           CtorCGF.GetAddrOfLocalVar(&Dst),
854           /*Volatile=*/false, CGM.PointerAlignInBytes,
855           CGM.getContext().VoidPtrTy, Dst.getLocation());
856       auto Arg = CtorCGF.Builder.CreatePointerCast(
857           ArgVal,
858           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
859       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
860                                /*IsInitializer=*/true);
861       ArgVal = CtorCGF.EmitLoadOfScalar(
862           CtorCGF.GetAddrOfLocalVar(&Dst),
863           /*Volatile=*/false, CGM.PointerAlignInBytes,
864           CGM.getContext().VoidPtrTy, Dst.getLocation());
865       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
866       CtorCGF.FinishFunction();
867       Ctor = Fn;
868     }
869     if (VD->getType().isDestructedType() != QualType::DK_none) {
870       // Generate function that emits destructor call for the threadprivate copy
871       // of the variable VD
872       CodeGenFunction DtorCGF(CGM);
873       FunctionArgList Args;
874       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
875                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
876       Args.push_back(&Dst);
877 
878       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
879           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
880           /*isVariadic=*/false);
881       auto FTy = CGM.getTypes().GetFunctionType(FI);
882       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
883           FTy, ".__kmpc_global_dtor_.", Loc);
884       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
885                             SourceLocation());
886       auto ArgVal = DtorCGF.EmitLoadOfScalar(
887           DtorCGF.GetAddrOfLocalVar(&Dst),
888           /*Volatile=*/false, CGM.PointerAlignInBytes,
889           CGM.getContext().VoidPtrTy, Dst.getLocation());
890       DtorCGF.emitDestroy(ArgVal, ASTTy,
891                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
892                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
893       DtorCGF.FinishFunction();
894       Dtor = Fn;
895     }
896     // Do not emit init function if it is not required.
897     if (!Ctor && !Dtor)
898       return nullptr;
899 
900     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
901     auto CopyCtorTy =
902         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
903                                 /*isVarArg=*/false)->getPointerTo();
904     // Copying constructor for the threadprivate variable.
905     // Must be NULL - reserved by runtime, but currently it requires that this
906     // parameter is always NULL. Otherwise it fires assertion.
907     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
908     if (Ctor == nullptr) {
909       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
910                                             /*isVarArg=*/false)->getPointerTo();
911       Ctor = llvm::Constant::getNullValue(CtorTy);
912     }
913     if (Dtor == nullptr) {
914       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
915                                             /*isVarArg=*/false)->getPointerTo();
916       Dtor = llvm::Constant::getNullValue(DtorTy);
917     }
918     if (!CGF) {
919       auto InitFunctionTy =
920           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
921       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
922           InitFunctionTy, ".__omp_threadprivate_init_.");
923       CodeGenFunction InitCGF(CGM);
924       FunctionArgList ArgList;
925       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
926                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
927                             Loc);
928       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
929       InitCGF.FinishFunction();
930       return InitFunction;
931     }
932     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
933   }
934   return nullptr;
935 }
936 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)937 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
938                                        llvm::Value *OutlinedFn,
939                                        llvm::Value *CapturedStruct) {
940   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
941   llvm::Value *Args[] = {
942       emitUpdateLocation(CGF, Loc),
943       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
944       // (there is only one additional argument - 'context')
945       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
946       CGF.EmitCastToVoidPtr(CapturedStruct)};
947   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
948   CGF.EmitRuntimeCall(RTLFn, Args);
949 }
950 
emitSerialCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)951 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
952                                      llvm::Value *OutlinedFn,
953                                      llvm::Value *CapturedStruct) {
954   auto ThreadID = getThreadID(CGF, Loc);
955   // Build calls:
956   // __kmpc_serialized_parallel(&Loc, GTid);
957   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
958   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
959                       Args);
960 
961   // OutlinedFn(&GTid, &zero, CapturedStruct);
962   auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
963   auto Int32Ty =
964       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
965   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
966   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
967   llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
968   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
969 
970   // __kmpc_end_serialized_parallel(&Loc, GTid);
971   llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
972   CGF.EmitRuntimeCall(
973       createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
974 }
975 
976 // If we're inside an (outlined) parallel region, use the region info's
977 // thread-ID variable (it is passed in a first argument of the outlined function
978 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
979 // regular serial code region, get thread ID by calling kmp_int32
980 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
981 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)982 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
983                                                   SourceLocation Loc) {
984   if (auto OMPRegionInfo =
985           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
986     if (OMPRegionInfo->getThreadIDVariable())
987       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
988 
989   auto ThreadID = getThreadID(CGF, Loc);
990   auto Int32Ty =
991       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
992   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
993   CGF.EmitStoreOfScalar(ThreadID,
994                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
995 
996   return ThreadIDTemp;
997 }
998 
999 llvm::Constant *
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name)1000 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1001                                              const llvm::Twine &Name) {
1002   SmallString<256> Buffer;
1003   llvm::raw_svector_ostream Out(Buffer);
1004   Out << Name;
1005   auto RuntimeName = Out.str();
1006   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1007   if (Elem.second) {
1008     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1009            "OMP internal variable has different type than requested");
1010     return &*Elem.second;
1011   }
1012 
1013   return Elem.second = new llvm::GlobalVariable(
1014              CGM.getModule(), Ty, /*IsConstant*/ false,
1015              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1016              Elem.first());
1017 }
1018 
getCriticalRegionLock(StringRef CriticalName)1019 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1020   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1021   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1022 }
1023 
1024 namespace {
1025 class CallEndCleanup : public EHScopeStack::Cleanup {
1026 public:
1027   typedef ArrayRef<llvm::Value *> CleanupValuesTy;
1028 private:
1029   llvm::Value *Callee;
1030   llvm::SmallVector<llvm::Value *, 8> Args;
1031 
1032 public:
CallEndCleanup(llvm::Value * Callee,CleanupValuesTy Args)1033   CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args)
1034       : Callee(Callee), Args(Args.begin(), Args.end()) {}
Emit(CodeGenFunction & CGF,Flags)1035   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1036     CGF.EmitRuntimeCall(Callee, Args);
1037   }
1038 };
1039 } // namespace
1040 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc)1041 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1042                                          StringRef CriticalName,
1043                                          const RegionCodeGenTy &CriticalOpGen,
1044                                          SourceLocation Loc) {
1045   // __kmpc_critical(ident_t *, gtid, Lock);
1046   // CriticalOpGen();
1047   // __kmpc_end_critical(ident_t *, gtid, Lock);
1048   // Prepare arguments and build a call to __kmpc_critical
1049   {
1050     CodeGenFunction::RunCleanupsScope Scope(CGF);
1051     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1052                            getCriticalRegionLock(CriticalName)};
1053     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1054     // Build a call to __kmpc_end_critical
1055     CGF.EHStack.pushCleanup<CallEndCleanup>(
1056         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1057         llvm::makeArrayRef(Args));
1058     emitInlinedDirective(CGF, CriticalOpGen);
1059   }
1060 }
1061 
emitIfStmt(CodeGenFunction & CGF,llvm::Value * IfCond,const RegionCodeGenTy & BodyOpGen)1062 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1063                        const RegionCodeGenTy &BodyOpGen) {
1064   llvm::Value *CallBool = CGF.EmitScalarConversion(
1065       IfCond,
1066       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1067       CGF.getContext().BoolTy);
1068 
1069   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1070   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1071   // Generate the branch (If-stmt)
1072   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1073   CGF.EmitBlock(ThenBlock);
1074   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
1075   // Emit the rest of bblocks/branches
1076   CGF.EmitBranch(ContBlock);
1077   CGF.EmitBlock(ContBlock, true);
1078 }
1079 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)1080 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1081                                        const RegionCodeGenTy &MasterOpGen,
1082                                        SourceLocation Loc) {
1083   // if(__kmpc_master(ident_t *, gtid)) {
1084   //   MasterOpGen();
1085   //   __kmpc_end_master(ident_t *, gtid);
1086   // }
1087   // Prepare arguments and build a call to __kmpc_master
1088   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1089   auto *IsMaster =
1090       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1091   emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
1092     CodeGenFunction::RunCleanupsScope Scope(CGF);
1093     CGF.EHStack.pushCleanup<CallEndCleanup>(
1094         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1095         llvm::makeArrayRef(Args));
1096     MasterOpGen(CGF);
1097   });
1098 }
1099 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)1100 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1101                                         SourceLocation Loc) {
1102   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1103   llvm::Value *Args[] = {
1104       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1105       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1106   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1107 }
1108 
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)1109 static llvm::Value *emitCopyprivateCopyFunction(
1110     CodeGenModule &CGM, llvm::Type *ArgsType,
1111     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1112     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1113   auto &C = CGM.getContext();
1114   // void copy_func(void *LHSArg, void *RHSArg);
1115   FunctionArgList Args;
1116   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1117                            C.VoidPtrTy);
1118   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1119                            C.VoidPtrTy);
1120   Args.push_back(&LHSArg);
1121   Args.push_back(&RHSArg);
1122   FunctionType::ExtInfo EI;
1123   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1124       C.VoidTy, Args, EI, /*isVariadic=*/false);
1125   auto *Fn = llvm::Function::Create(
1126       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1127       ".omp.copyprivate.copy_func", &CGM.getModule());
1128   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1129   CodeGenFunction CGF(CGM);
1130   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1131   // Dest = (void*[n])(LHSArg);
1132   // Src = (void*[n])(RHSArg);
1133   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1134       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1135                                     CGF.PointerAlignInBytes),
1136       ArgsType);
1137   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1138       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1139                                     CGF.PointerAlignInBytes),
1140       ArgsType);
1141   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1142   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1143   // ...
1144   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1145   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1146     auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1147         CGF.Builder.CreateAlignedLoad(
1148             CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1149             CGM.PointerAlignInBytes),
1150         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1151     auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1152         CGF.Builder.CreateAlignedLoad(
1153             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1154             CGM.PointerAlignInBytes),
1155         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1156     CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr,
1157                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1158                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1159                     AssignmentOps[I]);
1160   }
1161   CGF.FinishFunction();
1162   return Fn;
1163 }
1164 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)1165 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1166                                        const RegionCodeGenTy &SingleOpGen,
1167                                        SourceLocation Loc,
1168                                        ArrayRef<const Expr *> CopyprivateVars,
1169                                        ArrayRef<const Expr *> SrcExprs,
1170                                        ArrayRef<const Expr *> DstExprs,
1171                                        ArrayRef<const Expr *> AssignmentOps) {
1172   assert(CopyprivateVars.size() == SrcExprs.size() &&
1173          CopyprivateVars.size() == DstExprs.size() &&
1174          CopyprivateVars.size() == AssignmentOps.size());
1175   auto &C = CGM.getContext();
1176   // int32 did_it = 0;
1177   // if(__kmpc_single(ident_t *, gtid)) {
1178   //   SingleOpGen();
1179   //   __kmpc_end_single(ident_t *, gtid);
1180   //   did_it = 1;
1181   // }
1182   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1183   // <copy_func>, did_it);
1184 
1185   llvm::AllocaInst *DidIt = nullptr;
1186   if (!CopyprivateVars.empty()) {
1187     // int32 did_it = 0;
1188     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1189     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1190     CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0));
1191   }
1192   // Prepare arguments and build a call to __kmpc_single
1193   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1194   auto *IsSingle =
1195       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1196   emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
1197     CodeGenFunction::RunCleanupsScope Scope(CGF);
1198     CGF.EHStack.pushCleanup<CallEndCleanup>(
1199         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1200         llvm::makeArrayRef(Args));
1201     SingleOpGen(CGF);
1202     if (DidIt) {
1203       // did_it = 1;
1204       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1205                                      DidIt->getAlignment());
1206     }
1207   });
1208   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1209   // <copy_func>, did_it);
1210   if (DidIt) {
1211     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1212     auto CopyprivateArrayTy =
1213         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1214                                /*IndexTypeQuals=*/0);
1215     // Create a list of all private variables for copyprivate.
1216     auto *CopyprivateList =
1217         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1218     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1219       auto *Elem = CGF.Builder.CreateStructGEP(
1220           CopyprivateList->getAllocatedType(), CopyprivateList, I);
1221       CGF.Builder.CreateAlignedStore(
1222           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1223               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1224           Elem, CGM.PointerAlignInBytes);
1225     }
1226     // Build function that copies private values from single region to all other
1227     // threads in the corresponding parallel region.
1228     auto *CpyFn = emitCopyprivateCopyFunction(
1229         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1230         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1231     auto *BufSize = CGF.Builder.getInt32(
1232         C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1233     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1234                                                                CGF.VoidPtrTy);
1235     auto *DidItVal =
1236         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1237     llvm::Value *Args[] = {
1238         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1239         getThreadID(CGF, Loc),        // i32 <gtid>
1240         BufSize,                      // i32 <buf_size>
1241         CL,                           // void *<copyprivate list>
1242         CpyFn,                        // void (*) (void *, void *) <copy_func>
1243         DidItVal                      // i32 did_it
1244     };
1245     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1246   }
1247 }
1248 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind)1249 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1250                                       OpenMPDirectiveKind Kind) {
1251   // Build call __kmpc_cancel_barrier(loc, thread_id);
1252   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1253   if (Kind == OMPD_for) {
1254     Flags =
1255         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1256   } else if (Kind == OMPD_sections) {
1257     Flags = static_cast<OpenMPLocationFlags>(Flags |
1258                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1259   } else if (Kind == OMPD_single) {
1260     Flags =
1261         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1262   } else if (Kind == OMPD_barrier) {
1263     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1264   } else {
1265     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1266   }
1267   // Build call __kmpc_cancel_barrier(loc, thread_id);
1268   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
1269   // one provides the same functionality and adds initial support for
1270   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
1271   // is provided default by the runtime library so it safe to make such
1272   // replacement.
1273   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1274                          getThreadID(CGF, Loc)};
1275   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1276 }
1277 
1278 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1279 /// the enum sched_type in kmp.h).
1280 enum OpenMPSchedType {
1281   /// \brief Lower bound for default (unordered) versions.
1282   OMP_sch_lower = 32,
1283   OMP_sch_static_chunked = 33,
1284   OMP_sch_static = 34,
1285   OMP_sch_dynamic_chunked = 35,
1286   OMP_sch_guided_chunked = 36,
1287   OMP_sch_runtime = 37,
1288   OMP_sch_auto = 38,
1289   /// \brief Lower bound for 'ordered' versions.
1290   OMP_ord_lower = 64,
1291   /// \brief Lower bound for 'nomerge' versions.
1292   OMP_nm_lower = 160,
1293 };
1294 
1295 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked)1296 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1297                                           bool Chunked) {
1298   switch (ScheduleKind) {
1299   case OMPC_SCHEDULE_static:
1300     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
1301   case OMPC_SCHEDULE_dynamic:
1302     return OMP_sch_dynamic_chunked;
1303   case OMPC_SCHEDULE_guided:
1304     return OMP_sch_guided_chunked;
1305   case OMPC_SCHEDULE_auto:
1306     return OMP_sch_auto;
1307   case OMPC_SCHEDULE_runtime:
1308     return OMP_sch_runtime;
1309   case OMPC_SCHEDULE_unknown:
1310     assert(!Chunked && "chunk was specified but schedule kind not known");
1311     return OMP_sch_static;
1312   }
1313   llvm_unreachable("Unexpected runtime schedule");
1314 }
1315 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const1316 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1317                                          bool Chunked) const {
1318   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
1319   return Schedule == OMP_sch_static;
1320 }
1321 
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const1322 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1323   auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
1324   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1325   return Schedule != OMP_sch_static;
1326 }
1327 
emitForInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind,unsigned IVSize,bool IVSigned,llvm::Value * IL,llvm::Value * LB,llvm::Value * UB,llvm::Value * ST,llvm::Value * Chunk)1328 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1329                                   OpenMPScheduleClauseKind ScheduleKind,
1330                                   unsigned IVSize, bool IVSigned,
1331                                   llvm::Value *IL, llvm::Value *LB,
1332                                   llvm::Value *UB, llvm::Value *ST,
1333                                   llvm::Value *Chunk) {
1334   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
1335   if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
1336     // Call __kmpc_dispatch_init(
1337     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1338     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1339     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1340 
1341     // If the Chunk was not specified in the clause - use default value 1.
1342     if (Chunk == nullptr)
1343       Chunk = CGF.Builder.getIntN(IVSize, 1);
1344     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1345                             getThreadID(CGF, Loc),
1346                             CGF.Builder.getInt32(Schedule), // Schedule type
1347                             CGF.Builder.getIntN(IVSize, 0), // Lower
1348                             UB,                             // Upper
1349                             CGF.Builder.getIntN(IVSize, 1), // Stride
1350                             Chunk                           // Chunk
1351     };
1352     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1353   } else {
1354     // Call __kmpc_for_static_init(
1355     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1356     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1357     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1358     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1359     if (Chunk == nullptr) {
1360       assert(Schedule == OMP_sch_static &&
1361              "expected static non-chunked schedule");
1362       // If the Chunk was not specified in the clause - use default value 1.
1363       Chunk = CGF.Builder.getIntN(IVSize, 1);
1364     } else
1365       assert(Schedule == OMP_sch_static_chunked &&
1366              "expected static chunked schedule");
1367     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1368                             getThreadID(CGF, Loc),
1369                             CGF.Builder.getInt32(Schedule), // Schedule type
1370                             IL,                             // &isLastIter
1371                             LB,                             // &LB
1372                             UB,                             // &UB
1373                             ST,                             // &Stride
1374                             CGF.Builder.getIntN(IVSize, 1), // Incr
1375                             Chunk                           // Chunk
1376     };
1377     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1378   }
1379 }
1380 
emitForFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind)1381 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
1382                                     OpenMPScheduleClauseKind ScheduleKind) {
1383   assert((ScheduleKind == OMPC_SCHEDULE_static ||
1384           ScheduleKind == OMPC_SCHEDULE_unknown) &&
1385          "Non-static schedule kinds are not yet implemented");
1386   (void)ScheduleKind;
1387   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1388   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1389                          getThreadID(CGF, Loc)};
1390   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1391                       Args);
1392 }
1393 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,llvm::Value * IL,llvm::Value * LB,llvm::Value * UB,llvm::Value * ST)1394 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1395                                           SourceLocation Loc, unsigned IVSize,
1396                                           bool IVSigned, llvm::Value *IL,
1397                                           llvm::Value *LB, llvm::Value *UB,
1398                                           llvm::Value *ST) {
1399   // Call __kmpc_dispatch_next(
1400   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1401   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1402   //          kmp_int[32|64] *p_stride);
1403   llvm::Value *Args[] = {
1404       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1405       IL, // &isLastIter
1406       LB, // &Lower
1407       UB, // &Upper
1408       ST  // &Stride
1409   };
1410   llvm::Value *Call =
1411       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1412   return CGF.EmitScalarConversion(
1413       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1414       CGF.getContext().BoolTy);
1415 }
1416 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)1417 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1418                                            llvm::Value *NumThreads,
1419                                            SourceLocation Loc) {
1420   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1421   llvm::Value *Args[] = {
1422       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1423       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1424   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1425                       Args);
1426 }
1427 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc)1428 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1429                                 SourceLocation Loc) {
1430   // Build call void __kmpc_flush(ident_t *loc)
1431   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1432                       emitUpdateLocation(CGF, Loc));
1433 }
1434 
1435 namespace {
1436 /// \brief Indexes of fields for type kmp_task_t.
1437 enum KmpTaskTFields {
1438   /// \brief List of shared variables.
1439   KmpTaskTShareds,
1440   /// \brief Task routine.
1441   KmpTaskTRoutine,
1442   /// \brief Partition id for the untied tasks.
1443   KmpTaskTPartId,
1444   /// \brief Function with call of destructors for private variables.
1445   KmpTaskTDestructors,
1446 };
1447 } // namespace
1448 
emitKmpRoutineEntryT(QualType KmpInt32Ty)1449 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1450   if (!KmpRoutineEntryPtrTy) {
1451     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1452     auto &C = CGM.getContext();
1453     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1454     FunctionProtoType::ExtProtoInfo EPI;
1455     KmpRoutineEntryPtrQTy = C.getPointerType(
1456         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1457     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1458   }
1459 }
1460 
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1461 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1462                                  QualType FieldTy) {
1463   auto *Field = FieldDecl::Create(
1464       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1465       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1466       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1467   Field->setAccess(AS_public);
1468   DC->addDecl(Field);
1469 }
1470 
createKmpTaskTRecordDecl(CodeGenModule & CGM,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)1471 static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
1472                                          QualType KmpInt32Ty,
1473                                          QualType KmpRoutineEntryPointerQTy) {
1474   auto &C = CGM.getContext();
1475   // Build struct kmp_task_t {
1476   //         void *              shareds;
1477   //         kmp_routine_entry_t routine;
1478   //         kmp_int32           part_id;
1479   //         kmp_routine_entry_t destructors;
1480   //         /*  private vars  */
1481   //       };
1482   auto *RD = C.buildImplicitRecord("kmp_task_t");
1483   RD->startDefinition();
1484   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1485   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1486   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1487   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1488   // TODO: add private fields.
1489   RD->completeDefinition();
1490   return C.getRecordType(RD);
1491 }
1492 
1493 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1494 /// argument.
1495 /// \code
1496 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1497 ///   TaskFunction(gtid, tt->part_id, tt->shareds);
1498 ///   return 0;
1499 /// }
1500 /// \endcode
1501 static llvm::Value *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTPtrQTy,QualType SharedsPtrTy,llvm::Value * TaskFunction,llvm::Type * KmpTaskTTy)1502 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1503                       QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
1504                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
1505                       llvm::Type *KmpTaskTTy) {
1506   auto &C = CGM.getContext();
1507   FunctionArgList Args;
1508   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1509   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1510                                 /*Id=*/nullptr, KmpTaskTPtrQTy);
1511   Args.push_back(&GtidArg);
1512   Args.push_back(&TaskTypeArg);
1513   FunctionType::ExtInfo Info;
1514   auto &TaskEntryFnInfo =
1515       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1516                                                     /*isVariadic=*/false);
1517   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1518   auto *TaskEntry =
1519       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1520                              ".omp_task_entry.", &CGM.getModule());
1521   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1522   CodeGenFunction CGF(CGM);
1523   CGF.disableDebugInfo();
1524   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1525 
1526   // TaskFunction(gtid, tt->part_id, tt->shareds);
1527   auto *GtidParam = CGF.EmitLoadOfScalar(
1528       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1529       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1530   auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
1531       CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
1532       CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
1533   auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
1534                                                 /*Idx=*/KmpTaskTPartId);
1535   auto *PartidParam = CGF.EmitLoadOfScalar(
1536       PartidPtr, /*Volatile=*/false,
1537       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1538   auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
1539                                                  /*Idx=*/KmpTaskTShareds);
1540   auto *SharedsParam =
1541       CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
1542                            CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
1543   llvm::Value *CallArgs[] = {
1544       GtidParam, PartidParam,
1545       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1546           SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
1547   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1548   CGF.EmitStoreThroughLValue(
1549       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1550       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1551   CGF.FinishFunction();
1552   return TaskEntry;
1553 }
1554 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,bool Tied,llvm::PointerIntPair<llvm::Value *,1,bool> Final,llvm::Value * TaskFunction,QualType SharedsTy,llvm::Value * Shareds)1555 void CGOpenMPRuntime::emitTaskCall(
1556     CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
1557     llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
1558     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
1559   auto &C = CGM.getContext();
1560   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1561   // Build type kmp_routine_entry_t (if not built yet).
1562   emitKmpRoutineEntryT(KmpInt32Ty);
1563   // Build particular struct kmp_task_t for the given task.
1564   auto KmpTaskQTy =
1565       createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
1566   QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
1567   auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy);
1568   auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo();
1569   auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
1570   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
1571 
1572   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
1573   // kmp_task_t *tt);
1574   auto *TaskEntry =
1575       emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy,
1576                             TaskFunction, KmpTaskTTy);
1577 
1578   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1579   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1580   // kmp_routine_entry_t *task_entry);
1581   // Task flags. Format is taken from
1582   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
1583   // description of kmp_tasking_flags struct.
1584   const unsigned TiedFlag = 0x1;
1585   const unsigned FinalFlag = 0x2;
1586   unsigned Flags = Tied ? TiedFlag : 0;
1587   auto *TaskFlags =
1588       Final.getPointer()
1589           ? CGF.Builder.CreateSelect(Final.getPointer(),
1590                                      CGF.Builder.getInt32(FinalFlag),
1591                                      CGF.Builder.getInt32(/*C=*/0))
1592           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
1593   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
1594   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
1595   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
1596                               getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
1597                               CGM.getSize(SharedsSize),
1598                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1599                                   TaskEntry, KmpRoutineEntryPtrTy)};
1600   auto *NewTask = CGF.EmitRuntimeCall(
1601       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
1602   auto *NewTaskNewTaskTTy =
1603       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
1604   // Fill the data in the resulting kmp_task_t record.
1605   // Copy shareds if there are any.
1606   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
1607     CGF.EmitAggregateCopy(
1608         CGF.EmitLoadOfScalar(
1609             CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
1610                                         /*Idx=*/KmpTaskTShareds),
1611             /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
1612         Shareds, SharedsTy);
1613   // TODO: generate function with destructors for privates.
1614   // Provide pointer to function with destructors for privates.
1615   CGF.Builder.CreateAlignedStore(
1616       llvm::ConstantPointerNull::get(
1617           cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
1618       CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
1619                                   /*Idx=*/KmpTaskTDestructors),
1620       CGM.PointerAlignInBytes);
1621 
1622   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
1623   // libcall.
1624   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1625   // *new_task);
1626   llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
1627                              getThreadID(CGF, Loc), NewTask};
1628   // TODO: add check for untied tasks.
1629   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1630 }
1631 
emitReductionFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)1632 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
1633                                           llvm::Type *ArgsType,
1634                                           ArrayRef<const Expr *> LHSExprs,
1635                                           ArrayRef<const Expr *> RHSExprs,
1636                                           ArrayRef<const Expr *> ReductionOps) {
1637   auto &C = CGM.getContext();
1638 
1639   // void reduction_func(void *LHSArg, void *RHSArg);
1640   FunctionArgList Args;
1641   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1642                            C.VoidPtrTy);
1643   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1644                            C.VoidPtrTy);
1645   Args.push_back(&LHSArg);
1646   Args.push_back(&RHSArg);
1647   FunctionType::ExtInfo EI;
1648   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1649       C.VoidTy, Args, EI, /*isVariadic=*/false);
1650   auto *Fn = llvm::Function::Create(
1651       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1652       ".omp.reduction.reduction_func", &CGM.getModule());
1653   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1654   CodeGenFunction CGF(CGM);
1655   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1656 
1657   // Dst = (void*[n])(LHSArg);
1658   // Src = (void*[n])(RHSArg);
1659   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1660       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1661                                     CGF.PointerAlignInBytes),
1662       ArgsType);
1663   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1664       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1665                                     CGF.PointerAlignInBytes),
1666       ArgsType);
1667 
1668   //  ...
1669   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
1670   //  ...
1671   CodeGenFunction::OMPPrivateScope Scope(CGF);
1672   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
1673     Scope.addPrivate(
1674         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
1675         [&]() -> llvm::Value *{
1676           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1677               CGF.Builder.CreateAlignedLoad(
1678                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
1679                   CGM.PointerAlignInBytes),
1680               CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
1681         });
1682     Scope.addPrivate(
1683         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
1684         [&]() -> llvm::Value *{
1685           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1686               CGF.Builder.CreateAlignedLoad(
1687                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
1688                   CGM.PointerAlignInBytes),
1689               CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
1690         });
1691   }
1692   Scope.Privatize();
1693   for (auto *E : ReductionOps) {
1694     CGF.EmitIgnoredExpr(E);
1695   }
1696   Scope.ForceCleanup();
1697   CGF.FinishFunction();
1698   return Fn;
1699 }
1700 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,bool WithNowait)1701 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
1702                                     ArrayRef<const Expr *> LHSExprs,
1703                                     ArrayRef<const Expr *> RHSExprs,
1704                                     ArrayRef<const Expr *> ReductionOps,
1705                                     bool WithNowait) {
1706   // Next code should be emitted for reduction:
1707   //
1708   // static kmp_critical_name lock = { 0 };
1709   //
1710   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
1711   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
1712   //  ...
1713   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
1714   //  *(Type<n>-1*)rhs[<n>-1]);
1715   // }
1716   //
1717   // ...
1718   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
1719   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
1720   // RedList, reduce_func, &<lock>)) {
1721   // case 1:
1722   //  ...
1723   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
1724   //  ...
1725   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
1726   // break;
1727   // case 2:
1728   //  ...
1729   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
1730   //  ...
1731   // break;
1732   // default:;
1733   // }
1734 
1735   auto &C = CGM.getContext();
1736 
1737   // 1. Build a list of reduction variables.
1738   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
1739   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
1740   QualType ReductionArrayTy =
1741       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1742                              /*IndexTypeQuals=*/0);
1743   auto *ReductionList =
1744       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
1745   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
1746     auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
1747     CGF.Builder.CreateAlignedStore(
1748         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1749             CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
1750         Elem, CGM.PointerAlignInBytes);
1751   }
1752 
1753   // 2. Emit reduce_func().
1754   auto *ReductionFn = emitReductionFunction(
1755       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
1756       RHSExprs, ReductionOps);
1757 
1758   // 3. Create static kmp_critical_name lock = { 0 };
1759   auto *Lock = getCriticalRegionLock(".reduction");
1760 
1761   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
1762   // RedList, reduce_func, &<lock>);
1763   auto *IdentTLoc = emitUpdateLocation(
1764       CGF, Loc,
1765       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
1766   auto *ThreadId = getThreadID(CGF, Loc);
1767   auto *ReductionArrayTySize = llvm::ConstantInt::get(
1768       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
1769   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
1770                                                              CGF.VoidPtrTy);
1771   llvm::Value *Args[] = {
1772       IdentTLoc,                             // ident_t *<loc>
1773       ThreadId,                              // i32 <gtid>
1774       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
1775       ReductionArrayTySize,                  // size_type sizeof(RedList)
1776       RL,                                    // void *RedList
1777       ReductionFn, // void (*) (void *, void *) <reduce_func>
1778       Lock         // kmp_critical_name *&<lock>
1779   };
1780   auto Res = CGF.EmitRuntimeCall(
1781       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
1782                                        : OMPRTL__kmpc_reduce),
1783       Args);
1784 
1785   // 5. Build switch(res)
1786   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
1787   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
1788 
1789   // 6. Build case 1:
1790   //  ...
1791   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
1792   //  ...
1793   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
1794   // break;
1795   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
1796   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
1797   CGF.EmitBlock(Case1BB);
1798 
1799   {
1800     CodeGenFunction::RunCleanupsScope Scope(CGF);
1801     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
1802     llvm::Value *EndArgs[] = {
1803         IdentTLoc, // ident_t *<loc>
1804         ThreadId,  // i32 <gtid>
1805         Lock       // kmp_critical_name *&<lock>
1806     };
1807     CGF.EHStack.pushCleanup<CallEndCleanup>(
1808         NormalAndEHCleanup,
1809         createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
1810                                          : OMPRTL__kmpc_end_reduce),
1811         llvm::makeArrayRef(EndArgs));
1812     for (auto *E : ReductionOps) {
1813       CGF.EmitIgnoredExpr(E);
1814     }
1815   }
1816 
1817   CGF.EmitBranch(DefaultBB);
1818 
1819   // 7. Build case 2:
1820   //  ...
1821   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
1822   //  ...
1823   // break;
1824   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
1825   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
1826   CGF.EmitBlock(Case2BB);
1827 
1828   {
1829     CodeGenFunction::RunCleanupsScope Scope(CGF);
1830     auto I = LHSExprs.begin();
1831     for (auto *E : ReductionOps) {
1832       const Expr *XExpr = nullptr;
1833       const Expr *EExpr = nullptr;
1834       const Expr *UpExpr = nullptr;
1835       BinaryOperatorKind BO = BO_Comma;
1836       // Try to emit update expression as a simple atomic.
1837       if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) {
1838         // If this is a conditional operator, analyze it's condition for
1839         // min/max reduction operator.
1840         E = ACO->getCond();
1841       }
1842       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
1843         if (BO->getOpcode() == BO_Assign) {
1844           XExpr = BO->getLHS();
1845           UpExpr = BO->getRHS();
1846         }
1847       }
1848       // Analyze RHS part of the whole expression.
1849       if (UpExpr) {
1850         if (auto *BORHS =
1851                 dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) {
1852           EExpr = BORHS->getRHS();
1853           BO = BORHS->getOpcode();
1854         }
1855       }
1856       if (XExpr) {
1857         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1858         LValue X = CGF.EmitLValue(XExpr);
1859         RValue E;
1860         if (EExpr)
1861           E = CGF.EmitAnyExpr(EExpr);
1862         CGF.EmitOMPAtomicSimpleUpdateExpr(
1863             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
1864             [&CGF, UpExpr, VD](RValue XRValue) {
1865               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
1866               PrivateScope.addPrivate(
1867                   VD, [&CGF, VD, XRValue]() -> llvm::Value *{
1868                     auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
1869                     CGF.EmitStoreThroughLValue(
1870                         XRValue,
1871                         CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
1872                     return LHSTemp;
1873                   });
1874               (void)PrivateScope.Privatize();
1875               return CGF.EmitAnyExpr(UpExpr);
1876             });
1877       } else {
1878         // Emit as a critical region.
1879         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
1880           CGF.EmitIgnoredExpr(E);
1881         }, Loc);
1882       }
1883       ++I;
1884     }
1885   }
1886 
1887   CGF.EmitBranch(DefaultBB);
1888   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
1889 }
1890 
emitInlinedDirective(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen)1891 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
1892                                            const RegionCodeGenTy &CodeGen) {
1893   InlinedOpenMPRegionRAII Region(CGF, CodeGen);
1894   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
1895 }
1896 
1897