1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26
27 using namespace clang;
28 using namespace CodeGen;
29
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34 /// \brief Kinds of OpenMP regions used in codegen.
35 enum CGOpenMPRegionKind {
36 /// \brief Region with outlined function for standalone 'parallel'
37 /// directive.
38 ParallelOutlinedRegion,
39 /// \brief Region with outlined function for standalone 'task' directive.
40 TaskOutlinedRegion,
41 /// \brief Region for constructs that do not require function outlining,
42 /// like 'for', 'sections', 'atomic' etc. directives.
43 InlinedRegion,
44 };
45
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen)46 CGOpenMPRegionInfo(const CapturedStmt &CS,
47 const CGOpenMPRegionKind RegionKind,
48 const RegionCodeGenTy &CodeGen)
49 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50 CodeGen(CodeGen) {}
51
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen)52 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53 const RegionCodeGenTy &CodeGen)
54 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
55 CodeGen(CodeGen) {}
56
57 /// \brief Get a variable or parameter for storing global thread id
58 /// inside OpenMP construct.
59 virtual const VarDecl *getThreadIDVariable() const = 0;
60
61 /// \brief Emit the captured statement body.
62 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
63
64 /// \brief Get an LValue for the current ThreadID variable.
65 /// \return LValue for thread id variable. This LValue always has type int32*.
66 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
67
getRegionKind() const68 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
69
classof(const CGCapturedStmtInfo * Info)70 static bool classof(const CGCapturedStmtInfo *Info) {
71 return Info->getKind() == CR_OpenMP;
72 }
73
74 protected:
75 CGOpenMPRegionKind RegionKind;
76 const RegionCodeGenTy &CodeGen;
77 };
78
79 /// \brief API for captured statement code generation in OpenMP constructs.
80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
81 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)82 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
83 const RegionCodeGenTy &CodeGen)
84 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
85 ThreadIDVar(ThreadIDVar) {
86 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
87 }
88 /// \brief Get a variable or parameter for storing global thread id
89 /// inside OpenMP construct.
getThreadIDVariable() const90 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
91
92 /// \brief Get the name of the capture helper.
getHelperName() const93 StringRef getHelperName() const override { return ".omp_outlined."; }
94
classof(const CGCapturedStmtInfo * Info)95 static bool classof(const CGCapturedStmtInfo *Info) {
96 return CGOpenMPRegionInfo::classof(Info) &&
97 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
98 ParallelOutlinedRegion;
99 }
100
101 private:
102 /// \brief A variable or parameter storing global thread id for OpenMP
103 /// constructs.
104 const VarDecl *ThreadIDVar;
105 };
106
107 /// \brief API for captured statement code generation in OpenMP constructs.
108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
109 public:
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)110 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
111 const VarDecl *ThreadIDVar,
112 const RegionCodeGenTy &CodeGen)
113 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
114 ThreadIDVar(ThreadIDVar) {
115 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
116 }
117 /// \brief Get a variable or parameter for storing global thread id
118 /// inside OpenMP construct.
getThreadIDVariable() const119 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
120
121 /// \brief Get an LValue for the current ThreadID variable.
122 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
123
124 /// \brief Get the name of the capture helper.
getHelperName() const125 StringRef getHelperName() const override { return ".omp_outlined."; }
126
classof(const CGCapturedStmtInfo * Info)127 static bool classof(const CGCapturedStmtInfo *Info) {
128 return CGOpenMPRegionInfo::classof(Info) &&
129 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
130 TaskOutlinedRegion;
131 }
132
133 private:
134 /// \brief A variable or parameter storing global thread id for OpenMP
135 /// constructs.
136 const VarDecl *ThreadIDVar;
137 };
138
139 /// \brief API for inlined captured statement code generation in OpenMP
140 /// constructs.
141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
142 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen)143 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
144 const RegionCodeGenTy &CodeGen)
145 : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
146 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
147 // \brief Retrieve the value of the context parameter.
getContextValue() const148 llvm::Value *getContextValue() const override {
149 if (OuterRegionInfo)
150 return OuterRegionInfo->getContextValue();
151 llvm_unreachable("No context value for inlined OpenMP region");
152 }
setContextValue(llvm::Value * V)153 virtual void setContextValue(llvm::Value *V) override {
154 if (OuterRegionInfo) {
155 OuterRegionInfo->setContextValue(V);
156 return;
157 }
158 llvm_unreachable("No context value for inlined OpenMP region");
159 }
160 /// \brief Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const161 const FieldDecl *lookup(const VarDecl *VD) const override {
162 if (OuterRegionInfo)
163 return OuterRegionInfo->lookup(VD);
164 // If there is no outer outlined region,no need to lookup in a list of
165 // captured variables, we can use the original one.
166 return nullptr;
167 }
getThisFieldDecl() const168 FieldDecl *getThisFieldDecl() const override {
169 if (OuterRegionInfo)
170 return OuterRegionInfo->getThisFieldDecl();
171 return nullptr;
172 }
173 /// \brief Get a variable or parameter for storing global thread id
174 /// inside OpenMP construct.
getThreadIDVariable() const175 const VarDecl *getThreadIDVariable() const override {
176 if (OuterRegionInfo)
177 return OuterRegionInfo->getThreadIDVariable();
178 return nullptr;
179 }
180
181 /// \brief Get the name of the capture helper.
getHelperName() const182 StringRef getHelperName() const override {
183 if (auto *OuterRegionInfo = getOldCSI())
184 return OuterRegionInfo->getHelperName();
185 llvm_unreachable("No helper name for inlined OpenMP construct");
186 }
187
getOldCSI() const188 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
189
classof(const CGCapturedStmtInfo * Info)190 static bool classof(const CGCapturedStmtInfo *Info) {
191 return CGOpenMPRegionInfo::classof(Info) &&
192 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
193 }
194
195 private:
196 /// \brief CodeGen info about outer OpenMP region.
197 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
198 CGOpenMPRegionInfo *OuterRegionInfo;
199 };
200
201 /// \brief RAII for emitting code of OpenMP constructs.
202 class InlinedOpenMPRegionRAII {
203 CodeGenFunction &CGF;
204
205 public:
206 /// \brief Constructs region for combined constructs.
207 /// \param CodeGen Code generation sequence for combined directives. Includes
208 /// a list of functions used for code generation of implicitly inlined
209 /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen)210 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
211 : CGF(CGF) {
212 // Start emission for the construct.
213 CGF.CapturedStmtInfo =
214 new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
215 }
~InlinedOpenMPRegionRAII()216 ~InlinedOpenMPRegionRAII() {
217 // Restore original CapturedStmtInfo only if we're done with code emission.
218 auto *OldCSI =
219 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
220 delete CGF.CapturedStmtInfo;
221 CGF.CapturedStmtInfo = OldCSI;
222 }
223 };
224
225 } // namespace
226
getThreadIDVariableLValue(CodeGenFunction & CGF)227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
228 return CGF.MakeNaturalAlignAddrLValue(
229 CGF.Builder.CreateAlignedLoad(
230 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
231 CGF.PointerAlignInBytes),
232 getThreadIDVariable()
233 ->getType()
234 ->castAs<PointerType>()
235 ->getPointeeType());
236 }
237
EmitBody(CodeGenFunction & CGF,const Stmt *)238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
239 // 1.2.2 OpenMP Language Terminology
240 // Structured block - An executable statement with a single entry at the
241 // top and a single exit at the bottom.
242 // The point of exit cannot be a branch out of the structured block.
243 // longjmp() and throw() must not violate the entry/exit criteria.
244 CGF.EHStack.pushTerminate();
245 {
246 CodeGenFunction::RunCleanupsScope Scope(CGF);
247 CodeGen(CGF);
248 }
249 CGF.EHStack.popTerminate();
250 }
251
getThreadIDVariableLValue(CodeGenFunction & CGF)252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
253 CodeGenFunction &CGF) {
254 return CGF.MakeNaturalAlignAddrLValue(
255 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
256 getThreadIDVariable()->getType());
257 }
258
CGOpenMPRuntime(CodeGenModule & CGM)259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
260 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
261 IdentTy = llvm::StructType::create(
262 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
263 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
264 CGM.Int8PtrTy /* psource */, nullptr);
265 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
266 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
267 llvm::PointerType::getUnqual(CGM.Int32Ty)};
268 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
269 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
270 }
271
clear()272 void CGOpenMPRuntime::clear() {
273 InternalVars.clear();
274 }
275
276 llvm::Value *
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
278 const VarDecl *ThreadIDVar,
279 const RegionCodeGenTy &CodeGen) {
280 assert(ThreadIDVar->getType()->isPointerType() &&
281 "thread id variable must be of type kmp_int32 *");
282 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
283 CodeGenFunction CGF(CGM, true);
284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
285 CGF.CapturedStmtInfo = &CGInfo;
286 return CGF.GenerateCapturedStmtFunction(*CS);
287 }
288
289 llvm::Value *
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen)290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
291 const VarDecl *ThreadIDVar,
292 const RegionCodeGenTy &CodeGen) {
293 assert(!ThreadIDVar->getType()->isPointerType() &&
294 "thread id variable must be of type kmp_int32 for tasks");
295 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
296 CodeGenFunction CGF(CGM, true);
297 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
298 CGF.CapturedStmtInfo = &CGInfo;
299 return CGF.GenerateCapturedStmtFunction(*CS);
300 }
301
302 llvm::Value *
getOrCreateDefaultLocation(OpenMPLocationFlags Flags)303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
304 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
305 if (!Entry) {
306 if (!DefaultOpenMPPSource) {
307 // Initialize default location for psource field of ident_t structure of
308 // all ident_t objects. Format is ";file;function;line;column;;".
309 // Taken from
310 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
311 DefaultOpenMPPSource =
312 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
313 DefaultOpenMPPSource =
314 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
315 }
316 auto DefaultOpenMPLocation = new llvm::GlobalVariable(
317 CGM.getModule(), IdentTy, /*isConstant*/ true,
318 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
319 DefaultOpenMPLocation->setUnnamedAddr(true);
320
321 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
322 llvm::Constant *Values[] = {Zero,
323 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
324 Zero, Zero, DefaultOpenMPPSource};
325 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
326 DefaultOpenMPLocation->setInitializer(Init);
327 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
328 return DefaultOpenMPLocation;
329 }
330 return Entry;
331 }
332
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,OpenMPLocationFlags Flags)333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
334 SourceLocation Loc,
335 OpenMPLocationFlags Flags) {
336 // If no debug info is generated - return global default location.
337 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
338 Loc.isInvalid())
339 return getOrCreateDefaultLocation(Flags);
340
341 assert(CGF.CurFn && "No function in current CodeGenFunction.");
342
343 llvm::Value *LocValue = nullptr;
344 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
345 if (I != OpenMPLocThreadIDMap.end())
346 LocValue = I->second.DebugLoc;
347 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
348 // GetOpenMPThreadID was called before this routine.
349 if (LocValue == nullptr) {
350 // Generate "ident_t .kmpc_loc.addr;"
351 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
352 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
353 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
354 Elem.second.DebugLoc = AI;
355 LocValue = AI;
356
357 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
358 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
359 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
360 llvm::ConstantExpr::getSizeOf(IdentTy),
361 CGM.PointerAlignInBytes);
362 }
363
364 // char **psource = &.kmpc_loc_<flags>.addr.psource;
365 auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
366 IdentField_PSource);
367
368 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
369 if (OMPDebugLoc == nullptr) {
370 SmallString<128> Buffer2;
371 llvm::raw_svector_ostream OS2(Buffer2);
372 // Build debug location
373 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
374 OS2 << ";" << PLoc.getFilename() << ";";
375 if (const FunctionDecl *FD =
376 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
377 OS2 << FD->getQualifiedNameAsString();
378 }
379 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
380 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
381 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
382 }
383 // *psource = ";<File>;<Function>;<Line>;<Column>;;";
384 CGF.Builder.CreateStore(OMPDebugLoc, PSource);
385
386 return LocValue;
387 }
388
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
390 SourceLocation Loc) {
391 assert(CGF.CurFn && "No function in current CodeGenFunction.");
392
393 llvm::Value *ThreadID = nullptr;
394 // Check whether we've already cached a load of the thread id in this
395 // function.
396 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
397 if (I != OpenMPLocThreadIDMap.end()) {
398 ThreadID = I->second.ThreadID;
399 if (ThreadID != nullptr)
400 return ThreadID;
401 }
402 if (auto OMPRegionInfo =
403 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
404 if (OMPRegionInfo->getThreadIDVariable()) {
405 // Check if this an outlined function with thread id passed as argument.
406 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
407 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
408 // If value loaded in entry block, cache it and use it everywhere in
409 // function.
410 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
412 Elem.second.ThreadID = ThreadID;
413 }
414 return ThreadID;
415 }
416 }
417
418 // This is not an outlined function region - need to call __kmpc_int32
419 // kmpc_global_thread_num(ident_t *loc).
420 // Generate thread id value and cache this value for use across the
421 // function.
422 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
423 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
424 ThreadID =
425 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
426 emitUpdateLocation(CGF, Loc));
427 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
428 Elem.second.ThreadID = ThreadID;
429 return ThreadID;
430 }
431
functionFinished(CodeGenFunction & CGF)432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
433 assert(CGF.CurFn && "No function in current CodeGenFunction.");
434 if (OpenMPLocThreadIDMap.count(CGF.CurFn))
435 OpenMPLocThreadIDMap.erase(CGF.CurFn);
436 }
437
getIdentTyPointerTy()438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
439 return llvm::PointerType::getUnqual(IdentTy);
440 }
441
getKmpc_MicroPointerTy()442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
443 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
444 }
445
446 llvm::Constant *
createRuntimeFunction(OpenMPRTLFunction Function)447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
448 llvm::Constant *RTLFn = nullptr;
449 switch (Function) {
450 case OMPRTL__kmpc_fork_call: {
451 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
452 // microtask, ...);
453 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
454 getKmpc_MicroPointerTy()};
455 llvm::FunctionType *FnTy =
456 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
457 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
458 break;
459 }
460 case OMPRTL__kmpc_global_thread_num: {
461 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
462 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
463 llvm::FunctionType *FnTy =
464 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
465 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
466 break;
467 }
468 case OMPRTL__kmpc_threadprivate_cached: {
469 // Build void *__kmpc_threadprivate_cached(ident_t *loc,
470 // kmp_int32 global_tid, void *data, size_t size, void ***cache);
471 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
472 CGM.VoidPtrTy, CGM.SizeTy,
473 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
474 llvm::FunctionType *FnTy =
475 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
477 break;
478 }
479 case OMPRTL__kmpc_critical: {
480 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
481 // kmp_critical_name *crit);
482 llvm::Type *TypeParams[] = {
483 getIdentTyPointerTy(), CGM.Int32Ty,
484 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
485 llvm::FunctionType *FnTy =
486 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
487 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
488 break;
489 }
490 case OMPRTL__kmpc_threadprivate_register: {
491 // Build void __kmpc_threadprivate_register(ident_t *, void *data,
492 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
493 // typedef void *(*kmpc_ctor)(void *);
494 auto KmpcCtorTy =
495 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
496 /*isVarArg*/ false)->getPointerTo();
497 // typedef void *(*kmpc_cctor)(void *, void *);
498 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
499 auto KmpcCopyCtorTy =
500 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
501 /*isVarArg*/ false)->getPointerTo();
502 // typedef void (*kmpc_dtor)(void *);
503 auto KmpcDtorTy =
504 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
505 ->getPointerTo();
506 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
507 KmpcCopyCtorTy, KmpcDtorTy};
508 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
509 /*isVarArg*/ false);
510 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
511 break;
512 }
513 case OMPRTL__kmpc_end_critical: {
514 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
515 // kmp_critical_name *crit);
516 llvm::Type *TypeParams[] = {
517 getIdentTyPointerTy(), CGM.Int32Ty,
518 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
519 llvm::FunctionType *FnTy =
520 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
521 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
522 break;
523 }
524 case OMPRTL__kmpc_cancel_barrier: {
525 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
526 // global_tid);
527 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
528 llvm::FunctionType *FnTy =
529 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
530 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
531 break;
532 }
533 case OMPRTL__kmpc_for_static_fini: {
534 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
535 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
536 llvm::FunctionType *FnTy =
537 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
538 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
539 break;
540 }
541 case OMPRTL__kmpc_push_num_threads: {
542 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
543 // kmp_int32 num_threads)
544 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
545 CGM.Int32Ty};
546 llvm::FunctionType *FnTy =
547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
549 break;
550 }
551 case OMPRTL__kmpc_serialized_parallel: {
552 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
553 // global_tid);
554 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
555 llvm::FunctionType *FnTy =
556 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
558 break;
559 }
560 case OMPRTL__kmpc_end_serialized_parallel: {
561 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
562 // global_tid);
563 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
564 llvm::FunctionType *FnTy =
565 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
566 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
567 break;
568 }
569 case OMPRTL__kmpc_flush: {
570 // Build void __kmpc_flush(ident_t *loc);
571 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
572 llvm::FunctionType *FnTy =
573 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
574 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
575 break;
576 }
577 case OMPRTL__kmpc_master: {
578 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
579 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
580 llvm::FunctionType *FnTy =
581 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
582 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
583 break;
584 }
585 case OMPRTL__kmpc_end_master: {
586 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
587 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
588 llvm::FunctionType *FnTy =
589 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
590 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
591 break;
592 }
593 case OMPRTL__kmpc_omp_taskyield: {
594 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
595 // int end_part);
596 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
597 llvm::FunctionType *FnTy =
598 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
599 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
600 break;
601 }
602 case OMPRTL__kmpc_single: {
603 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
604 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
605 llvm::FunctionType *FnTy =
606 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
607 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
608 break;
609 }
610 case OMPRTL__kmpc_end_single: {
611 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
612 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
613 llvm::FunctionType *FnTy =
614 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
615 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
616 break;
617 }
618 case OMPRTL__kmpc_omp_task_alloc: {
619 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
620 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
621 // kmp_routine_entry_t *task_entry);
622 assert(KmpRoutineEntryPtrTy != nullptr &&
623 "Type kmp_routine_entry_t must be created.");
624 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
625 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
626 // Return void * and then cast to particular kmp_task_t type.
627 llvm::FunctionType *FnTy =
628 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
629 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
630 break;
631 }
632 case OMPRTL__kmpc_omp_task: {
633 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
634 // *new_task);
635 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
636 CGM.VoidPtrTy};
637 llvm::FunctionType *FnTy =
638 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
639 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
640 break;
641 }
642 case OMPRTL__kmpc_copyprivate: {
643 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
644 // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
645 // kmp_int32 didit);
646 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
647 auto *CpyFnTy =
648 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
649 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
650 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
651 CGM.Int32Ty};
652 llvm::FunctionType *FnTy =
653 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
654 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
655 break;
656 }
657 case OMPRTL__kmpc_reduce: {
658 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
659 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
660 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
661 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
662 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
663 /*isVarArg=*/false);
664 llvm::Type *TypeParams[] = {
665 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
666 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
667 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
668 llvm::FunctionType *FnTy =
669 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
670 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
671 break;
672 }
673 case OMPRTL__kmpc_reduce_nowait: {
674 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
675 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
676 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
677 // *lck);
678 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
679 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
680 /*isVarArg=*/false);
681 llvm::Type *TypeParams[] = {
682 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
683 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
684 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
685 llvm::FunctionType *FnTy =
686 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
687 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
688 break;
689 }
690 case OMPRTL__kmpc_end_reduce: {
691 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
692 // kmp_critical_name *lck);
693 llvm::Type *TypeParams[] = {
694 getIdentTyPointerTy(), CGM.Int32Ty,
695 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
696 llvm::FunctionType *FnTy =
697 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
698 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
699 break;
700 }
701 case OMPRTL__kmpc_end_reduce_nowait: {
702 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
703 // kmp_critical_name *lck);
704 llvm::Type *TypeParams[] = {
705 getIdentTyPointerTy(), CGM.Int32Ty,
706 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
707 llvm::FunctionType *FnTy =
708 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
709 RTLFn =
710 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
711 break;
712 }
713 }
714 return RTLFn;
715 }
716
createForStaticInitFunction(unsigned IVSize,bool IVSigned)717 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
718 bool IVSigned) {
719 assert((IVSize == 32 || IVSize == 64) &&
720 "IV size is not compatible with the omp runtime");
721 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
722 : "__kmpc_for_static_init_4u")
723 : (IVSigned ? "__kmpc_for_static_init_8"
724 : "__kmpc_for_static_init_8u");
725 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
726 auto PtrTy = llvm::PointerType::getUnqual(ITy);
727 llvm::Type *TypeParams[] = {
728 getIdentTyPointerTy(), // loc
729 CGM.Int32Ty, // tid
730 CGM.Int32Ty, // schedtype
731 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
732 PtrTy, // p_lower
733 PtrTy, // p_upper
734 PtrTy, // p_stride
735 ITy, // incr
736 ITy // chunk
737 };
738 llvm::FunctionType *FnTy =
739 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
740 return CGM.CreateRuntimeFunction(FnTy, Name);
741 }
742
createDispatchInitFunction(unsigned IVSize,bool IVSigned)743 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
744 bool IVSigned) {
745 assert((IVSize == 32 || IVSize == 64) &&
746 "IV size is not compatible with the omp runtime");
747 auto Name =
748 IVSize == 32
749 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
750 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
751 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
752 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
753 CGM.Int32Ty, // tid
754 CGM.Int32Ty, // schedtype
755 ITy, // lower
756 ITy, // upper
757 ITy, // stride
758 ITy // chunk
759 };
760 llvm::FunctionType *FnTy =
761 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
762 return CGM.CreateRuntimeFunction(FnTy, Name);
763 }
764
createDispatchNextFunction(unsigned IVSize,bool IVSigned)765 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
766 bool IVSigned) {
767 assert((IVSize == 32 || IVSize == 64) &&
768 "IV size is not compatible with the omp runtime");
769 auto Name =
770 IVSize == 32
771 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
772 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
773 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
774 auto PtrTy = llvm::PointerType::getUnqual(ITy);
775 llvm::Type *TypeParams[] = {
776 getIdentTyPointerTy(), // loc
777 CGM.Int32Ty, // tid
778 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
779 PtrTy, // p_lower
780 PtrTy, // p_upper
781 PtrTy // p_stride
782 };
783 llvm::FunctionType *FnTy =
784 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
785 return CGM.CreateRuntimeFunction(FnTy, Name);
786 }
787
788 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)789 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
790 // Lookup the entry, lazily creating it if necessary.
791 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
792 Twine(CGM.getMangledName(VD)) + ".cache.");
793 }
794
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,llvm::Value * VDAddr,SourceLocation Loc)795 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
796 const VarDecl *VD,
797 llvm::Value *VDAddr,
798 SourceLocation Loc) {
799 auto VarTy = VDAddr->getType()->getPointerElementType();
800 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
801 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
802 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
803 getOrCreateThreadPrivateCache(VD)};
804 return CGF.EmitRuntimeCall(
805 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
806 }
807
emitThreadPrivateVarInit(CodeGenFunction & CGF,llvm::Value * VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)808 void CGOpenMPRuntime::emitThreadPrivateVarInit(
809 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
810 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
811 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
812 // library.
813 auto OMPLoc = emitUpdateLocation(CGF, Loc);
814 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
815 OMPLoc);
816 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
817 // to register constructor/destructor for variable.
818 llvm::Value *Args[] = {OMPLoc,
819 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
820 Ctor, CopyCtor, Dtor};
821 CGF.EmitRuntimeCall(
822 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
823 }
824
emitThreadPrivateVarDefinition(const VarDecl * VD,llvm::Value * VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)825 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
826 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
827 bool PerformInit, CodeGenFunction *CGF) {
828 VD = VD->getDefinition(CGM.getContext());
829 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
830 ThreadPrivateWithDefinition.insert(VD);
831 QualType ASTTy = VD->getType();
832
833 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
834 auto Init = VD->getAnyInitializer();
835 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
836 // Generate function that re-emits the declaration's initializer into the
837 // threadprivate copy of the variable VD
838 CodeGenFunction CtorCGF(CGM);
839 FunctionArgList Args;
840 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
841 /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
842 Args.push_back(&Dst);
843
844 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
845 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
846 /*isVariadic=*/false);
847 auto FTy = CGM.getTypes().GetFunctionType(FI);
848 auto Fn = CGM.CreateGlobalInitOrDestructFunction(
849 FTy, ".__kmpc_global_ctor_.", Loc);
850 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
851 Args, SourceLocation());
852 auto ArgVal = CtorCGF.EmitLoadOfScalar(
853 CtorCGF.GetAddrOfLocalVar(&Dst),
854 /*Volatile=*/false, CGM.PointerAlignInBytes,
855 CGM.getContext().VoidPtrTy, Dst.getLocation());
856 auto Arg = CtorCGF.Builder.CreatePointerCast(
857 ArgVal,
858 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
859 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
860 /*IsInitializer=*/true);
861 ArgVal = CtorCGF.EmitLoadOfScalar(
862 CtorCGF.GetAddrOfLocalVar(&Dst),
863 /*Volatile=*/false, CGM.PointerAlignInBytes,
864 CGM.getContext().VoidPtrTy, Dst.getLocation());
865 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
866 CtorCGF.FinishFunction();
867 Ctor = Fn;
868 }
869 if (VD->getType().isDestructedType() != QualType::DK_none) {
870 // Generate function that emits destructor call for the threadprivate copy
871 // of the variable VD
872 CodeGenFunction DtorCGF(CGM);
873 FunctionArgList Args;
874 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
875 /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
876 Args.push_back(&Dst);
877
878 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
879 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
880 /*isVariadic=*/false);
881 auto FTy = CGM.getTypes().GetFunctionType(FI);
882 auto Fn = CGM.CreateGlobalInitOrDestructFunction(
883 FTy, ".__kmpc_global_dtor_.", Loc);
884 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
885 SourceLocation());
886 auto ArgVal = DtorCGF.EmitLoadOfScalar(
887 DtorCGF.GetAddrOfLocalVar(&Dst),
888 /*Volatile=*/false, CGM.PointerAlignInBytes,
889 CGM.getContext().VoidPtrTy, Dst.getLocation());
890 DtorCGF.emitDestroy(ArgVal, ASTTy,
891 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
892 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
893 DtorCGF.FinishFunction();
894 Dtor = Fn;
895 }
896 // Do not emit init function if it is not required.
897 if (!Ctor && !Dtor)
898 return nullptr;
899
900 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
901 auto CopyCtorTy =
902 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
903 /*isVarArg=*/false)->getPointerTo();
904 // Copying constructor for the threadprivate variable.
905 // Must be NULL - reserved by runtime, but currently it requires that this
906 // parameter is always NULL. Otherwise it fires assertion.
907 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
908 if (Ctor == nullptr) {
909 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
910 /*isVarArg=*/false)->getPointerTo();
911 Ctor = llvm::Constant::getNullValue(CtorTy);
912 }
913 if (Dtor == nullptr) {
914 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
915 /*isVarArg=*/false)->getPointerTo();
916 Dtor = llvm::Constant::getNullValue(DtorTy);
917 }
918 if (!CGF) {
919 auto InitFunctionTy =
920 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
921 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
922 InitFunctionTy, ".__omp_threadprivate_init_.");
923 CodeGenFunction InitCGF(CGM);
924 FunctionArgList ArgList;
925 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
926 CGM.getTypes().arrangeNullaryFunction(), ArgList,
927 Loc);
928 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
929 InitCGF.FinishFunction();
930 return InitFunction;
931 }
932 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
933 }
934 return nullptr;
935 }
936
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)937 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
938 llvm::Value *OutlinedFn,
939 llvm::Value *CapturedStruct) {
940 // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
941 llvm::Value *Args[] = {
942 emitUpdateLocation(CGF, Loc),
943 CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
944 // (there is only one additional argument - 'context')
945 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
946 CGF.EmitCastToVoidPtr(CapturedStruct)};
947 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
948 CGF.EmitRuntimeCall(RTLFn, Args);
949 }
950
emitSerialCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)951 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
952 llvm::Value *OutlinedFn,
953 llvm::Value *CapturedStruct) {
954 auto ThreadID = getThreadID(CGF, Loc);
955 // Build calls:
956 // __kmpc_serialized_parallel(&Loc, GTid);
957 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
958 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
959 Args);
960
961 // OutlinedFn(>id, &zero, CapturedStruct);
962 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
963 auto Int32Ty =
964 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
965 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
966 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
967 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
968 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
969
970 // __kmpc_end_serialized_parallel(&Loc, GTid);
971 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
972 CGF.EmitRuntimeCall(
973 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
974 }
975
976 // If we're inside an (outlined) parallel region, use the region info's
977 // thread-ID variable (it is passed in a first argument of the outlined function
978 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
979 // regular serial code region, get thread ID by calling kmp_int32
980 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
981 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)982 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
983 SourceLocation Loc) {
984 if (auto OMPRegionInfo =
985 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
986 if (OMPRegionInfo->getThreadIDVariable())
987 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
988
989 auto ThreadID = getThreadID(CGF, Loc);
990 auto Int32Ty =
991 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
992 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
993 CGF.EmitStoreOfScalar(ThreadID,
994 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
995
996 return ThreadIDTemp;
997 }
998
999 llvm::Constant *
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name)1000 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1001 const llvm::Twine &Name) {
1002 SmallString<256> Buffer;
1003 llvm::raw_svector_ostream Out(Buffer);
1004 Out << Name;
1005 auto RuntimeName = Out.str();
1006 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1007 if (Elem.second) {
1008 assert(Elem.second->getType()->getPointerElementType() == Ty &&
1009 "OMP internal variable has different type than requested");
1010 return &*Elem.second;
1011 }
1012
1013 return Elem.second = new llvm::GlobalVariable(
1014 CGM.getModule(), Ty, /*IsConstant*/ false,
1015 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1016 Elem.first());
1017 }
1018
getCriticalRegionLock(StringRef CriticalName)1019 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1020 llvm::Twine Name(".gomp_critical_user_", CriticalName);
1021 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1022 }
1023
1024 namespace {
1025 class CallEndCleanup : public EHScopeStack::Cleanup {
1026 public:
1027 typedef ArrayRef<llvm::Value *> CleanupValuesTy;
1028 private:
1029 llvm::Value *Callee;
1030 llvm::SmallVector<llvm::Value *, 8> Args;
1031
1032 public:
CallEndCleanup(llvm::Value * Callee,CleanupValuesTy Args)1033 CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args)
1034 : Callee(Callee), Args(Args.begin(), Args.end()) {}
Emit(CodeGenFunction & CGF,Flags)1035 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1036 CGF.EmitRuntimeCall(Callee, Args);
1037 }
1038 };
1039 } // namespace
1040
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc)1041 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1042 StringRef CriticalName,
1043 const RegionCodeGenTy &CriticalOpGen,
1044 SourceLocation Loc) {
1045 // __kmpc_critical(ident_t *, gtid, Lock);
1046 // CriticalOpGen();
1047 // __kmpc_end_critical(ident_t *, gtid, Lock);
1048 // Prepare arguments and build a call to __kmpc_critical
1049 {
1050 CodeGenFunction::RunCleanupsScope Scope(CGF);
1051 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1052 getCriticalRegionLock(CriticalName)};
1053 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1054 // Build a call to __kmpc_end_critical
1055 CGF.EHStack.pushCleanup<CallEndCleanup>(
1056 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1057 llvm::makeArrayRef(Args));
1058 emitInlinedDirective(CGF, CriticalOpGen);
1059 }
1060 }
1061
emitIfStmt(CodeGenFunction & CGF,llvm::Value * IfCond,const RegionCodeGenTy & BodyOpGen)1062 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1063 const RegionCodeGenTy &BodyOpGen) {
1064 llvm::Value *CallBool = CGF.EmitScalarConversion(
1065 IfCond,
1066 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1067 CGF.getContext().BoolTy);
1068
1069 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1070 auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1071 // Generate the branch (If-stmt)
1072 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1073 CGF.EmitBlock(ThenBlock);
1074 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
1075 // Emit the rest of bblocks/branches
1076 CGF.EmitBranch(ContBlock);
1077 CGF.EmitBlock(ContBlock, true);
1078 }
1079
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)1080 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1081 const RegionCodeGenTy &MasterOpGen,
1082 SourceLocation Loc) {
1083 // if(__kmpc_master(ident_t *, gtid)) {
1084 // MasterOpGen();
1085 // __kmpc_end_master(ident_t *, gtid);
1086 // }
1087 // Prepare arguments and build a call to __kmpc_master
1088 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1089 auto *IsMaster =
1090 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1091 emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
1092 CodeGenFunction::RunCleanupsScope Scope(CGF);
1093 CGF.EHStack.pushCleanup<CallEndCleanup>(
1094 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1095 llvm::makeArrayRef(Args));
1096 MasterOpGen(CGF);
1097 });
1098 }
1099
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)1100 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1101 SourceLocation Loc) {
1102 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1103 llvm::Value *Args[] = {
1104 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1105 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1106 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1107 }
1108
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)1109 static llvm::Value *emitCopyprivateCopyFunction(
1110 CodeGenModule &CGM, llvm::Type *ArgsType,
1111 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1112 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1113 auto &C = CGM.getContext();
1114 // void copy_func(void *LHSArg, void *RHSArg);
1115 FunctionArgList Args;
1116 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1117 C.VoidPtrTy);
1118 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1119 C.VoidPtrTy);
1120 Args.push_back(&LHSArg);
1121 Args.push_back(&RHSArg);
1122 FunctionType::ExtInfo EI;
1123 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1124 C.VoidTy, Args, EI, /*isVariadic=*/false);
1125 auto *Fn = llvm::Function::Create(
1126 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1127 ".omp.copyprivate.copy_func", &CGM.getModule());
1128 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1129 CodeGenFunction CGF(CGM);
1130 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1131 // Dest = (void*[n])(LHSArg);
1132 // Src = (void*[n])(RHSArg);
1133 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1134 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1135 CGF.PointerAlignInBytes),
1136 ArgsType);
1137 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1138 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1139 CGF.PointerAlignInBytes),
1140 ArgsType);
1141 // *(Type0*)Dst[0] = *(Type0*)Src[0];
1142 // *(Type1*)Dst[1] = *(Type1*)Src[1];
1143 // ...
1144 // *(Typen*)Dst[n] = *(Typen*)Src[n];
1145 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1146 auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1147 CGF.Builder.CreateAlignedLoad(
1148 CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1149 CGM.PointerAlignInBytes),
1150 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1151 auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1152 CGF.Builder.CreateAlignedLoad(
1153 CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1154 CGM.PointerAlignInBytes),
1155 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1156 CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr,
1157 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1158 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1159 AssignmentOps[I]);
1160 }
1161 CGF.FinishFunction();
1162 return Fn;
1163 }
1164
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)1165 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1166 const RegionCodeGenTy &SingleOpGen,
1167 SourceLocation Loc,
1168 ArrayRef<const Expr *> CopyprivateVars,
1169 ArrayRef<const Expr *> SrcExprs,
1170 ArrayRef<const Expr *> DstExprs,
1171 ArrayRef<const Expr *> AssignmentOps) {
1172 assert(CopyprivateVars.size() == SrcExprs.size() &&
1173 CopyprivateVars.size() == DstExprs.size() &&
1174 CopyprivateVars.size() == AssignmentOps.size());
1175 auto &C = CGM.getContext();
1176 // int32 did_it = 0;
1177 // if(__kmpc_single(ident_t *, gtid)) {
1178 // SingleOpGen();
1179 // __kmpc_end_single(ident_t *, gtid);
1180 // did_it = 1;
1181 // }
1182 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1183 // <copy_func>, did_it);
1184
1185 llvm::AllocaInst *DidIt = nullptr;
1186 if (!CopyprivateVars.empty()) {
1187 // int32 did_it = 0;
1188 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1189 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1190 CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0));
1191 }
1192 // Prepare arguments and build a call to __kmpc_single
1193 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1194 auto *IsSingle =
1195 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1196 emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
1197 CodeGenFunction::RunCleanupsScope Scope(CGF);
1198 CGF.EHStack.pushCleanup<CallEndCleanup>(
1199 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1200 llvm::makeArrayRef(Args));
1201 SingleOpGen(CGF);
1202 if (DidIt) {
1203 // did_it = 1;
1204 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1205 DidIt->getAlignment());
1206 }
1207 });
1208 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1209 // <copy_func>, did_it);
1210 if (DidIt) {
1211 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1212 auto CopyprivateArrayTy =
1213 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1214 /*IndexTypeQuals=*/0);
1215 // Create a list of all private variables for copyprivate.
1216 auto *CopyprivateList =
1217 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1218 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1219 auto *Elem = CGF.Builder.CreateStructGEP(
1220 CopyprivateList->getAllocatedType(), CopyprivateList, I);
1221 CGF.Builder.CreateAlignedStore(
1222 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1223 CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1224 Elem, CGM.PointerAlignInBytes);
1225 }
1226 // Build function that copies private values from single region to all other
1227 // threads in the corresponding parallel region.
1228 auto *CpyFn = emitCopyprivateCopyFunction(
1229 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1230 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1231 auto *BufSize = CGF.Builder.getInt32(
1232 C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1233 auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1234 CGF.VoidPtrTy);
1235 auto *DidItVal =
1236 CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1237 llvm::Value *Args[] = {
1238 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1239 getThreadID(CGF, Loc), // i32 <gtid>
1240 BufSize, // i32 <buf_size>
1241 CL, // void *<copyprivate list>
1242 CpyFn, // void (*) (void *, void *) <copy_func>
1243 DidItVal // i32 did_it
1244 };
1245 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1246 }
1247 }
1248
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind)1249 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1250 OpenMPDirectiveKind Kind) {
1251 // Build call __kmpc_cancel_barrier(loc, thread_id);
1252 OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1253 if (Kind == OMPD_for) {
1254 Flags =
1255 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1256 } else if (Kind == OMPD_sections) {
1257 Flags = static_cast<OpenMPLocationFlags>(Flags |
1258 OMP_IDENT_BARRIER_IMPL_SECTIONS);
1259 } else if (Kind == OMPD_single) {
1260 Flags =
1261 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1262 } else if (Kind == OMPD_barrier) {
1263 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1264 } else {
1265 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1266 }
1267 // Build call __kmpc_cancel_barrier(loc, thread_id);
1268 // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
1269 // one provides the same functionality and adds initial support for
1270 // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
1271 // is provided default by the runtime library so it safe to make such
1272 // replacement.
1273 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1274 getThreadID(CGF, Loc)};
1275 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1276 }
1277
1278 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1279 /// the enum sched_type in kmp.h).
1280 enum OpenMPSchedType {
1281 /// \brief Lower bound for default (unordered) versions.
1282 OMP_sch_lower = 32,
1283 OMP_sch_static_chunked = 33,
1284 OMP_sch_static = 34,
1285 OMP_sch_dynamic_chunked = 35,
1286 OMP_sch_guided_chunked = 36,
1287 OMP_sch_runtime = 37,
1288 OMP_sch_auto = 38,
1289 /// \brief Lower bound for 'ordered' versions.
1290 OMP_ord_lower = 64,
1291 /// \brief Lower bound for 'nomerge' versions.
1292 OMP_nm_lower = 160,
1293 };
1294
1295 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked)1296 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1297 bool Chunked) {
1298 switch (ScheduleKind) {
1299 case OMPC_SCHEDULE_static:
1300 return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
1301 case OMPC_SCHEDULE_dynamic:
1302 return OMP_sch_dynamic_chunked;
1303 case OMPC_SCHEDULE_guided:
1304 return OMP_sch_guided_chunked;
1305 case OMPC_SCHEDULE_auto:
1306 return OMP_sch_auto;
1307 case OMPC_SCHEDULE_runtime:
1308 return OMP_sch_runtime;
1309 case OMPC_SCHEDULE_unknown:
1310 assert(!Chunked && "chunk was specified but schedule kind not known");
1311 return OMP_sch_static;
1312 }
1313 llvm_unreachable("Unexpected runtime schedule");
1314 }
1315
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const1316 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1317 bool Chunked) const {
1318 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
1319 return Schedule == OMP_sch_static;
1320 }
1321
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const1322 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1323 auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
1324 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1325 return Schedule != OMP_sch_static;
1326 }
1327
emitForInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind,unsigned IVSize,bool IVSigned,llvm::Value * IL,llvm::Value * LB,llvm::Value * UB,llvm::Value * ST,llvm::Value * Chunk)1328 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1329 OpenMPScheduleClauseKind ScheduleKind,
1330 unsigned IVSize, bool IVSigned,
1331 llvm::Value *IL, llvm::Value *LB,
1332 llvm::Value *UB, llvm::Value *ST,
1333 llvm::Value *Chunk) {
1334 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
1335 if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
1336 // Call __kmpc_dispatch_init(
1337 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1338 // kmp_int[32|64] lower, kmp_int[32|64] upper,
1339 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
1340
1341 // If the Chunk was not specified in the clause - use default value 1.
1342 if (Chunk == nullptr)
1343 Chunk = CGF.Builder.getIntN(IVSize, 1);
1344 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1345 getThreadID(CGF, Loc),
1346 CGF.Builder.getInt32(Schedule), // Schedule type
1347 CGF.Builder.getIntN(IVSize, 0), // Lower
1348 UB, // Upper
1349 CGF.Builder.getIntN(IVSize, 1), // Stride
1350 Chunk // Chunk
1351 };
1352 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1353 } else {
1354 // Call __kmpc_for_static_init(
1355 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1356 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1357 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1358 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
1359 if (Chunk == nullptr) {
1360 assert(Schedule == OMP_sch_static &&
1361 "expected static non-chunked schedule");
1362 // If the Chunk was not specified in the clause - use default value 1.
1363 Chunk = CGF.Builder.getIntN(IVSize, 1);
1364 } else
1365 assert(Schedule == OMP_sch_static_chunked &&
1366 "expected static chunked schedule");
1367 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1368 getThreadID(CGF, Loc),
1369 CGF.Builder.getInt32(Schedule), // Schedule type
1370 IL, // &isLastIter
1371 LB, // &LB
1372 UB, // &UB
1373 ST, // &Stride
1374 CGF.Builder.getIntN(IVSize, 1), // Incr
1375 Chunk // Chunk
1376 };
1377 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1378 }
1379 }
1380
emitForFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind)1381 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
1382 OpenMPScheduleClauseKind ScheduleKind) {
1383 assert((ScheduleKind == OMPC_SCHEDULE_static ||
1384 ScheduleKind == OMPC_SCHEDULE_unknown) &&
1385 "Non-static schedule kinds are not yet implemented");
1386 (void)ScheduleKind;
1387 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1388 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1389 getThreadID(CGF, Loc)};
1390 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1391 Args);
1392 }
1393
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,llvm::Value * IL,llvm::Value * LB,llvm::Value * UB,llvm::Value * ST)1394 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1395 SourceLocation Loc, unsigned IVSize,
1396 bool IVSigned, llvm::Value *IL,
1397 llvm::Value *LB, llvm::Value *UB,
1398 llvm::Value *ST) {
1399 // Call __kmpc_dispatch_next(
1400 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1401 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1402 // kmp_int[32|64] *p_stride);
1403 llvm::Value *Args[] = {
1404 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1405 IL, // &isLastIter
1406 LB, // &Lower
1407 UB, // &Upper
1408 ST // &Stride
1409 };
1410 llvm::Value *Call =
1411 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1412 return CGF.EmitScalarConversion(
1413 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1414 CGF.getContext().BoolTy);
1415 }
1416
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)1417 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1418 llvm::Value *NumThreads,
1419 SourceLocation Loc) {
1420 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1421 llvm::Value *Args[] = {
1422 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1423 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1424 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1425 Args);
1426 }
1427
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc)1428 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1429 SourceLocation Loc) {
1430 // Build call void __kmpc_flush(ident_t *loc)
1431 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1432 emitUpdateLocation(CGF, Loc));
1433 }
1434
1435 namespace {
1436 /// \brief Indexes of fields for type kmp_task_t.
1437 enum KmpTaskTFields {
1438 /// \brief List of shared variables.
1439 KmpTaskTShareds,
1440 /// \brief Task routine.
1441 KmpTaskTRoutine,
1442 /// \brief Partition id for the untied tasks.
1443 KmpTaskTPartId,
1444 /// \brief Function with call of destructors for private variables.
1445 KmpTaskTDestructors,
1446 };
1447 } // namespace
1448
emitKmpRoutineEntryT(QualType KmpInt32Ty)1449 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1450 if (!KmpRoutineEntryPtrTy) {
1451 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1452 auto &C = CGM.getContext();
1453 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1454 FunctionProtoType::ExtProtoInfo EPI;
1455 KmpRoutineEntryPtrQTy = C.getPointerType(
1456 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1457 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1458 }
1459 }
1460
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1461 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1462 QualType FieldTy) {
1463 auto *Field = FieldDecl::Create(
1464 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1465 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1466 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1467 Field->setAccess(AS_public);
1468 DC->addDecl(Field);
1469 }
1470
createKmpTaskTRecordDecl(CodeGenModule & CGM,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)1471 static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
1472 QualType KmpInt32Ty,
1473 QualType KmpRoutineEntryPointerQTy) {
1474 auto &C = CGM.getContext();
1475 // Build struct kmp_task_t {
1476 // void * shareds;
1477 // kmp_routine_entry_t routine;
1478 // kmp_int32 part_id;
1479 // kmp_routine_entry_t destructors;
1480 // /* private vars */
1481 // };
1482 auto *RD = C.buildImplicitRecord("kmp_task_t");
1483 RD->startDefinition();
1484 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1485 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1486 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1487 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1488 // TODO: add private fields.
1489 RD->completeDefinition();
1490 return C.getRecordType(RD);
1491 }
1492
1493 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1494 /// argument.
1495 /// \code
1496 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1497 /// TaskFunction(gtid, tt->part_id, tt->shareds);
1498 /// return 0;
1499 /// }
1500 /// \endcode
1501 static llvm::Value *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTPtrQTy,QualType SharedsPtrTy,llvm::Value * TaskFunction,llvm::Type * KmpTaskTTy)1502 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1503 QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
1504 QualType SharedsPtrTy, llvm::Value *TaskFunction,
1505 llvm::Type *KmpTaskTTy) {
1506 auto &C = CGM.getContext();
1507 FunctionArgList Args;
1508 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1509 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1510 /*Id=*/nullptr, KmpTaskTPtrQTy);
1511 Args.push_back(&GtidArg);
1512 Args.push_back(&TaskTypeArg);
1513 FunctionType::ExtInfo Info;
1514 auto &TaskEntryFnInfo =
1515 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1516 /*isVariadic=*/false);
1517 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1518 auto *TaskEntry =
1519 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1520 ".omp_task_entry.", &CGM.getModule());
1521 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1522 CodeGenFunction CGF(CGM);
1523 CGF.disableDebugInfo();
1524 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1525
1526 // TaskFunction(gtid, tt->part_id, tt->shareds);
1527 auto *GtidParam = CGF.EmitLoadOfScalar(
1528 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1529 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1530 auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
1531 CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
1532 CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
1533 auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
1534 /*Idx=*/KmpTaskTPartId);
1535 auto *PartidParam = CGF.EmitLoadOfScalar(
1536 PartidPtr, /*Volatile=*/false,
1537 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1538 auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
1539 /*Idx=*/KmpTaskTShareds);
1540 auto *SharedsParam =
1541 CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
1542 CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
1543 llvm::Value *CallArgs[] = {
1544 GtidParam, PartidParam,
1545 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1546 SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
1547 CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1548 CGF.EmitStoreThroughLValue(
1549 RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1550 CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1551 CGF.FinishFunction();
1552 return TaskEntry;
1553 }
1554
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,bool Tied,llvm::PointerIntPair<llvm::Value *,1,bool> Final,llvm::Value * TaskFunction,QualType SharedsTy,llvm::Value * Shareds)1555 void CGOpenMPRuntime::emitTaskCall(
1556 CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
1557 llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
1558 llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
1559 auto &C = CGM.getContext();
1560 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1561 // Build type kmp_routine_entry_t (if not built yet).
1562 emitKmpRoutineEntryT(KmpInt32Ty);
1563 // Build particular struct kmp_task_t for the given task.
1564 auto KmpTaskQTy =
1565 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
1566 QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
1567 auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy);
1568 auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo();
1569 auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
1570 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
1571
1572 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
1573 // kmp_task_t *tt);
1574 auto *TaskEntry =
1575 emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy,
1576 TaskFunction, KmpTaskTTy);
1577
1578 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1579 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1580 // kmp_routine_entry_t *task_entry);
1581 // Task flags. Format is taken from
1582 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
1583 // description of kmp_tasking_flags struct.
1584 const unsigned TiedFlag = 0x1;
1585 const unsigned FinalFlag = 0x2;
1586 unsigned Flags = Tied ? TiedFlag : 0;
1587 auto *TaskFlags =
1588 Final.getPointer()
1589 ? CGF.Builder.CreateSelect(Final.getPointer(),
1590 CGF.Builder.getInt32(FinalFlag),
1591 CGF.Builder.getInt32(/*C=*/0))
1592 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
1593 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
1594 auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
1595 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
1596 getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
1597 CGM.getSize(SharedsSize),
1598 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1599 TaskEntry, KmpRoutineEntryPtrTy)};
1600 auto *NewTask = CGF.EmitRuntimeCall(
1601 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
1602 auto *NewTaskNewTaskTTy =
1603 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
1604 // Fill the data in the resulting kmp_task_t record.
1605 // Copy shareds if there are any.
1606 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
1607 CGF.EmitAggregateCopy(
1608 CGF.EmitLoadOfScalar(
1609 CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
1610 /*Idx=*/KmpTaskTShareds),
1611 /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
1612 Shareds, SharedsTy);
1613 // TODO: generate function with destructors for privates.
1614 // Provide pointer to function with destructors for privates.
1615 CGF.Builder.CreateAlignedStore(
1616 llvm::ConstantPointerNull::get(
1617 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
1618 CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
1619 /*Idx=*/KmpTaskTDestructors),
1620 CGM.PointerAlignInBytes);
1621
1622 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
1623 // libcall.
1624 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1625 // *new_task);
1626 llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
1627 getThreadID(CGF, Loc), NewTask};
1628 // TODO: add check for untied tasks.
1629 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1630 }
1631
emitReductionFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)1632 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
1633 llvm::Type *ArgsType,
1634 ArrayRef<const Expr *> LHSExprs,
1635 ArrayRef<const Expr *> RHSExprs,
1636 ArrayRef<const Expr *> ReductionOps) {
1637 auto &C = CGM.getContext();
1638
1639 // void reduction_func(void *LHSArg, void *RHSArg);
1640 FunctionArgList Args;
1641 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1642 C.VoidPtrTy);
1643 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1644 C.VoidPtrTy);
1645 Args.push_back(&LHSArg);
1646 Args.push_back(&RHSArg);
1647 FunctionType::ExtInfo EI;
1648 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1649 C.VoidTy, Args, EI, /*isVariadic=*/false);
1650 auto *Fn = llvm::Function::Create(
1651 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1652 ".omp.reduction.reduction_func", &CGM.getModule());
1653 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1654 CodeGenFunction CGF(CGM);
1655 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1656
1657 // Dst = (void*[n])(LHSArg);
1658 // Src = (void*[n])(RHSArg);
1659 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1660 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1661 CGF.PointerAlignInBytes),
1662 ArgsType);
1663 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1664 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1665 CGF.PointerAlignInBytes),
1666 ArgsType);
1667
1668 // ...
1669 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
1670 // ...
1671 CodeGenFunction::OMPPrivateScope Scope(CGF);
1672 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
1673 Scope.addPrivate(
1674 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
1675 [&]() -> llvm::Value *{
1676 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1677 CGF.Builder.CreateAlignedLoad(
1678 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
1679 CGM.PointerAlignInBytes),
1680 CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
1681 });
1682 Scope.addPrivate(
1683 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
1684 [&]() -> llvm::Value *{
1685 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1686 CGF.Builder.CreateAlignedLoad(
1687 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
1688 CGM.PointerAlignInBytes),
1689 CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
1690 });
1691 }
1692 Scope.Privatize();
1693 for (auto *E : ReductionOps) {
1694 CGF.EmitIgnoredExpr(E);
1695 }
1696 Scope.ForceCleanup();
1697 CGF.FinishFunction();
1698 return Fn;
1699 }
1700
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,bool WithNowait)1701 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
1702 ArrayRef<const Expr *> LHSExprs,
1703 ArrayRef<const Expr *> RHSExprs,
1704 ArrayRef<const Expr *> ReductionOps,
1705 bool WithNowait) {
1706 // Next code should be emitted for reduction:
1707 //
1708 // static kmp_critical_name lock = { 0 };
1709 //
1710 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
1711 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
1712 // ...
1713 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
1714 // *(Type<n>-1*)rhs[<n>-1]);
1715 // }
1716 //
1717 // ...
1718 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
1719 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
1720 // RedList, reduce_func, &<lock>)) {
1721 // case 1:
1722 // ...
1723 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
1724 // ...
1725 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
1726 // break;
1727 // case 2:
1728 // ...
1729 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
1730 // ...
1731 // break;
1732 // default:;
1733 // }
1734
1735 auto &C = CGM.getContext();
1736
1737 // 1. Build a list of reduction variables.
1738 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
1739 llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
1740 QualType ReductionArrayTy =
1741 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1742 /*IndexTypeQuals=*/0);
1743 auto *ReductionList =
1744 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
1745 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
1746 auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
1747 CGF.Builder.CreateAlignedStore(
1748 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1749 CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
1750 Elem, CGM.PointerAlignInBytes);
1751 }
1752
1753 // 2. Emit reduce_func().
1754 auto *ReductionFn = emitReductionFunction(
1755 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
1756 RHSExprs, ReductionOps);
1757
1758 // 3. Create static kmp_critical_name lock = { 0 };
1759 auto *Lock = getCriticalRegionLock(".reduction");
1760
1761 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
1762 // RedList, reduce_func, &<lock>);
1763 auto *IdentTLoc = emitUpdateLocation(
1764 CGF, Loc,
1765 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
1766 auto *ThreadId = getThreadID(CGF, Loc);
1767 auto *ReductionArrayTySize = llvm::ConstantInt::get(
1768 CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
1769 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
1770 CGF.VoidPtrTy);
1771 llvm::Value *Args[] = {
1772 IdentTLoc, // ident_t *<loc>
1773 ThreadId, // i32 <gtid>
1774 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
1775 ReductionArrayTySize, // size_type sizeof(RedList)
1776 RL, // void *RedList
1777 ReductionFn, // void (*) (void *, void *) <reduce_func>
1778 Lock // kmp_critical_name *&<lock>
1779 };
1780 auto Res = CGF.EmitRuntimeCall(
1781 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
1782 : OMPRTL__kmpc_reduce),
1783 Args);
1784
1785 // 5. Build switch(res)
1786 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
1787 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
1788
1789 // 6. Build case 1:
1790 // ...
1791 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
1792 // ...
1793 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
1794 // break;
1795 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
1796 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
1797 CGF.EmitBlock(Case1BB);
1798
1799 {
1800 CodeGenFunction::RunCleanupsScope Scope(CGF);
1801 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
1802 llvm::Value *EndArgs[] = {
1803 IdentTLoc, // ident_t *<loc>
1804 ThreadId, // i32 <gtid>
1805 Lock // kmp_critical_name *&<lock>
1806 };
1807 CGF.EHStack.pushCleanup<CallEndCleanup>(
1808 NormalAndEHCleanup,
1809 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
1810 : OMPRTL__kmpc_end_reduce),
1811 llvm::makeArrayRef(EndArgs));
1812 for (auto *E : ReductionOps) {
1813 CGF.EmitIgnoredExpr(E);
1814 }
1815 }
1816
1817 CGF.EmitBranch(DefaultBB);
1818
1819 // 7. Build case 2:
1820 // ...
1821 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
1822 // ...
1823 // break;
1824 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
1825 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
1826 CGF.EmitBlock(Case2BB);
1827
1828 {
1829 CodeGenFunction::RunCleanupsScope Scope(CGF);
1830 auto I = LHSExprs.begin();
1831 for (auto *E : ReductionOps) {
1832 const Expr *XExpr = nullptr;
1833 const Expr *EExpr = nullptr;
1834 const Expr *UpExpr = nullptr;
1835 BinaryOperatorKind BO = BO_Comma;
1836 // Try to emit update expression as a simple atomic.
1837 if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) {
1838 // If this is a conditional operator, analyze it's condition for
1839 // min/max reduction operator.
1840 E = ACO->getCond();
1841 }
1842 if (auto *BO = dyn_cast<BinaryOperator>(E)) {
1843 if (BO->getOpcode() == BO_Assign) {
1844 XExpr = BO->getLHS();
1845 UpExpr = BO->getRHS();
1846 }
1847 }
1848 // Analyze RHS part of the whole expression.
1849 if (UpExpr) {
1850 if (auto *BORHS =
1851 dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) {
1852 EExpr = BORHS->getRHS();
1853 BO = BORHS->getOpcode();
1854 }
1855 }
1856 if (XExpr) {
1857 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1858 LValue X = CGF.EmitLValue(XExpr);
1859 RValue E;
1860 if (EExpr)
1861 E = CGF.EmitAnyExpr(EExpr);
1862 CGF.EmitOMPAtomicSimpleUpdateExpr(
1863 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
1864 [&CGF, UpExpr, VD](RValue XRValue) {
1865 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
1866 PrivateScope.addPrivate(
1867 VD, [&CGF, VD, XRValue]() -> llvm::Value *{
1868 auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
1869 CGF.EmitStoreThroughLValue(
1870 XRValue,
1871 CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
1872 return LHSTemp;
1873 });
1874 (void)PrivateScope.Privatize();
1875 return CGF.EmitAnyExpr(UpExpr);
1876 });
1877 } else {
1878 // Emit as a critical region.
1879 emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
1880 CGF.EmitIgnoredExpr(E);
1881 }, Loc);
1882 }
1883 ++I;
1884 }
1885 }
1886
1887 CGF.EmitBranch(DefaultBB);
1888 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
1889 }
1890
emitInlinedDirective(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen)1891 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
1892 const RegionCodeGenTy &CodeGen) {
1893 InlinedOpenMPRegionRAII Region(CGF, CodeGen);
1894 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
1895 }
1896
1897