1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/Support/AtomicOrdering.h"
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm::omp;
35
36 static const VarDecl *getBaseDecl(const Expr *Ref);
37
38 namespace {
39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
40 /// for captured expressions.
41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)42 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43 for (const auto *C : S.clauses()) {
44 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45 if (const auto *PreInit =
46 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47 for (const auto *I : PreInit->decls()) {
48 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49 CGF.EmitVarDecl(cast<VarDecl>(*I));
50 } else {
51 CodeGenFunction::AutoVarEmission Emission =
52 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53 CGF.EmitAutoVarCleanups(Emission);
54 }
55 }
56 }
57 }
58 }
59 }
60 CodeGenFunction::OMPPrivateScope InlinedShareds;
61
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)62 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63 return CGF.LambdaCaptureFields.lookup(VD) ||
64 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67 }
68
69 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const llvm::Optional<OpenMPDirectiveKind> CapturedRegion=llvm::None,const bool EmitPreInitStmt=true)70 OMPLexicalScope(
71 CodeGenFunction &CGF, const OMPExecutableDirective &S,
72 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73 const bool EmitPreInitStmt = true)
74 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75 InlinedShareds(CGF) {
76 if (EmitPreInitStmt)
77 emitPreInitStmt(CGF, S);
78 if (!CapturedRegion.hasValue())
79 return;
80 assert(S.hasAssociatedStmt() &&
81 "Expected associated statement for inlined directive.");
82 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83 for (const auto &C : CS->captures()) {
84 if (C.capturesVariable() || C.capturesVariableByCopy()) {
85 auto *VD = C.getCapturedVar();
86 assert(VD == VD->getCanonicalDecl() &&
87 "Canonical decl must be captured.");
88 DeclRefExpr DRE(
89 CGF.getContext(), const_cast<VarDecl *>(VD),
90 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91 InlinedShareds.isGlobalVarCaptured(VD)),
92 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94 return CGF.EmitLValue(&DRE).getAddress(CGF);
95 });
96 }
97 }
98 (void)InlinedShareds.Privatize();
99 }
100 };
101
102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)105 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106 OpenMPDirectiveKind Kind = S.getDirectiveKind();
107 return !(isOpenMPTargetExecutionDirective(Kind) ||
108 isOpenMPLoopBoundSharingDirective(Kind)) &&
109 isOpenMPParallelDirective(Kind);
110 }
111
112 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)113 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115 EmitPreInitStmt(S)) {}
116 };
117
118 /// Lexical scope for OpenMP teams construct, that handles correct codegen
119 /// for captured expressions.
120 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)121 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122 OpenMPDirectiveKind Kind = S.getDirectiveKind();
123 return !isOpenMPTargetExecutionDirective(Kind) &&
124 isOpenMPTeamsDirective(Kind);
125 }
126
127 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)128 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130 EmitPreInitStmt(S)) {}
131 };
132
133 /// Private scope for OpenMP loop-based directives, that supports capturing
134 /// of used expression from loop statement.
135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopDirective & S)136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
137 CodeGenFunction::OMPMapVars PreCondVars;
138 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
139 for (const auto *E : S.counters()) {
140 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
141 EmittedAsPrivate.insert(VD->getCanonicalDecl());
142 (void)PreCondVars.setVarAddr(
143 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
144 }
145 // Mark private vars as undefs.
146 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
147 for (const Expr *IRef : C->varlists()) {
148 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
149 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
150 (void)PreCondVars.setVarAddr(
151 CGF, OrigVD,
152 Address(llvm::UndefValue::get(
153 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
154 OrigVD->getType().getNonReferenceType()))),
155 CGF.getContext().getDeclAlign(OrigVD)));
156 }
157 }
158 }
159 (void)PreCondVars.apply(CGF);
160 // Emit init, __range and __end variables for C++ range loops.
161 const Stmt *Body =
162 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
163 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) {
164 Body = OMPLoopDirective::tryToFindNextInnerLoop(
165 Body, /*TryImperfectlyNestedLoops=*/true);
166 if (auto *For = dyn_cast<ForStmt>(Body)) {
167 Body = For->getBody();
168 } else {
169 assert(isa<CXXForRangeStmt>(Body) &&
170 "Expected canonical for loop or range-based for loop.");
171 auto *CXXFor = cast<CXXForRangeStmt>(Body);
172 if (const Stmt *Init = CXXFor->getInit())
173 CGF.EmitStmt(Init);
174 CGF.EmitStmt(CXXFor->getRangeStmt());
175 CGF.EmitStmt(CXXFor->getEndStmt());
176 Body = CXXFor->getBody();
177 }
178 }
179 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
180 for (const auto *I : PreInits->decls())
181 CGF.EmitVarDecl(cast<VarDecl>(*I));
182 }
183 PreCondVars.restore(CGF);
184 }
185
186 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopDirective & S)187 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
188 : CodeGenFunction::RunCleanupsScope(CGF) {
189 emitPreInitStmt(CGF, S);
190 }
191 };
192
193 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
194 CodeGenFunction::OMPPrivateScope InlinedShareds;
195
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)196 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
197 return CGF.LambdaCaptureFields.lookup(VD) ||
198 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
199 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
200 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
201 }
202
203 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)204 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
205 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
206 InlinedShareds(CGF) {
207 for (const auto *C : S.clauses()) {
208 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
209 if (const auto *PreInit =
210 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
211 for (const auto *I : PreInit->decls()) {
212 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
213 CGF.EmitVarDecl(cast<VarDecl>(*I));
214 } else {
215 CodeGenFunction::AutoVarEmission Emission =
216 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
217 CGF.EmitAutoVarCleanups(Emission);
218 }
219 }
220 }
221 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
222 for (const Expr *E : UDP->varlists()) {
223 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
224 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
225 CGF.EmitVarDecl(*OED);
226 }
227 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
228 for (const Expr *E : UDP->varlists()) {
229 const Decl *D = getBaseDecl(E);
230 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
231 CGF.EmitVarDecl(*OED);
232 }
233 }
234 }
235 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
236 CGF.EmitOMPPrivateClause(S, InlinedShareds);
237 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
238 if (const Expr *E = TG->getReductionRef())
239 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
240 }
241 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
242 while (CS) {
243 for (auto &C : CS->captures()) {
244 if (C.capturesVariable() || C.capturesVariableByCopy()) {
245 auto *VD = C.getCapturedVar();
246 assert(VD == VD->getCanonicalDecl() &&
247 "Canonical decl must be captured.");
248 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
249 isCapturedVar(CGF, VD) ||
250 (CGF.CapturedStmtInfo &&
251 InlinedShareds.isGlobalVarCaptured(VD)),
252 VD->getType().getNonReferenceType(), VK_LValue,
253 C.getLocation());
254 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
255 return CGF.EmitLValue(&DRE).getAddress(CGF);
256 });
257 }
258 }
259 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
260 }
261 (void)InlinedShareds.Privatize();
262 }
263 };
264
265 } // namespace
266
267 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
268 const OMPExecutableDirective &S,
269 const RegionCodeGenTy &CodeGen);
270
EmitOMPSharedLValue(const Expr * E)271 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
272 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
273 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
274 OrigVD = OrigVD->getCanonicalDecl();
275 bool IsCaptured =
276 LambdaCaptureFields.lookup(OrigVD) ||
277 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
278 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
279 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
280 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
281 return EmitLValue(&DRE);
282 }
283 }
284 return EmitLValue(E);
285 }
286
getTypeSize(QualType Ty)287 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
288 ASTContext &C = getContext();
289 llvm::Value *Size = nullptr;
290 auto SizeInChars = C.getTypeSizeInChars(Ty);
291 if (SizeInChars.isZero()) {
292 // getTypeSizeInChars() returns 0 for a VLA.
293 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
294 VlaSizePair VlaSize = getVLASize(VAT);
295 Ty = VlaSize.Type;
296 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
297 : VlaSize.NumElts;
298 }
299 SizeInChars = C.getTypeSizeInChars(Ty);
300 if (SizeInChars.isZero())
301 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
302 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
303 }
304 return CGM.getSize(SizeInChars);
305 }
306
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)307 void CodeGenFunction::GenerateOpenMPCapturedVars(
308 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
309 const RecordDecl *RD = S.getCapturedRecordDecl();
310 auto CurField = RD->field_begin();
311 auto CurCap = S.captures().begin();
312 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
313 E = S.capture_init_end();
314 I != E; ++I, ++CurField, ++CurCap) {
315 if (CurField->hasCapturedVLAType()) {
316 const VariableArrayType *VAT = CurField->getCapturedVLAType();
317 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
318 CapturedVars.push_back(Val);
319 } else if (CurCap->capturesThis()) {
320 CapturedVars.push_back(CXXThisValue);
321 } else if (CurCap->capturesVariableByCopy()) {
322 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
323
324 // If the field is not a pointer, we need to save the actual value
325 // and load it as a void pointer.
326 if (!CurField->getType()->isAnyPointerType()) {
327 ASTContext &Ctx = getContext();
328 Address DstAddr = CreateMemTemp(
329 Ctx.getUIntPtrType(),
330 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
331 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
332
333 llvm::Value *SrcAddrVal = EmitScalarConversion(
334 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
335 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
336 LValue SrcLV =
337 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
338
339 // Store the value using the source type pointer.
340 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
341
342 // Load the value using the destination type pointer.
343 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
344 }
345 CapturedVars.push_back(CV);
346 } else {
347 assert(CurCap->capturesVariable() && "Expected capture by reference.");
348 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
349 }
350 }
351 }
352
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV)353 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
354 QualType DstType, StringRef Name,
355 LValue AddrLV) {
356 ASTContext &Ctx = CGF.getContext();
357
358 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
359 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
360 Ctx.getPointerType(DstType), Loc);
361 Address TmpAddr =
362 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
363 .getAddress(CGF);
364 return TmpAddr;
365 }
366
getCanonicalParamType(ASTContext & C,QualType T)367 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
368 if (T->isLValueReferenceType())
369 return C.getLValueReferenceType(
370 getCanonicalParamType(C, T.getNonReferenceType()),
371 /*SpelledAsLValue=*/false);
372 if (T->isPointerType())
373 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
374 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
375 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
376 return getCanonicalParamType(C, VLA->getElementType());
377 if (!A->isVariablyModifiedType())
378 return C.getCanonicalType(T);
379 }
380 return C.getCanonicalParamType(T);
381 }
382
383 namespace {
384 /// Contains required data for proper outlined function codegen.
385 struct FunctionOptions {
386 /// Captured statement for which the function is generated.
387 const CapturedStmt *S = nullptr;
388 /// true if cast to/from UIntPtr is required for variables captured by
389 /// value.
390 const bool UIntPtrCastRequired = true;
391 /// true if only casted arguments must be registered as local args or VLA
392 /// sizes.
393 const bool RegisterCastedArgsOnly = false;
394 /// Name of the generated function.
395 const StringRef FunctionName;
396 /// Location of the non-debug version of the outlined function.
397 SourceLocation Loc;
FunctionOptions__anone1cc1baf0411::FunctionOptions398 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
399 bool RegisterCastedArgsOnly, StringRef FunctionName,
400 SourceLocation Loc)
401 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
402 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
403 FunctionName(FunctionName), Loc(Loc) {}
404 };
405 } // namespace
406
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)407 static llvm::Function *emitOutlinedFunctionPrologue(
408 CodeGenFunction &CGF, FunctionArgList &Args,
409 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
410 &LocalAddrs,
411 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
412 &VLASizes,
413 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
414 const CapturedDecl *CD = FO.S->getCapturedDecl();
415 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
416 assert(CD->hasBody() && "missing CapturedDecl body");
417
418 CXXThisValue = nullptr;
419 // Build the argument list.
420 CodeGenModule &CGM = CGF.CGM;
421 ASTContext &Ctx = CGM.getContext();
422 FunctionArgList TargetArgs;
423 Args.append(CD->param_begin(),
424 std::next(CD->param_begin(), CD->getContextParamPosition()));
425 TargetArgs.append(
426 CD->param_begin(),
427 std::next(CD->param_begin(), CD->getContextParamPosition()));
428 auto I = FO.S->captures().begin();
429 FunctionDecl *DebugFunctionDecl = nullptr;
430 if (!FO.UIntPtrCastRequired) {
431 FunctionProtoType::ExtProtoInfo EPI;
432 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
433 DebugFunctionDecl = FunctionDecl::Create(
434 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
435 SourceLocation(), DeclarationName(), FunctionTy,
436 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
437 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
438 }
439 for (const FieldDecl *FD : RD->fields()) {
440 QualType ArgType = FD->getType();
441 IdentifierInfo *II = nullptr;
442 VarDecl *CapVar = nullptr;
443
444 // If this is a capture by copy and the type is not a pointer, the outlined
445 // function argument type should be uintptr and the value properly casted to
446 // uintptr. This is necessary given that the runtime library is only able to
447 // deal with pointers. We can pass in the same way the VLA type sizes to the
448 // outlined function.
449 if (FO.UIntPtrCastRequired &&
450 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
451 I->capturesVariableArrayType()))
452 ArgType = Ctx.getUIntPtrType();
453
454 if (I->capturesVariable() || I->capturesVariableByCopy()) {
455 CapVar = I->getCapturedVar();
456 II = CapVar->getIdentifier();
457 } else if (I->capturesThis()) {
458 II = &Ctx.Idents.get("this");
459 } else {
460 assert(I->capturesVariableArrayType());
461 II = &Ctx.Idents.get("vla");
462 }
463 if (ArgType->isVariablyModifiedType())
464 ArgType = getCanonicalParamType(Ctx, ArgType);
465 VarDecl *Arg;
466 if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
467 Arg = ParmVarDecl::Create(
468 Ctx, DebugFunctionDecl,
469 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
470 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
471 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
472 } else {
473 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
474 II, ArgType, ImplicitParamDecl::Other);
475 }
476 Args.emplace_back(Arg);
477 // Do not cast arguments if we emit function with non-original types.
478 TargetArgs.emplace_back(
479 FO.UIntPtrCastRequired
480 ? Arg
481 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
482 ++I;
483 }
484 Args.append(
485 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
486 CD->param_end());
487 TargetArgs.append(
488 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
489 CD->param_end());
490
491 // Create the function declaration.
492 const CGFunctionInfo &FuncInfo =
493 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
494 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
495
496 auto *F =
497 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
498 FO.FunctionName, &CGM.getModule());
499 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
500 if (CD->isNothrow())
501 F->setDoesNotThrow();
502 F->setDoesNotRecurse();
503
504 // Generate the function.
505 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
506 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
507 FO.UIntPtrCastRequired ? FO.Loc
508 : CD->getBody()->getBeginLoc());
509 unsigned Cnt = CD->getContextParamPosition();
510 I = FO.S->captures().begin();
511 for (const FieldDecl *FD : RD->fields()) {
512 // Do not map arguments if we emit function with non-original types.
513 Address LocalAddr(Address::invalid());
514 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
515 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
516 TargetArgs[Cnt]);
517 } else {
518 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
519 }
520 // If we are capturing a pointer by copy we don't need to do anything, just
521 // use the value that we get from the arguments.
522 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
523 const VarDecl *CurVD = I->getCapturedVar();
524 if (!FO.RegisterCastedArgsOnly)
525 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
526 ++Cnt;
527 ++I;
528 continue;
529 }
530
531 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
532 AlignmentSource::Decl);
533 if (FD->hasCapturedVLAType()) {
534 if (FO.UIntPtrCastRequired) {
535 ArgLVal = CGF.MakeAddrLValue(
536 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
537 Args[Cnt]->getName(), ArgLVal),
538 FD->getType(), AlignmentSource::Decl);
539 }
540 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
541 const VariableArrayType *VAT = FD->getCapturedVLAType();
542 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
543 } else if (I->capturesVariable()) {
544 const VarDecl *Var = I->getCapturedVar();
545 QualType VarTy = Var->getType();
546 Address ArgAddr = ArgLVal.getAddress(CGF);
547 if (ArgLVal.getType()->isLValueReferenceType()) {
548 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
549 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
550 assert(ArgLVal.getType()->isPointerType());
551 ArgAddr = CGF.EmitLoadOfPointer(
552 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
553 }
554 if (!FO.RegisterCastedArgsOnly) {
555 LocalAddrs.insert(
556 {Args[Cnt],
557 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
558 }
559 } else if (I->capturesVariableByCopy()) {
560 assert(!FD->getType()->isAnyPointerType() &&
561 "Not expecting a captured pointer.");
562 const VarDecl *Var = I->getCapturedVar();
563 LocalAddrs.insert({Args[Cnt],
564 {Var, FO.UIntPtrCastRequired
565 ? castValueFromUintptr(
566 CGF, I->getLocation(), FD->getType(),
567 Args[Cnt]->getName(), ArgLVal)
568 : ArgLVal.getAddress(CGF)}});
569 } else {
570 // If 'this' is captured, load it into CXXThisValue.
571 assert(I->capturesThis());
572 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
573 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
574 }
575 ++Cnt;
576 ++I;
577 }
578
579 return F;
580 }
581
582 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S,SourceLocation Loc)583 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
584 SourceLocation Loc) {
585 assert(
586 CapturedStmtInfo &&
587 "CapturedStmtInfo should be set when generating the captured function");
588 const CapturedDecl *CD = S.getCapturedDecl();
589 // Build the argument list.
590 bool NeedWrapperFunction =
591 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
592 FunctionArgList Args;
593 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
594 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
595 SmallString<256> Buffer;
596 llvm::raw_svector_ostream Out(Buffer);
597 Out << CapturedStmtInfo->getHelperName();
598 if (NeedWrapperFunction)
599 Out << "_debug__";
600 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
601 Out.str(), Loc);
602 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
603 VLASizes, CXXThisValue, FO);
604 CodeGenFunction::OMPPrivateScope LocalScope(*this);
605 for (const auto &LocalAddrPair : LocalAddrs) {
606 if (LocalAddrPair.second.first) {
607 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
608 return LocalAddrPair.second.second;
609 });
610 }
611 }
612 (void)LocalScope.Privatize();
613 for (const auto &VLASizePair : VLASizes)
614 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
615 PGO.assignRegionCounters(GlobalDecl(CD), F);
616 CapturedStmtInfo->EmitBody(*this, CD->getBody());
617 (void)LocalScope.ForceCleanup();
618 FinishFunction(CD->getBodyRBrace());
619 if (!NeedWrapperFunction)
620 return F;
621
622 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
623 /*RegisterCastedArgsOnly=*/true,
624 CapturedStmtInfo->getHelperName(), Loc);
625 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
626 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
627 Args.clear();
628 LocalAddrs.clear();
629 VLASizes.clear();
630 llvm::Function *WrapperF =
631 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
632 WrapperCGF.CXXThisValue, WrapperFO);
633 llvm::SmallVector<llvm::Value *, 4> CallArgs;
634 for (const auto *Arg : Args) {
635 llvm::Value *CallArg;
636 auto I = LocalAddrs.find(Arg);
637 if (I != LocalAddrs.end()) {
638 LValue LV = WrapperCGF.MakeAddrLValue(
639 I->second.second,
640 I->second.first ? I->second.first->getType() : Arg->getType(),
641 AlignmentSource::Decl);
642 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
643 } else {
644 auto EI = VLASizes.find(Arg);
645 if (EI != VLASizes.end()) {
646 CallArg = EI->second.second;
647 } else {
648 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
649 Arg->getType(),
650 AlignmentSource::Decl);
651 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
652 }
653 }
654 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
655 }
656 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
657 WrapperCGF.FinishFunction();
658 return WrapperF;
659 }
660
661 //===----------------------------------------------------------------------===//
662 // OpenMP Directive Emission
663 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)664 void CodeGenFunction::EmitOMPAggregateAssign(
665 Address DestAddr, Address SrcAddr, QualType OriginalType,
666 const llvm::function_ref<void(Address, Address)> CopyGen) {
667 // Perform element-by-element initialization.
668 QualType ElementTy;
669
670 // Drill down to the base element type on both arrays.
671 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
672 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
673 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
674
675 llvm::Value *SrcBegin = SrcAddr.getPointer();
676 llvm::Value *DestBegin = DestAddr.getPointer();
677 // Cast from pointer to array type to pointer to single element.
678 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
679 // The basic structure here is a while-do loop.
680 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
681 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
682 llvm::Value *IsEmpty =
683 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
684 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
685
686 // Enter the loop body, making that address the current address.
687 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
688 EmitBlock(BodyBB);
689
690 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
691
692 llvm::PHINode *SrcElementPHI =
693 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 Address SrcElementCurrent =
696 Address(SrcElementPHI,
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698
699 llvm::PHINode *DestElementPHI =
700 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI,
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 CopyGen(DestElementCurrent, SrcElementCurrent);
708
709 // Shift the address forward by one element.
710 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
711 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
712 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
713 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
714 // Check whether we've reached the end.
715 llvm::Value *Done =
716 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
717 Builder.CreateCondBr(Done, DoneBB, BodyBB);
718 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
719 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
720
721 // Done.
722 EmitBlock(DoneBB, /*IsFinished=*/true);
723 }
724
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)725 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
726 Address SrcAddr, const VarDecl *DestVD,
727 const VarDecl *SrcVD, const Expr *Copy) {
728 if (OriginalType->isArrayType()) {
729 const auto *BO = dyn_cast<BinaryOperator>(Copy);
730 if (BO && BO->getOpcode() == BO_Assign) {
731 // Perform simple memcpy for simple copying.
732 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
733 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
734 EmitAggregateAssign(Dest, Src, OriginalType);
735 } else {
736 // For arrays with complex element types perform element by element
737 // copying.
738 EmitOMPAggregateAssign(
739 DestAddr, SrcAddr, OriginalType,
740 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
741 // Working with the single array element, so have to remap
742 // destination and source variables to corresponding array
743 // elements.
744 CodeGenFunction::OMPPrivateScope Remap(*this);
745 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
746 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
747 (void)Remap.Privatize();
748 EmitIgnoredExpr(Copy);
749 });
750 }
751 } else {
752 // Remap pseudo source variable to private copy.
753 CodeGenFunction::OMPPrivateScope Remap(*this);
754 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
755 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
756 (void)Remap.Privatize();
757 // Emit copying of the whole variable.
758 EmitIgnoredExpr(Copy);
759 }
760 }
761
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)762 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
763 OMPPrivateScope &PrivateScope) {
764 if (!HaveInsertPoint())
765 return false;
766 bool DeviceConstTarget =
767 getLangOpts().OpenMPIsDevice &&
768 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
769 bool FirstprivateIsLastprivate = false;
770 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
771 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
772 for (const auto *D : C->varlists())
773 Lastprivates.try_emplace(
774 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
775 C->getKind());
776 }
777 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
778 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
779 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
780 // Force emission of the firstprivate copy if the directive does not emit
781 // outlined function, like omp for, omp simd, omp distribute etc.
782 bool MustEmitFirstprivateCopy =
783 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
784 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
785 const auto *IRef = C->varlist_begin();
786 const auto *InitsRef = C->inits().begin();
787 for (const Expr *IInit : C->private_copies()) {
788 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
789 bool ThisFirstprivateIsLastprivate =
790 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
791 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
792 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
793 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
794 !FD->getType()->isReferenceType() &&
795 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
796 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
797 ++IRef;
798 ++InitsRef;
799 continue;
800 }
801 // Do not emit copy for firstprivate constant variables in target regions,
802 // captured by reference.
803 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
804 FD && FD->getType()->isReferenceType() &&
805 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
806 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
807 OrigVD);
808 ++IRef;
809 ++InitsRef;
810 continue;
811 }
812 FirstprivateIsLastprivate =
813 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
814 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
815 const auto *VDInit =
816 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
817 bool IsRegistered;
818 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
819 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
820 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
821 LValue OriginalLVal;
822 if (!FD) {
823 // Check if the firstprivate variable is just a constant value.
824 ConstantEmission CE = tryEmitAsConstant(&DRE);
825 if (CE && !CE.isReference()) {
826 // Constant value, no need to create a copy.
827 ++IRef;
828 ++InitsRef;
829 continue;
830 }
831 if (CE && CE.isReference()) {
832 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
833 } else {
834 assert(!CE && "Expected non-constant firstprivate.");
835 OriginalLVal = EmitLValue(&DRE);
836 }
837 } else {
838 OriginalLVal = EmitLValue(&DRE);
839 }
840 QualType Type = VD->getType();
841 if (Type->isArrayType()) {
842 // Emit VarDecl with copy init for arrays.
843 // Get the address of the original variable captured in current
844 // captured region.
845 IsRegistered = PrivateScope.addPrivate(
846 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
847 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
848 const Expr *Init = VD->getInit();
849 if (!isa<CXXConstructExpr>(Init) ||
850 isTrivialInitializer(Init)) {
851 // Perform simple memcpy.
852 LValue Dest =
853 MakeAddrLValue(Emission.getAllocatedAddress(), Type);
854 EmitAggregateAssign(Dest, OriginalLVal, Type);
855 } else {
856 EmitOMPAggregateAssign(
857 Emission.getAllocatedAddress(),
858 OriginalLVal.getAddress(*this), Type,
859 [this, VDInit, Init](Address DestElement,
860 Address SrcElement) {
861 // Clean up any temporaries needed by the
862 // initialization.
863 RunCleanupsScope InitScope(*this);
864 // Emit initialization for single element.
865 setAddrOfLocalVar(VDInit, SrcElement);
866 EmitAnyExprToMem(Init, DestElement,
867 Init->getType().getQualifiers(),
868 /*IsInitializer*/ false);
869 LocalDeclMap.erase(VDInit);
870 });
871 }
872 EmitAutoVarCleanups(Emission);
873 return Emission.getAllocatedAddress();
874 });
875 } else {
876 Address OriginalAddr = OriginalLVal.getAddress(*this);
877 IsRegistered =
878 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
879 ThisFirstprivateIsLastprivate,
880 OrigVD, &Lastprivates, IRef]() {
881 // Emit private VarDecl with copy init.
882 // Remap temp VDInit variable to the address of the original
883 // variable (for proper handling of captured global variables).
884 setAddrOfLocalVar(VDInit, OriginalAddr);
885 EmitDecl(*VD);
886 LocalDeclMap.erase(VDInit);
887 if (ThisFirstprivateIsLastprivate &&
888 Lastprivates[OrigVD->getCanonicalDecl()] ==
889 OMPC_LASTPRIVATE_conditional) {
890 // Create/init special variable for lastprivate conditionals.
891 Address VDAddr =
892 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
893 *this, OrigVD);
894 llvm::Value *V = EmitLoadOfScalar(
895 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
896 AlignmentSource::Decl),
897 (*IRef)->getExprLoc());
898 EmitStoreOfScalar(V,
899 MakeAddrLValue(VDAddr, (*IRef)->getType(),
900 AlignmentSource::Decl));
901 LocalDeclMap.erase(VD);
902 setAddrOfLocalVar(VD, VDAddr);
903 return VDAddr;
904 }
905 return GetAddrOfLocalVar(VD);
906 });
907 }
908 assert(IsRegistered &&
909 "firstprivate var already registered as private");
910 // Silence the warning about unused variable.
911 (void)IsRegistered;
912 }
913 ++IRef;
914 ++InitsRef;
915 }
916 }
917 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
918 }
919
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)920 void CodeGenFunction::EmitOMPPrivateClause(
921 const OMPExecutableDirective &D,
922 CodeGenFunction::OMPPrivateScope &PrivateScope) {
923 if (!HaveInsertPoint())
924 return;
925 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
926 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
927 auto IRef = C->varlist_begin();
928 for (const Expr *IInit : C->private_copies()) {
929 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
930 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
931 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
932 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
933 // Emit private VarDecl with copy init.
934 EmitDecl(*VD);
935 return GetAddrOfLocalVar(VD);
936 });
937 assert(IsRegistered && "private var already registered as private");
938 // Silence the warning about unused variable.
939 (void)IsRegistered;
940 }
941 ++IRef;
942 }
943 }
944 }
945
EmitOMPCopyinClause(const OMPExecutableDirective & D)946 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
947 if (!HaveInsertPoint())
948 return false;
949 // threadprivate_var1 = master_threadprivate_var1;
950 // operator=(threadprivate_var2, master_threadprivate_var2);
951 // ...
952 // __kmpc_barrier(&loc, global_tid);
953 llvm::DenseSet<const VarDecl *> CopiedVars;
954 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
955 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
956 auto IRef = C->varlist_begin();
957 auto ISrcRef = C->source_exprs().begin();
958 auto IDestRef = C->destination_exprs().begin();
959 for (const Expr *AssignOp : C->assignment_ops()) {
960 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
961 QualType Type = VD->getType();
962 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
963 // Get the address of the master variable. If we are emitting code with
964 // TLS support, the address is passed from the master as field in the
965 // captured declaration.
966 Address MasterAddr = Address::invalid();
967 if (getLangOpts().OpenMPUseTLS &&
968 getContext().getTargetInfo().isTLSSupported()) {
969 assert(CapturedStmtInfo->lookup(VD) &&
970 "Copyin threadprivates should have been captured!");
971 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
972 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
973 MasterAddr = EmitLValue(&DRE).getAddress(*this);
974 LocalDeclMap.erase(VD);
975 } else {
976 MasterAddr =
977 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
978 : CGM.GetAddrOfGlobal(VD),
979 getContext().getDeclAlign(VD));
980 }
981 // Get the address of the threadprivate variable.
982 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
983 if (CopiedVars.size() == 1) {
984 // At first check if current thread is a master thread. If it is, no
985 // need to copy data.
986 CopyBegin = createBasicBlock("copyin.not.master");
987 CopyEnd = createBasicBlock("copyin.not.master.end");
988 Builder.CreateCondBr(
989 Builder.CreateICmpNE(
990 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
991 Builder.CreatePtrToInt(PrivateAddr.getPointer(),
992 CGM.IntPtrTy)),
993 CopyBegin, CopyEnd);
994 EmitBlock(CopyBegin);
995 }
996 const auto *SrcVD =
997 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
998 const auto *DestVD =
999 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1000 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1001 }
1002 ++IRef;
1003 ++ISrcRef;
1004 ++IDestRef;
1005 }
1006 }
1007 if (CopyEnd) {
1008 // Exit out of copying procedure for non-master thread.
1009 EmitBlock(CopyEnd, /*IsFinished=*/true);
1010 return true;
1011 }
1012 return false;
1013 }
1014
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)1015 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1016 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1017 if (!HaveInsertPoint())
1018 return false;
1019 bool HasAtLeastOneLastprivate = false;
1020 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1021 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1022 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1023 for (const Expr *C : LoopDirective->counters()) {
1024 SIMDLCVs.insert(
1025 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1026 }
1027 }
1028 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1029 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1030 HasAtLeastOneLastprivate = true;
1031 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1032 !getLangOpts().OpenMPSimd)
1033 break;
1034 const auto *IRef = C->varlist_begin();
1035 const auto *IDestRef = C->destination_exprs().begin();
1036 for (const Expr *IInit : C->private_copies()) {
1037 // Keep the address of the original variable for future update at the end
1038 // of the loop.
1039 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1040 // Taskloops do not require additional initialization, it is done in
1041 // runtime support library.
1042 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1043 const auto *DestVD =
1044 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1045 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1046 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1047 /*RefersToEnclosingVariableOrCapture=*/
1048 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1049 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1050 return EmitLValue(&DRE).getAddress(*this);
1051 });
1052 // Check if the variable is also a firstprivate: in this case IInit is
1053 // not generated. Initialization of this variable will happen in codegen
1054 // for 'firstprivate' clause.
1055 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1056 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1057 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1058 OrigVD]() {
1059 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1060 Address VDAddr =
1061 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1062 OrigVD);
1063 setAddrOfLocalVar(VD, VDAddr);
1064 return VDAddr;
1065 }
1066 // Emit private VarDecl with copy init.
1067 EmitDecl(*VD);
1068 return GetAddrOfLocalVar(VD);
1069 });
1070 assert(IsRegistered &&
1071 "lastprivate var already registered as private");
1072 (void)IsRegistered;
1073 }
1074 }
1075 ++IRef;
1076 ++IDestRef;
1077 }
1078 }
1079 return HasAtLeastOneLastprivate;
1080 }
1081
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)1082 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1083 const OMPExecutableDirective &D, bool NoFinals,
1084 llvm::Value *IsLastIterCond) {
1085 if (!HaveInsertPoint())
1086 return;
1087 // Emit following code:
1088 // if (<IsLastIterCond>) {
1089 // orig_var1 = private_orig_var1;
1090 // ...
1091 // orig_varn = private_orig_varn;
1092 // }
1093 llvm::BasicBlock *ThenBB = nullptr;
1094 llvm::BasicBlock *DoneBB = nullptr;
1095 if (IsLastIterCond) {
1096 // Emit implicit barrier if at least one lastprivate conditional is found
1097 // and this is not a simd mode.
1098 if (!getLangOpts().OpenMPSimd &&
1099 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1100 [](const OMPLastprivateClause *C) {
1101 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1102 })) {
1103 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1104 OMPD_unknown,
1105 /*EmitChecks=*/false,
1106 /*ForceSimpleCall=*/true);
1107 }
1108 ThenBB = createBasicBlock(".omp.lastprivate.then");
1109 DoneBB = createBasicBlock(".omp.lastprivate.done");
1110 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1111 EmitBlock(ThenBB);
1112 }
1113 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1114 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1115 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1116 auto IC = LoopDirective->counters().begin();
1117 for (const Expr *F : LoopDirective->finals()) {
1118 const auto *D =
1119 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1120 if (NoFinals)
1121 AlreadyEmittedVars.insert(D);
1122 else
1123 LoopCountersAndUpdates[D] = F;
1124 ++IC;
1125 }
1126 }
1127 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1128 auto IRef = C->varlist_begin();
1129 auto ISrcRef = C->source_exprs().begin();
1130 auto IDestRef = C->destination_exprs().begin();
1131 for (const Expr *AssignOp : C->assignment_ops()) {
1132 const auto *PrivateVD =
1133 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1134 QualType Type = PrivateVD->getType();
1135 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1136 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1137 // If lastprivate variable is a loop control variable for loop-based
1138 // directive, update its value before copyin back to original
1139 // variable.
1140 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1141 EmitIgnoredExpr(FinalExpr);
1142 const auto *SrcVD =
1143 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1144 const auto *DestVD =
1145 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1146 // Get the address of the private variable.
1147 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1148 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1149 PrivateAddr =
1150 Address(Builder.CreateLoad(PrivateAddr),
1151 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1152 // Store the last value to the private copy in the last iteration.
1153 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1154 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1155 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1156 (*IRef)->getExprLoc());
1157 // Get the address of the original variable.
1158 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1159 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1160 }
1161 ++IRef;
1162 ++ISrcRef;
1163 ++IDestRef;
1164 }
1165 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1166 EmitIgnoredExpr(PostUpdate);
1167 }
1168 if (IsLastIterCond)
1169 EmitBlock(DoneBB, /*IsFinished=*/true);
1170 }
1171
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope,bool ForInscan)1172 void CodeGenFunction::EmitOMPReductionClauseInit(
1173 const OMPExecutableDirective &D,
1174 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1175 if (!HaveInsertPoint())
1176 return;
1177 SmallVector<const Expr *, 4> Shareds;
1178 SmallVector<const Expr *, 4> Privates;
1179 SmallVector<const Expr *, 4> ReductionOps;
1180 SmallVector<const Expr *, 4> LHSs;
1181 SmallVector<const Expr *, 4> RHSs;
1182 OMPTaskDataTy Data;
1183 SmallVector<const Expr *, 4> TaskLHSs;
1184 SmallVector<const Expr *, 4> TaskRHSs;
1185 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1186 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1187 continue;
1188 Shareds.append(C->varlist_begin(), C->varlist_end());
1189 Privates.append(C->privates().begin(), C->privates().end());
1190 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1191 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1192 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1193 if (C->getModifier() == OMPC_REDUCTION_task) {
1194 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1195 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1196 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1197 Data.ReductionOps.append(C->reduction_ops().begin(),
1198 C->reduction_ops().end());
1199 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1200 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1201 }
1202 }
1203 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1204 unsigned Count = 0;
1205 auto *ILHS = LHSs.begin();
1206 auto *IRHS = RHSs.begin();
1207 auto *IPriv = Privates.begin();
1208 for (const Expr *IRef : Shareds) {
1209 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1210 // Emit private VarDecl with reduction init.
1211 RedCG.emitSharedOrigLValue(*this, Count);
1212 RedCG.emitAggregateType(*this, Count);
1213 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1214 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1215 RedCG.getSharedLValue(Count),
1216 [&Emission](CodeGenFunction &CGF) {
1217 CGF.EmitAutoVarInit(Emission);
1218 return true;
1219 });
1220 EmitAutoVarCleanups(Emission);
1221 Address BaseAddr = RedCG.adjustPrivateAddress(
1222 *this, Count, Emission.getAllocatedAddress());
1223 bool IsRegistered = PrivateScope.addPrivate(
1224 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1225 assert(IsRegistered && "private var already registered as private");
1226 // Silence the warning about unused variable.
1227 (void)IsRegistered;
1228
1229 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1230 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1231 QualType Type = PrivateVD->getType();
1232 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1233 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1234 // Store the address of the original variable associated with the LHS
1235 // implicit variable.
1236 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1237 return RedCG.getSharedLValue(Count).getAddress(*this);
1238 });
1239 PrivateScope.addPrivate(
1240 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1241 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1242 isa<ArraySubscriptExpr>(IRef)) {
1243 // Store the address of the original variable associated with the LHS
1244 // implicit variable.
1245 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1246 return RedCG.getSharedLValue(Count).getAddress(*this);
1247 });
1248 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1249 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1250 ConvertTypeForMem(RHSVD->getType()),
1251 "rhs.begin");
1252 });
1253 } else {
1254 QualType Type = PrivateVD->getType();
1255 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1256 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1257 // Store the address of the original variable associated with the LHS
1258 // implicit variable.
1259 if (IsArray) {
1260 OriginalAddr = Builder.CreateElementBitCast(
1261 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1262 }
1263 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1264 PrivateScope.addPrivate(
1265 RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1266 return IsArray
1267 ? Builder.CreateElementBitCast(
1268 GetAddrOfLocalVar(PrivateVD),
1269 ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1270 : GetAddrOfLocalVar(PrivateVD);
1271 });
1272 }
1273 ++ILHS;
1274 ++IRHS;
1275 ++IPriv;
1276 ++Count;
1277 }
1278 if (!Data.ReductionVars.empty()) {
1279 Data.IsReductionWithTaskMod = true;
1280 Data.IsWorksharingReduction =
1281 isOpenMPWorksharingDirective(D.getDirectiveKind());
1282 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1283 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1284 const Expr *TaskRedRef = nullptr;
1285 switch (D.getDirectiveKind()) {
1286 case OMPD_parallel:
1287 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1288 break;
1289 case OMPD_for:
1290 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1291 break;
1292 case OMPD_sections:
1293 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1294 break;
1295 case OMPD_parallel_for:
1296 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1297 break;
1298 case OMPD_parallel_master:
1299 TaskRedRef =
1300 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1301 break;
1302 case OMPD_parallel_sections:
1303 TaskRedRef =
1304 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1305 break;
1306 case OMPD_target_parallel:
1307 TaskRedRef =
1308 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1309 break;
1310 case OMPD_target_parallel_for:
1311 TaskRedRef =
1312 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1313 break;
1314 case OMPD_distribute_parallel_for:
1315 TaskRedRef =
1316 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1317 break;
1318 case OMPD_teams_distribute_parallel_for:
1319 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1320 .getTaskReductionRefExpr();
1321 break;
1322 case OMPD_target_teams_distribute_parallel_for:
1323 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1324 .getTaskReductionRefExpr();
1325 break;
1326 case OMPD_simd:
1327 case OMPD_for_simd:
1328 case OMPD_section:
1329 case OMPD_single:
1330 case OMPD_master:
1331 case OMPD_critical:
1332 case OMPD_parallel_for_simd:
1333 case OMPD_task:
1334 case OMPD_taskyield:
1335 case OMPD_barrier:
1336 case OMPD_taskwait:
1337 case OMPD_taskgroup:
1338 case OMPD_flush:
1339 case OMPD_depobj:
1340 case OMPD_scan:
1341 case OMPD_ordered:
1342 case OMPD_atomic:
1343 case OMPD_teams:
1344 case OMPD_target:
1345 case OMPD_cancellation_point:
1346 case OMPD_cancel:
1347 case OMPD_target_data:
1348 case OMPD_target_enter_data:
1349 case OMPD_target_exit_data:
1350 case OMPD_taskloop:
1351 case OMPD_taskloop_simd:
1352 case OMPD_master_taskloop:
1353 case OMPD_master_taskloop_simd:
1354 case OMPD_parallel_master_taskloop:
1355 case OMPD_parallel_master_taskloop_simd:
1356 case OMPD_distribute:
1357 case OMPD_target_update:
1358 case OMPD_distribute_parallel_for_simd:
1359 case OMPD_distribute_simd:
1360 case OMPD_target_parallel_for_simd:
1361 case OMPD_target_simd:
1362 case OMPD_teams_distribute:
1363 case OMPD_teams_distribute_simd:
1364 case OMPD_teams_distribute_parallel_for_simd:
1365 case OMPD_target_teams:
1366 case OMPD_target_teams_distribute:
1367 case OMPD_target_teams_distribute_parallel_for_simd:
1368 case OMPD_target_teams_distribute_simd:
1369 case OMPD_declare_target:
1370 case OMPD_end_declare_target:
1371 case OMPD_threadprivate:
1372 case OMPD_allocate:
1373 case OMPD_declare_reduction:
1374 case OMPD_declare_mapper:
1375 case OMPD_declare_simd:
1376 case OMPD_requires:
1377 case OMPD_declare_variant:
1378 case OMPD_begin_declare_variant:
1379 case OMPD_end_declare_variant:
1380 case OMPD_unknown:
1381 default:
1382 llvm_unreachable("Enexpected directive with task reductions.");
1383 }
1384
1385 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1386 EmitVarDecl(*VD);
1387 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1388 /*Volatile=*/false, TaskRedRef->getType());
1389 }
1390 }
1391
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1392 void CodeGenFunction::EmitOMPReductionClauseFinal(
1393 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1394 if (!HaveInsertPoint())
1395 return;
1396 llvm::SmallVector<const Expr *, 8> Privates;
1397 llvm::SmallVector<const Expr *, 8> LHSExprs;
1398 llvm::SmallVector<const Expr *, 8> RHSExprs;
1399 llvm::SmallVector<const Expr *, 8> ReductionOps;
1400 bool HasAtLeastOneReduction = false;
1401 bool IsReductionWithTaskMod = false;
1402 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1403 // Do not emit for inscan reductions.
1404 if (C->getModifier() == OMPC_REDUCTION_inscan)
1405 continue;
1406 HasAtLeastOneReduction = true;
1407 Privates.append(C->privates().begin(), C->privates().end());
1408 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1409 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1410 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1411 IsReductionWithTaskMod =
1412 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1413 }
1414 if (HasAtLeastOneReduction) {
1415 if (IsReductionWithTaskMod) {
1416 CGM.getOpenMPRuntime().emitTaskReductionFini(
1417 *this, D.getBeginLoc(),
1418 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1419 }
1420 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1421 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1422 ReductionKind == OMPD_simd;
1423 bool SimpleReduction = ReductionKind == OMPD_simd;
1424 // Emit nowait reduction if nowait clause is present or directive is a
1425 // parallel directive (it always has implicit barrier).
1426 CGM.getOpenMPRuntime().emitReduction(
1427 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1428 {WithNowait, SimpleReduction, ReductionKind});
1429 }
1430 }
1431
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1432 static void emitPostUpdateForReductionClause(
1433 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1434 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1435 if (!CGF.HaveInsertPoint())
1436 return;
1437 llvm::BasicBlock *DoneBB = nullptr;
1438 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1439 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1440 if (!DoneBB) {
1441 if (llvm::Value *Cond = CondGen(CGF)) {
1442 // If the first post-update expression is found, emit conditional
1443 // block if it was requested.
1444 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1445 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1446 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1447 CGF.EmitBlock(ThenBB);
1448 }
1449 }
1450 CGF.EmitIgnoredExpr(PostUpdate);
1451 }
1452 }
1453 if (DoneBB)
1454 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1455 }
1456
1457 namespace {
1458 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1459 /// parallel function. This is necessary for combined constructs such as
1460 /// 'distribute parallel for'
1461 typedef llvm::function_ref<void(CodeGenFunction &,
1462 const OMPExecutableDirective &,
1463 llvm::SmallVectorImpl<llvm::Value *> &)>
1464 CodeGenBoundParametersTy;
1465 } // anonymous namespace
1466
1467 static void
checkForLastprivateConditionalUpdate(CodeGenFunction & CGF,const OMPExecutableDirective & S)1468 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1469 const OMPExecutableDirective &S) {
1470 if (CGF.getLangOpts().OpenMP < 50)
1471 return;
1472 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1473 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1474 for (const Expr *Ref : C->varlists()) {
1475 if (!Ref->getType()->isScalarType())
1476 continue;
1477 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1478 if (!DRE)
1479 continue;
1480 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1481 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1482 }
1483 }
1484 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1485 for (const Expr *Ref : C->varlists()) {
1486 if (!Ref->getType()->isScalarType())
1487 continue;
1488 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1489 if (!DRE)
1490 continue;
1491 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1492 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1493 }
1494 }
1495 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1496 for (const Expr *Ref : C->varlists()) {
1497 if (!Ref->getType()->isScalarType())
1498 continue;
1499 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1500 if (!DRE)
1501 continue;
1502 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1503 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1504 }
1505 }
1506 // Privates should ne analyzed since they are not captured at all.
1507 // Task reductions may be skipped - tasks are ignored.
1508 // Firstprivates do not return value but may be passed by reference - no need
1509 // to check for updated lastprivate conditional.
1510 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1511 for (const Expr *Ref : C->varlists()) {
1512 if (!Ref->getType()->isScalarType())
1513 continue;
1514 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1515 if (!DRE)
1516 continue;
1517 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1518 }
1519 }
1520 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1521 CGF, S, PrivateDecls);
1522 }
1523
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1524 static void emitCommonOMPParallelDirective(
1525 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1526 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1527 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1528 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1529 llvm::Function *OutlinedFn =
1530 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1531 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1532 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1533 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1534 llvm::Value *NumThreads =
1535 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1536 /*IgnoreResultAssign=*/true);
1537 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1538 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1539 }
1540 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1541 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1542 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1543 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1544 }
1545 const Expr *IfCond = nullptr;
1546 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1547 if (C->getNameModifier() == OMPD_unknown ||
1548 C->getNameModifier() == OMPD_parallel) {
1549 IfCond = C->getCondition();
1550 break;
1551 }
1552 }
1553
1554 OMPParallelScope Scope(CGF, S);
1555 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1556 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1557 // lower and upper bounds with the pragma 'for' chunking mechanism.
1558 // The following lambda takes care of appending the lower and upper bound
1559 // parameters when necessary
1560 CodeGenBoundParameters(CGF, S, CapturedVars);
1561 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1562 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1563 CapturedVars, IfCond);
1564 }
1565
isAllocatableDecl(const VarDecl * VD)1566 static bool isAllocatableDecl(const VarDecl *VD) {
1567 const VarDecl *CVD = VD->getCanonicalDecl();
1568 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1569 return false;
1570 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1571 // Use the default allocation.
1572 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1573 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1574 !AA->getAllocator());
1575 }
1576
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1577 static void emitEmptyBoundParameters(CodeGenFunction &,
1578 const OMPExecutableDirective &,
1579 llvm::SmallVectorImpl<llvm::Value *> &) {}
1580
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)1581 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1582 CodeGenFunction &CGF, const VarDecl *VD) {
1583 CodeGenModule &CGM = CGF.CGM;
1584 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1585
1586 if (!VD)
1587 return Address::invalid();
1588 const VarDecl *CVD = VD->getCanonicalDecl();
1589 if (!isAllocatableDecl(CVD))
1590 return Address::invalid();
1591 llvm::Value *Size;
1592 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1593 if (CVD->getType()->isVariablyModifiedType()) {
1594 Size = CGF.getTypeSize(CVD->getType());
1595 // Align the size: ((size + align - 1) / align) * align
1596 Size = CGF.Builder.CreateNUWAdd(
1597 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1598 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1599 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1600 } else {
1601 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1602 Size = CGM.getSize(Sz.alignTo(Align));
1603 }
1604
1605 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1606 assert(AA->getAllocator() &&
1607 "Expected allocator expression for non-default allocator.");
1608 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1609 // According to the standard, the original allocator type is a enum (integer).
1610 // Convert to pointer type, if required.
1611 if (Allocator->getType()->isIntegerTy())
1612 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1613 else if (Allocator->getType()->isPointerTy())
1614 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1615 CGM.VoidPtrTy);
1616
1617 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1618 CGF.Builder, Size, Allocator,
1619 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1620 llvm::CallInst *FreeCI =
1621 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1622
1623 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1624 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1625 Addr,
1626 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1627 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1628 return Address(Addr, Align);
1629 }
1630
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1631 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1632 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1633 SourceLocation Loc) {
1634 CodeGenModule &CGM = CGF.CGM;
1635 if (CGM.getLangOpts().OpenMPUseTLS &&
1636 CGM.getContext().getTargetInfo().isTLSSupported())
1637 return VDAddr;
1638
1639 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1640
1641 llvm::Type *VarTy = VDAddr.getElementType();
1642 llvm::Value *Data =
1643 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1644 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1645 std::string Suffix = getNameWithSeparators({"cache", ""});
1646 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1647
1648 llvm::CallInst *ThreadPrivateCacheCall =
1649 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1650
1651 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1652 }
1653
getNameWithSeparators(ArrayRef<StringRef> Parts,StringRef FirstSeparator,StringRef Separator)1654 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1655 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1656 SmallString<128> Buffer;
1657 llvm::raw_svector_ostream OS(Buffer);
1658 StringRef Sep = FirstSeparator;
1659 for (StringRef Part : Parts) {
1660 OS << Sep << Part;
1661 Sep = Separator;
1662 }
1663 return OS.str().str();
1664 }
EmitOMPParallelDirective(const OMPParallelDirective & S)1665 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1666 if (CGM.getLangOpts().OpenMPIRBuilder) {
1667 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1668 // Check if we have any if clause associated with the directive.
1669 llvm::Value *IfCond = nullptr;
1670 if (const auto *C = S.getSingleClause<OMPIfClause>())
1671 IfCond = EmitScalarExpr(C->getCondition(),
1672 /*IgnoreResultAssign=*/true);
1673
1674 llvm::Value *NumThreads = nullptr;
1675 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1676 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1677 /*IgnoreResultAssign=*/true);
1678
1679 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1680 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1681 ProcBind = ProcBindClause->getProcBindKind();
1682
1683 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1684
1685 // The cleanup callback that finalizes all variabels at the given location,
1686 // thus calls destructors etc.
1687 auto FiniCB = [this](InsertPointTy IP) {
1688 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1689 };
1690
1691 // Privatization callback that performs appropriate action for
1692 // shared/private/firstprivate/lastprivate/copyin/... variables.
1693 //
1694 // TODO: This defaults to shared right now.
1695 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1696 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1697 // The next line is appropriate only for variables (Val) with the
1698 // data-sharing attribute "shared".
1699 ReplVal = &Val;
1700
1701 return CodeGenIP;
1702 };
1703
1704 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1705 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1706
1707 auto BodyGenCB = [ParallelRegionBodyStmt,
1708 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1709 llvm::BasicBlock &ContinuationBB) {
1710 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1711 ContinuationBB);
1712 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1713 CodeGenIP, ContinuationBB);
1714 };
1715
1716 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1717 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1718 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1719 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1720 Builder.restoreIP(
1721 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1722 IfCond, NumThreads, ProcBind, S.hasCancel()));
1723 return;
1724 }
1725
1726 // Emit parallel region as a standalone region.
1727 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1728 Action.Enter(CGF);
1729 OMPPrivateScope PrivateScope(CGF);
1730 bool Copyins = CGF.EmitOMPCopyinClause(S);
1731 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1732 if (Copyins) {
1733 // Emit implicit barrier to synchronize threads and avoid data races on
1734 // propagation master's thread values of threadprivate variables to local
1735 // instances of that variables of all other implicit threads.
1736 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1737 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1738 /*ForceSimpleCall=*/true);
1739 }
1740 CGF.EmitOMPPrivateClause(S, PrivateScope);
1741 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1742 (void)PrivateScope.Privatize();
1743 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1744 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1745 };
1746 {
1747 auto LPCRegion =
1748 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1749 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1750 emitEmptyBoundParameters);
1751 emitPostUpdateForReductionClause(*this, S,
1752 [](CodeGenFunction &) { return nullptr; });
1753 }
1754 // Check for outer lastprivate conditional update.
1755 checkForLastprivateConditionalUpdate(*this, S);
1756 }
1757
emitBody(CodeGenFunction & CGF,const Stmt * S,const Stmt * NextLoop,int MaxLevel,int Level=0)1758 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1759 int MaxLevel, int Level = 0) {
1760 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1761 const Stmt *SimplifiedS = S->IgnoreContainers();
1762 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1763 PrettyStackTraceLoc CrashInfo(
1764 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1765 "LLVM IR generation of compound statement ('{}')");
1766
1767 // Keep track of the current cleanup stack depth, including debug scopes.
1768 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1769 for (const Stmt *CurStmt : CS->body())
1770 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1771 return;
1772 }
1773 if (SimplifiedS == NextLoop) {
1774 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1775 S = For->getBody();
1776 } else {
1777 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1778 "Expected canonical for loop or range-based for loop.");
1779 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1780 CGF.EmitStmt(CXXFor->getLoopVarStmt());
1781 S = CXXFor->getBody();
1782 }
1783 if (Level + 1 < MaxLevel) {
1784 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1785 S, /*TryImperfectlyNestedLoops=*/true);
1786 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1787 return;
1788 }
1789 }
1790 CGF.EmitStmt(S);
1791 }
1792
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1793 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1794 JumpDest LoopExit) {
1795 RunCleanupsScope BodyScope(*this);
1796 // Update counters values on current iteration.
1797 for (const Expr *UE : D.updates())
1798 EmitIgnoredExpr(UE);
1799 // Update the linear variables.
1800 // In distribute directives only loop counters may be marked as linear, no
1801 // need to generate the code for them.
1802 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1803 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1804 for (const Expr *UE : C->updates())
1805 EmitIgnoredExpr(UE);
1806 }
1807 }
1808
1809 // On a continue in the body, jump to the end.
1810 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1811 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1812 for (const Expr *E : D.finals_conditions()) {
1813 if (!E)
1814 continue;
1815 // Check that loop counter in non-rectangular nest fits into the iteration
1816 // space.
1817 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1818 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1819 getProfileCount(D.getBody()));
1820 EmitBlock(NextBB);
1821 }
1822
1823 OMPPrivateScope InscanScope(*this);
1824 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1825 bool IsInscanRegion = InscanScope.Privatize();
1826 if (IsInscanRegion) {
1827 // Need to remember the block before and after scan directive
1828 // to dispatch them correctly depending on the clause used in
1829 // this directive, inclusive or exclusive. For inclusive scan the natural
1830 // order of the blocks is used, for exclusive clause the blocks must be
1831 // executed in reverse order.
1832 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1833 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1834 // No need to allocate inscan exit block, in simd mode it is selected in the
1835 // codegen for the scan directive.
1836 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1837 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1838 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1839 EmitBranch(OMPScanDispatch);
1840 EmitBlock(OMPBeforeScanBlock);
1841 }
1842
1843 // Emit loop variables for C++ range loops.
1844 const Stmt *Body =
1845 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1846 // Emit loop body.
1847 emitBody(*this, Body,
1848 OMPLoopDirective::tryToFindNextInnerLoop(
1849 Body, /*TryImperfectlyNestedLoops=*/true),
1850 D.getCollapsedNumber());
1851
1852 // Jump to the dispatcher at the end of the loop body.
1853 if (IsInscanRegion)
1854 EmitBranch(OMPScanExitBlock);
1855
1856 // The end (updates/cleanups).
1857 EmitBlock(Continue.getBlock());
1858 BreakContinueStack.pop_back();
1859 }
1860
EmitOMPInnerLoop(const OMPExecutableDirective & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)1861 void CodeGenFunction::EmitOMPInnerLoop(
1862 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
1863 const Expr *IncExpr,
1864 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1865 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1866 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1867
1868 // Start the loop with a block that tests the condition.
1869 auto CondBlock = createBasicBlock("omp.inner.for.cond");
1870 EmitBlock(CondBlock);
1871 const SourceRange R = S.getSourceRange();
1872
1873 // If attributes are attached, push to the basic block with them.
1874 const auto &OMPED = cast<OMPExecutableDirective>(S);
1875 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
1876 const Stmt *SS = ICS->getCapturedStmt();
1877 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
1878 if (AS)
1879 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
1880 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
1881 SourceLocToDebugLoc(R.getEnd()));
1882 else
1883 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1884 SourceLocToDebugLoc(R.getEnd()));
1885
1886 // If there are any cleanups between here and the loop-exit scope,
1887 // create a block to stage a loop exit along.
1888 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1889 if (RequiresCleanup)
1890 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1891
1892 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
1893
1894 // Emit condition.
1895 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1896 if (ExitBlock != LoopExit.getBlock()) {
1897 EmitBlock(ExitBlock);
1898 EmitBranchThroughCleanup(LoopExit);
1899 }
1900
1901 EmitBlock(LoopBody);
1902 incrementProfileCounter(&S);
1903
1904 // Create a block for the increment.
1905 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1906 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1907
1908 BodyGen(*this);
1909
1910 // Emit "IV = IV + 1" and a back-edge to the condition block.
1911 EmitBlock(Continue.getBlock());
1912 EmitIgnoredExpr(IncExpr);
1913 PostIncGen(*this);
1914 BreakContinueStack.pop_back();
1915 EmitBranch(CondBlock);
1916 LoopStack.pop();
1917 // Emit the fall-through block.
1918 EmitBlock(LoopExit.getBlock());
1919 }
1920
EmitOMPLinearClauseInit(const OMPLoopDirective & D)1921 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1922 if (!HaveInsertPoint())
1923 return false;
1924 // Emit inits for the linear variables.
1925 bool HasLinears = false;
1926 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1927 for (const Expr *Init : C->inits()) {
1928 HasLinears = true;
1929 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1930 if (const auto *Ref =
1931 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1932 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1933 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1934 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1935 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1936 VD->getInit()->getType(), VK_LValue,
1937 VD->getInit()->getExprLoc());
1938 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1939 VD->getType()),
1940 /*capturedByInit=*/false);
1941 EmitAutoVarCleanups(Emission);
1942 } else {
1943 EmitVarDecl(*VD);
1944 }
1945 }
1946 // Emit the linear steps for the linear clauses.
1947 // If a step is not constant, it is pre-calculated before the loop.
1948 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1949 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1950 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1951 // Emit calculation of the linear step.
1952 EmitIgnoredExpr(CS);
1953 }
1954 }
1955 return HasLinears;
1956 }
1957
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1958 void CodeGenFunction::EmitOMPLinearClauseFinal(
1959 const OMPLoopDirective &D,
1960 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1961 if (!HaveInsertPoint())
1962 return;
1963 llvm::BasicBlock *DoneBB = nullptr;
1964 // Emit the final values of the linear variables.
1965 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1966 auto IC = C->varlist_begin();
1967 for (const Expr *F : C->finals()) {
1968 if (!DoneBB) {
1969 if (llvm::Value *Cond = CondGen(*this)) {
1970 // If the first post-update expression is found, emit conditional
1971 // block if it was requested.
1972 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
1973 DoneBB = createBasicBlock(".omp.linear.pu.done");
1974 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1975 EmitBlock(ThenBB);
1976 }
1977 }
1978 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1979 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1980 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1981 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1982 Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
1983 CodeGenFunction::OMPPrivateScope VarScope(*this);
1984 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1985 (void)VarScope.Privatize();
1986 EmitIgnoredExpr(F);
1987 ++IC;
1988 }
1989 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1990 EmitIgnoredExpr(PostUpdate);
1991 }
1992 if (DoneBB)
1993 EmitBlock(DoneBB, /*IsFinished=*/true);
1994 }
1995
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)1996 static void emitAlignedClause(CodeGenFunction &CGF,
1997 const OMPExecutableDirective &D) {
1998 if (!CGF.HaveInsertPoint())
1999 return;
2000 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2001 llvm::APInt ClauseAlignment(64, 0);
2002 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2003 auto *AlignmentCI =
2004 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2005 ClauseAlignment = AlignmentCI->getValue();
2006 }
2007 for (const Expr *E : Clause->varlists()) {
2008 llvm::APInt Alignment(ClauseAlignment);
2009 if (Alignment == 0) {
2010 // OpenMP [2.8.1, Description]
2011 // If no optional parameter is specified, implementation-defined default
2012 // alignments for SIMD instructions on the target platforms are assumed.
2013 Alignment =
2014 CGF.getContext()
2015 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2016 E->getType()->getPointeeType()))
2017 .getQuantity();
2018 }
2019 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2020 "alignment is not power of 2");
2021 if (Alignment != 0) {
2022 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2023 CGF.emitAlignmentAssumption(
2024 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2025 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2026 }
2027 }
2028 }
2029 }
2030
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)2031 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2032 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2033 if (!HaveInsertPoint())
2034 return;
2035 auto I = S.private_counters().begin();
2036 for (const Expr *E : S.counters()) {
2037 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2038 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2039 // Emit var without initialization.
2040 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2041 EmitAutoVarCleanups(VarEmission);
2042 LocalDeclMap.erase(PrivateVD);
2043 (void)LoopScope.addPrivate(VD, [&VarEmission]() {
2044 return VarEmission.getAllocatedAddress();
2045 });
2046 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2047 VD->hasGlobalStorage()) {
2048 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2049 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2050 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2051 E->getType(), VK_LValue, E->getExprLoc());
2052 return EmitLValue(&DRE).getAddress(*this);
2053 });
2054 } else {
2055 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2056 return VarEmission.getAllocatedAddress();
2057 });
2058 }
2059 ++I;
2060 }
2061 // Privatize extra loop counters used in loops for ordered(n) clauses.
2062 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2063 if (!C->getNumForLoops())
2064 continue;
2065 for (unsigned I = S.getCollapsedNumber(),
2066 E = C->getLoopNumIterations().size();
2067 I < E; ++I) {
2068 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2069 const auto *VD = cast<VarDecl>(DRE->getDecl());
2070 // Override only those variables that can be captured to avoid re-emission
2071 // of the variables declared within the loops.
2072 if (DRE->refersToEnclosingVariableOrCapture()) {
2073 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2074 return CreateMemTemp(DRE->getType(), VD->getName());
2075 });
2076 }
2077 }
2078 }
2079 }
2080
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)2081 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2082 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2083 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2084 if (!CGF.HaveInsertPoint())
2085 return;
2086 {
2087 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2088 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2089 (void)PreCondScope.Privatize();
2090 // Get initial values of real counters.
2091 for (const Expr *I : S.inits()) {
2092 CGF.EmitIgnoredExpr(I);
2093 }
2094 }
2095 // Create temp loop control variables with their init values to support
2096 // non-rectangular loops.
2097 CodeGenFunction::OMPMapVars PreCondVars;
2098 for (const Expr * E: S.dependent_counters()) {
2099 if (!E)
2100 continue;
2101 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2102 "dependent counter must not be an iterator.");
2103 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2104 Address CounterAddr =
2105 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2106 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2107 }
2108 (void)PreCondVars.apply(CGF);
2109 for (const Expr *E : S.dependent_inits()) {
2110 if (!E)
2111 continue;
2112 CGF.EmitIgnoredExpr(E);
2113 }
2114 // Check that loop is executed at least one time.
2115 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2116 PreCondVars.restore(CGF);
2117 }
2118
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)2119 void CodeGenFunction::EmitOMPLinearClause(
2120 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2121 if (!HaveInsertPoint())
2122 return;
2123 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2124 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2125 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2126 for (const Expr *C : LoopDirective->counters()) {
2127 SIMDLCVs.insert(
2128 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2129 }
2130 }
2131 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2132 auto CurPrivate = C->privates().begin();
2133 for (const Expr *E : C->varlists()) {
2134 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2135 const auto *PrivateVD =
2136 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2137 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2138 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2139 // Emit private VarDecl with copy init.
2140 EmitVarDecl(*PrivateVD);
2141 return GetAddrOfLocalVar(PrivateVD);
2142 });
2143 assert(IsRegistered && "linear var already registered as private");
2144 // Silence the warning about unused variable.
2145 (void)IsRegistered;
2146 } else {
2147 EmitVarDecl(*PrivateVD);
2148 }
2149 ++CurPrivate;
2150 }
2151 }
2152 }
2153
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,bool IsMonotonic)2154 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2155 const OMPExecutableDirective &D,
2156 bool IsMonotonic) {
2157 if (!CGF.HaveInsertPoint())
2158 return;
2159 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2160 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2161 /*ignoreResult=*/true);
2162 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2163 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2164 // In presence of finite 'safelen', it may be unsafe to mark all
2165 // the memory instructions parallel, because loop-carried
2166 // dependences of 'safelen' iterations are possible.
2167 if (!IsMonotonic)
2168 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2169 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2170 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2171 /*ignoreResult=*/true);
2172 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2173 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2174 // In presence of finite 'safelen', it may be unsafe to mark all
2175 // the memory instructions parallel, because loop-carried
2176 // dependences of 'safelen' iterations are possible.
2177 CGF.LoopStack.setParallel(/*Enable=*/false);
2178 }
2179 }
2180
EmitOMPSimdInit(const OMPLoopDirective & D,bool IsMonotonic)2181 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
2182 bool IsMonotonic) {
2183 // Walk clauses and process safelen/lastprivate.
2184 LoopStack.setParallel(!IsMonotonic);
2185 LoopStack.setVectorizeEnable();
2186 emitSimdlenSafelenClause(*this, D, IsMonotonic);
2187 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2188 if (C->getKind() == OMPC_ORDER_concurrent)
2189 LoopStack.setParallel(/*Enable=*/true);
2190 if ((D.getDirectiveKind() == OMPD_simd ||
2191 (getLangOpts().OpenMPSimd &&
2192 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2193 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2194 [](const OMPReductionClause *C) {
2195 return C->getModifier() == OMPC_REDUCTION_inscan;
2196 }))
2197 // Disable parallel access in case of prefix sum.
2198 LoopStack.setParallel(/*Enable=*/false);
2199 }
2200
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2201 void CodeGenFunction::EmitOMPSimdFinal(
2202 const OMPLoopDirective &D,
2203 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2204 if (!HaveInsertPoint())
2205 return;
2206 llvm::BasicBlock *DoneBB = nullptr;
2207 auto IC = D.counters().begin();
2208 auto IPC = D.private_counters().begin();
2209 for (const Expr *F : D.finals()) {
2210 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2211 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2212 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2213 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2214 OrigVD->hasGlobalStorage() || CED) {
2215 if (!DoneBB) {
2216 if (llvm::Value *Cond = CondGen(*this)) {
2217 // If the first post-update expression is found, emit conditional
2218 // block if it was requested.
2219 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2220 DoneBB = createBasicBlock(".omp.final.done");
2221 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2222 EmitBlock(ThenBB);
2223 }
2224 }
2225 Address OrigAddr = Address::invalid();
2226 if (CED) {
2227 OrigAddr =
2228 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2229 } else {
2230 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2231 /*RefersToEnclosingVariableOrCapture=*/false,
2232 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2233 OrigAddr = EmitLValue(&DRE).getAddress(*this);
2234 }
2235 OMPPrivateScope VarScope(*this);
2236 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2237 (void)VarScope.Privatize();
2238 EmitIgnoredExpr(F);
2239 }
2240 ++IC;
2241 ++IPC;
2242 }
2243 if (DoneBB)
2244 EmitBlock(DoneBB, /*IsFinished=*/true);
2245 }
2246
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2247 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2248 const OMPLoopDirective &S,
2249 CodeGenFunction::JumpDest LoopExit) {
2250 CGF.EmitOMPLoopBody(S, LoopExit);
2251 CGF.EmitStopPoint(&S);
2252 }
2253
2254 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)2255 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2256 const DeclRefExpr *Helper) {
2257 auto VDecl = cast<VarDecl>(Helper->getDecl());
2258 CGF.EmitVarDecl(*VDecl);
2259 return CGF.EmitLValue(Helper);
2260 }
2261
emitCommonSimdLoop(CodeGenFunction & CGF,const OMPLoopDirective & S,const RegionCodeGenTy & SimdInitGen,const RegionCodeGenTy & BodyCodeGen)2262 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2263 const RegionCodeGenTy &SimdInitGen,
2264 const RegionCodeGenTy &BodyCodeGen) {
2265 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2266 PrePostActionTy &) {
2267 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2268 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2269 SimdInitGen(CGF);
2270
2271 BodyCodeGen(CGF);
2272 };
2273 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2274 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2275 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2276
2277 BodyCodeGen(CGF);
2278 };
2279 const Expr *IfCond = nullptr;
2280 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2281 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2282 if (CGF.getLangOpts().OpenMP >= 50 &&
2283 (C->getNameModifier() == OMPD_unknown ||
2284 C->getNameModifier() == OMPD_simd)) {
2285 IfCond = C->getCondition();
2286 break;
2287 }
2288 }
2289 }
2290 if (IfCond) {
2291 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2292 } else {
2293 RegionCodeGenTy ThenRCG(ThenGen);
2294 ThenRCG(CGF);
2295 }
2296 }
2297
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)2298 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2299 PrePostActionTy &Action) {
2300 Action.Enter(CGF);
2301 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2302 "Expected simd directive");
2303 OMPLoopScope PreInitScope(CGF, S);
2304 // if (PreCond) {
2305 // for (IV in 0..LastIteration) BODY;
2306 // <Final counter/linear vars updates>;
2307 // }
2308 //
2309 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2310 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2311 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2312 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2313 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2314 }
2315
2316 // Emit: if (PreCond) - begin.
2317 // If the condition constant folds and can be elided, avoid emitting the
2318 // whole loop.
2319 bool CondConstant;
2320 llvm::BasicBlock *ContBlock = nullptr;
2321 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2322 if (!CondConstant)
2323 return;
2324 } else {
2325 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2326 ContBlock = CGF.createBasicBlock("simd.if.end");
2327 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2328 CGF.getProfileCount(&S));
2329 CGF.EmitBlock(ThenBlock);
2330 CGF.incrementProfileCounter(&S);
2331 }
2332
2333 // Emit the loop iteration variable.
2334 const Expr *IVExpr = S.getIterationVariable();
2335 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2336 CGF.EmitVarDecl(*IVDecl);
2337 CGF.EmitIgnoredExpr(S.getInit());
2338
2339 // Emit the iterations count variable.
2340 // If it is not a variable, Sema decided to calculate iterations count on
2341 // each iteration (e.g., it is foldable into a constant).
2342 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2343 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2344 // Emit calculation of the iterations count.
2345 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2346 }
2347
2348 emitAlignedClause(CGF, S);
2349 (void)CGF.EmitOMPLinearClauseInit(S);
2350 {
2351 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2352 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2353 CGF.EmitOMPLinearClause(S, LoopScope);
2354 CGF.EmitOMPPrivateClause(S, LoopScope);
2355 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2356 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2357 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2358 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2359 (void)LoopScope.Privatize();
2360 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2361 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2362
2363 emitCommonSimdLoop(
2364 CGF, S,
2365 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2366 CGF.EmitOMPSimdInit(S);
2367 },
2368 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2369 CGF.EmitOMPInnerLoop(
2370 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2371 [&S](CodeGenFunction &CGF) {
2372 emitOMPLoopBodyWithStopPoint(CGF, S,
2373 CodeGenFunction::JumpDest());
2374 },
2375 [](CodeGenFunction &) {});
2376 });
2377 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2378 // Emit final copy of the lastprivate variables at the end of loops.
2379 if (HasLastprivateClause)
2380 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2381 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2382 emitPostUpdateForReductionClause(CGF, S,
2383 [](CodeGenFunction &) { return nullptr; });
2384 }
2385 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2386 // Emit: if (PreCond) - end.
2387 if (ContBlock) {
2388 CGF.EmitBranch(ContBlock);
2389 CGF.EmitBlock(ContBlock, true);
2390 }
2391 }
2392
EmitOMPSimdDirective(const OMPSimdDirective & S)2393 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2394 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2395 OMPFirstScanLoop = true;
2396 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2397 emitOMPSimdRegion(CGF, S, Action);
2398 };
2399 {
2400 auto LPCRegion =
2401 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2402 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2403 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2404 }
2405 // Check for outer lastprivate conditional update.
2406 checkForLastprivateConditionalUpdate(*this, S);
2407 }
2408
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)2409 void CodeGenFunction::EmitOMPOuterLoop(
2410 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2411 CodeGenFunction::OMPPrivateScope &LoopScope,
2412 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2413 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2414 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2415 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2416
2417 const Expr *IVExpr = S.getIterationVariable();
2418 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2419 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2420
2421 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2422
2423 // Start the loop with a block that tests the condition.
2424 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2425 EmitBlock(CondBlock);
2426 const SourceRange R = S.getSourceRange();
2427 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2428 SourceLocToDebugLoc(R.getEnd()));
2429
2430 llvm::Value *BoolCondVal = nullptr;
2431 if (!DynamicOrOrdered) {
2432 // UB = min(UB, GlobalUB) or
2433 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2434 // 'distribute parallel for')
2435 EmitIgnoredExpr(LoopArgs.EUB);
2436 // IV = LB
2437 EmitIgnoredExpr(LoopArgs.Init);
2438 // IV < UB
2439 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2440 } else {
2441 BoolCondVal =
2442 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2443 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2444 }
2445
2446 // If there are any cleanups between here and the loop-exit scope,
2447 // create a block to stage a loop exit along.
2448 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2449 if (LoopScope.requiresCleanups())
2450 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2451
2452 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2453 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2454 if (ExitBlock != LoopExit.getBlock()) {
2455 EmitBlock(ExitBlock);
2456 EmitBranchThroughCleanup(LoopExit);
2457 }
2458 EmitBlock(LoopBody);
2459
2460 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2461 // LB for loop condition and emitted it above).
2462 if (DynamicOrOrdered)
2463 EmitIgnoredExpr(LoopArgs.Init);
2464
2465 // Create a block for the increment.
2466 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2467 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2468
2469 emitCommonSimdLoop(
2470 *this, S,
2471 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2472 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2473 // with dynamic/guided scheduling and without ordered clause.
2474 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2475 CGF.LoopStack.setParallel(!IsMonotonic);
2476 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2477 if (C->getKind() == OMPC_ORDER_concurrent)
2478 CGF.LoopStack.setParallel(/*Enable=*/true);
2479 } else {
2480 CGF.EmitOMPSimdInit(S, IsMonotonic);
2481 }
2482 },
2483 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2484 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2485 SourceLocation Loc = S.getBeginLoc();
2486 // when 'distribute' is not combined with a 'for':
2487 // while (idx <= UB) { BODY; ++idx; }
2488 // when 'distribute' is combined with a 'for'
2489 // (e.g. 'distribute parallel for')
2490 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2491 CGF.EmitOMPInnerLoop(
2492 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2493 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2494 CodeGenLoop(CGF, S, LoopExit);
2495 },
2496 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2497 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2498 });
2499 });
2500
2501 EmitBlock(Continue.getBlock());
2502 BreakContinueStack.pop_back();
2503 if (!DynamicOrOrdered) {
2504 // Emit "LB = LB + Stride", "UB = UB + Stride".
2505 EmitIgnoredExpr(LoopArgs.NextLB);
2506 EmitIgnoredExpr(LoopArgs.NextUB);
2507 }
2508
2509 EmitBranch(CondBlock);
2510 LoopStack.pop();
2511 // Emit the fall-through block.
2512 EmitBlock(LoopExit.getBlock());
2513
2514 // Tell the runtime we are done.
2515 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2516 if (!DynamicOrOrdered)
2517 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2518 S.getDirectiveKind());
2519 };
2520 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2521 }
2522
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)2523 void CodeGenFunction::EmitOMPForOuterLoop(
2524 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2525 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2526 const OMPLoopArguments &LoopArgs,
2527 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2528 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2529
2530 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2531 const bool DynamicOrOrdered =
2532 Ordered || RT.isDynamic(ScheduleKind.Schedule);
2533
2534 assert((Ordered ||
2535 !RT.isStaticNonchunked(ScheduleKind.Schedule,
2536 LoopArgs.Chunk != nullptr)) &&
2537 "static non-chunked schedule does not need outer loop");
2538
2539 // Emit outer loop.
2540 //
2541 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2542 // When schedule(dynamic,chunk_size) is specified, the iterations are
2543 // distributed to threads in the team in chunks as the threads request them.
2544 // Each thread executes a chunk of iterations, then requests another chunk,
2545 // until no chunks remain to be distributed. Each chunk contains chunk_size
2546 // iterations, except for the last chunk to be distributed, which may have
2547 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2548 //
2549 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2550 // to threads in the team in chunks as the executing threads request them.
2551 // Each thread executes a chunk of iterations, then requests another chunk,
2552 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2553 // each chunk is proportional to the number of unassigned iterations divided
2554 // by the number of threads in the team, decreasing to 1. For a chunk_size
2555 // with value k (greater than 1), the size of each chunk is determined in the
2556 // same way, with the restriction that the chunks do not contain fewer than k
2557 // iterations (except for the last chunk to be assigned, which may have fewer
2558 // than k iterations).
2559 //
2560 // When schedule(auto) is specified, the decision regarding scheduling is
2561 // delegated to the compiler and/or runtime system. The programmer gives the
2562 // implementation the freedom to choose any possible mapping of iterations to
2563 // threads in the team.
2564 //
2565 // When schedule(runtime) is specified, the decision regarding scheduling is
2566 // deferred until run time, and the schedule and chunk size are taken from the
2567 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2568 // implementation defined
2569 //
2570 // while(__kmpc_dispatch_next(&LB, &UB)) {
2571 // idx = LB;
2572 // while (idx <= UB) { BODY; ++idx;
2573 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2574 // } // inner loop
2575 // }
2576 //
2577 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2578 // When schedule(static, chunk_size) is specified, iterations are divided into
2579 // chunks of size chunk_size, and the chunks are assigned to the threads in
2580 // the team in a round-robin fashion in the order of the thread number.
2581 //
2582 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2583 // while (idx <= UB) { BODY; ++idx; } // inner loop
2584 // LB = LB + ST;
2585 // UB = UB + ST;
2586 // }
2587 //
2588
2589 const Expr *IVExpr = S.getIterationVariable();
2590 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2591 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2592
2593 if (DynamicOrOrdered) {
2594 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2595 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2596 llvm::Value *LBVal = DispatchBounds.first;
2597 llvm::Value *UBVal = DispatchBounds.second;
2598 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2599 LoopArgs.Chunk};
2600 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2601 IVSigned, Ordered, DipatchRTInputValues);
2602 } else {
2603 CGOpenMPRuntime::StaticRTInput StaticInit(
2604 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2605 LoopArgs.ST, LoopArgs.Chunk);
2606 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2607 ScheduleKind, StaticInit);
2608 }
2609
2610 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2611 const unsigned IVSize,
2612 const bool IVSigned) {
2613 if (Ordered) {
2614 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2615 IVSigned);
2616 }
2617 };
2618
2619 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2620 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2621 OuterLoopArgs.IncExpr = S.getInc();
2622 OuterLoopArgs.Init = S.getInit();
2623 OuterLoopArgs.Cond = S.getCond();
2624 OuterLoopArgs.NextLB = S.getNextLowerBound();
2625 OuterLoopArgs.NextUB = S.getNextUpperBound();
2626 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2627 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2628 }
2629
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)2630 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2631 const unsigned IVSize, const bool IVSigned) {}
2632
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)2633 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2634 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2635 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2636 const CodeGenLoopTy &CodeGenLoopContent) {
2637
2638 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2639
2640 // Emit outer loop.
2641 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2642 // dynamic
2643 //
2644
2645 const Expr *IVExpr = S.getIterationVariable();
2646 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2647 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2648
2649 CGOpenMPRuntime::StaticRTInput StaticInit(
2650 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2651 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2652 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2653
2654 // for combined 'distribute' and 'for' the increment expression of distribute
2655 // is stored in DistInc. For 'distribute' alone, it is in Inc.
2656 Expr *IncExpr;
2657 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2658 IncExpr = S.getDistInc();
2659 else
2660 IncExpr = S.getInc();
2661
2662 // this routine is shared by 'omp distribute parallel for' and
2663 // 'omp distribute': select the right EUB expression depending on the
2664 // directive
2665 OMPLoopArguments OuterLoopArgs;
2666 OuterLoopArgs.LB = LoopArgs.LB;
2667 OuterLoopArgs.UB = LoopArgs.UB;
2668 OuterLoopArgs.ST = LoopArgs.ST;
2669 OuterLoopArgs.IL = LoopArgs.IL;
2670 OuterLoopArgs.Chunk = LoopArgs.Chunk;
2671 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2672 ? S.getCombinedEnsureUpperBound()
2673 : S.getEnsureUpperBound();
2674 OuterLoopArgs.IncExpr = IncExpr;
2675 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2676 ? S.getCombinedInit()
2677 : S.getInit();
2678 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2679 ? S.getCombinedCond()
2680 : S.getCond();
2681 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2682 ? S.getCombinedNextLowerBound()
2683 : S.getNextLowerBound();
2684 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2685 ? S.getCombinedNextUpperBound()
2686 : S.getNextUpperBound();
2687
2688 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2689 LoopScope, OuterLoopArgs, CodeGenLoopContent,
2690 emitEmptyOrdered);
2691 }
2692
2693 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)2694 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2695 const OMPExecutableDirective &S) {
2696 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2697 LValue LB =
2698 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2699 LValue UB =
2700 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2701
2702 // When composing 'distribute' with 'for' (e.g. as in 'distribute
2703 // parallel for') we need to use the 'distribute'
2704 // chunk lower and upper bounds rather than the whole loop iteration
2705 // space. These are parameters to the outlined function for 'parallel'
2706 // and we copy the bounds of the previous schedule into the
2707 // the current ones.
2708 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2709 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2710 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2711 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2712 PrevLBVal = CGF.EmitScalarConversion(
2713 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2714 LS.getIterationVariable()->getType(),
2715 LS.getPrevLowerBoundVariable()->getExprLoc());
2716 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2717 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2718 PrevUBVal = CGF.EmitScalarConversion(
2719 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2720 LS.getIterationVariable()->getType(),
2721 LS.getPrevUpperBoundVariable()->getExprLoc());
2722
2723 CGF.EmitStoreOfScalar(PrevLBVal, LB);
2724 CGF.EmitStoreOfScalar(PrevUBVal, UB);
2725
2726 return {LB, UB};
2727 }
2728
2729 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2730 /// we need to use the LB and UB expressions generated by the worksharing
2731 /// code generation support, whereas in non combined situations we would
2732 /// just emit 0 and the LastIteration expression
2733 /// This function is necessary due to the difference of the LB and UB
2734 /// types for the RT emission routines for 'for_static_init' and
2735 /// 'for_dispatch_init'
2736 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)2737 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2738 const OMPExecutableDirective &S,
2739 Address LB, Address UB) {
2740 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2741 const Expr *IVExpr = LS.getIterationVariable();
2742 // when implementing a dynamic schedule for a 'for' combined with a
2743 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2744 // is not normalized as each team only executes its own assigned
2745 // distribute chunk
2746 QualType IteratorTy = IVExpr->getType();
2747 llvm::Value *LBVal =
2748 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2749 llvm::Value *UBVal =
2750 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2751 return {LBVal, UBVal};
2752 }
2753
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)2754 static void emitDistributeParallelForDistributeInnerBoundParams(
2755 CodeGenFunction &CGF, const OMPExecutableDirective &S,
2756 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2757 const auto &Dir = cast<OMPLoopDirective>(S);
2758 LValue LB =
2759 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2760 llvm::Value *LBCast =
2761 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2762 CGF.SizeTy, /*isSigned=*/false);
2763 CapturedVars.push_back(LBCast);
2764 LValue UB =
2765 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2766
2767 llvm::Value *UBCast =
2768 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2769 CGF.SizeTy, /*isSigned=*/false);
2770 CapturedVars.push_back(UBCast);
2771 }
2772
2773 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2774 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2775 const OMPLoopDirective &S,
2776 CodeGenFunction::JumpDest LoopExit) {
2777 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2778 PrePostActionTy &Action) {
2779 Action.Enter(CGF);
2780 bool HasCancel = false;
2781 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2782 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2783 HasCancel = D->hasCancel();
2784 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2785 HasCancel = D->hasCancel();
2786 else if (const auto *D =
2787 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2788 HasCancel = D->hasCancel();
2789 }
2790 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2791 HasCancel);
2792 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2793 emitDistributeParallelForInnerBounds,
2794 emitDistributeParallelForDispatchBounds);
2795 };
2796
2797 emitCommonOMPParallelDirective(
2798 CGF, S,
2799 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2800 CGInlinedWorksharingLoop,
2801 emitDistributeParallelForDistributeInnerBoundParams);
2802 }
2803
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)2804 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2805 const OMPDistributeParallelForDirective &S) {
2806 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2807 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2808 S.getDistInc());
2809 };
2810 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2811 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2812 }
2813
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)2814 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2815 const OMPDistributeParallelForSimdDirective &S) {
2816 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2817 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2818 S.getDistInc());
2819 };
2820 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2821 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2822 }
2823
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)2824 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2825 const OMPDistributeSimdDirective &S) {
2826 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2827 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2828 };
2829 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2830 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2831 }
2832
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)2833 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2834 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2835 // Emit SPMD target parallel for region as a standalone region.
2836 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2837 emitOMPSimdRegion(CGF, S, Action);
2838 };
2839 llvm::Function *Fn;
2840 llvm::Constant *Addr;
2841 // Emit target region as a standalone region.
2842 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2843 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2844 assert(Fn && Addr && "Target device function emission failed.");
2845 }
2846
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)2847 void CodeGenFunction::EmitOMPTargetSimdDirective(
2848 const OMPTargetSimdDirective &S) {
2849 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2850 emitOMPSimdRegion(CGF, S, Action);
2851 };
2852 emitCommonOMPTargetDirective(*this, S, CodeGen);
2853 }
2854
2855 namespace {
2856 struct ScheduleKindModifiersTy {
2857 OpenMPScheduleClauseKind Kind;
2858 OpenMPScheduleClauseModifier M1;
2859 OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anone1cc1baf3e11::ScheduleKindModifiersTy2860 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2861 OpenMPScheduleClauseModifier M1,
2862 OpenMPScheduleClauseModifier M2)
2863 : Kind(Kind), M1(M1), M2(M2) {}
2864 };
2865 } // namespace
2866
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)2867 bool CodeGenFunction::EmitOMPWorksharingLoop(
2868 const OMPLoopDirective &S, Expr *EUB,
2869 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2870 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2871 // Emit the loop iteration variable.
2872 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2873 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
2874 EmitVarDecl(*IVDecl);
2875
2876 // Emit the iterations count variable.
2877 // If it is not a variable, Sema decided to calculate iterations count on each
2878 // iteration (e.g., it is foldable into a constant).
2879 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2880 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2881 // Emit calculation of the iterations count.
2882 EmitIgnoredExpr(S.getCalcLastIteration());
2883 }
2884
2885 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2886
2887 bool HasLastprivateClause;
2888 // Check pre-condition.
2889 {
2890 OMPLoopScope PreInitScope(*this, S);
2891 // Skip the entire loop if we don't meet the precondition.
2892 // If the condition constant folds and can be elided, avoid emitting the
2893 // whole loop.
2894 bool CondConstant;
2895 llvm::BasicBlock *ContBlock = nullptr;
2896 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2897 if (!CondConstant)
2898 return false;
2899 } else {
2900 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
2901 ContBlock = createBasicBlock("omp.precond.end");
2902 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2903 getProfileCount(&S));
2904 EmitBlock(ThenBlock);
2905 incrementProfileCounter(&S);
2906 }
2907
2908 RunCleanupsScope DoacrossCleanupScope(*this);
2909 bool Ordered = false;
2910 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2911 if (OrderedClause->getNumForLoops())
2912 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
2913 else
2914 Ordered = true;
2915 }
2916
2917 llvm::DenseSet<const Expr *> EmittedFinals;
2918 emitAlignedClause(*this, S);
2919 bool HasLinears = EmitOMPLinearClauseInit(S);
2920 // Emit helper vars inits.
2921
2922 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2923 LValue LB = Bounds.first;
2924 LValue UB = Bounds.second;
2925 LValue ST =
2926 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2927 LValue IL =
2928 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2929
2930 // Emit 'then' code.
2931 {
2932 OMPPrivateScope LoopScope(*this);
2933 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2934 // Emit implicit barrier to synchronize threads and avoid data races on
2935 // initialization of firstprivate variables and post-update of
2936 // lastprivate variables.
2937 CGM.getOpenMPRuntime().emitBarrierCall(
2938 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2939 /*ForceSimpleCall=*/true);
2940 }
2941 EmitOMPPrivateClause(S, LoopScope);
2942 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2943 *this, S, EmitLValue(S.getIterationVariable()));
2944 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2945 EmitOMPReductionClauseInit(S, LoopScope);
2946 EmitOMPPrivateLoopCounters(S, LoopScope);
2947 EmitOMPLinearClause(S, LoopScope);
2948 (void)LoopScope.Privatize();
2949 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2950 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
2951
2952 // Detect the loop schedule kind and chunk.
2953 const Expr *ChunkExpr = nullptr;
2954 OpenMPScheduleTy ScheduleKind;
2955 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
2956 ScheduleKind.Schedule = C->getScheduleKind();
2957 ScheduleKind.M1 = C->getFirstScheduleModifier();
2958 ScheduleKind.M2 = C->getSecondScheduleModifier();
2959 ChunkExpr = C->getChunkSize();
2960 } else {
2961 // Default behaviour for schedule clause.
2962 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
2963 *this, S, ScheduleKind.Schedule, ChunkExpr);
2964 }
2965 bool HasChunkSizeOne = false;
2966 llvm::Value *Chunk = nullptr;
2967 if (ChunkExpr) {
2968 Chunk = EmitScalarExpr(ChunkExpr);
2969 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
2970 S.getIterationVariable()->getType(),
2971 S.getBeginLoc());
2972 Expr::EvalResult Result;
2973 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
2974 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
2975 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
2976 }
2977 }
2978 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2979 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2980 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2981 // If the static schedule kind is specified or if the ordered clause is
2982 // specified, and if no monotonic modifier is specified, the effect will
2983 // be as if the monotonic modifier was specified.
2984 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2985 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
2986 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
2987 bool IsMonotonic =
2988 Ordered ||
2989 ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2990 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
2991 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
2992 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
2993 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2994 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2995 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2996 /* Chunked */ Chunk != nullptr) ||
2997 StaticChunkedOne) &&
2998 !Ordered) {
2999 JumpDest LoopExit =
3000 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3001 emitCommonSimdLoop(
3002 *this, S,
3003 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
3004 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3005 CGF.EmitOMPSimdInit(S, IsMonotonic);
3006 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3007 if (C->getKind() == OMPC_ORDER_concurrent)
3008 CGF.LoopStack.setParallel(/*Enable=*/true);
3009 }
3010 },
3011 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3012 &S, ScheduleKind, LoopExit,
3013 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3014 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3015 // When no chunk_size is specified, the iteration space is divided
3016 // into chunks that are approximately equal in size, and at most
3017 // one chunk is distributed to each thread. Note that the size of
3018 // the chunks is unspecified in this case.
3019 CGOpenMPRuntime::StaticRTInput StaticInit(
3020 IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3021 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3022 StaticChunkedOne ? Chunk : nullptr);
3023 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3024 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3025 StaticInit);
3026 // UB = min(UB, GlobalUB);
3027 if (!StaticChunkedOne)
3028 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3029 // IV = LB;
3030 CGF.EmitIgnoredExpr(S.getInit());
3031 // For unchunked static schedule generate:
3032 //
3033 // while (idx <= UB) {
3034 // BODY;
3035 // ++idx;
3036 // }
3037 //
3038 // For static schedule with chunk one:
3039 //
3040 // while (IV <= PrevUB) {
3041 // BODY;
3042 // IV += ST;
3043 // }
3044 CGF.EmitOMPInnerLoop(
3045 S, LoopScope.requiresCleanups(),
3046 StaticChunkedOne ? S.getCombinedParForInDistCond()
3047 : S.getCond(),
3048 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3049 [&S, LoopExit](CodeGenFunction &CGF) {
3050 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3051 },
3052 [](CodeGenFunction &) {});
3053 });
3054 EmitBlock(LoopExit.getBlock());
3055 // Tell the runtime we are done.
3056 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3057 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3058 S.getDirectiveKind());
3059 };
3060 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3061 } else {
3062 // Emit the outer loop, which requests its work chunk [LB..UB] from
3063 // runtime and runs the inner loop to process it.
3064 const OMPLoopArguments LoopArguments(
3065 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3066 IL.getAddress(*this), Chunk, EUB);
3067 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3068 LoopArguments, CGDispatchBounds);
3069 }
3070 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3071 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3072 return CGF.Builder.CreateIsNotNull(
3073 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3074 });
3075 }
3076 EmitOMPReductionClauseFinal(
3077 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3078 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3079 : /*Parallel only*/ OMPD_parallel);
3080 // Emit post-update of the reduction variables if IsLastIter != 0.
3081 emitPostUpdateForReductionClause(
3082 *this, S, [IL, &S](CodeGenFunction &CGF) {
3083 return CGF.Builder.CreateIsNotNull(
3084 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3085 });
3086 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3087 if (HasLastprivateClause)
3088 EmitOMPLastprivateClauseFinal(
3089 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3090 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3091 }
3092 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3093 return CGF.Builder.CreateIsNotNull(
3094 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3095 });
3096 DoacrossCleanupScope.ForceCleanup();
3097 // We're now done with the loop, so jump to the continuation block.
3098 if (ContBlock) {
3099 EmitBranch(ContBlock);
3100 EmitBlock(ContBlock, /*IsFinished=*/true);
3101 }
3102 }
3103 return HasLastprivateClause;
3104 }
3105
3106 /// The following two functions generate expressions for the loop lower
3107 /// and upper bounds in case of static and dynamic (dispatch) schedule
3108 /// of the associated 'for' or 'distribute' loop.
3109 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3110 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3111 const auto &LS = cast<OMPLoopDirective>(S);
3112 LValue LB =
3113 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3114 LValue UB =
3115 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3116 return {LB, UB};
3117 }
3118
3119 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3120 /// consider the lower and upper bound expressions generated by the
3121 /// worksharing loop support, but we use 0 and the iteration space size as
3122 /// constants
3123 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3124 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3125 Address LB, Address UB) {
3126 const auto &LS = cast<OMPLoopDirective>(S);
3127 const Expr *IVExpr = LS.getIterationVariable();
3128 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3129 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3130 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3131 return {LBVal, UBVal};
3132 }
3133
3134 /// Emits the code for the directive with inscan reductions.
3135 /// The code is the following:
3136 /// \code
3137 /// size num_iters = <num_iters>;
3138 /// <type> buffer[num_iters];
3139 /// #pragma omp ...
3140 /// for (i: 0..<num_iters>) {
3141 /// <input phase>;
3142 /// buffer[i] = red;
3143 /// }
3144 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3145 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3146 /// buffer[i] op= buffer[i-pow(2,k)];
3147 /// #pragma omp ...
3148 /// for (0..<num_iters>) {
3149 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3150 /// <scan phase>;
3151 /// }
3152 /// \endcode
emitScanBasedDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen,llvm::function_ref<void (CodeGenFunction &)> FirstGen,llvm::function_ref<void (CodeGenFunction &)> SecondGen)3153 static void emitScanBasedDirective(
3154 CodeGenFunction &CGF, const OMPLoopDirective &S,
3155 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3156 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3157 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3158 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3159 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3160 SmallVector<const Expr *, 4> Shareds;
3161 SmallVector<const Expr *, 4> Privates;
3162 SmallVector<const Expr *, 4> ReductionOps;
3163 SmallVector<const Expr *, 4> LHSs;
3164 SmallVector<const Expr *, 4> RHSs;
3165 SmallVector<const Expr *, 4> CopyOps;
3166 SmallVector<const Expr *, 4> CopyArrayTemps;
3167 SmallVector<const Expr *, 4> CopyArrayElems;
3168 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3169 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3170 "Only inscan reductions are expected.");
3171 Shareds.append(C->varlist_begin(), C->varlist_end());
3172 Privates.append(C->privates().begin(), C->privates().end());
3173 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3174 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3175 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3176 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3177 CopyArrayTemps.append(C->copy_array_temps().begin(),
3178 C->copy_array_temps().end());
3179 CopyArrayElems.append(C->copy_array_elems().begin(),
3180 C->copy_array_elems().end());
3181 }
3182 {
3183 // Emit buffers for each reduction variables.
3184 // ReductionCodeGen is required to emit correctly the code for array
3185 // reductions.
3186 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3187 unsigned Count = 0;
3188 auto *ITA = CopyArrayTemps.begin();
3189 for (const Expr *IRef : Privates) {
3190 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3191 // Emit variably modified arrays, used for arrays/array sections
3192 // reductions.
3193 if (PrivateVD->getType()->isVariablyModifiedType()) {
3194 RedCG.emitSharedOrigLValue(CGF, Count);
3195 RedCG.emitAggregateType(CGF, Count);
3196 }
3197 CodeGenFunction::OpaqueValueMapping DimMapping(
3198 CGF,
3199 cast<OpaqueValueExpr>(
3200 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3201 ->getSizeExpr()),
3202 RValue::get(OMPScanNumIterations));
3203 // Emit temp buffer.
3204 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3205 ++ITA;
3206 ++Count;
3207 }
3208 }
3209 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3210 {
3211 // Emit loop with input phase:
3212 // #pragma omp ...
3213 // for (i: 0..<num_iters>) {
3214 // <input phase>;
3215 // buffer[i] = red;
3216 // }
3217 CGF.OMPFirstScanLoop = true;
3218 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3219 FirstGen(CGF);
3220 }
3221 // Emit prefix reduction:
3222 // for (int k = 0; k <= ceil(log2(n)); ++k)
3223 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3224 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3225 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3226 llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3227 llvm::Value *Arg =
3228 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3229 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3230 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3231 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3232 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3233 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3234 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3235 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3236 CGF.EmitBlock(LoopBB);
3237 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3238 // size pow2k = 1;
3239 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3240 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3241 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3242 // for (size i = n - 1; i >= 2 ^ k; --i)
3243 // tmp[i] op= tmp[i-pow2k];
3244 llvm::BasicBlock *InnerLoopBB =
3245 CGF.createBasicBlock("omp.inner.log.scan.body");
3246 llvm::BasicBlock *InnerExitBB =
3247 CGF.createBasicBlock("omp.inner.log.scan.exit");
3248 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3249 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3250 CGF.EmitBlock(InnerLoopBB);
3251 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3252 IVal->addIncoming(NMin1, LoopBB);
3253 {
3254 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3255 auto *ILHS = LHSs.begin();
3256 auto *IRHS = RHSs.begin();
3257 for (const Expr *CopyArrayElem : CopyArrayElems) {
3258 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3259 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3260 Address LHSAddr = Address::invalid();
3261 {
3262 CodeGenFunction::OpaqueValueMapping IdxMapping(
3263 CGF,
3264 cast<OpaqueValueExpr>(
3265 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3266 RValue::get(IVal));
3267 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3268 }
3269 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3270 Address RHSAddr = Address::invalid();
3271 {
3272 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3273 CodeGenFunction::OpaqueValueMapping IdxMapping(
3274 CGF,
3275 cast<OpaqueValueExpr>(
3276 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3277 RValue::get(OffsetIVal));
3278 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3279 }
3280 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3281 ++ILHS;
3282 ++IRHS;
3283 }
3284 PrivScope.Privatize();
3285 CGF.CGM.getOpenMPRuntime().emitReduction(
3286 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3287 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3288 }
3289 llvm::Value *NextIVal =
3290 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3291 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3292 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3293 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3294 CGF.EmitBlock(InnerExitBB);
3295 llvm::Value *Next =
3296 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3297 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3298 // pow2k <<= 1;
3299 llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3300 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3301 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3302 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3303 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3304 CGF.EmitBlock(ExitBB);
3305
3306 CGF.OMPFirstScanLoop = false;
3307 SecondGen(CGF);
3308 }
3309
emitWorksharingDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,bool HasCancel)3310 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3311 const OMPLoopDirective &S,
3312 bool HasCancel) {
3313 bool HasLastprivates;
3314 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3315 [](const OMPReductionClause *C) {
3316 return C->getModifier() == OMPC_REDUCTION_inscan;
3317 })) {
3318 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3319 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3320 OMPLoopScope LoopScope(CGF, S);
3321 return CGF.EmitScalarExpr(S.getNumIterations());
3322 };
3323 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3324 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3325 CGF, S.getDirectiveKind(), HasCancel);
3326 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3327 emitForLoopBounds,
3328 emitDispatchForLoopBounds);
3329 // Emit an implicit barrier at the end.
3330 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3331 OMPD_for);
3332 };
3333 const auto &&SecondGen = [&S, HasCancel,
3334 &HasLastprivates](CodeGenFunction &CGF) {
3335 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3336 CGF, S.getDirectiveKind(), HasCancel);
3337 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3338 emitForLoopBounds,
3339 emitDispatchForLoopBounds);
3340 };
3341 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3342 } else {
3343 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3344 HasCancel);
3345 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3346 emitForLoopBounds,
3347 emitDispatchForLoopBounds);
3348 }
3349 return HasLastprivates;
3350 }
3351
EmitOMPForDirective(const OMPForDirective & S)3352 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3353 bool HasLastprivates = false;
3354 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3355 PrePostActionTy &) {
3356 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3357 };
3358 {
3359 auto LPCRegion =
3360 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3361 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3362 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3363 S.hasCancel());
3364 }
3365
3366 // Emit an implicit barrier at the end.
3367 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3368 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3369 // Check for outer lastprivate conditional update.
3370 checkForLastprivateConditionalUpdate(*this, S);
3371 }
3372
EmitOMPForSimdDirective(const OMPForSimdDirective & S)3373 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3374 bool HasLastprivates = false;
3375 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3376 PrePostActionTy &) {
3377 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3378 };
3379 {
3380 auto LPCRegion =
3381 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3382 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3383 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3384 }
3385
3386 // Emit an implicit barrier at the end.
3387 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3388 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3389 // Check for outer lastprivate conditional update.
3390 checkForLastprivateConditionalUpdate(*this, S);
3391 }
3392
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)3393 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3394 const Twine &Name,
3395 llvm::Value *Init = nullptr) {
3396 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3397 if (Init)
3398 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3399 return LVal;
3400 }
3401
EmitSections(const OMPExecutableDirective & S)3402 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3403 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3404 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3405 bool HasLastprivates = false;
3406 auto &&CodeGen = [&S, CapturedStmt, CS,
3407 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3408 const ASTContext &C = CGF.getContext();
3409 QualType KmpInt32Ty =
3410 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3411 // Emit helper vars inits.
3412 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3413 CGF.Builder.getInt32(0));
3414 llvm::ConstantInt *GlobalUBVal = CS != nullptr
3415 ? CGF.Builder.getInt32(CS->size() - 1)
3416 : CGF.Builder.getInt32(0);
3417 LValue UB =
3418 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3419 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3420 CGF.Builder.getInt32(1));
3421 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3422 CGF.Builder.getInt32(0));
3423 // Loop counter.
3424 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3425 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3426 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3427 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3428 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3429 // Generate condition for loop.
3430 BinaryOperator *Cond = BinaryOperator::Create(
3431 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
3432 S.getBeginLoc(), FPOptionsOverride());
3433 // Increment for loop counter.
3434 UnaryOperator *Inc = UnaryOperator::Create(
3435 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
3436 S.getBeginLoc(), true, FPOptionsOverride());
3437 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3438 // Iterate through all sections and emit a switch construct:
3439 // switch (IV) {
3440 // case 0:
3441 // <SectionStmt[0]>;
3442 // break;
3443 // ...
3444 // case <NumSection> - 1:
3445 // <SectionStmt[<NumSection> - 1]>;
3446 // break;
3447 // }
3448 // .omp.sections.exit:
3449 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3450 llvm::SwitchInst *SwitchStmt =
3451 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3452 ExitBB, CS == nullptr ? 1 : CS->size());
3453 if (CS) {
3454 unsigned CaseNumber = 0;
3455 for (const Stmt *SubStmt : CS->children()) {
3456 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3457 CGF.EmitBlock(CaseBB);
3458 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3459 CGF.EmitStmt(SubStmt);
3460 CGF.EmitBranch(ExitBB);
3461 ++CaseNumber;
3462 }
3463 } else {
3464 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3465 CGF.EmitBlock(CaseBB);
3466 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3467 CGF.EmitStmt(CapturedStmt);
3468 CGF.EmitBranch(ExitBB);
3469 }
3470 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3471 };
3472
3473 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3474 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3475 // Emit implicit barrier to synchronize threads and avoid data races on
3476 // initialization of firstprivate variables and post-update of lastprivate
3477 // variables.
3478 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3479 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3480 /*ForceSimpleCall=*/true);
3481 }
3482 CGF.EmitOMPPrivateClause(S, LoopScope);
3483 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3484 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3485 CGF.EmitOMPReductionClauseInit(S, LoopScope);
3486 (void)LoopScope.Privatize();
3487 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3488 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3489
3490 // Emit static non-chunked loop.
3491 OpenMPScheduleTy ScheduleKind;
3492 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3493 CGOpenMPRuntime::StaticRTInput StaticInit(
3494 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3495 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3496 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3497 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3498 // UB = min(UB, GlobalUB);
3499 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3500 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3501 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3502 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3503 // IV = LB;
3504 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3505 // while (idx <= UB) { BODY; ++idx; }
3506 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3507 [](CodeGenFunction &) {});
3508 // Tell the runtime we are done.
3509 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3510 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3511 S.getDirectiveKind());
3512 };
3513 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3514 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3515 // Emit post-update of the reduction variables if IsLastIter != 0.
3516 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3517 return CGF.Builder.CreateIsNotNull(
3518 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3519 });
3520
3521 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3522 if (HasLastprivates)
3523 CGF.EmitOMPLastprivateClauseFinal(
3524 S, /*NoFinals=*/false,
3525 CGF.Builder.CreateIsNotNull(
3526 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3527 };
3528
3529 bool HasCancel = false;
3530 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3531 HasCancel = OSD->hasCancel();
3532 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3533 HasCancel = OPSD->hasCancel();
3534 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3535 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3536 HasCancel);
3537 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3538 // clause. Otherwise the barrier will be generated by the codegen for the
3539 // directive.
3540 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3541 // Emit implicit barrier to synchronize threads and avoid data races on
3542 // initialization of firstprivate variables.
3543 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3544 OMPD_unknown);
3545 }
3546 }
3547
EmitOMPSectionsDirective(const OMPSectionsDirective & S)3548 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3549 {
3550 auto LPCRegion =
3551 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3552 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3553 EmitSections(S);
3554 }
3555 // Emit an implicit barrier at the end.
3556 if (!S.getSingleClause<OMPNowaitClause>()) {
3557 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3558 OMPD_sections);
3559 }
3560 // Check for outer lastprivate conditional update.
3561 checkForLastprivateConditionalUpdate(*this, S);
3562 }
3563
EmitOMPSectionDirective(const OMPSectionDirective & S)3564 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3565 LexicalScope Scope(*this, S.getSourceRange());
3566 EmitStopPoint(&S);
3567 EmitStmt(S.getAssociatedStmt());
3568 }
3569
EmitOMPSingleDirective(const OMPSingleDirective & S)3570 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3571 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3572 llvm::SmallVector<const Expr *, 8> DestExprs;
3573 llvm::SmallVector<const Expr *, 8> SrcExprs;
3574 llvm::SmallVector<const Expr *, 8> AssignmentOps;
3575 // Check if there are any 'copyprivate' clauses associated with this
3576 // 'single' construct.
3577 // Build a list of copyprivate variables along with helper expressions
3578 // (<source>, <destination>, <destination>=<source> expressions)
3579 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3580 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3581 DestExprs.append(C->destination_exprs().begin(),
3582 C->destination_exprs().end());
3583 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3584 AssignmentOps.append(C->assignment_ops().begin(),
3585 C->assignment_ops().end());
3586 }
3587 // Emit code for 'single' region along with 'copyprivate' clauses
3588 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3589 Action.Enter(CGF);
3590 OMPPrivateScope SingleScope(CGF);
3591 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3592 CGF.EmitOMPPrivateClause(S, SingleScope);
3593 (void)SingleScope.Privatize();
3594 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3595 };
3596 {
3597 auto LPCRegion =
3598 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3599 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3600 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3601 CopyprivateVars, DestExprs,
3602 SrcExprs, AssignmentOps);
3603 }
3604 // Emit an implicit barrier at the end (to avoid data race on firstprivate
3605 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3606 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
3607 CGM.getOpenMPRuntime().emitBarrierCall(
3608 *this, S.getBeginLoc(),
3609 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
3610 }
3611 // Check for outer lastprivate conditional update.
3612 checkForLastprivateConditionalUpdate(*this, S);
3613 }
3614
emitMaster(CodeGenFunction & CGF,const OMPExecutableDirective & S)3615 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3616 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3617 Action.Enter(CGF);
3618 CGF.EmitStmt(S.getRawStmt());
3619 };
3620 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3621 }
3622
EmitOMPMasterDirective(const OMPMasterDirective & S)3623 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3624 if (CGM.getLangOpts().OpenMPIRBuilder) {
3625 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3626 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3627
3628 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3629
3630 auto FiniCB = [this](InsertPointTy IP) {
3631 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3632 };
3633
3634 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3635 InsertPointTy CodeGenIP,
3636 llvm::BasicBlock &FiniBB) {
3637 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3638 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3639 CodeGenIP, FiniBB);
3640 };
3641
3642 LexicalScope Scope(*this, S.getSourceRange());
3643 EmitStopPoint(&S);
3644 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
3645
3646 return;
3647 }
3648 LexicalScope Scope(*this, S.getSourceRange());
3649 EmitStopPoint(&S);
3650 emitMaster(*this, S);
3651 }
3652
EmitOMPCriticalDirective(const OMPCriticalDirective & S)3653 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
3654 if (CGM.getLangOpts().OpenMPIRBuilder) {
3655 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3656 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3657
3658 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
3659 const Expr *Hint = nullptr;
3660 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3661 Hint = HintClause->getHint();
3662
3663 // TODO: This is slightly different from what's currently being done in
3664 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
3665 // about typing is final.
3666 llvm::Value *HintInst = nullptr;
3667 if (Hint)
3668 HintInst =
3669 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
3670
3671 auto FiniCB = [this](InsertPointTy IP) {
3672 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3673 };
3674
3675 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
3676 InsertPointTy CodeGenIP,
3677 llvm::BasicBlock &FiniBB) {
3678 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3679 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
3680 CodeGenIP, FiniBB);
3681 };
3682
3683 LexicalScope Scope(*this, S.getSourceRange());
3684 EmitStopPoint(&S);
3685 Builder.restoreIP(OMPBuilder.createCritical(
3686 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
3687 HintInst));
3688
3689 return;
3690 }
3691
3692 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3693 Action.Enter(CGF);
3694 CGF.EmitStmt(S.getAssociatedStmt());
3695 };
3696 const Expr *Hint = nullptr;
3697 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3698 Hint = HintClause->getHint();
3699 LexicalScope Scope(*this, S.getSourceRange());
3700 EmitStopPoint(&S);
3701 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
3702 S.getDirectiveName().getAsString(),
3703 CodeGen, S.getBeginLoc(), Hint);
3704 }
3705
EmitOMPParallelForDirective(const OMPParallelForDirective & S)3706 void CodeGenFunction::EmitOMPParallelForDirective(
3707 const OMPParallelForDirective &S) {
3708 // Emit directive as a combined directive that consists of two implicit
3709 // directives: 'parallel' with 'for' directive.
3710 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3711 Action.Enter(CGF);
3712 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
3713 };
3714 {
3715 auto LPCRegion =
3716 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3717 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
3718 emitEmptyBoundParameters);
3719 }
3720 // Check for outer lastprivate conditional update.
3721 checkForLastprivateConditionalUpdate(*this, S);
3722 }
3723
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)3724 void CodeGenFunction::EmitOMPParallelForSimdDirective(
3725 const OMPParallelForSimdDirective &S) {
3726 // Emit directive as a combined directive that consists of two implicit
3727 // directives: 'parallel' with 'for' directive.
3728 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3729 Action.Enter(CGF);
3730 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3731 };
3732 {
3733 auto LPCRegion =
3734 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3735 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
3736 emitEmptyBoundParameters);
3737 }
3738 // Check for outer lastprivate conditional update.
3739 checkForLastprivateConditionalUpdate(*this, S);
3740 }
3741
EmitOMPParallelMasterDirective(const OMPParallelMasterDirective & S)3742 void CodeGenFunction::EmitOMPParallelMasterDirective(
3743 const OMPParallelMasterDirective &S) {
3744 // Emit directive as a combined directive that consists of two implicit
3745 // directives: 'parallel' with 'master' directive.
3746 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3747 Action.Enter(CGF);
3748 OMPPrivateScope PrivateScope(CGF);
3749 bool Copyins = CGF.EmitOMPCopyinClause(S);
3750 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3751 if (Copyins) {
3752 // Emit implicit barrier to synchronize threads and avoid data races on
3753 // propagation master's thread values of threadprivate variables to local
3754 // instances of that variables of all other implicit threads.
3755 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3756 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3757 /*ForceSimpleCall=*/true);
3758 }
3759 CGF.EmitOMPPrivateClause(S, PrivateScope);
3760 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3761 (void)PrivateScope.Privatize();
3762 emitMaster(CGF, S);
3763 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3764 };
3765 {
3766 auto LPCRegion =
3767 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3768 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
3769 emitEmptyBoundParameters);
3770 emitPostUpdateForReductionClause(*this, S,
3771 [](CodeGenFunction &) { return nullptr; });
3772 }
3773 // Check for outer lastprivate conditional update.
3774 checkForLastprivateConditionalUpdate(*this, S);
3775 }
3776
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)3777 void CodeGenFunction::EmitOMPParallelSectionsDirective(
3778 const OMPParallelSectionsDirective &S) {
3779 // Emit directive as a combined directive that consists of two implicit
3780 // directives: 'parallel' with 'sections' directive.
3781 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3782 Action.Enter(CGF);
3783 CGF.EmitSections(S);
3784 };
3785 {
3786 auto LPCRegion =
3787 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3788 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
3789 emitEmptyBoundParameters);
3790 }
3791 // Check for outer lastprivate conditional update.
3792 checkForLastprivateConditionalUpdate(*this, S);
3793 }
3794
3795 namespace {
3796 /// Get the list of variables declared in the context of the untied tasks.
3797 class CheckVarsEscapingUntiedTaskDeclContext final
3798 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
3799 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
3800
3801 public:
3802 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
3803 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
VisitDeclStmt(const DeclStmt * S)3804 void VisitDeclStmt(const DeclStmt *S) {
3805 if (!S)
3806 return;
3807 // Need to privatize only local vars, static locals can be processed as is.
3808 for (const Decl *D : S->decls()) {
3809 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
3810 if (VD->hasLocalStorage())
3811 PrivateDecls.push_back(VD);
3812 }
3813 }
VisitOMPExecutableDirective(const OMPExecutableDirective *)3814 void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
VisitCapturedStmt(const CapturedStmt *)3815 void VisitCapturedStmt(const CapturedStmt *) { return; }
VisitLambdaExpr(const LambdaExpr *)3816 void VisitLambdaExpr(const LambdaExpr *) { return; }
VisitBlockExpr(const BlockExpr *)3817 void VisitBlockExpr(const BlockExpr *) { return; }
VisitStmt(const Stmt * S)3818 void VisitStmt(const Stmt *S) {
3819 if (!S)
3820 return;
3821 for (const Stmt *Child : S->children())
3822 if (Child)
3823 Visit(Child);
3824 }
3825
3826 /// Swaps list of vars with the provided one.
getPrivateDecls() const3827 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
3828 };
3829 } // anonymous namespace
3830
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)3831 void CodeGenFunction::EmitOMPTaskBasedDirective(
3832 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
3833 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
3834 OMPTaskDataTy &Data) {
3835 // Emit outlined function for task construct.
3836 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
3837 auto I = CS->getCapturedDecl()->param_begin();
3838 auto PartId = std::next(I);
3839 auto TaskT = std::next(I, 4);
3840 // Check if the task is final
3841 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
3842 // If the condition constant folds and can be elided, try to avoid emitting
3843 // the condition and the dead arm of the if/else.
3844 const Expr *Cond = Clause->getCondition();
3845 bool CondConstant;
3846 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
3847 Data.Final.setInt(CondConstant);
3848 else
3849 Data.Final.setPointer(EvaluateExprAsBool(Cond));
3850 } else {
3851 // By default the task is not final.
3852 Data.Final.setInt(/*IntVal=*/false);
3853 }
3854 // Check if the task has 'priority' clause.
3855 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
3856 const Expr *Prio = Clause->getPriority();
3857 Data.Priority.setInt(/*IntVal=*/true);
3858 Data.Priority.setPointer(EmitScalarConversion(
3859 EmitScalarExpr(Prio), Prio->getType(),
3860 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
3861 Prio->getExprLoc()));
3862 }
3863 // The first function argument for tasks is a thread id, the second one is a
3864 // part id (0 for tied tasks, >=0 for untied task).
3865 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
3866 // Get list of private variables.
3867 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
3868 auto IRef = C->varlist_begin();
3869 for (const Expr *IInit : C->private_copies()) {
3870 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3871 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3872 Data.PrivateVars.push_back(*IRef);
3873 Data.PrivateCopies.push_back(IInit);
3874 }
3875 ++IRef;
3876 }
3877 }
3878 EmittedAsPrivate.clear();
3879 // Get list of firstprivate variables.
3880 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3881 auto IRef = C->varlist_begin();
3882 auto IElemInitRef = C->inits().begin();
3883 for (const Expr *IInit : C->private_copies()) {
3884 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3885 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3886 Data.FirstprivateVars.push_back(*IRef);
3887 Data.FirstprivateCopies.push_back(IInit);
3888 Data.FirstprivateInits.push_back(*IElemInitRef);
3889 }
3890 ++IRef;
3891 ++IElemInitRef;
3892 }
3893 }
3894 // Get list of lastprivate variables (for taskloops).
3895 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
3896 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
3897 auto IRef = C->varlist_begin();
3898 auto ID = C->destination_exprs().begin();
3899 for (const Expr *IInit : C->private_copies()) {
3900 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3901 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3902 Data.LastprivateVars.push_back(*IRef);
3903 Data.LastprivateCopies.push_back(IInit);
3904 }
3905 LastprivateDstsOrigs.insert(
3906 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
3907 cast<DeclRefExpr>(*IRef)});
3908 ++IRef;
3909 ++ID;
3910 }
3911 }
3912 SmallVector<const Expr *, 4> LHSs;
3913 SmallVector<const Expr *, 4> RHSs;
3914 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3915 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
3916 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
3917 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
3918 Data.ReductionOps.append(C->reduction_ops().begin(),
3919 C->reduction_ops().end());
3920 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3921 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3922 }
3923 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
3924 *this, S.getBeginLoc(), LHSs, RHSs, Data);
3925 // Build list of dependences.
3926 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
3927 OMPTaskDataTy::DependData &DD =
3928 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
3929 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
3930 }
3931 // Get list of local vars for untied tasks.
3932 if (!Data.Tied) {
3933 CheckVarsEscapingUntiedTaskDeclContext Checker;
3934 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
3935 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
3936 Checker.getPrivateDecls().end());
3937 }
3938 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
3939 CapturedRegion](CodeGenFunction &CGF,
3940 PrePostActionTy &Action) {
3941 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>>
3942 UntiedLocalVars;
3943 // Set proper addresses for generated private copies.
3944 OMPPrivateScope Scope(CGF);
3945 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
3946 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
3947 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
3948 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3949 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3950 enum { PrivatesParam = 2, CopyFnParam = 3 };
3951 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3952 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3953 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3954 CS->getCapturedDecl()->getParam(PrivatesParam)));
3955 // Map privates.
3956 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3957 llvm::SmallVector<llvm::Value *, 16> CallArgs;
3958 CallArgs.push_back(PrivatesPtr);
3959 for (const Expr *E : Data.PrivateVars) {
3960 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3961 Address PrivatePtr = CGF.CreateMemTemp(
3962 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
3963 PrivatePtrs.emplace_back(VD, PrivatePtr);
3964 CallArgs.push_back(PrivatePtr.getPointer());
3965 }
3966 for (const Expr *E : Data.FirstprivateVars) {
3967 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3968 Address PrivatePtr =
3969 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3970 ".firstpriv.ptr.addr");
3971 PrivatePtrs.emplace_back(VD, PrivatePtr);
3972 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
3973 CallArgs.push_back(PrivatePtr.getPointer());
3974 }
3975 for (const Expr *E : Data.LastprivateVars) {
3976 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3977 Address PrivatePtr =
3978 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3979 ".lastpriv.ptr.addr");
3980 PrivatePtrs.emplace_back(VD, PrivatePtr);
3981 CallArgs.push_back(PrivatePtr.getPointer());
3982 }
3983 for (const VarDecl *VD : Data.PrivateLocals) {
3984 QualType Ty = VD->getType().getNonReferenceType();
3985 if (VD->getType()->isLValueReferenceType())
3986 Ty = CGF.getContext().getPointerType(Ty);
3987 if (isAllocatableDecl(VD))
3988 Ty = CGF.getContext().getPointerType(Ty);
3989 Address PrivatePtr = CGF.CreateMemTemp(
3990 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
3991 UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid());
3992 CallArgs.push_back(PrivatePtr.getPointer());
3993 }
3994 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3995 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3996 for (const auto &Pair : LastprivateDstsOrigs) {
3997 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
3998 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
3999 /*RefersToEnclosingVariableOrCapture=*/
4000 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4001 Pair.second->getType(), VK_LValue,
4002 Pair.second->getExprLoc());
4003 Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4004 return CGF.EmitLValue(&DRE).getAddress(CGF);
4005 });
4006 }
4007 for (const auto &Pair : PrivatePtrs) {
4008 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4009 CGF.getContext().getDeclAlign(Pair.first));
4010 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4011 }
4012 // Adjust mapping for internal locals by mapping actual memory instead of
4013 // a pointer to this memory.
4014 for (auto &Pair : UntiedLocalVars) {
4015 if (isAllocatableDecl(Pair.first)) {
4016 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4017 Address Replacement(Ptr, CGF.getPointerAlign());
4018 Pair.getSecond().first = Replacement;
4019 Ptr = CGF.Builder.CreateLoad(Replacement);
4020 Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4021 Pair.getSecond().second = Replacement;
4022 } else {
4023 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4024 Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4025 Pair.getSecond().first = Replacement;
4026 }
4027 }
4028 }
4029 if (Data.Reductions) {
4030 OMPPrivateScope FirstprivateScope(CGF);
4031 for (const auto &Pair : FirstprivatePtrs) {
4032 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4033 CGF.getContext().getDeclAlign(Pair.first));
4034 FirstprivateScope.addPrivate(Pair.first,
4035 [Replacement]() { return Replacement; });
4036 }
4037 (void)FirstprivateScope.Privatize();
4038 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4039 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4040 Data.ReductionCopies, Data.ReductionOps);
4041 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4042 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4043 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4044 RedCG.emitSharedOrigLValue(CGF, Cnt);
4045 RedCG.emitAggregateType(CGF, Cnt);
4046 // FIXME: This must removed once the runtime library is fixed.
4047 // Emit required threadprivate variables for
4048 // initializer/combiner/finalizer.
4049 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4050 RedCG, Cnt);
4051 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4052 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4053 Replacement =
4054 Address(CGF.EmitScalarConversion(
4055 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4056 CGF.getContext().getPointerType(
4057 Data.ReductionCopies[Cnt]->getType()),
4058 Data.ReductionCopies[Cnt]->getExprLoc()),
4059 Replacement.getAlignment());
4060 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4061 Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4062 [Replacement]() { return Replacement; });
4063 }
4064 }
4065 // Privatize all private variables except for in_reduction items.
4066 (void)Scope.Privatize();
4067 SmallVector<const Expr *, 4> InRedVars;
4068 SmallVector<const Expr *, 4> InRedPrivs;
4069 SmallVector<const Expr *, 4> InRedOps;
4070 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4071 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4072 auto IPriv = C->privates().begin();
4073 auto IRed = C->reduction_ops().begin();
4074 auto ITD = C->taskgroup_descriptors().begin();
4075 for (const Expr *Ref : C->varlists()) {
4076 InRedVars.emplace_back(Ref);
4077 InRedPrivs.emplace_back(*IPriv);
4078 InRedOps.emplace_back(*IRed);
4079 TaskgroupDescriptors.emplace_back(*ITD);
4080 std::advance(IPriv, 1);
4081 std::advance(IRed, 1);
4082 std::advance(ITD, 1);
4083 }
4084 }
4085 // Privatize in_reduction items here, because taskgroup descriptors must be
4086 // privatized earlier.
4087 OMPPrivateScope InRedScope(CGF);
4088 if (!InRedVars.empty()) {
4089 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4090 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4091 RedCG.emitSharedOrigLValue(CGF, Cnt);
4092 RedCG.emitAggregateType(CGF, Cnt);
4093 // The taskgroup descriptor variable is always implicit firstprivate and
4094 // privatized already during processing of the firstprivates.
4095 // FIXME: This must removed once the runtime library is fixed.
4096 // Emit required threadprivate variables for
4097 // initializer/combiner/finalizer.
4098 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4099 RedCG, Cnt);
4100 llvm::Value *ReductionsPtr;
4101 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4102 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4103 TRExpr->getExprLoc());
4104 } else {
4105 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4106 }
4107 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4108 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4109 Replacement = Address(
4110 CGF.EmitScalarConversion(
4111 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4112 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4113 InRedPrivs[Cnt]->getExprLoc()),
4114 Replacement.getAlignment());
4115 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4116 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4117 [Replacement]() { return Replacement; });
4118 }
4119 }
4120 (void)InRedScope.Privatize();
4121
4122 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4123 UntiedLocalVars);
4124 Action.Enter(CGF);
4125 BodyGen(CGF);
4126 };
4127 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4128 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4129 Data.NumberOfParts);
4130 OMPLexicalScope Scope(*this, S, llvm::None,
4131 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4132 !isOpenMPSimdDirective(S.getDirectiveKind()));
4133 TaskGen(*this, OutlinedFn, Data);
4134 }
4135
4136 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)4137 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4138 QualType Ty, CapturedDecl *CD,
4139 SourceLocation Loc) {
4140 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4141 ImplicitParamDecl::Other);
4142 auto *OrigRef = DeclRefExpr::Create(
4143 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4144 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4145 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4146 ImplicitParamDecl::Other);
4147 auto *PrivateRef = DeclRefExpr::Create(
4148 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4149 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4150 QualType ElemType = C.getBaseElementType(Ty);
4151 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4152 ImplicitParamDecl::Other);
4153 auto *InitRef = DeclRefExpr::Create(
4154 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4155 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4156 PrivateVD->setInitStyle(VarDecl::CInit);
4157 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4158 InitRef, /*BasePath=*/nullptr,
4159 VK_RValue, FPOptionsOverride()));
4160 Data.FirstprivateVars.emplace_back(OrigRef);
4161 Data.FirstprivateCopies.emplace_back(PrivateRef);
4162 Data.FirstprivateInits.emplace_back(InitRef);
4163 return OrigVD;
4164 }
4165
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)4166 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4167 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4168 OMPTargetDataInfo &InputInfo) {
4169 // Emit outlined function for task construct.
4170 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4171 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4172 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4173 auto I = CS->getCapturedDecl()->param_begin();
4174 auto PartId = std::next(I);
4175 auto TaskT = std::next(I, 4);
4176 OMPTaskDataTy Data;
4177 // The task is not final.
4178 Data.Final.setInt(/*IntVal=*/false);
4179 // Get list of firstprivate variables.
4180 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4181 auto IRef = C->varlist_begin();
4182 auto IElemInitRef = C->inits().begin();
4183 for (auto *IInit : C->private_copies()) {
4184 Data.FirstprivateVars.push_back(*IRef);
4185 Data.FirstprivateCopies.push_back(IInit);
4186 Data.FirstprivateInits.push_back(*IElemInitRef);
4187 ++IRef;
4188 ++IElemInitRef;
4189 }
4190 }
4191 OMPPrivateScope TargetScope(*this);
4192 VarDecl *BPVD = nullptr;
4193 VarDecl *PVD = nullptr;
4194 VarDecl *SVD = nullptr;
4195 VarDecl *MVD = nullptr;
4196 if (InputInfo.NumberOfTargetItems > 0) {
4197 auto *CD = CapturedDecl::Create(
4198 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4199 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4200 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4201 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4202 /*IndexTypeQuals=*/0);
4203 BPVD = createImplicitFirstprivateForType(
4204 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4205 PVD = createImplicitFirstprivateForType(
4206 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4207 QualType SizesType = getContext().getConstantArrayType(
4208 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4209 ArrSize, nullptr, ArrayType::Normal,
4210 /*IndexTypeQuals=*/0);
4211 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4212 S.getBeginLoc());
4213 TargetScope.addPrivate(
4214 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4215 TargetScope.addPrivate(PVD,
4216 [&InputInfo]() { return InputInfo.PointersArray; });
4217 TargetScope.addPrivate(SVD,
4218 [&InputInfo]() { return InputInfo.SizesArray; });
4219 // If there is no user-defined mapper, the mapper array will be nullptr. In
4220 // this case, we don't need to privatize it.
4221 if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4222 InputInfo.MappersArray.getPointer())) {
4223 MVD = createImplicitFirstprivateForType(
4224 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4225 TargetScope.addPrivate(MVD,
4226 [&InputInfo]() { return InputInfo.MappersArray; });
4227 }
4228 }
4229 (void)TargetScope.Privatize();
4230 // Build list of dependences.
4231 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4232 OMPTaskDataTy::DependData &DD =
4233 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4234 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4235 }
4236 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4237 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4238 // Set proper addresses for generated private copies.
4239 OMPPrivateScope Scope(CGF);
4240 if (!Data.FirstprivateVars.empty()) {
4241 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
4242 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
4243 enum { PrivatesParam = 2, CopyFnParam = 3 };
4244 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4245 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4246 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4247 CS->getCapturedDecl()->getParam(PrivatesParam)));
4248 // Map privates.
4249 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4250 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4251 CallArgs.push_back(PrivatesPtr);
4252 for (const Expr *E : Data.FirstprivateVars) {
4253 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4254 Address PrivatePtr =
4255 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4256 ".firstpriv.ptr.addr");
4257 PrivatePtrs.emplace_back(VD, PrivatePtr);
4258 CallArgs.push_back(PrivatePtr.getPointer());
4259 }
4260 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4261 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4262 for (const auto &Pair : PrivatePtrs) {
4263 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4264 CGF.getContext().getDeclAlign(Pair.first));
4265 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4266 }
4267 }
4268 // Privatize all private variables except for in_reduction items.
4269 (void)Scope.Privatize();
4270 if (InputInfo.NumberOfTargetItems > 0) {
4271 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4272 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4273 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4274 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4275 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4276 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4277 // If MVD is nullptr, the mapper array is not privatized
4278 if (MVD)
4279 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4280 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4281 }
4282
4283 Action.Enter(CGF);
4284 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4285 BodyGen(CGF);
4286 };
4287 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4288 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4289 Data.NumberOfParts);
4290 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4291 IntegerLiteral IfCond(getContext(), TrueOrFalse,
4292 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4293 SourceLocation());
4294
4295 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4296 SharedsTy, CapturedStruct, &IfCond, Data);
4297 }
4298
EmitOMPTaskDirective(const OMPTaskDirective & S)4299 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4300 // Emit outlined function for task construct.
4301 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4302 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4303 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4304 const Expr *IfCond = nullptr;
4305 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4306 if (C->getNameModifier() == OMPD_unknown ||
4307 C->getNameModifier() == OMPD_task) {
4308 IfCond = C->getCondition();
4309 break;
4310 }
4311 }
4312
4313 OMPTaskDataTy Data;
4314 // Check if we should emit tied or untied task.
4315 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4316 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4317 CGF.EmitStmt(CS->getCapturedStmt());
4318 };
4319 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4320 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4321 const OMPTaskDataTy &Data) {
4322 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4323 SharedsTy, CapturedStruct, IfCond,
4324 Data);
4325 };
4326 auto LPCRegion =
4327 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4328 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4329 }
4330
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)4331 void CodeGenFunction::EmitOMPTaskyieldDirective(
4332 const OMPTaskyieldDirective &S) {
4333 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4334 }
4335
EmitOMPBarrierDirective(const OMPBarrierDirective & S)4336 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4337 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4338 }
4339
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)4340 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4341 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4342 }
4343
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)4344 void CodeGenFunction::EmitOMPTaskgroupDirective(
4345 const OMPTaskgroupDirective &S) {
4346 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4347 Action.Enter(CGF);
4348 if (const Expr *E = S.getReductionRef()) {
4349 SmallVector<const Expr *, 4> LHSs;
4350 SmallVector<const Expr *, 4> RHSs;
4351 OMPTaskDataTy Data;
4352 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4353 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4354 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4355 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4356 Data.ReductionOps.append(C->reduction_ops().begin(),
4357 C->reduction_ops().end());
4358 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4359 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4360 }
4361 llvm::Value *ReductionDesc =
4362 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4363 LHSs, RHSs, Data);
4364 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4365 CGF.EmitVarDecl(*VD);
4366 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4367 /*Volatile=*/false, E->getType());
4368 }
4369 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4370 };
4371 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4372 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4373 }
4374
EmitOMPFlushDirective(const OMPFlushDirective & S)4375 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4376 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4377 ? llvm::AtomicOrdering::NotAtomic
4378 : llvm::AtomicOrdering::AcquireRelease;
4379 CGM.getOpenMPRuntime().emitFlush(
4380 *this,
4381 [&S]() -> ArrayRef<const Expr *> {
4382 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4383 return llvm::makeArrayRef(FlushClause->varlist_begin(),
4384 FlushClause->varlist_end());
4385 return llvm::None;
4386 }(),
4387 S.getBeginLoc(), AO);
4388 }
4389
EmitOMPDepobjDirective(const OMPDepobjDirective & S)4390 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4391 const auto *DO = S.getSingleClause<OMPDepobjClause>();
4392 LValue DOLVal = EmitLValue(DO->getDepobj());
4393 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4394 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4395 DC->getModifier());
4396 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4397 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4398 *this, Dependencies, DC->getBeginLoc());
4399 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4400 return;
4401 }
4402 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4403 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4404 return;
4405 }
4406 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4407 CGM.getOpenMPRuntime().emitUpdateClause(
4408 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4409 return;
4410 }
4411 }
4412
EmitOMPScanDirective(const OMPScanDirective & S)4413 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4414 if (!OMPParentLoopDirectiveForScan)
4415 return;
4416 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4417 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4418 SmallVector<const Expr *, 4> Shareds;
4419 SmallVector<const Expr *, 4> Privates;
4420 SmallVector<const Expr *, 4> LHSs;
4421 SmallVector<const Expr *, 4> RHSs;
4422 SmallVector<const Expr *, 4> ReductionOps;
4423 SmallVector<const Expr *, 4> CopyOps;
4424 SmallVector<const Expr *, 4> CopyArrayTemps;
4425 SmallVector<const Expr *, 4> CopyArrayElems;
4426 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4427 if (C->getModifier() != OMPC_REDUCTION_inscan)
4428 continue;
4429 Shareds.append(C->varlist_begin(), C->varlist_end());
4430 Privates.append(C->privates().begin(), C->privates().end());
4431 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4432 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4433 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4434 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4435 CopyArrayTemps.append(C->copy_array_temps().begin(),
4436 C->copy_array_temps().end());
4437 CopyArrayElems.append(C->copy_array_elems().begin(),
4438 C->copy_array_elems().end());
4439 }
4440 if (ParentDir.getDirectiveKind() == OMPD_simd ||
4441 (getLangOpts().OpenMPSimd &&
4442 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4443 // For simd directive and simd-based directives in simd only mode, use the
4444 // following codegen:
4445 // int x = 0;
4446 // #pragma omp simd reduction(inscan, +: x)
4447 // for (..) {
4448 // <first part>
4449 // #pragma omp scan inclusive(x)
4450 // <second part>
4451 // }
4452 // is transformed to:
4453 // int x = 0;
4454 // for (..) {
4455 // int x_priv = 0;
4456 // <first part>
4457 // x = x_priv + x;
4458 // x_priv = x;
4459 // <second part>
4460 // }
4461 // and
4462 // int x = 0;
4463 // #pragma omp simd reduction(inscan, +: x)
4464 // for (..) {
4465 // <first part>
4466 // #pragma omp scan exclusive(x)
4467 // <second part>
4468 // }
4469 // to
4470 // int x = 0;
4471 // for (..) {
4472 // int x_priv = 0;
4473 // <second part>
4474 // int temp = x;
4475 // x = x_priv + x;
4476 // x_priv = temp;
4477 // <first part>
4478 // }
4479 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4480 EmitBranch(IsInclusive
4481 ? OMPScanReduce
4482 : BreakContinueStack.back().ContinueBlock.getBlock());
4483 EmitBlock(OMPScanDispatch);
4484 {
4485 // New scope for correct construction/destruction of temp variables for
4486 // exclusive scan.
4487 LexicalScope Scope(*this, S.getSourceRange());
4488 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4489 EmitBlock(OMPScanReduce);
4490 if (!IsInclusive) {
4491 // Create temp var and copy LHS value to this temp value.
4492 // TMP = LHS;
4493 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4494 const Expr *PrivateExpr = Privates[I];
4495 const Expr *TempExpr = CopyArrayTemps[I];
4496 EmitAutoVarDecl(
4497 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4498 LValue DestLVal = EmitLValue(TempExpr);
4499 LValue SrcLVal = EmitLValue(LHSs[I]);
4500 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4501 SrcLVal.getAddress(*this),
4502 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4503 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4504 CopyOps[I]);
4505 }
4506 }
4507 CGM.getOpenMPRuntime().emitReduction(
4508 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4509 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4510 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4511 const Expr *PrivateExpr = Privates[I];
4512 LValue DestLVal;
4513 LValue SrcLVal;
4514 if (IsInclusive) {
4515 DestLVal = EmitLValue(RHSs[I]);
4516 SrcLVal = EmitLValue(LHSs[I]);
4517 } else {
4518 const Expr *TempExpr = CopyArrayTemps[I];
4519 DestLVal = EmitLValue(RHSs[I]);
4520 SrcLVal = EmitLValue(TempExpr);
4521 }
4522 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4523 SrcLVal.getAddress(*this),
4524 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4525 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4526 CopyOps[I]);
4527 }
4528 }
4529 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4530 OMPScanExitBlock = IsInclusive
4531 ? BreakContinueStack.back().ContinueBlock.getBlock()
4532 : OMPScanReduce;
4533 EmitBlock(OMPAfterScanBlock);
4534 return;
4535 }
4536 if (!IsInclusive) {
4537 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4538 EmitBlock(OMPScanExitBlock);
4539 }
4540 if (OMPFirstScanLoop) {
4541 // Emit buffer[i] = red; at the end of the input phase.
4542 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4543 .getIterationVariable()
4544 ->IgnoreParenImpCasts();
4545 LValue IdxLVal = EmitLValue(IVExpr);
4546 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4547 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4548 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4549 const Expr *PrivateExpr = Privates[I];
4550 const Expr *OrigExpr = Shareds[I];
4551 const Expr *CopyArrayElem = CopyArrayElems[I];
4552 OpaqueValueMapping IdxMapping(
4553 *this,
4554 cast<OpaqueValueExpr>(
4555 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4556 RValue::get(IdxVal));
4557 LValue DestLVal = EmitLValue(CopyArrayElem);
4558 LValue SrcLVal = EmitLValue(OrigExpr);
4559 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4560 SrcLVal.getAddress(*this),
4561 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4562 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4563 CopyOps[I]);
4564 }
4565 }
4566 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4567 if (IsInclusive) {
4568 EmitBlock(OMPScanExitBlock);
4569 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4570 }
4571 EmitBlock(OMPScanDispatch);
4572 if (!OMPFirstScanLoop) {
4573 // Emit red = buffer[i]; at the entrance to the scan phase.
4574 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4575 .getIterationVariable()
4576 ->IgnoreParenImpCasts();
4577 LValue IdxLVal = EmitLValue(IVExpr);
4578 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4579 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4580 llvm::BasicBlock *ExclusiveExitBB = nullptr;
4581 if (!IsInclusive) {
4582 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
4583 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
4584 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
4585 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
4586 EmitBlock(ContBB);
4587 // Use idx - 1 iteration for exclusive scan.
4588 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
4589 }
4590 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4591 const Expr *PrivateExpr = Privates[I];
4592 const Expr *OrigExpr = Shareds[I];
4593 const Expr *CopyArrayElem = CopyArrayElems[I];
4594 OpaqueValueMapping IdxMapping(
4595 *this,
4596 cast<OpaqueValueExpr>(
4597 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4598 RValue::get(IdxVal));
4599 LValue SrcLVal = EmitLValue(CopyArrayElem);
4600 LValue DestLVal = EmitLValue(OrigExpr);
4601 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4602 SrcLVal.getAddress(*this),
4603 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4604 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4605 CopyOps[I]);
4606 }
4607 if (!IsInclusive) {
4608 EmitBlock(ExclusiveExitBB);
4609 }
4610 }
4611 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
4612 : OMPAfterScanBlock);
4613 EmitBlock(OMPAfterScanBlock);
4614 }
4615
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)4616 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
4617 const CodeGenLoopTy &CodeGenLoop,
4618 Expr *IncExpr) {
4619 // Emit the loop iteration variable.
4620 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
4621 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
4622 EmitVarDecl(*IVDecl);
4623
4624 // Emit the iterations count variable.
4625 // If it is not a variable, Sema decided to calculate iterations count on each
4626 // iteration (e.g., it is foldable into a constant).
4627 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4628 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4629 // Emit calculation of the iterations count.
4630 EmitIgnoredExpr(S.getCalcLastIteration());
4631 }
4632
4633 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
4634
4635 bool HasLastprivateClause = false;
4636 // Check pre-condition.
4637 {
4638 OMPLoopScope PreInitScope(*this, S);
4639 // Skip the entire loop if we don't meet the precondition.
4640 // If the condition constant folds and can be elided, avoid emitting the
4641 // whole loop.
4642 bool CondConstant;
4643 llvm::BasicBlock *ContBlock = nullptr;
4644 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4645 if (!CondConstant)
4646 return;
4647 } else {
4648 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
4649 ContBlock = createBasicBlock("omp.precond.end");
4650 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
4651 getProfileCount(&S));
4652 EmitBlock(ThenBlock);
4653 incrementProfileCounter(&S);
4654 }
4655
4656 emitAlignedClause(*this, S);
4657 // Emit 'then' code.
4658 {
4659 // Emit helper vars inits.
4660
4661 LValue LB = EmitOMPHelperVar(
4662 *this, cast<DeclRefExpr>(
4663 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4664 ? S.getCombinedLowerBoundVariable()
4665 : S.getLowerBoundVariable())));
4666 LValue UB = EmitOMPHelperVar(
4667 *this, cast<DeclRefExpr>(
4668 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4669 ? S.getCombinedUpperBoundVariable()
4670 : S.getUpperBoundVariable())));
4671 LValue ST =
4672 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
4673 LValue IL =
4674 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
4675
4676 OMPPrivateScope LoopScope(*this);
4677 if (EmitOMPFirstprivateClause(S, LoopScope)) {
4678 // Emit implicit barrier to synchronize threads and avoid data races
4679 // on initialization of firstprivate variables and post-update of
4680 // lastprivate variables.
4681 CGM.getOpenMPRuntime().emitBarrierCall(
4682 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4683 /*ForceSimpleCall=*/true);
4684 }
4685 EmitOMPPrivateClause(S, LoopScope);
4686 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4687 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4688 !isOpenMPTeamsDirective(S.getDirectiveKind()))
4689 EmitOMPReductionClauseInit(S, LoopScope);
4690 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
4691 EmitOMPPrivateLoopCounters(S, LoopScope);
4692 (void)LoopScope.Privatize();
4693 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4694 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
4695
4696 // Detect the distribute schedule kind and chunk.
4697 llvm::Value *Chunk = nullptr;
4698 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
4699 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
4700 ScheduleKind = C->getDistScheduleKind();
4701 if (const Expr *Ch = C->getChunkSize()) {
4702 Chunk = EmitScalarExpr(Ch);
4703 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
4704 S.getIterationVariable()->getType(),
4705 S.getBeginLoc());
4706 }
4707 } else {
4708 // Default behaviour for dist_schedule clause.
4709 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
4710 *this, S, ScheduleKind, Chunk);
4711 }
4712 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
4713 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
4714
4715 // OpenMP [2.10.8, distribute Construct, Description]
4716 // If dist_schedule is specified, kind must be static. If specified,
4717 // iterations are divided into chunks of size chunk_size, chunks are
4718 // assigned to the teams of the league in a round-robin fashion in the
4719 // order of the team number. When no chunk_size is specified, the
4720 // iteration space is divided into chunks that are approximately equal
4721 // in size, and at most one chunk is distributed to each team of the
4722 // league. The size of the chunks is unspecified in this case.
4723 bool StaticChunked = RT.isStaticChunked(
4724 ScheduleKind, /* Chunked */ Chunk != nullptr) &&
4725 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
4726 if (RT.isStaticNonchunked(ScheduleKind,
4727 /* Chunked */ Chunk != nullptr) ||
4728 StaticChunked) {
4729 CGOpenMPRuntime::StaticRTInput StaticInit(
4730 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
4731 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4732 StaticChunked ? Chunk : nullptr);
4733 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
4734 StaticInit);
4735 JumpDest LoopExit =
4736 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
4737 // UB = min(UB, GlobalUB);
4738 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4739 ? S.getCombinedEnsureUpperBound()
4740 : S.getEnsureUpperBound());
4741 // IV = LB;
4742 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4743 ? S.getCombinedInit()
4744 : S.getInit());
4745
4746 const Expr *Cond =
4747 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4748 ? S.getCombinedCond()
4749 : S.getCond();
4750
4751 if (StaticChunked)
4752 Cond = S.getCombinedDistCond();
4753
4754 // For static unchunked schedules generate:
4755 //
4756 // 1. For distribute alone, codegen
4757 // while (idx <= UB) {
4758 // BODY;
4759 // ++idx;
4760 // }
4761 //
4762 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
4763 // while (idx <= UB) {
4764 // <CodeGen rest of pragma>(LB, UB);
4765 // idx += ST;
4766 // }
4767 //
4768 // For static chunk one schedule generate:
4769 //
4770 // while (IV <= GlobalUB) {
4771 // <CodeGen rest of pragma>(LB, UB);
4772 // LB += ST;
4773 // UB += ST;
4774 // UB = min(UB, GlobalUB);
4775 // IV = LB;
4776 // }
4777 //
4778 emitCommonSimdLoop(
4779 *this, S,
4780 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4781 if (isOpenMPSimdDirective(S.getDirectiveKind()))
4782 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
4783 },
4784 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
4785 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
4786 CGF.EmitOMPInnerLoop(
4787 S, LoopScope.requiresCleanups(), Cond, IncExpr,
4788 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
4789 CodeGenLoop(CGF, S, LoopExit);
4790 },
4791 [&S, StaticChunked](CodeGenFunction &CGF) {
4792 if (StaticChunked) {
4793 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
4794 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
4795 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
4796 CGF.EmitIgnoredExpr(S.getCombinedInit());
4797 }
4798 });
4799 });
4800 EmitBlock(LoopExit.getBlock());
4801 // Tell the runtime we are done.
4802 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
4803 } else {
4804 // Emit the outer loop, which requests its work chunk [LB..UB] from
4805 // runtime and runs the inner loop to process it.
4806 const OMPLoopArguments LoopArguments = {
4807 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4808 IL.getAddress(*this), Chunk};
4809 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
4810 CodeGenLoop);
4811 }
4812 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
4813 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
4814 return CGF.Builder.CreateIsNotNull(
4815 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4816 });
4817 }
4818 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4819 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4820 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
4821 EmitOMPReductionClauseFinal(S, OMPD_simd);
4822 // Emit post-update of the reduction variables if IsLastIter != 0.
4823 emitPostUpdateForReductionClause(
4824 *this, S, [IL, &S](CodeGenFunction &CGF) {
4825 return CGF.Builder.CreateIsNotNull(
4826 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4827 });
4828 }
4829 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4830 if (HasLastprivateClause) {
4831 EmitOMPLastprivateClauseFinal(
4832 S, /*NoFinals=*/false,
4833 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
4834 }
4835 }
4836
4837 // We're now done with the loop, so jump to the continuation block.
4838 if (ContBlock) {
4839 EmitBranch(ContBlock);
4840 EmitBlock(ContBlock, true);
4841 }
4842 }
4843 }
4844
EmitOMPDistributeDirective(const OMPDistributeDirective & S)4845 void CodeGenFunction::EmitOMPDistributeDirective(
4846 const OMPDistributeDirective &S) {
4847 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4848 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4849 };
4850 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4851 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
4852 }
4853
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S,SourceLocation Loc)4854 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
4855 const CapturedStmt *S,
4856 SourceLocation Loc) {
4857 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
4858 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
4859 CGF.CapturedStmtInfo = &CapStmtInfo;
4860 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
4861 Fn->setDoesNotRecurse();
4862 return Fn;
4863 }
4864
EmitOMPOrderedDirective(const OMPOrderedDirective & S)4865 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
4866 if (S.hasClausesOfKind<OMPDependClause>()) {
4867 assert(!S.hasAssociatedStmt() &&
4868 "No associated statement must be in ordered depend construct.");
4869 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
4870 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
4871 return;
4872 }
4873 const auto *C = S.getSingleClause<OMPSIMDClause>();
4874 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
4875 PrePostActionTy &Action) {
4876 const CapturedStmt *CS = S.getInnermostCapturedStmt();
4877 if (C) {
4878 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4879 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4880 llvm::Function *OutlinedFn =
4881 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
4882 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
4883 OutlinedFn, CapturedVars);
4884 } else {
4885 Action.Enter(CGF);
4886 CGF.EmitStmt(CS->getCapturedStmt());
4887 }
4888 };
4889 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4890 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
4891 }
4892
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)4893 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
4894 QualType SrcType, QualType DestType,
4895 SourceLocation Loc) {
4896 assert(CGF.hasScalarEvaluationKind(DestType) &&
4897 "DestType must have scalar evaluation kind.");
4898 assert(!Val.isAggregate() && "Must be a scalar or complex.");
4899 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
4900 DestType, Loc)
4901 : CGF.EmitComplexToScalarConversion(
4902 Val.getComplexVal(), SrcType, DestType, Loc);
4903 }
4904
4905 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)4906 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
4907 QualType DestType, SourceLocation Loc) {
4908 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
4909 "DestType must have complex evaluation kind.");
4910 CodeGenFunction::ComplexPairTy ComplexVal;
4911 if (Val.isScalar()) {
4912 // Convert the input element to the element type of the complex.
4913 QualType DestElementType =
4914 DestType->castAs<ComplexType>()->getElementType();
4915 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
4916 Val.getScalarVal(), SrcType, DestElementType, Loc);
4917 ComplexVal = CodeGenFunction::ComplexPairTy(
4918 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
4919 } else {
4920 assert(Val.isComplex() && "Must be a scalar or complex.");
4921 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
4922 QualType DestElementType =
4923 DestType->castAs<ComplexType>()->getElementType();
4924 ComplexVal.first = CGF.EmitScalarConversion(
4925 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
4926 ComplexVal.second = CGF.EmitScalarConversion(
4927 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
4928 }
4929 return ComplexVal;
4930 }
4931
emitSimpleAtomicStore(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,RValue RVal)4932 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4933 LValue LVal, RValue RVal) {
4934 if (LVal.isGlobalReg())
4935 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
4936 else
4937 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
4938 }
4939
emitSimpleAtomicLoad(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,SourceLocation Loc)4940 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
4941 llvm::AtomicOrdering AO, LValue LVal,
4942 SourceLocation Loc) {
4943 if (LVal.isGlobalReg())
4944 return CGF.EmitLoadOfLValue(LVal, Loc);
4945 return CGF.EmitAtomicLoad(
4946 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
4947 LVal.isVolatile());
4948 }
4949
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)4950 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
4951 QualType RValTy, SourceLocation Loc) {
4952 switch (getEvaluationKind(LVal.getType())) {
4953 case TEK_Scalar:
4954 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
4955 *this, RVal, RValTy, LVal.getType(), Loc)),
4956 LVal);
4957 break;
4958 case TEK_Complex:
4959 EmitStoreOfComplex(
4960 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
4961 /*isInit=*/false);
4962 break;
4963 case TEK_Aggregate:
4964 llvm_unreachable("Must be a scalar or complex.");
4965 }
4966 }
4967
emitOMPAtomicReadExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * V,SourceLocation Loc)4968 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4969 const Expr *X, const Expr *V,
4970 SourceLocation Loc) {
4971 // v = x;
4972 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
4973 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
4974 LValue XLValue = CGF.EmitLValue(X);
4975 LValue VLValue = CGF.EmitLValue(V);
4976 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
4977 // OpenMP, 2.17.7, atomic Construct
4978 // If the read or capture clause is specified and the acquire, acq_rel, or
4979 // seq_cst clause is specified then the strong flush on exit from the atomic
4980 // operation is also an acquire flush.
4981 switch (AO) {
4982 case llvm::AtomicOrdering::Acquire:
4983 case llvm::AtomicOrdering::AcquireRelease:
4984 case llvm::AtomicOrdering::SequentiallyConsistent:
4985 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4986 llvm::AtomicOrdering::Acquire);
4987 break;
4988 case llvm::AtomicOrdering::Monotonic:
4989 case llvm::AtomicOrdering::Release:
4990 break;
4991 case llvm::AtomicOrdering::NotAtomic:
4992 case llvm::AtomicOrdering::Unordered:
4993 llvm_unreachable("Unexpected ordering.");
4994 }
4995 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
4996 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
4997 }
4998
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,SourceLocation Loc)4999 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
5000 llvm::AtomicOrdering AO, const Expr *X,
5001 const Expr *E, SourceLocation Loc) {
5002 // x = expr;
5003 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
5004 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
5005 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5006 // OpenMP, 2.17.7, atomic Construct
5007 // If the write, update, or capture clause is specified and the release,
5008 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5009 // the atomic operation is also a release flush.
5010 switch (AO) {
5011 case llvm::AtomicOrdering::Release:
5012 case llvm::AtomicOrdering::AcquireRelease:
5013 case llvm::AtomicOrdering::SequentiallyConsistent:
5014 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5015 llvm::AtomicOrdering::Release);
5016 break;
5017 case llvm::AtomicOrdering::Acquire:
5018 case llvm::AtomicOrdering::Monotonic:
5019 break;
5020 case llvm::AtomicOrdering::NotAtomic:
5021 case llvm::AtomicOrdering::Unordered:
5022 llvm_unreachable("Unexpected ordering.");
5023 }
5024 }
5025
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)5026 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
5027 RValue Update,
5028 BinaryOperatorKind BO,
5029 llvm::AtomicOrdering AO,
5030 bool IsXLHSInRHSPart) {
5031 ASTContext &Context = CGF.getContext();
5032 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
5033 // expression is simple and atomic is allowed for the given type for the
5034 // target platform.
5035 if (BO == BO_Comma || !Update.isScalar() ||
5036 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
5037 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
5038 (Update.getScalarVal()->getType() !=
5039 X.getAddress(CGF).getElementType())) ||
5040 !X.getAddress(CGF).getElementType()->isIntegerTy() ||
5041 !Context.getTargetInfo().hasBuiltinAtomic(
5042 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
5043 return std::make_pair(false, RValue::get(nullptr));
5044
5045 llvm::AtomicRMWInst::BinOp RMWOp;
5046 switch (BO) {
5047 case BO_Add:
5048 RMWOp = llvm::AtomicRMWInst::Add;
5049 break;
5050 case BO_Sub:
5051 if (!IsXLHSInRHSPart)
5052 return std::make_pair(false, RValue::get(nullptr));
5053 RMWOp = llvm::AtomicRMWInst::Sub;
5054 break;
5055 case BO_And:
5056 RMWOp = llvm::AtomicRMWInst::And;
5057 break;
5058 case BO_Or:
5059 RMWOp = llvm::AtomicRMWInst::Or;
5060 break;
5061 case BO_Xor:
5062 RMWOp = llvm::AtomicRMWInst::Xor;
5063 break;
5064 case BO_LT:
5065 RMWOp = X.getType()->hasSignedIntegerRepresentation()
5066 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
5067 : llvm::AtomicRMWInst::Max)
5068 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
5069 : llvm::AtomicRMWInst::UMax);
5070 break;
5071 case BO_GT:
5072 RMWOp = X.getType()->hasSignedIntegerRepresentation()
5073 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
5074 : llvm::AtomicRMWInst::Min)
5075 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
5076 : llvm::AtomicRMWInst::UMin);
5077 break;
5078 case BO_Assign:
5079 RMWOp = llvm::AtomicRMWInst::Xchg;
5080 break;
5081 case BO_Mul:
5082 case BO_Div:
5083 case BO_Rem:
5084 case BO_Shl:
5085 case BO_Shr:
5086 case BO_LAnd:
5087 case BO_LOr:
5088 return std::make_pair(false, RValue::get(nullptr));
5089 case BO_PtrMemD:
5090 case BO_PtrMemI:
5091 case BO_LE:
5092 case BO_GE:
5093 case BO_EQ:
5094 case BO_NE:
5095 case BO_Cmp:
5096 case BO_AddAssign:
5097 case BO_SubAssign:
5098 case BO_AndAssign:
5099 case BO_OrAssign:
5100 case BO_XorAssign:
5101 case BO_MulAssign:
5102 case BO_DivAssign:
5103 case BO_RemAssign:
5104 case BO_ShlAssign:
5105 case BO_ShrAssign:
5106 case BO_Comma:
5107 llvm_unreachable("Unsupported atomic update operation");
5108 }
5109 llvm::Value *UpdateVal = Update.getScalarVal();
5110 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5111 UpdateVal = CGF.Builder.CreateIntCast(
5112 IC, X.getAddress(CGF).getElementType(),
5113 X.getType()->hasSignedIntegerRepresentation());
5114 }
5115 llvm::Value *Res =
5116 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5117 return std::make_pair(true, RValue::get(Res));
5118 }
5119
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)5120 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
5121 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5122 llvm::AtomicOrdering AO, SourceLocation Loc,
5123 const llvm::function_ref<RValue(RValue)> CommonGen) {
5124 // Update expressions are allowed to have the following forms:
5125 // x binop= expr; -> xrval + expr;
5126 // x++, ++x -> xrval + 1;
5127 // x--, --x -> xrval - 1;
5128 // x = x binop expr; -> xrval binop expr
5129 // x = expr Op x; - > expr binop xrval;
5130 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5131 if (!Res.first) {
5132 if (X.isGlobalReg()) {
5133 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5134 // 'xrval'.
5135 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5136 } else {
5137 // Perform compare-and-swap procedure.
5138 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5139 }
5140 }
5141 return Res;
5142 }
5143
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5144 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
5145 llvm::AtomicOrdering AO, const Expr *X,
5146 const Expr *E, const Expr *UE,
5147 bool IsXLHSInRHSPart, SourceLocation Loc) {
5148 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5149 "Update expr in 'atomic update' must be a binary operator.");
5150 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5151 // Update expressions are allowed to have the following forms:
5152 // x binop= expr; -> xrval + expr;
5153 // x++, ++x -> xrval + 1;
5154 // x--, --x -> xrval - 1;
5155 // x = x binop expr; -> xrval binop expr
5156 // x = expr Op x; - > expr binop xrval;
5157 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5158 LValue XLValue = CGF.EmitLValue(X);
5159 RValue ExprRValue = CGF.EmitAnyExpr(E);
5160 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5161 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5162 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5163 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5164 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5165 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5166 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5167 return CGF.EmitAnyExpr(UE);
5168 };
5169 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5170 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5171 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5172 // OpenMP, 2.17.7, atomic Construct
5173 // If the write, update, or capture clause is specified and the release,
5174 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5175 // the atomic operation is also a release flush.
5176 switch (AO) {
5177 case llvm::AtomicOrdering::Release:
5178 case llvm::AtomicOrdering::AcquireRelease:
5179 case llvm::AtomicOrdering::SequentiallyConsistent:
5180 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5181 llvm::AtomicOrdering::Release);
5182 break;
5183 case llvm::AtomicOrdering::Acquire:
5184 case llvm::AtomicOrdering::Monotonic:
5185 break;
5186 case llvm::AtomicOrdering::NotAtomic:
5187 case llvm::AtomicOrdering::Unordered:
5188 llvm_unreachable("Unexpected ordering.");
5189 }
5190 }
5191
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)5192 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
5193 QualType SourceType, QualType ResType,
5194 SourceLocation Loc) {
5195 switch (CGF.getEvaluationKind(ResType)) {
5196 case TEK_Scalar:
5197 return RValue::get(
5198 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5199 case TEK_Complex: {
5200 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5201 return RValue::getComplex(Res.first, Res.second);
5202 }
5203 case TEK_Aggregate:
5204 break;
5205 }
5206 llvm_unreachable("Must be a scalar or complex.");
5207 }
5208
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5209 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
5210 llvm::AtomicOrdering AO,
5211 bool IsPostfixUpdate, const Expr *V,
5212 const Expr *X, const Expr *E,
5213 const Expr *UE, bool IsXLHSInRHSPart,
5214 SourceLocation Loc) {
5215 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
5216 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
5217 RValue NewVVal;
5218 LValue VLValue = CGF.EmitLValue(V);
5219 LValue XLValue = CGF.EmitLValue(X);
5220 RValue ExprRValue = CGF.EmitAnyExpr(E);
5221 QualType NewVValType;
5222 if (UE) {
5223 // 'x' is updated with some additional value.
5224 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5225 "Update expr in 'atomic capture' must be a binary operator.");
5226 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5227 // Update expressions are allowed to have the following forms:
5228 // x binop= expr; -> xrval + expr;
5229 // x++, ++x -> xrval + 1;
5230 // x--, --x -> xrval - 1;
5231 // x = x binop expr; -> xrval binop expr
5232 // x = expr Op x; - > expr binop xrval;
5233 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5234 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5235 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5236 NewVValType = XRValExpr->getType();
5237 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5238 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
5239 IsPostfixUpdate](RValue XRValue) {
5240 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5241 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5242 RValue Res = CGF.EmitAnyExpr(UE);
5243 NewVVal = IsPostfixUpdate ? XRValue : Res;
5244 return Res;
5245 };
5246 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5247 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5248 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5249 if (Res.first) {
5250 // 'atomicrmw' instruction was generated.
5251 if (IsPostfixUpdate) {
5252 // Use old value from 'atomicrmw'.
5253 NewVVal = Res.second;
5254 } else {
5255 // 'atomicrmw' does not provide new value, so evaluate it using old
5256 // value of 'x'.
5257 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5258 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
5259 NewVVal = CGF.EmitAnyExpr(UE);
5260 }
5261 }
5262 } else {
5263 // 'x' is simply rewritten with some 'expr'.
5264 NewVValType = X->getType().getNonReferenceType();
5265 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
5266 X->getType().getNonReferenceType(), Loc);
5267 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
5268 NewVVal = XRValue;
5269 return ExprRValue;
5270 };
5271 // Try to perform atomicrmw xchg, otherwise simple exchange.
5272 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5273 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
5274 Loc, Gen);
5275 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5276 if (Res.first) {
5277 // 'atomicrmw' instruction was generated.
5278 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
5279 }
5280 }
5281 // Emit post-update store to 'v' of old/new 'x' value.
5282 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
5283 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5284 // OpenMP, 2.17.7, atomic Construct
5285 // If the write, update, or capture clause is specified and the release,
5286 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5287 // the atomic operation is also a release flush.
5288 // If the read or capture clause is specified and the acquire, acq_rel, or
5289 // seq_cst clause is specified then the strong flush on exit from the atomic
5290 // operation is also an acquire flush.
5291 switch (AO) {
5292 case llvm::AtomicOrdering::Release:
5293 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5294 llvm::AtomicOrdering::Release);
5295 break;
5296 case llvm::AtomicOrdering::Acquire:
5297 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5298 llvm::AtomicOrdering::Acquire);
5299 break;
5300 case llvm::AtomicOrdering::AcquireRelease:
5301 case llvm::AtomicOrdering::SequentiallyConsistent:
5302 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5303 llvm::AtomicOrdering::AcquireRelease);
5304 break;
5305 case llvm::AtomicOrdering::Monotonic:
5306 break;
5307 case llvm::AtomicOrdering::NotAtomic:
5308 case llvm::AtomicOrdering::Unordered:
5309 llvm_unreachable("Unexpected ordering.");
5310 }
5311 }
5312
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5313 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
5314 llvm::AtomicOrdering AO, bool IsPostfixUpdate,
5315 const Expr *X, const Expr *V, const Expr *E,
5316 const Expr *UE, bool IsXLHSInRHSPart,
5317 SourceLocation Loc) {
5318 switch (Kind) {
5319 case OMPC_read:
5320 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
5321 break;
5322 case OMPC_write:
5323 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
5324 break;
5325 case OMPC_unknown:
5326 case OMPC_update:
5327 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
5328 break;
5329 case OMPC_capture:
5330 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
5331 IsXLHSInRHSPart, Loc);
5332 break;
5333 case OMPC_if:
5334 case OMPC_final:
5335 case OMPC_num_threads:
5336 case OMPC_private:
5337 case OMPC_firstprivate:
5338 case OMPC_lastprivate:
5339 case OMPC_reduction:
5340 case OMPC_task_reduction:
5341 case OMPC_in_reduction:
5342 case OMPC_safelen:
5343 case OMPC_simdlen:
5344 case OMPC_allocator:
5345 case OMPC_allocate:
5346 case OMPC_collapse:
5347 case OMPC_default:
5348 case OMPC_seq_cst:
5349 case OMPC_acq_rel:
5350 case OMPC_acquire:
5351 case OMPC_release:
5352 case OMPC_relaxed:
5353 case OMPC_shared:
5354 case OMPC_linear:
5355 case OMPC_aligned:
5356 case OMPC_copyin:
5357 case OMPC_copyprivate:
5358 case OMPC_flush:
5359 case OMPC_depobj:
5360 case OMPC_proc_bind:
5361 case OMPC_schedule:
5362 case OMPC_ordered:
5363 case OMPC_nowait:
5364 case OMPC_untied:
5365 case OMPC_threadprivate:
5366 case OMPC_depend:
5367 case OMPC_mergeable:
5368 case OMPC_device:
5369 case OMPC_threads:
5370 case OMPC_simd:
5371 case OMPC_map:
5372 case OMPC_num_teams:
5373 case OMPC_thread_limit:
5374 case OMPC_priority:
5375 case OMPC_grainsize:
5376 case OMPC_nogroup:
5377 case OMPC_num_tasks:
5378 case OMPC_hint:
5379 case OMPC_dist_schedule:
5380 case OMPC_defaultmap:
5381 case OMPC_uniform:
5382 case OMPC_to:
5383 case OMPC_from:
5384 case OMPC_use_device_ptr:
5385 case OMPC_use_device_addr:
5386 case OMPC_is_device_ptr:
5387 case OMPC_unified_address:
5388 case OMPC_unified_shared_memory:
5389 case OMPC_reverse_offload:
5390 case OMPC_dynamic_allocators:
5391 case OMPC_atomic_default_mem_order:
5392 case OMPC_device_type:
5393 case OMPC_match:
5394 case OMPC_nontemporal:
5395 case OMPC_order:
5396 case OMPC_destroy:
5397 case OMPC_detach:
5398 case OMPC_inclusive:
5399 case OMPC_exclusive:
5400 case OMPC_uses_allocators:
5401 case OMPC_affinity:
5402 default:
5403 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
5404 }
5405 }
5406
EmitOMPAtomicDirective(const OMPAtomicDirective & S)5407 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
5408 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
5409 bool MemOrderingSpecified = false;
5410 if (S.getSingleClause<OMPSeqCstClause>()) {
5411 AO = llvm::AtomicOrdering::SequentiallyConsistent;
5412 MemOrderingSpecified = true;
5413 } else if (S.getSingleClause<OMPAcqRelClause>()) {
5414 AO = llvm::AtomicOrdering::AcquireRelease;
5415 MemOrderingSpecified = true;
5416 } else if (S.getSingleClause<OMPAcquireClause>()) {
5417 AO = llvm::AtomicOrdering::Acquire;
5418 MemOrderingSpecified = true;
5419 } else if (S.getSingleClause<OMPReleaseClause>()) {
5420 AO = llvm::AtomicOrdering::Release;
5421 MemOrderingSpecified = true;
5422 } else if (S.getSingleClause<OMPRelaxedClause>()) {
5423 AO = llvm::AtomicOrdering::Monotonic;
5424 MemOrderingSpecified = true;
5425 }
5426 OpenMPClauseKind Kind = OMPC_unknown;
5427 for (const OMPClause *C : S.clauses()) {
5428 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
5429 // if it is first).
5430 if (C->getClauseKind() != OMPC_seq_cst &&
5431 C->getClauseKind() != OMPC_acq_rel &&
5432 C->getClauseKind() != OMPC_acquire &&
5433 C->getClauseKind() != OMPC_release &&
5434 C->getClauseKind() != OMPC_relaxed) {
5435 Kind = C->getClauseKind();
5436 break;
5437 }
5438 }
5439 if (!MemOrderingSpecified) {
5440 llvm::AtomicOrdering DefaultOrder =
5441 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
5442 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
5443 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
5444 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
5445 Kind == OMPC_capture)) {
5446 AO = DefaultOrder;
5447 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
5448 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
5449 AO = llvm::AtomicOrdering::Release;
5450 } else if (Kind == OMPC_read) {
5451 assert(Kind == OMPC_read && "Unexpected atomic kind.");
5452 AO = llvm::AtomicOrdering::Acquire;
5453 }
5454 }
5455 }
5456
5457 LexicalScope Scope(*this, S.getSourceRange());
5458 EmitStopPoint(S.getAssociatedStmt());
5459 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
5460 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
5461 S.getBeginLoc());
5462 }
5463
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)5464 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
5465 const OMPExecutableDirective &S,
5466 const RegionCodeGenTy &CodeGen) {
5467 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
5468 CodeGenModule &CGM = CGF.CGM;
5469
5470 // On device emit this construct as inlined code.
5471 if (CGM.getLangOpts().OpenMPIsDevice) {
5472 OMPLexicalScope Scope(CGF, S, OMPD_target);
5473 CGM.getOpenMPRuntime().emitInlinedDirective(
5474 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5475 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5476 });
5477 return;
5478 }
5479
5480 auto LPCRegion =
5481 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
5482 llvm::Function *Fn = nullptr;
5483 llvm::Constant *FnID = nullptr;
5484
5485 const Expr *IfCond = nullptr;
5486 // Check for the at most one if clause associated with the target region.
5487 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5488 if (C->getNameModifier() == OMPD_unknown ||
5489 C->getNameModifier() == OMPD_target) {
5490 IfCond = C->getCondition();
5491 break;
5492 }
5493 }
5494
5495 // Check if we have any device clause associated with the directive.
5496 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
5497 nullptr, OMPC_DEVICE_unknown);
5498 if (auto *C = S.getSingleClause<OMPDeviceClause>())
5499 Device.setPointerAndInt(C->getDevice(), C->getModifier());
5500
5501 // Check if we have an if clause whose conditional always evaluates to false
5502 // or if we do not have any targets specified. If so the target region is not
5503 // an offload entry point.
5504 bool IsOffloadEntry = true;
5505 if (IfCond) {
5506 bool Val;
5507 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
5508 IsOffloadEntry = false;
5509 }
5510 if (CGM.getLangOpts().OMPTargetTriples.empty())
5511 IsOffloadEntry = false;
5512
5513 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
5514 StringRef ParentName;
5515 // In case we have Ctors/Dtors we use the complete type variant to produce
5516 // the mangling of the device outlined kernel.
5517 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
5518 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
5519 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
5520 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
5521 else
5522 ParentName =
5523 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
5524
5525 // Emit target region as a standalone region.
5526 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
5527 IsOffloadEntry, CodeGen);
5528 OMPLexicalScope Scope(CGF, S, OMPD_task);
5529 auto &&SizeEmitter =
5530 [IsOffloadEntry](CodeGenFunction &CGF,
5531 const OMPLoopDirective &D) -> llvm::Value * {
5532 if (IsOffloadEntry) {
5533 OMPLoopScope(CGF, D);
5534 // Emit calculation of the iterations count.
5535 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
5536 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
5537 /*isSigned=*/false);
5538 return NumIterations;
5539 }
5540 return nullptr;
5541 };
5542 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
5543 SizeEmitter);
5544 }
5545
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)5546 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
5547 PrePostActionTy &Action) {
5548 Action.Enter(CGF);
5549 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5550 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5551 CGF.EmitOMPPrivateClause(S, PrivateScope);
5552 (void)PrivateScope.Privatize();
5553 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5554 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5555
5556 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
5557 }
5558
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)5559 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
5560 StringRef ParentName,
5561 const OMPTargetDirective &S) {
5562 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5563 emitTargetRegion(CGF, S, Action);
5564 };
5565 llvm::Function *Fn;
5566 llvm::Constant *Addr;
5567 // Emit target region as a standalone region.
5568 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5569 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5570 assert(Fn && Addr && "Target device function emission failed.");
5571 }
5572
EmitOMPTargetDirective(const OMPTargetDirective & S)5573 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
5574 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5575 emitTargetRegion(CGF, S, Action);
5576 };
5577 emitCommonOMPTargetDirective(*this, S, CodeGen);
5578 }
5579
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)5580 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
5581 const OMPExecutableDirective &S,
5582 OpenMPDirectiveKind InnermostKind,
5583 const RegionCodeGenTy &CodeGen) {
5584 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
5585 llvm::Function *OutlinedFn =
5586 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
5587 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
5588
5589 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
5590 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5591 if (NT || TL) {
5592 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
5593 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
5594
5595 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
5596 S.getBeginLoc());
5597 }
5598
5599 OMPTeamsScope Scope(CGF, S);
5600 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5601 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5602 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
5603 CapturedVars);
5604 }
5605
EmitOMPTeamsDirective(const OMPTeamsDirective & S)5606 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
5607 // Emit teams region as a standalone region.
5608 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5609 Action.Enter(CGF);
5610 OMPPrivateScope PrivateScope(CGF);
5611 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5612 CGF.EmitOMPPrivateClause(S, PrivateScope);
5613 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5614 (void)PrivateScope.Privatize();
5615 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
5616 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5617 };
5618 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5619 emitPostUpdateForReductionClause(*this, S,
5620 [](CodeGenFunction &) { return nullptr; });
5621 }
5622
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)5623 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5624 const OMPTargetTeamsDirective &S) {
5625 auto *CS = S.getCapturedStmt(OMPD_teams);
5626 Action.Enter(CGF);
5627 // Emit teams region as a standalone region.
5628 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
5629 Action.Enter(CGF);
5630 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5631 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5632 CGF.EmitOMPPrivateClause(S, PrivateScope);
5633 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5634 (void)PrivateScope.Privatize();
5635 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5636 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5637 CGF.EmitStmt(CS->getCapturedStmt());
5638 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5639 };
5640 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
5641 emitPostUpdateForReductionClause(CGF, S,
5642 [](CodeGenFunction &) { return nullptr; });
5643 }
5644
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)5645 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
5646 CodeGenModule &CGM, StringRef ParentName,
5647 const OMPTargetTeamsDirective &S) {
5648 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5649 emitTargetTeamsRegion(CGF, Action, S);
5650 };
5651 llvm::Function *Fn;
5652 llvm::Constant *Addr;
5653 // Emit target region as a standalone region.
5654 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5655 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5656 assert(Fn && Addr && "Target device function emission failed.");
5657 }
5658
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)5659 void CodeGenFunction::EmitOMPTargetTeamsDirective(
5660 const OMPTargetTeamsDirective &S) {
5661 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5662 emitTargetTeamsRegion(CGF, Action, S);
5663 };
5664 emitCommonOMPTargetDirective(*this, S, CodeGen);
5665 }
5666
5667 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)5668 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5669 const OMPTargetTeamsDistributeDirective &S) {
5670 Action.Enter(CGF);
5671 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5672 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5673 };
5674
5675 // Emit teams region as a standalone region.
5676 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5677 PrePostActionTy &Action) {
5678 Action.Enter(CGF);
5679 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5680 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5681 (void)PrivateScope.Privatize();
5682 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5683 CodeGenDistribute);
5684 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5685 };
5686 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
5687 emitPostUpdateForReductionClause(CGF, S,
5688 [](CodeGenFunction &) { return nullptr; });
5689 }
5690
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)5691 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
5692 CodeGenModule &CGM, StringRef ParentName,
5693 const OMPTargetTeamsDistributeDirective &S) {
5694 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5695 emitTargetTeamsDistributeRegion(CGF, Action, S);
5696 };
5697 llvm::Function *Fn;
5698 llvm::Constant *Addr;
5699 // Emit target region as a standalone region.
5700 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5701 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5702 assert(Fn && Addr && "Target device function emission failed.");
5703 }
5704
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)5705 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
5706 const OMPTargetTeamsDistributeDirective &S) {
5707 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5708 emitTargetTeamsDistributeRegion(CGF, Action, S);
5709 };
5710 emitCommonOMPTargetDirective(*this, S, CodeGen);
5711 }
5712
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)5713 static void emitTargetTeamsDistributeSimdRegion(
5714 CodeGenFunction &CGF, PrePostActionTy &Action,
5715 const OMPTargetTeamsDistributeSimdDirective &S) {
5716 Action.Enter(CGF);
5717 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5718 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5719 };
5720
5721 // Emit teams region as a standalone region.
5722 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5723 PrePostActionTy &Action) {
5724 Action.Enter(CGF);
5725 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5726 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5727 (void)PrivateScope.Privatize();
5728 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5729 CodeGenDistribute);
5730 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5731 };
5732 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
5733 emitPostUpdateForReductionClause(CGF, S,
5734 [](CodeGenFunction &) { return nullptr; });
5735 }
5736
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)5737 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
5738 CodeGenModule &CGM, StringRef ParentName,
5739 const OMPTargetTeamsDistributeSimdDirective &S) {
5740 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5741 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5742 };
5743 llvm::Function *Fn;
5744 llvm::Constant *Addr;
5745 // Emit target region as a standalone region.
5746 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5747 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5748 assert(Fn && Addr && "Target device function emission failed.");
5749 }
5750
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)5751 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
5752 const OMPTargetTeamsDistributeSimdDirective &S) {
5753 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5754 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5755 };
5756 emitCommonOMPTargetDirective(*this, S, CodeGen);
5757 }
5758
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)5759 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
5760 const OMPTeamsDistributeDirective &S) {
5761
5762 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5763 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5764 };
5765
5766 // Emit teams region as a standalone region.
5767 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5768 PrePostActionTy &Action) {
5769 Action.Enter(CGF);
5770 OMPPrivateScope PrivateScope(CGF);
5771 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5772 (void)PrivateScope.Privatize();
5773 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5774 CodeGenDistribute);
5775 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5776 };
5777 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5778 emitPostUpdateForReductionClause(*this, S,
5779 [](CodeGenFunction &) { return nullptr; });
5780 }
5781
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)5782 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
5783 const OMPTeamsDistributeSimdDirective &S) {
5784 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5785 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5786 };
5787
5788 // Emit teams region as a standalone region.
5789 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5790 PrePostActionTy &Action) {
5791 Action.Enter(CGF);
5792 OMPPrivateScope PrivateScope(CGF);
5793 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5794 (void)PrivateScope.Privatize();
5795 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
5796 CodeGenDistribute);
5797 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5798 };
5799 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
5800 emitPostUpdateForReductionClause(*this, S,
5801 [](CodeGenFunction &) { return nullptr; });
5802 }
5803
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)5804 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
5805 const OMPTeamsDistributeParallelForDirective &S) {
5806 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5807 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5808 S.getDistInc());
5809 };
5810
5811 // Emit teams region as a standalone region.
5812 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5813 PrePostActionTy &Action) {
5814 Action.Enter(CGF);
5815 OMPPrivateScope PrivateScope(CGF);
5816 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5817 (void)PrivateScope.Privatize();
5818 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5819 CodeGenDistribute);
5820 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5821 };
5822 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
5823 emitPostUpdateForReductionClause(*this, S,
5824 [](CodeGenFunction &) { return nullptr; });
5825 }
5826
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)5827 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
5828 const OMPTeamsDistributeParallelForSimdDirective &S) {
5829 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5830 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5831 S.getDistInc());
5832 };
5833
5834 // Emit teams region as a standalone region.
5835 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5836 PrePostActionTy &Action) {
5837 Action.Enter(CGF);
5838 OMPPrivateScope PrivateScope(CGF);
5839 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5840 (void)PrivateScope.Privatize();
5841 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5842 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5843 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5844 };
5845 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
5846 CodeGen);
5847 emitPostUpdateForReductionClause(*this, S,
5848 [](CodeGenFunction &) { return nullptr; });
5849 }
5850
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)5851 static void emitTargetTeamsDistributeParallelForRegion(
5852 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
5853 PrePostActionTy &Action) {
5854 Action.Enter(CGF);
5855 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5856 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5857 S.getDistInc());
5858 };
5859
5860 // Emit teams region as a standalone region.
5861 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5862 PrePostActionTy &Action) {
5863 Action.Enter(CGF);
5864 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5865 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5866 (void)PrivateScope.Privatize();
5867 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5868 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5869 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5870 };
5871
5872 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
5873 CodeGenTeams);
5874 emitPostUpdateForReductionClause(CGF, S,
5875 [](CodeGenFunction &) { return nullptr; });
5876 }
5877
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)5878 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
5879 CodeGenModule &CGM, StringRef ParentName,
5880 const OMPTargetTeamsDistributeParallelForDirective &S) {
5881 // Emit SPMD target teams distribute parallel for region as a standalone
5882 // region.
5883 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5884 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5885 };
5886 llvm::Function *Fn;
5887 llvm::Constant *Addr;
5888 // Emit target region as a standalone region.
5889 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5890 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5891 assert(Fn && Addr && "Target device function emission failed.");
5892 }
5893
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)5894 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
5895 const OMPTargetTeamsDistributeParallelForDirective &S) {
5896 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5897 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5898 };
5899 emitCommonOMPTargetDirective(*this, S, CodeGen);
5900 }
5901
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)5902 static void emitTargetTeamsDistributeParallelForSimdRegion(
5903 CodeGenFunction &CGF,
5904 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
5905 PrePostActionTy &Action) {
5906 Action.Enter(CGF);
5907 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5908 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5909 S.getDistInc());
5910 };
5911
5912 // Emit teams region as a standalone region.
5913 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5914 PrePostActionTy &Action) {
5915 Action.Enter(CGF);
5916 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5917 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5918 (void)PrivateScope.Privatize();
5919 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5920 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5921 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5922 };
5923
5924 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
5925 CodeGenTeams);
5926 emitPostUpdateForReductionClause(CGF, S,
5927 [](CodeGenFunction &) { return nullptr; });
5928 }
5929
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)5930 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
5931 CodeGenModule &CGM, StringRef ParentName,
5932 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5933 // Emit SPMD target teams distribute parallel for simd region as a standalone
5934 // region.
5935 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5936 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5937 };
5938 llvm::Function *Fn;
5939 llvm::Constant *Addr;
5940 // Emit target region as a standalone region.
5941 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5942 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5943 assert(Fn && Addr && "Target device function emission failed.");
5944 }
5945
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)5946 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
5947 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5948 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5949 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5950 };
5951 emitCommonOMPTargetDirective(*this, S, CodeGen);
5952 }
5953
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)5954 void CodeGenFunction::EmitOMPCancellationPointDirective(
5955 const OMPCancellationPointDirective &S) {
5956 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
5957 S.getCancelRegion());
5958 }
5959
EmitOMPCancelDirective(const OMPCancelDirective & S)5960 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
5961 const Expr *IfCond = nullptr;
5962 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5963 if (C->getNameModifier() == OMPD_unknown ||
5964 C->getNameModifier() == OMPD_cancel) {
5965 IfCond = C->getCondition();
5966 break;
5967 }
5968 }
5969 if (CGM.getLangOpts().OpenMPIRBuilder) {
5970 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5971 // TODO: This check is necessary as we only generate `omp parallel` through
5972 // the OpenMPIRBuilder for now.
5973 if (S.getCancelRegion() == OMPD_parallel) {
5974 llvm::Value *IfCondition = nullptr;
5975 if (IfCond)
5976 IfCondition = EmitScalarExpr(IfCond,
5977 /*IgnoreResultAssign=*/true);
5978 return Builder.restoreIP(
5979 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
5980 }
5981 }
5982
5983 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
5984 S.getCancelRegion());
5985 }
5986
5987 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)5988 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
5989 if (Kind == OMPD_parallel || Kind == OMPD_task ||
5990 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
5991 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
5992 return ReturnBlock;
5993 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
5994 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
5995 Kind == OMPD_distribute_parallel_for ||
5996 Kind == OMPD_target_parallel_for ||
5997 Kind == OMPD_teams_distribute_parallel_for ||
5998 Kind == OMPD_target_teams_distribute_parallel_for);
5999 return OMPCancelStack.getExitBlock();
6000 }
6001
EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)6002 void CodeGenFunction::EmitOMPUseDevicePtrClause(
6003 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
6004 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6005 auto OrigVarIt = C.varlist_begin();
6006 auto InitIt = C.inits().begin();
6007 for (const Expr *PvtVarIt : C.private_copies()) {
6008 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
6009 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
6010 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
6011
6012 // In order to identify the right initializer we need to match the
6013 // declaration used by the mapping logic. In some cases we may get
6014 // OMPCapturedExprDecl that refers to the original declaration.
6015 const ValueDecl *MatchingVD = OrigVD;
6016 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6017 // OMPCapturedExprDecl are used to privative fields of the current
6018 // structure.
6019 const auto *ME = cast<MemberExpr>(OED->getInit());
6020 assert(isa<CXXThisExpr>(ME->getBase()) &&
6021 "Base should be the current struct!");
6022 MatchingVD = ME->getMemberDecl();
6023 }
6024
6025 // If we don't have information about the current list item, move on to
6026 // the next one.
6027 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6028 if (InitAddrIt == CaptureDeviceAddrMap.end())
6029 continue;
6030
6031 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
6032 InitAddrIt, InitVD,
6033 PvtVD]() {
6034 // Initialize the temporary initialization variable with the address we
6035 // get from the runtime library. We have to cast the source address
6036 // because it is always a void *. References are materialized in the
6037 // privatization scope, so the initialization here disregards the fact
6038 // the original variable is a reference.
6039 QualType AddrQTy =
6040 getContext().getPointerType(OrigVD->getType().getNonReferenceType());
6041 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
6042 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
6043 setAddrOfLocalVar(InitVD, InitAddr);
6044
6045 // Emit private declaration, it will be initialized by the value we
6046 // declaration we just added to the local declarations map.
6047 EmitDecl(*PvtVD);
6048
6049 // The initialization variables reached its purpose in the emission
6050 // of the previous declaration, so we don't need it anymore.
6051 LocalDeclMap.erase(InitVD);
6052
6053 // Return the address of the private variable.
6054 return GetAddrOfLocalVar(PvtVD);
6055 });
6056 assert(IsRegistered && "firstprivate var already registered as private");
6057 // Silence the warning about unused variable.
6058 (void)IsRegistered;
6059
6060 ++OrigVarIt;
6061 ++InitIt;
6062 }
6063 }
6064
getBaseDecl(const Expr * Ref)6065 static const VarDecl *getBaseDecl(const Expr *Ref) {
6066 const Expr *Base = Ref->IgnoreParenImpCasts();
6067 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
6068 Base = OASE->getBase()->IgnoreParenImpCasts();
6069 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
6070 Base = ASE->getBase()->IgnoreParenImpCasts();
6071 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
6072 }
6073
EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)6074 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
6075 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
6076 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6077 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
6078 for (const Expr *Ref : C.varlists()) {
6079 const VarDecl *OrigVD = getBaseDecl(Ref);
6080 if (!Processed.insert(OrigVD).second)
6081 continue;
6082 // In order to identify the right initializer we need to match the
6083 // declaration used by the mapping logic. In some cases we may get
6084 // OMPCapturedExprDecl that refers to the original declaration.
6085 const ValueDecl *MatchingVD = OrigVD;
6086 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6087 // OMPCapturedExprDecl are used to privative fields of the current
6088 // structure.
6089 const auto *ME = cast<MemberExpr>(OED->getInit());
6090 assert(isa<CXXThisExpr>(ME->getBase()) &&
6091 "Base should be the current struct!");
6092 MatchingVD = ME->getMemberDecl();
6093 }
6094
6095 // If we don't have information about the current list item, move on to
6096 // the next one.
6097 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6098 if (InitAddrIt == CaptureDeviceAddrMap.end())
6099 continue;
6100
6101 Address PrivAddr = InitAddrIt->getSecond();
6102 // For declrefs and variable length array need to load the pointer for
6103 // correct mapping, since the pointer to the data was passed to the runtime.
6104 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6105 MatchingVD->getType()->isArrayType())
6106 PrivAddr =
6107 EmitLoadOfPointer(PrivAddr, getContext()
6108 .getPointerType(OrigVD->getType())
6109 ->castAs<PointerType>());
6110 llvm::Type *RealTy =
6111 ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
6112 ->getPointerTo();
6113 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
6114
6115 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
6116 }
6117 }
6118
6119 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)6120 void CodeGenFunction::EmitOMPTargetDataDirective(
6121 const OMPTargetDataDirective &S) {
6122 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
6123 /*SeparateBeginEndCalls=*/true);
6124
6125 // Create a pre/post action to signal the privatization of the device pointer.
6126 // This action can be replaced by the OpenMP runtime code generation to
6127 // deactivate privatization.
6128 bool PrivatizeDevicePointers = false;
6129 class DevicePointerPrivActionTy : public PrePostActionTy {
6130 bool &PrivatizeDevicePointers;
6131
6132 public:
6133 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
6134 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
6135 void Enter(CodeGenFunction &CGF) override {
6136 PrivatizeDevicePointers = true;
6137 }
6138 };
6139 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
6140
6141 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
6142 CodeGenFunction &CGF, PrePostActionTy &Action) {
6143 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6144 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6145 };
6146
6147 // Codegen that selects whether to generate the privatization code or not.
6148 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
6149 &InnermostCodeGen](CodeGenFunction &CGF,
6150 PrePostActionTy &Action) {
6151 RegionCodeGenTy RCG(InnermostCodeGen);
6152 PrivatizeDevicePointers = false;
6153
6154 // Call the pre-action to change the status of PrivatizeDevicePointers if
6155 // needed.
6156 Action.Enter(CGF);
6157
6158 if (PrivatizeDevicePointers) {
6159 OMPPrivateScope PrivateScope(CGF);
6160 // Emit all instances of the use_device_ptr clause.
6161 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
6162 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
6163 Info.CaptureDeviceAddrMap);
6164 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
6165 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
6166 Info.CaptureDeviceAddrMap);
6167 (void)PrivateScope.Privatize();
6168 RCG(CGF);
6169 } else {
6170 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6171 RCG(CGF);
6172 }
6173 };
6174
6175 // Forward the provided action to the privatization codegen.
6176 RegionCodeGenTy PrivRCG(PrivCodeGen);
6177 PrivRCG.setAction(Action);
6178
6179 // Notwithstanding the body of the region is emitted as inlined directive,
6180 // we don't use an inline scope as changes in the references inside the
6181 // region are expected to be visible outside, so we do not privative them.
6182 OMPLexicalScope Scope(CGF, S);
6183 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
6184 PrivRCG);
6185 };
6186
6187 RegionCodeGenTy RCG(CodeGen);
6188
6189 // If we don't have target devices, don't bother emitting the data mapping
6190 // code.
6191 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
6192 RCG(*this);
6193 return;
6194 }
6195
6196 // Check if we have any if clause associated with the directive.
6197 const Expr *IfCond = nullptr;
6198 if (const auto *C = S.getSingleClause<OMPIfClause>())
6199 IfCond = C->getCondition();
6200
6201 // Check if we have any device clause associated with the directive.
6202 const Expr *Device = nullptr;
6203 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6204 Device = C->getDevice();
6205
6206 // Set the action to signal privatization of device pointers.
6207 RCG.setAction(PrivAction);
6208
6209 // Emit region code.
6210 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
6211 Info);
6212 }
6213
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)6214 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
6215 const OMPTargetEnterDataDirective &S) {
6216 // If we don't have target devices, don't bother emitting the data mapping
6217 // code.
6218 if (CGM.getLangOpts().OMPTargetTriples.empty())
6219 return;
6220
6221 // Check if we have any if clause associated with the directive.
6222 const Expr *IfCond = nullptr;
6223 if (const auto *C = S.getSingleClause<OMPIfClause>())
6224 IfCond = C->getCondition();
6225
6226 // Check if we have any device clause associated with the directive.
6227 const Expr *Device = nullptr;
6228 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6229 Device = C->getDevice();
6230
6231 OMPLexicalScope Scope(*this, S, OMPD_task);
6232 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6233 }
6234
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)6235 void CodeGenFunction::EmitOMPTargetExitDataDirective(
6236 const OMPTargetExitDataDirective &S) {
6237 // If we don't have target devices, don't bother emitting the data mapping
6238 // code.
6239 if (CGM.getLangOpts().OMPTargetTriples.empty())
6240 return;
6241
6242 // Check if we have any if clause associated with the directive.
6243 const Expr *IfCond = nullptr;
6244 if (const auto *C = S.getSingleClause<OMPIfClause>())
6245 IfCond = C->getCondition();
6246
6247 // Check if we have any device clause associated with the directive.
6248 const Expr *Device = nullptr;
6249 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6250 Device = C->getDevice();
6251
6252 OMPLexicalScope Scope(*this, S, OMPD_task);
6253 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6254 }
6255
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)6256 static void emitTargetParallelRegion(CodeGenFunction &CGF,
6257 const OMPTargetParallelDirective &S,
6258 PrePostActionTy &Action) {
6259 // Get the captured statement associated with the 'parallel' region.
6260 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
6261 Action.Enter(CGF);
6262 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6263 Action.Enter(CGF);
6264 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6265 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6266 CGF.EmitOMPPrivateClause(S, PrivateScope);
6267 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6268 (void)PrivateScope.Privatize();
6269 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6270 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6271 // TODO: Add support for clauses.
6272 CGF.EmitStmt(CS->getCapturedStmt());
6273 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
6274 };
6275 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
6276 emitEmptyBoundParameters);
6277 emitPostUpdateForReductionClause(CGF, S,
6278 [](CodeGenFunction &) { return nullptr; });
6279 }
6280
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)6281 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
6282 CodeGenModule &CGM, StringRef ParentName,
6283 const OMPTargetParallelDirective &S) {
6284 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6285 emitTargetParallelRegion(CGF, S, Action);
6286 };
6287 llvm::Function *Fn;
6288 llvm::Constant *Addr;
6289 // Emit target region as a standalone region.
6290 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6291 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6292 assert(Fn && Addr && "Target device function emission failed.");
6293 }
6294
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)6295 void CodeGenFunction::EmitOMPTargetParallelDirective(
6296 const OMPTargetParallelDirective &S) {
6297 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6298 emitTargetParallelRegion(CGF, S, Action);
6299 };
6300 emitCommonOMPTargetDirective(*this, S, CodeGen);
6301 }
6302
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)6303 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
6304 const OMPTargetParallelForDirective &S,
6305 PrePostActionTy &Action) {
6306 Action.Enter(CGF);
6307 // Emit directive as a combined directive that consists of two implicit
6308 // directives: 'parallel' with 'for' directive.
6309 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6310 Action.Enter(CGF);
6311 CodeGenFunction::OMPCancelStackRAII CancelRegion(
6312 CGF, OMPD_target_parallel_for, S.hasCancel());
6313 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6314 emitDispatchForLoopBounds);
6315 };
6316 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
6317 emitEmptyBoundParameters);
6318 }
6319
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)6320 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
6321 CodeGenModule &CGM, StringRef ParentName,
6322 const OMPTargetParallelForDirective &S) {
6323 // Emit SPMD target parallel for region as a standalone region.
6324 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6325 emitTargetParallelForRegion(CGF, S, Action);
6326 };
6327 llvm::Function *Fn;
6328 llvm::Constant *Addr;
6329 // Emit target region as a standalone region.
6330 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6331 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6332 assert(Fn && Addr && "Target device function emission failed.");
6333 }
6334
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)6335 void CodeGenFunction::EmitOMPTargetParallelForDirective(
6336 const OMPTargetParallelForDirective &S) {
6337 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6338 emitTargetParallelForRegion(CGF, S, Action);
6339 };
6340 emitCommonOMPTargetDirective(*this, S, CodeGen);
6341 }
6342
6343 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)6344 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
6345 const OMPTargetParallelForSimdDirective &S,
6346 PrePostActionTy &Action) {
6347 Action.Enter(CGF);
6348 // Emit directive as a combined directive that consists of two implicit
6349 // directives: 'parallel' with 'for' directive.
6350 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6351 Action.Enter(CGF);
6352 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6353 emitDispatchForLoopBounds);
6354 };
6355 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
6356 emitEmptyBoundParameters);
6357 }
6358
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)6359 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
6360 CodeGenModule &CGM, StringRef ParentName,
6361 const OMPTargetParallelForSimdDirective &S) {
6362 // Emit SPMD target parallel for region as a standalone region.
6363 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6364 emitTargetParallelForSimdRegion(CGF, S, Action);
6365 };
6366 llvm::Function *Fn;
6367 llvm::Constant *Addr;
6368 // Emit target region as a standalone region.
6369 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6370 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6371 assert(Fn && Addr && "Target device function emission failed.");
6372 }
6373
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)6374 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
6375 const OMPTargetParallelForSimdDirective &S) {
6376 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6377 emitTargetParallelForSimdRegion(CGF, S, Action);
6378 };
6379 emitCommonOMPTargetDirective(*this, S, CodeGen);
6380 }
6381
6382 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)6383 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
6384 const ImplicitParamDecl *PVD,
6385 CodeGenFunction::OMPPrivateScope &Privates) {
6386 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
6387 Privates.addPrivate(VDecl,
6388 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
6389 }
6390
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)6391 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
6392 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
6393 // Emit outlined function for task construct.
6394 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
6395 Address CapturedStruct = Address::invalid();
6396 {
6397 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6398 CapturedStruct = GenerateCapturedStmtArgument(*CS);
6399 }
6400 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
6401 const Expr *IfCond = nullptr;
6402 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6403 if (C->getNameModifier() == OMPD_unknown ||
6404 C->getNameModifier() == OMPD_taskloop) {
6405 IfCond = C->getCondition();
6406 break;
6407 }
6408 }
6409
6410 OMPTaskDataTy Data;
6411 // Check if taskloop must be emitted without taskgroup.
6412 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
6413 // TODO: Check if we should emit tied or untied task.
6414 Data.Tied = true;
6415 // Set scheduling for taskloop
6416 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
6417 // grainsize clause
6418 Data.Schedule.setInt(/*IntVal=*/false);
6419 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
6420 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
6421 // num_tasks clause
6422 Data.Schedule.setInt(/*IntVal=*/true);
6423 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
6424 }
6425
6426 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
6427 // if (PreCond) {
6428 // for (IV in 0..LastIteration) BODY;
6429 // <Final counter/linear vars updates>;
6430 // }
6431 //
6432
6433 // Emit: if (PreCond) - begin.
6434 // If the condition constant folds and can be elided, avoid emitting the
6435 // whole loop.
6436 bool CondConstant;
6437 llvm::BasicBlock *ContBlock = nullptr;
6438 OMPLoopScope PreInitScope(CGF, S);
6439 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
6440 if (!CondConstant)
6441 return;
6442 } else {
6443 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
6444 ContBlock = CGF.createBasicBlock("taskloop.if.end");
6445 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
6446 CGF.getProfileCount(&S));
6447 CGF.EmitBlock(ThenBlock);
6448 CGF.incrementProfileCounter(&S);
6449 }
6450
6451 (void)CGF.EmitOMPLinearClauseInit(S);
6452
6453 OMPPrivateScope LoopScope(CGF);
6454 // Emit helper vars inits.
6455 enum { LowerBound = 5, UpperBound, Stride, LastIter };
6456 auto *I = CS->getCapturedDecl()->param_begin();
6457 auto *LBP = std::next(I, LowerBound);
6458 auto *UBP = std::next(I, UpperBound);
6459 auto *STP = std::next(I, Stride);
6460 auto *LIP = std::next(I, LastIter);
6461 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
6462 LoopScope);
6463 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
6464 LoopScope);
6465 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
6466 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
6467 LoopScope);
6468 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
6469 CGF.EmitOMPLinearClause(S, LoopScope);
6470 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
6471 (void)LoopScope.Privatize();
6472 // Emit the loop iteration variable.
6473 const Expr *IVExpr = S.getIterationVariable();
6474 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
6475 CGF.EmitVarDecl(*IVDecl);
6476 CGF.EmitIgnoredExpr(S.getInit());
6477
6478 // Emit the iterations count variable.
6479 // If it is not a variable, Sema decided to calculate iterations count on
6480 // each iteration (e.g., it is foldable into a constant).
6481 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
6482 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
6483 // Emit calculation of the iterations count.
6484 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
6485 }
6486
6487 {
6488 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6489 emitCommonSimdLoop(
6490 CGF, S,
6491 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6492 if (isOpenMPSimdDirective(S.getDirectiveKind()))
6493 CGF.EmitOMPSimdInit(S);
6494 },
6495 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
6496 CGF.EmitOMPInnerLoop(
6497 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
6498 [&S](CodeGenFunction &CGF) {
6499 emitOMPLoopBodyWithStopPoint(CGF, S,
6500 CodeGenFunction::JumpDest());
6501 },
6502 [](CodeGenFunction &) {});
6503 });
6504 }
6505 // Emit: if (PreCond) - end.
6506 if (ContBlock) {
6507 CGF.EmitBranch(ContBlock);
6508 CGF.EmitBlock(ContBlock, true);
6509 }
6510 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6511 if (HasLastprivateClause) {
6512 CGF.EmitOMPLastprivateClauseFinal(
6513 S, isOpenMPSimdDirective(S.getDirectiveKind()),
6514 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
6515 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6516 (*LIP)->getType(), S.getBeginLoc())));
6517 }
6518 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
6519 return CGF.Builder.CreateIsNotNull(
6520 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6521 (*LIP)->getType(), S.getBeginLoc()));
6522 });
6523 };
6524 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
6525 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
6526 const OMPTaskDataTy &Data) {
6527 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
6528 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
6529 OMPLoopScope PreInitScope(CGF, S);
6530 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
6531 OutlinedFn, SharedsTy,
6532 CapturedStruct, IfCond, Data);
6533 };
6534 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
6535 CodeGen);
6536 };
6537 if (Data.Nogroup) {
6538 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
6539 } else {
6540 CGM.getOpenMPRuntime().emitTaskgroupRegion(
6541 *this,
6542 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
6543 PrePostActionTy &Action) {
6544 Action.Enter(CGF);
6545 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
6546 Data);
6547 },
6548 S.getBeginLoc());
6549 }
6550 }
6551
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)6552 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
6553 auto LPCRegion =
6554 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6555 EmitOMPTaskLoopBasedDirective(S);
6556 }
6557
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)6558 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
6559 const OMPTaskLoopSimdDirective &S) {
6560 auto LPCRegion =
6561 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6562 OMPLexicalScope Scope(*this, S);
6563 EmitOMPTaskLoopBasedDirective(S);
6564 }
6565
EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective & S)6566 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
6567 const OMPMasterTaskLoopDirective &S) {
6568 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6569 Action.Enter(CGF);
6570 EmitOMPTaskLoopBasedDirective(S);
6571 };
6572 auto LPCRegion =
6573 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6574 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
6575 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6576 }
6577
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective & S)6578 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
6579 const OMPMasterTaskLoopSimdDirective &S) {
6580 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6581 Action.Enter(CGF);
6582 EmitOMPTaskLoopBasedDirective(S);
6583 };
6584 auto LPCRegion =
6585 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6586 OMPLexicalScope Scope(*this, S);
6587 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6588 }
6589
EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective & S)6590 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
6591 const OMPParallelMasterTaskLoopDirective &S) {
6592 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6593 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6594 PrePostActionTy &Action) {
6595 Action.Enter(CGF);
6596 CGF.EmitOMPTaskLoopBasedDirective(S);
6597 };
6598 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6599 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6600 S.getBeginLoc());
6601 };
6602 auto LPCRegion =
6603 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6604 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
6605 emitEmptyBoundParameters);
6606 }
6607
EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective & S)6608 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
6609 const OMPParallelMasterTaskLoopSimdDirective &S) {
6610 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6611 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6612 PrePostActionTy &Action) {
6613 Action.Enter(CGF);
6614 CGF.EmitOMPTaskLoopBasedDirective(S);
6615 };
6616 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6617 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6618 S.getBeginLoc());
6619 };
6620 auto LPCRegion =
6621 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6622 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
6623 emitEmptyBoundParameters);
6624 }
6625
6626 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)6627 void CodeGenFunction::EmitOMPTargetUpdateDirective(
6628 const OMPTargetUpdateDirective &S) {
6629 // If we don't have target devices, don't bother emitting the data mapping
6630 // code.
6631 if (CGM.getLangOpts().OMPTargetTriples.empty())
6632 return;
6633
6634 // Check if we have any if clause associated with the directive.
6635 const Expr *IfCond = nullptr;
6636 if (const auto *C = S.getSingleClause<OMPIfClause>())
6637 IfCond = C->getCondition();
6638
6639 // Check if we have any device clause associated with the directive.
6640 const Expr *Device = nullptr;
6641 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6642 Device = C->getDevice();
6643
6644 OMPLexicalScope Scope(*this, S, OMPD_task);
6645 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6646 }
6647
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)6648 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
6649 const OMPExecutableDirective &D) {
6650 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
6651 EmitOMPScanDirective(*SD);
6652 return;
6653 }
6654 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
6655 return;
6656 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
6657 OMPPrivateScope GlobalsScope(CGF);
6658 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
6659 // Capture global firstprivates to avoid crash.
6660 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
6661 for (const Expr *Ref : C->varlists()) {
6662 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
6663 if (!DRE)
6664 continue;
6665 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
6666 if (!VD || VD->hasLocalStorage())
6667 continue;
6668 if (!CGF.LocalDeclMap.count(VD)) {
6669 LValue GlobLVal = CGF.EmitLValue(Ref);
6670 GlobalsScope.addPrivate(
6671 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6672 }
6673 }
6674 }
6675 }
6676 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
6677 (void)GlobalsScope.Privatize();
6678 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
6679 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
6680 } else {
6681 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
6682 for (const Expr *E : LD->counters()) {
6683 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
6684 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
6685 LValue GlobLVal = CGF.EmitLValue(E);
6686 GlobalsScope.addPrivate(
6687 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6688 }
6689 if (isa<OMPCapturedExprDecl>(VD)) {
6690 // Emit only those that were not explicitly referenced in clauses.
6691 if (!CGF.LocalDeclMap.count(VD))
6692 CGF.EmitVarDecl(*VD);
6693 }
6694 }
6695 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
6696 if (!C->getNumForLoops())
6697 continue;
6698 for (unsigned I = LD->getCollapsedNumber(),
6699 E = C->getLoopNumIterations().size();
6700 I < E; ++I) {
6701 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
6702 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
6703 // Emit only those that were not explicitly referenced in clauses.
6704 if (!CGF.LocalDeclMap.count(VD))
6705 CGF.EmitVarDecl(*VD);
6706 }
6707 }
6708 }
6709 }
6710 (void)GlobalsScope.Privatize();
6711 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
6712 }
6713 };
6714 if (D.getDirectiveKind() == OMPD_atomic ||
6715 D.getDirectiveKind() == OMPD_critical ||
6716 D.getDirectiveKind() == OMPD_section ||
6717 D.getDirectiveKind() == OMPD_master) {
6718 EmitStmt(D.getAssociatedStmt());
6719 } else {
6720 auto LPCRegion =
6721 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
6722 OMPSimdLexicalScope Scope(*this, D);
6723 CGM.getOpenMPRuntime().emitInlinedDirective(
6724 *this,
6725 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
6726 : D.getDirectiveKind(),
6727 CodeGen);
6728 }
6729 // Check for outer lastprivate conditional update.
6730 checkForLastprivateConditionalUpdate(*this, D);
6731 }
6732