1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 using namespace clang;
21 using namespace CodeGen;
22
23 //===----------------------------------------------------------------------===//
24 // OpenMP Directive Emission
25 //===----------------------------------------------------------------------===//
26 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
27 /// function. Here is the logic:
28 /// if (Cond) {
29 /// CodeGen(true);
30 /// } else {
31 /// CodeGen(false);
32 /// }
EmitOMPIfClause(CodeGenFunction & CGF,const Expr * Cond,const std::function<void (bool)> & CodeGen)33 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
34 const std::function<void(bool)> &CodeGen) {
35 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
36
37 // If the condition constant folds and can be elided, try to avoid emitting
38 // the condition and the dead arm of the if/else.
39 bool CondConstant;
40 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
41 CodeGen(CondConstant);
42 return;
43 }
44
45 // Otherwise, the condition did not fold, or we couldn't elide it. Just
46 // emit the conditional branch.
47 auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
48 auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
49 auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
50 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
51
52 // Emit the 'then' code.
53 CGF.EmitBlock(ThenBlock);
54 CodeGen(/*ThenBlock*/ true);
55 CGF.EmitBranch(ContBlock);
56 // Emit the 'else' code if present.
57 {
58 // There is no need to emit line number for unconditional branch.
59 auto NL = ApplyDebugLocation::CreateEmpty(CGF);
60 CGF.EmitBlock(ElseBlock);
61 }
62 CodeGen(/*ThenBlock*/ false);
63 {
64 // There is no need to emit line number for unconditional branch.
65 auto NL = ApplyDebugLocation::CreateEmpty(CGF);
66 CGF.EmitBranch(ContBlock);
67 }
68 // Emit the continuation block for code after the if.
69 CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
70 }
71
EmitOMPAggregateAssign(llvm::Value * DestAddr,llvm::Value * SrcAddr,QualType OriginalType,const llvm::function_ref<void (llvm::Value *,llvm::Value *)> & CopyGen)72 void CodeGenFunction::EmitOMPAggregateAssign(
73 llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
74 const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
75 // Perform element-by-element initialization.
76 QualType ElementTy;
77 auto SrcBegin = SrcAddr;
78 auto DestBegin = DestAddr;
79 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
80 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
81 // Cast from pointer to array type to pointer to single element.
82 SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin,
83 DestBegin->getType());
84 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
85 // The basic structure here is a while-do loop.
86 auto BodyBB = createBasicBlock("omp.arraycpy.body");
87 auto DoneBB = createBasicBlock("omp.arraycpy.done");
88 auto IsEmpty =
89 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
90 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
91
92 // Enter the loop body, making that address the current address.
93 auto EntryBB = Builder.GetInsertBlock();
94 EmitBlock(BodyBB);
95 auto SrcElementCurrent =
96 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
97 SrcElementCurrent->addIncoming(SrcBegin, EntryBB);
98 auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2,
99 "omp.arraycpy.destElementPast");
100 DestElementCurrent->addIncoming(DestBegin, EntryBB);
101
102 // Emit copy.
103 CopyGen(DestElementCurrent, SrcElementCurrent);
104
105 // Shift the address forward by one element.
106 auto DestElementNext = Builder.CreateConstGEP1_32(
107 DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element");
108 auto SrcElementNext = Builder.CreateConstGEP1_32(
109 SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element");
110 // Check whether we've reached the end.
111 auto Done =
112 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
113 Builder.CreateCondBr(Done, DoneBB, BodyBB);
114 DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock());
115 SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock());
116
117 // Done.
118 EmitBlock(DoneBB, /*IsFinished=*/true);
119 }
120
EmitOMPCopy(CodeGenFunction & CGF,QualType OriginalType,llvm::Value * DestAddr,llvm::Value * SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)121 void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF,
122 QualType OriginalType, llvm::Value *DestAddr,
123 llvm::Value *SrcAddr, const VarDecl *DestVD,
124 const VarDecl *SrcVD, const Expr *Copy) {
125 if (OriginalType->isArrayType()) {
126 auto *BO = dyn_cast<BinaryOperator>(Copy);
127 if (BO && BO->getOpcode() == BO_Assign) {
128 // Perform simple memcpy for simple copying.
129 CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
130 } else {
131 // For arrays with complex element types perform element by element
132 // copying.
133 CGF.EmitOMPAggregateAssign(
134 DestAddr, SrcAddr, OriginalType,
135 [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement,
136 llvm::Value *SrcElement) {
137 // Working with the single array element, so have to remap
138 // destination and source variables to corresponding array
139 // elements.
140 CodeGenFunction::OMPPrivateScope Remap(CGF);
141 Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{
142 return DestElement;
143 });
144 Remap.addPrivate(
145 SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; });
146 (void)Remap.Privatize();
147 CGF.EmitIgnoredExpr(Copy);
148 });
149 }
150 } else {
151 // Remap pseudo source variable to private copy.
152 CodeGenFunction::OMPPrivateScope Remap(CGF);
153 Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; });
154 Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; });
155 (void)Remap.Privatize();
156 // Emit copying of the whole variable.
157 CGF.EmitIgnoredExpr(Copy);
158 }
159 }
160
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)161 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
162 OMPPrivateScope &PrivateScope) {
163 auto FirstprivateFilter = [](const OMPClause *C) -> bool {
164 return C->getClauseKind() == OMPC_firstprivate;
165 };
166 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
167 for (OMPExecutableDirective::filtered_clause_iterator<decltype(
168 FirstprivateFilter)> I(D.clauses(), FirstprivateFilter);
169 I; ++I) {
170 auto *C = cast<OMPFirstprivateClause>(*I);
171 auto IRef = C->varlist_begin();
172 auto InitsRef = C->inits().begin();
173 for (auto IInit : C->private_copies()) {
174 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
175 if (EmittedAsFirstprivate.count(OrigVD) == 0) {
176 EmittedAsFirstprivate.insert(OrigVD);
177 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
178 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
179 bool IsRegistered;
180 DeclRefExpr DRE(
181 const_cast<VarDecl *>(OrigVD),
182 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
183 OrigVD) != nullptr,
184 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
185 auto *OriginalAddr = EmitLValue(&DRE).getAddress();
186 if (OrigVD->getType()->isArrayType()) {
187 // Emit VarDecl with copy init for arrays.
188 // Get the address of the original variable captured in current
189 // captured region.
190 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
191 auto Emission = EmitAutoVarAlloca(*VD);
192 auto *Init = VD->getInit();
193 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
194 // Perform simple memcpy.
195 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
196 (*IRef)->getType());
197 } else {
198 EmitOMPAggregateAssign(
199 Emission.getAllocatedAddress(), OriginalAddr,
200 (*IRef)->getType(),
201 [this, VDInit, Init](llvm::Value *DestElement,
202 llvm::Value *SrcElement) {
203 // Clean up any temporaries needed by the initialization.
204 RunCleanupsScope InitScope(*this);
205 // Emit initialization for single element.
206 LocalDeclMap[VDInit] = SrcElement;
207 EmitAnyExprToMem(Init, DestElement,
208 Init->getType().getQualifiers(),
209 /*IsInitializer*/ false);
210 LocalDeclMap.erase(VDInit);
211 });
212 }
213 EmitAutoVarCleanups(Emission);
214 return Emission.getAllocatedAddress();
215 });
216 } else {
217 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
218 // Emit private VarDecl with copy init.
219 // Remap temp VDInit variable to the address of the original
220 // variable
221 // (for proper handling of captured global variables).
222 LocalDeclMap[VDInit] = OriginalAddr;
223 EmitDecl(*VD);
224 LocalDeclMap.erase(VDInit);
225 return GetAddrOfLocalVar(VD);
226 });
227 }
228 assert(IsRegistered &&
229 "firstprivate var already registered as private");
230 // Silence the warning about unused variable.
231 (void)IsRegistered;
232 }
233 ++IRef, ++InitsRef;
234 }
235 }
236 return !EmittedAsFirstprivate.empty();
237 }
238
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)239 void CodeGenFunction::EmitOMPPrivateClause(
240 const OMPExecutableDirective &D,
241 CodeGenFunction::OMPPrivateScope &PrivateScope) {
242 auto PrivateFilter = [](const OMPClause *C) -> bool {
243 return C->getClauseKind() == OMPC_private;
244 };
245 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
246 I(D.clauses(), PrivateFilter); I; ++I) {
247 auto *C = cast<OMPPrivateClause>(*I);
248 auto IRef = C->varlist_begin();
249 for (auto IInit : C->private_copies()) {
250 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
251 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
252 bool IsRegistered =
253 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
254 // Emit private VarDecl with copy init.
255 EmitDecl(*VD);
256 return GetAddrOfLocalVar(VD);
257 });
258 assert(IsRegistered && "private var already registered as private");
259 // Silence the warning about unused variable.
260 (void)IsRegistered;
261 ++IRef;
262 }
263 }
264 }
265
EmitOMPCopyinClause(const OMPExecutableDirective & D)266 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
267 // threadprivate_var1 = master_threadprivate_var1;
268 // operator=(threadprivate_var2, master_threadprivate_var2);
269 // ...
270 // __kmpc_barrier(&loc, global_tid);
271 auto CopyinFilter = [](const OMPClause *C) -> bool {
272 return C->getClauseKind() == OMPC_copyin;
273 };
274 llvm::DenseSet<const VarDecl *> CopiedVars;
275 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
276 for (OMPExecutableDirective::filtered_clause_iterator<decltype(CopyinFilter)>
277 I(D.clauses(), CopyinFilter);
278 I; ++I) {
279 auto *C = cast<OMPCopyinClause>(*I);
280 auto IRef = C->varlist_begin();
281 auto ISrcRef = C->source_exprs().begin();
282 auto IDestRef = C->destination_exprs().begin();
283 for (auto *AssignOp : C->assignment_ops()) {
284 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
285 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
286 // Get the address of the master variable.
287 auto *MasterAddr = VD->isStaticLocal()
288 ? CGM.getStaticLocalDeclAddress(VD)
289 : CGM.GetAddrOfGlobal(VD);
290 // Get the address of the threadprivate variable.
291 auto *PrivateAddr = EmitLValue(*IRef).getAddress();
292 if (CopiedVars.size() == 1) {
293 // At first check if current thread is a master thread. If it is, no
294 // need to copy data.
295 CopyBegin = createBasicBlock("copyin.not.master");
296 CopyEnd = createBasicBlock("copyin.not.master.end");
297 Builder.CreateCondBr(
298 Builder.CreateICmpNE(
299 Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy),
300 Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)),
301 CopyBegin, CopyEnd);
302 EmitBlock(CopyBegin);
303 }
304 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
305 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
306 EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD,
307 SrcVD, AssignOp);
308 }
309 ++IRef;
310 ++ISrcRef;
311 ++IDestRef;
312 }
313 }
314 if (CopyEnd) {
315 // Exit out of copying procedure for non-master thread.
316 EmitBlock(CopyEnd, /*IsFinished=*/true);
317 return true;
318 }
319 return false;
320 }
321
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)322 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
323 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
324 auto LastprivateFilter = [](const OMPClause *C) -> bool {
325 return C->getClauseKind() == OMPC_lastprivate;
326 };
327 bool HasAtLeastOneLastprivate = false;
328 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
329 for (OMPExecutableDirective::filtered_clause_iterator<decltype(
330 LastprivateFilter)> I(D.clauses(), LastprivateFilter);
331 I; ++I) {
332 auto *C = cast<OMPLastprivateClause>(*I);
333 auto IRef = C->varlist_begin();
334 auto IDestRef = C->destination_exprs().begin();
335 for (auto *IInit : C->private_copies()) {
336 // Keep the address of the original variable for future update at the end
337 // of the loop.
338 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
339 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
340 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
341 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{
342 DeclRefExpr DRE(
343 const_cast<VarDecl *>(OrigVD),
344 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
345 OrigVD) != nullptr,
346 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
347 return EmitLValue(&DRE).getAddress();
348 });
349 // Check if the variable is also a firstprivate: in this case IInit is
350 // not generated. Initialization of this variable will happen in codegen
351 // for 'firstprivate' clause.
352 if (!IInit)
353 continue;
354 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
355 bool IsRegistered =
356 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
357 // Emit private VarDecl with copy init.
358 EmitDecl(*VD);
359 return GetAddrOfLocalVar(VD);
360 });
361 assert(IsRegistered && "lastprivate var already registered as private");
362 HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered;
363 }
364 ++IRef, ++IDestRef;
365 }
366 }
367 return HasAtLeastOneLastprivate;
368 }
369
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,llvm::Value * IsLastIterCond)370 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
371 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
372 // Emit following code:
373 // if (<IsLastIterCond>) {
374 // orig_var1 = private_orig_var1;
375 // ...
376 // orig_varn = private_orig_varn;
377 // }
378 auto *ThenBB = createBasicBlock(".omp.lastprivate.then");
379 auto *DoneBB = createBasicBlock(".omp.lastprivate.done");
380 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
381 EmitBlock(ThenBB);
382 {
383 auto LastprivateFilter = [](const OMPClause *C) -> bool {
384 return C->getClauseKind() == OMPC_lastprivate;
385 };
386 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
387 for (OMPExecutableDirective::filtered_clause_iterator<decltype(
388 LastprivateFilter)> I(D.clauses(), LastprivateFilter);
389 I; ++I) {
390 auto *C = cast<OMPLastprivateClause>(*I);
391 auto IRef = C->varlist_begin();
392 auto ISrcRef = C->source_exprs().begin();
393 auto IDestRef = C->destination_exprs().begin();
394 for (auto *AssignOp : C->assignment_ops()) {
395 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
396 if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) {
397 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
398 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
399 // Get the address of the original variable.
400 auto *OriginalAddr = GetAddrOfLocalVar(DestVD);
401 // Get the address of the private variable.
402 auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD);
403 EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr,
404 DestVD, SrcVD, AssignOp);
405 }
406 ++IRef;
407 ++ISrcRef;
408 ++IDestRef;
409 }
410 }
411 }
412 EmitBlock(DoneBB, /*IsFinished=*/true);
413 }
414
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)415 void CodeGenFunction::EmitOMPReductionClauseInit(
416 const OMPExecutableDirective &D,
417 CodeGenFunction::OMPPrivateScope &PrivateScope) {
418 auto ReductionFilter = [](const OMPClause *C) -> bool {
419 return C->getClauseKind() == OMPC_reduction;
420 };
421 for (OMPExecutableDirective::filtered_clause_iterator<decltype(
422 ReductionFilter)> I(D.clauses(), ReductionFilter);
423 I; ++I) {
424 auto *C = cast<OMPReductionClause>(*I);
425 auto ILHS = C->lhs_exprs().begin();
426 auto IRHS = C->rhs_exprs().begin();
427 for (auto IRef : C->varlists()) {
428 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
429 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
430 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
431 // Store the address of the original variable associated with the LHS
432 // implicit variable.
433 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
434 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
435 CapturedStmtInfo->lookup(OrigVD) != nullptr,
436 IRef->getType(), VK_LValue, IRef->getExprLoc());
437 return EmitLValue(&DRE).getAddress();
438 });
439 // Emit reduction copy.
440 bool IsRegistered =
441 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
442 // Emit private VarDecl with reduction init.
443 EmitDecl(*PrivateVD);
444 return GetAddrOfLocalVar(PrivateVD);
445 });
446 assert(IsRegistered && "private var already registered as private");
447 // Silence the warning about unused variable.
448 (void)IsRegistered;
449 ++ILHS, ++IRHS;
450 }
451 }
452 }
453
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D)454 void CodeGenFunction::EmitOMPReductionClauseFinal(
455 const OMPExecutableDirective &D) {
456 llvm::SmallVector<const Expr *, 8> LHSExprs;
457 llvm::SmallVector<const Expr *, 8> RHSExprs;
458 llvm::SmallVector<const Expr *, 8> ReductionOps;
459 auto ReductionFilter = [](const OMPClause *C) -> bool {
460 return C->getClauseKind() == OMPC_reduction;
461 };
462 bool HasAtLeastOneReduction = false;
463 for (OMPExecutableDirective::filtered_clause_iterator<decltype(
464 ReductionFilter)> I(D.clauses(), ReductionFilter);
465 I; ++I) {
466 HasAtLeastOneReduction = true;
467 auto *C = cast<OMPReductionClause>(*I);
468 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
469 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
470 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
471 }
472 if (HasAtLeastOneReduction) {
473 // Emit nowait reduction if nowait clause is present or directive is a
474 // parallel directive (it always has implicit barrier).
475 CGM.getOpenMPRuntime().emitReduction(
476 *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
477 D.getSingleClause(OMPC_nowait) ||
478 isOpenMPParallelDirective(D.getDirectiveKind()));
479 }
480 }
481
482 /// \brief Emits code for OpenMP parallel directive in the parallel region.
emitOMPParallelCall(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)483 static void emitOMPParallelCall(CodeGenFunction &CGF,
484 const OMPExecutableDirective &S,
485 llvm::Value *OutlinedFn,
486 llvm::Value *CapturedStruct) {
487 if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
488 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
489 auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
490 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
491 /*IgnoreResultAssign*/ true);
492 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
493 CGF, NumThreads, NumThreadsClause->getLocStart());
494 }
495 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
496 CapturedStruct);
497 }
498
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)499 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
500 const OMPExecutableDirective &S,
501 const RegionCodeGenTy &CodeGen) {
502 auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
503 auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
504 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
505 S, *CS->getCapturedDecl()->param_begin(), CodeGen);
506 if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
507 auto Cond = cast<OMPIfClause>(C)->getCondition();
508 EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
509 if (ThenBlock)
510 emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
511 else
512 CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
513 OutlinedFn, CapturedStruct);
514 });
515 } else
516 emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
517 }
518
EmitOMPParallelDirective(const OMPParallelDirective & S)519 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
520 LexicalScope Scope(*this, S.getSourceRange());
521 // Emit parallel region as a standalone region.
522 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
523 OMPPrivateScope PrivateScope(CGF);
524 bool Copyins = CGF.EmitOMPCopyinClause(S);
525 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
526 if (Copyins || Firstprivates) {
527 // Emit implicit barrier to synchronize threads and avoid data races on
528 // initialization of firstprivate variables or propagation master's thread
529 // values of threadprivate variables to local instances of that variables
530 // of all other implicit threads.
531 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
532 OMPD_unknown);
533 }
534 CGF.EmitOMPPrivateClause(S, PrivateScope);
535 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
536 (void)PrivateScope.Privatize();
537 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
538 CGF.EmitOMPReductionClauseFinal(S);
539 // Emit implicit barrier at the end of the 'parallel' directive.
540 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
541 OMPD_unknown);
542 };
543 emitCommonOMPParallelDirective(*this, S, CodeGen);
544 }
545
EmitOMPLoopBody(const OMPLoopDirective & S,bool SeparateIter)546 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
547 bool SeparateIter) {
548 RunCleanupsScope BodyScope(*this);
549 // Update counters values on current iteration.
550 for (auto I : S.updates()) {
551 EmitIgnoredExpr(I);
552 }
553 // Update the linear variables.
554 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
555 for (auto U : C->updates()) {
556 EmitIgnoredExpr(U);
557 }
558 }
559
560 // On a continue in the body, jump to the end.
561 auto Continue = getJumpDestInCurrentScope("omp.body.continue");
562 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
563 // Emit loop body.
564 EmitStmt(S.getBody());
565 // The end (updates/cleanups).
566 EmitBlock(Continue.getBlock());
567 BreakContinueStack.pop_back();
568 if (SeparateIter) {
569 // TODO: Update lastprivates if the SeparateIter flag is true.
570 // This will be implemented in a follow-up OMPLastprivateClause patch, but
571 // result should be still correct without it, as we do not make these
572 // variables private yet.
573 }
574 }
575
EmitOMPInnerLoop(const Stmt & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> & BodyGen)576 void CodeGenFunction::EmitOMPInnerLoop(
577 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
578 const Expr *IncExpr,
579 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
580 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
581 auto Cnt = getPGORegionCounter(&S);
582
583 // Start the loop with a block that tests the condition.
584 auto CondBlock = createBasicBlock("omp.inner.for.cond");
585 EmitBlock(CondBlock);
586 LoopStack.push(CondBlock);
587
588 // If there are any cleanups between here and the loop-exit scope,
589 // create a block to stage a loop exit along.
590 auto ExitBlock = LoopExit.getBlock();
591 if (RequiresCleanup)
592 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
593
594 auto LoopBody = createBasicBlock("omp.inner.for.body");
595
596 // Emit condition.
597 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
598 if (ExitBlock != LoopExit.getBlock()) {
599 EmitBlock(ExitBlock);
600 EmitBranchThroughCleanup(LoopExit);
601 }
602
603 EmitBlock(LoopBody);
604 Cnt.beginRegion(Builder);
605
606 // Create a block for the increment.
607 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
608 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
609
610 BodyGen(*this);
611
612 // Emit "IV = IV + 1" and a back-edge to the condition block.
613 EmitBlock(Continue.getBlock());
614 EmitIgnoredExpr(IncExpr);
615 BreakContinueStack.pop_back();
616 EmitBranch(CondBlock);
617 LoopStack.pop();
618 // Emit the fall-through block.
619 EmitBlock(LoopExit.getBlock());
620 }
621
EmitOMPSimdFinal(const OMPLoopDirective & S)622 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
623 auto IC = S.counters().begin();
624 for (auto F : S.finals()) {
625 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
626 EmitIgnoredExpr(F);
627 }
628 ++IC;
629 }
630 // Emit the final values of the linear variables.
631 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
632 for (auto F : C->finals()) {
633 EmitIgnoredExpr(F);
634 }
635 }
636 }
637
EmitOMPAlignedClause(CodeGenFunction & CGF,CodeGenModule & CGM,const OMPAlignedClause & Clause)638 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
639 const OMPAlignedClause &Clause) {
640 unsigned ClauseAlignment = 0;
641 if (auto AlignmentExpr = Clause.getAlignment()) {
642 auto AlignmentCI =
643 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
644 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
645 }
646 for (auto E : Clause.varlists()) {
647 unsigned Alignment = ClauseAlignment;
648 if (Alignment == 0) {
649 // OpenMP [2.8.1, Description]
650 // If no optional parameter is specified, implementation-defined default
651 // alignments for SIMD instructions on the target platforms are assumed.
652 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
653 E->getType());
654 }
655 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
656 "alignment is not power of 2");
657 if (Alignment != 0) {
658 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
659 CGF.EmitAlignmentAssumption(PtrValue, Alignment);
660 }
661 }
662 }
663
EmitPrivateLoopCounters(CodeGenFunction & CGF,CodeGenFunction::OMPPrivateScope & LoopScope,ArrayRef<Expr * > Counters)664 static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
665 CodeGenFunction::OMPPrivateScope &LoopScope,
666 ArrayRef<Expr *> Counters) {
667 for (auto *E : Counters) {
668 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
669 bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
670 // Emit var without initialization.
671 auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
672 CGF.EmitAutoVarCleanups(VarEmission);
673 return VarEmission.getAllocatedAddress();
674 });
675 assert(IsRegistered && "counter already registered as private");
676 // Silence the warning about unused variable.
677 (void)IsRegistered;
678 }
679 }
680
681 static void
EmitPrivateLinearVars(CodeGenFunction & CGF,const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)682 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
683 CodeGenFunction::OMPPrivateScope &PrivateScope) {
684 for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
685 for (auto *E : Clause->varlists()) {
686 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
687 bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
688 // Emit var without initialization.
689 auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
690 CGF.EmitAutoVarCleanups(VarEmission);
691 return VarEmission.getAllocatedAddress();
692 });
693 assert(IsRegistered && "linear var already registered as private");
694 // Silence the warning about unused variable.
695 (void)IsRegistered;
696 }
697 }
698 }
699
EmitOMPSimdDirective(const OMPSimdDirective & S)700 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
701 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
702 // Pragma 'simd' code depends on presence of 'lastprivate'.
703 // If present, we have to separate last iteration of the loop:
704 //
705 // if (LastIteration != 0) {
706 // for (IV in 0..LastIteration-1) BODY;
707 // BODY with updates of lastprivate vars;
708 // <Final counter/linear vars updates>;
709 // }
710 //
711 // otherwise (when there's no lastprivate):
712 //
713 // for (IV in 0..LastIteration) BODY;
714 // <Final counter/linear vars updates>;
715 //
716
717 // Walk clauses and process safelen/lastprivate.
718 bool SeparateIter = false;
719 CGF.LoopStack.setParallel();
720 CGF.LoopStack.setVectorizerEnable(true);
721 for (auto C : S.clauses()) {
722 switch (C->getClauseKind()) {
723 case OMPC_safelen: {
724 RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
725 AggValueSlot::ignored(), true);
726 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
727 CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
728 // In presence of finite 'safelen', it may be unsafe to mark all
729 // the memory instructions parallel, because loop-carried
730 // dependences of 'safelen' iterations are possible.
731 CGF.LoopStack.setParallel(false);
732 break;
733 }
734 case OMPC_aligned:
735 EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
736 break;
737 case OMPC_lastprivate:
738 SeparateIter = true;
739 break;
740 default:
741 // Not handled yet
742 ;
743 }
744 }
745
746 // Emit inits for the linear variables.
747 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
748 for (auto Init : C->inits()) {
749 auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
750 CGF.EmitVarDecl(*D);
751 }
752 }
753
754 // Emit the loop iteration variable.
755 const Expr *IVExpr = S.getIterationVariable();
756 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
757 CGF.EmitVarDecl(*IVDecl);
758 CGF.EmitIgnoredExpr(S.getInit());
759
760 // Emit the iterations count variable.
761 // If it is not a variable, Sema decided to calculate iterations count on
762 // each
763 // iteration (e.g., it is foldable into a constant).
764 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
765 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
766 // Emit calculation of the iterations count.
767 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
768 }
769
770 // Emit the linear steps for the linear clauses.
771 // If a step is not constant, it is pre-calculated before the loop.
772 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
773 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
774 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
775 CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
776 // Emit calculation of the linear step.
777 CGF.EmitIgnoredExpr(CS);
778 }
779 }
780
781 if (SeparateIter) {
782 // Emit: if (LastIteration > 0) - begin.
783 RegionCounter Cnt = CGF.getPGORegionCounter(&S);
784 auto ThenBlock = CGF.createBasicBlock("simd.if.then");
785 auto ContBlock = CGF.createBasicBlock("simd.if.end");
786 CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
787 Cnt.getCount());
788 CGF.EmitBlock(ThenBlock);
789 Cnt.beginRegion(CGF.Builder);
790 // Emit 'then' code.
791 {
792 OMPPrivateScope LoopScope(CGF);
793 EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
794 EmitPrivateLinearVars(CGF, S, LoopScope);
795 CGF.EmitOMPPrivateClause(S, LoopScope);
796 (void)LoopScope.Privatize();
797 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
798 S.getCond(/*SeparateIter=*/true), S.getInc(),
799 [&S](CodeGenFunction &CGF) {
800 CGF.EmitOMPLoopBody(S);
801 CGF.EmitStopPoint(&S);
802 });
803 CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
804 }
805 CGF.EmitOMPSimdFinal(S);
806 // Emit: if (LastIteration != 0) - end.
807 CGF.EmitBranch(ContBlock);
808 CGF.EmitBlock(ContBlock, true);
809 } else {
810 {
811 OMPPrivateScope LoopScope(CGF);
812 EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
813 EmitPrivateLinearVars(CGF, S, LoopScope);
814 CGF.EmitOMPPrivateClause(S, LoopScope);
815 (void)LoopScope.Privatize();
816 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
817 S.getCond(/*SeparateIter=*/false), S.getInc(),
818 [&S](CodeGenFunction &CGF) {
819 CGF.EmitOMPLoopBody(S);
820 CGF.EmitStopPoint(&S);
821 });
822 }
823 CGF.EmitOMPSimdFinal(S);
824 }
825 };
826 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
827 }
828
EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,llvm::Value * LB,llvm::Value * UB,llvm::Value * ST,llvm::Value * IL,llvm::Value * Chunk)829 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
830 const OMPLoopDirective &S,
831 OMPPrivateScope &LoopScope,
832 llvm::Value *LB, llvm::Value *UB,
833 llvm::Value *ST, llvm::Value *IL,
834 llvm::Value *Chunk) {
835 auto &RT = CGM.getOpenMPRuntime();
836
837 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
838 const bool Dynamic = RT.isDynamic(ScheduleKind);
839
840 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
841 "static non-chunked schedule does not need outer loop");
842
843 // Emit outer loop.
844 //
845 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
846 // When schedule(dynamic,chunk_size) is specified, the iterations are
847 // distributed to threads in the team in chunks as the threads request them.
848 // Each thread executes a chunk of iterations, then requests another chunk,
849 // until no chunks remain to be distributed. Each chunk contains chunk_size
850 // iterations, except for the last chunk to be distributed, which may have
851 // fewer iterations. When no chunk_size is specified, it defaults to 1.
852 //
853 // When schedule(guided,chunk_size) is specified, the iterations are assigned
854 // to threads in the team in chunks as the executing threads request them.
855 // Each thread executes a chunk of iterations, then requests another chunk,
856 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
857 // each chunk is proportional to the number of unassigned iterations divided
858 // by the number of threads in the team, decreasing to 1. For a chunk_size
859 // with value k (greater than 1), the size of each chunk is determined in the
860 // same way, with the restriction that the chunks do not contain fewer than k
861 // iterations (except for the last chunk to be assigned, which may have fewer
862 // than k iterations).
863 //
864 // When schedule(auto) is specified, the decision regarding scheduling is
865 // delegated to the compiler and/or runtime system. The programmer gives the
866 // implementation the freedom to choose any possible mapping of iterations to
867 // threads in the team.
868 //
869 // When schedule(runtime) is specified, the decision regarding scheduling is
870 // deferred until run time, and the schedule and chunk size are taken from the
871 // run-sched-var ICV. If the ICV is set to auto, the schedule is
872 // implementation defined
873 //
874 // while(__kmpc_dispatch_next(&LB, &UB)) {
875 // idx = LB;
876 // while (idx <= UB) { BODY; ++idx; } // inner loop
877 // }
878 //
879 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
880 // When schedule(static, chunk_size) is specified, iterations are divided into
881 // chunks of size chunk_size, and the chunks are assigned to the threads in
882 // the team in a round-robin fashion in the order of the thread number.
883 //
884 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
885 // while (idx <= UB) { BODY; ++idx; } // inner loop
886 // LB = LB + ST;
887 // UB = UB + ST;
888 // }
889 //
890
891 const Expr *IVExpr = S.getIterationVariable();
892 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
893 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
894
895 RT.emitForInit(
896 *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
897 (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
898 Chunk);
899
900 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
901
902 // Start the loop with a block that tests the condition.
903 auto CondBlock = createBasicBlock("omp.dispatch.cond");
904 EmitBlock(CondBlock);
905 LoopStack.push(CondBlock);
906
907 llvm::Value *BoolCondVal = nullptr;
908 if (!Dynamic) {
909 // UB = min(UB, GlobalUB)
910 EmitIgnoredExpr(S.getEnsureUpperBound());
911 // IV = LB
912 EmitIgnoredExpr(S.getInit());
913 // IV < UB
914 BoolCondVal = EvaluateExprAsBool(S.getCond(false));
915 } else {
916 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
917 IL, LB, UB, ST);
918 }
919
920 // If there are any cleanups between here and the loop-exit scope,
921 // create a block to stage a loop exit along.
922 auto ExitBlock = LoopExit.getBlock();
923 if (LoopScope.requiresCleanups())
924 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
925
926 auto LoopBody = createBasicBlock("omp.dispatch.body");
927 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
928 if (ExitBlock != LoopExit.getBlock()) {
929 EmitBlock(ExitBlock);
930 EmitBranchThroughCleanup(LoopExit);
931 }
932 EmitBlock(LoopBody);
933
934 // Emit "IV = LB" (in case of static schedule, we have already calculated new
935 // LB for loop condition and emitted it above).
936 if (Dynamic)
937 EmitIgnoredExpr(S.getInit());
938
939 // Create a block for the increment.
940 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
941 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
942
943 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
944 S.getCond(/*SeparateIter=*/false), S.getInc(),
945 [&S](CodeGenFunction &CGF) {
946 CGF.EmitOMPLoopBody(S);
947 CGF.EmitStopPoint(&S);
948 });
949
950 EmitBlock(Continue.getBlock());
951 BreakContinueStack.pop_back();
952 if (!Dynamic) {
953 // Emit "LB = LB + Stride", "UB = UB + Stride".
954 EmitIgnoredExpr(S.getNextLowerBound());
955 EmitIgnoredExpr(S.getNextUpperBound());
956 }
957
958 EmitBranch(CondBlock);
959 LoopStack.pop();
960 // Emit the fall-through block.
961 EmitBlock(LoopExit.getBlock());
962
963 // Tell the runtime we are done.
964 // FIXME: Also call fini for ordered loops with dynamic scheduling.
965 if (!Dynamic)
966 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
967 }
968
969 /// \brief Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)970 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
971 const DeclRefExpr *Helper) {
972 auto VDecl = cast<VarDecl>(Helper->getDecl());
973 CGF.EmitVarDecl(*VDecl);
974 return CGF.EmitLValue(Helper);
975 }
976
EmitOMPWorksharingLoop(const OMPLoopDirective & S)977 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
978 // Emit the loop iteration variable.
979 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
980 auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
981 EmitVarDecl(*IVDecl);
982
983 // Emit the iterations count variable.
984 // If it is not a variable, Sema decided to calculate iterations count on each
985 // iteration (e.g., it is foldable into a constant).
986 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
987 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
988 // Emit calculation of the iterations count.
989 EmitIgnoredExpr(S.getCalcLastIteration());
990 }
991
992 auto &RT = CGM.getOpenMPRuntime();
993
994 bool HasLastprivateClause;
995 // Check pre-condition.
996 {
997 // Skip the entire loop if we don't meet the precondition.
998 RegionCounter Cnt = getPGORegionCounter(&S);
999 auto ThenBlock = createBasicBlock("omp.precond.then");
1000 auto ContBlock = createBasicBlock("omp.precond.end");
1001 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
1002 EmitBlock(ThenBlock);
1003 Cnt.beginRegion(Builder);
1004 // Emit 'then' code.
1005 {
1006 // Emit helper vars inits.
1007 LValue LB =
1008 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1009 LValue UB =
1010 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1011 LValue ST =
1012 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1013 LValue IL =
1014 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1015
1016 OMPPrivateScope LoopScope(*this);
1017 if (EmitOMPFirstprivateClause(S, LoopScope)) {
1018 // Emit implicit barrier to synchronize threads and avoid data races on
1019 // initialization of firstprivate variables.
1020 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1021 OMPD_unknown);
1022 }
1023 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1024 EmitPrivateLoopCounters(*this, LoopScope, S.counters());
1025 (void)LoopScope.Privatize();
1026
1027 // Detect the loop schedule kind and chunk.
1028 auto ScheduleKind = OMPC_SCHEDULE_unknown;
1029 llvm::Value *Chunk = nullptr;
1030 if (auto C = cast_or_null<OMPScheduleClause>(
1031 S.getSingleClause(OMPC_schedule))) {
1032 ScheduleKind = C->getScheduleKind();
1033 if (auto Ch = C->getChunkSize()) {
1034 Chunk = EmitScalarExpr(Ch);
1035 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1036 S.getIterationVariable()->getType());
1037 }
1038 }
1039 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1040 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1041 if (RT.isStaticNonchunked(ScheduleKind,
1042 /* Chunked */ Chunk != nullptr)) {
1043 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1044 // When no chunk_size is specified, the iteration space is divided into
1045 // chunks that are approximately equal in size, and at most one chunk is
1046 // distributed to each thread. Note that the size of the chunks is
1047 // unspecified in this case.
1048 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1049 IL.getAddress(), LB.getAddress(), UB.getAddress(),
1050 ST.getAddress());
1051 // UB = min(UB, GlobalUB);
1052 EmitIgnoredExpr(S.getEnsureUpperBound());
1053 // IV = LB;
1054 EmitIgnoredExpr(S.getInit());
1055 // while (idx <= UB) { BODY; ++idx; }
1056 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
1057 S.getCond(/*SeparateIter=*/false), S.getInc(),
1058 [&S](CodeGenFunction &CGF) {
1059 CGF.EmitOMPLoopBody(S);
1060 CGF.EmitStopPoint(&S);
1061 });
1062 // Tell the runtime we are done.
1063 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
1064 } else {
1065 // Emit the outer loop, which requests its work chunk [LB..UB] from
1066 // runtime and runs the inner loop to process it.
1067 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
1068 UB.getAddress(), ST.getAddress(), IL.getAddress(),
1069 Chunk);
1070 }
1071 // Emit final copy of the lastprivate variables if IsLastIter != 0.
1072 if (HasLastprivateClause)
1073 EmitOMPLastprivateClauseFinal(
1074 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
1075 }
1076 // We're now done with the loop, so jump to the continuation block.
1077 EmitBranch(ContBlock);
1078 EmitBlock(ContBlock, true);
1079 }
1080 return HasLastprivateClause;
1081 }
1082
EmitOMPForDirective(const OMPForDirective & S)1083 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
1084 LexicalScope Scope(*this, S.getSourceRange());
1085 bool HasLastprivates = false;
1086 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1087 HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1088 };
1089 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1090
1091 // Emit an implicit barrier at the end.
1092 if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
1093 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1094 }
1095 }
1096
EmitOMPForSimdDirective(const OMPForSimdDirective &)1097 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
1098 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
1099 }
1100
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)1101 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
1102 const Twine &Name,
1103 llvm::Value *Init = nullptr) {
1104 auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
1105 if (Init)
1106 CGF.EmitScalarInit(Init, LVal);
1107 return LVal;
1108 }
1109
emitSections(CodeGenFunction & CGF,const OMPExecutableDirective & S)1110 static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
1111 const OMPExecutableDirective &S) {
1112 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
1113 auto *CS = dyn_cast<CompoundStmt>(Stmt);
1114 if (CS && CS->size() > 1) {
1115 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
1116 auto &C = CGF.CGM.getContext();
1117 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1118 // Emit helper vars inits.
1119 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
1120 CGF.Builder.getInt32(0));
1121 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
1122 LValue UB =
1123 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
1124 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
1125 CGF.Builder.getInt32(1));
1126 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
1127 CGF.Builder.getInt32(0));
1128 // Loop counter.
1129 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
1130 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1131 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
1132 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1133 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
1134 // Generate condition for loop.
1135 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
1136 OK_Ordinary, S.getLocStart(),
1137 /*fpContractable=*/false);
1138 // Increment for loop counter.
1139 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
1140 OK_Ordinary, S.getLocStart());
1141 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
1142 // Iterate through all sections and emit a switch construct:
1143 // switch (IV) {
1144 // case 0:
1145 // <SectionStmt[0]>;
1146 // break;
1147 // ...
1148 // case <NumSection> - 1:
1149 // <SectionStmt[<NumSection> - 1]>;
1150 // break;
1151 // }
1152 // .omp.sections.exit:
1153 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
1154 auto *SwitchStmt = CGF.Builder.CreateSwitch(
1155 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
1156 CS->size());
1157 unsigned CaseNumber = 0;
1158 for (auto C = CS->children(); C; ++C, ++CaseNumber) {
1159 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1160 CGF.EmitBlock(CaseBB);
1161 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
1162 CGF.EmitStmt(*C);
1163 CGF.EmitBranch(ExitBB);
1164 }
1165 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1166 };
1167 // Emit static non-chunked loop.
1168 CGF.CGM.getOpenMPRuntime().emitForInit(
1169 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
1170 /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
1171 ST.getAddress());
1172 // UB = min(UB, GlobalUB);
1173 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
1174 auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
1175 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
1176 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
1177 // IV = LB;
1178 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
1179 // while (idx <= UB) { BODY; ++idx; }
1180 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
1181 // Tell the runtime we are done.
1182 CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
1183 OMPC_SCHEDULE_static);
1184 };
1185
1186 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
1187 return OMPD_sections;
1188 }
1189 // If only one section is found - no need to generate loop, emit as a single
1190 // region.
1191 auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
1192 CGF.EmitStmt(Stmt);
1193 CGF.EnsureInsertPoint();
1194 };
1195 CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
1196 llvm::None, llvm::None,
1197 llvm::None, llvm::None);
1198 return OMPD_single;
1199 }
1200
EmitOMPSectionsDirective(const OMPSectionsDirective & S)1201 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
1202 LexicalScope Scope(*this, S.getSourceRange());
1203 OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
1204 // Emit an implicit barrier at the end.
1205 if (!S.getSingleClause(OMPC_nowait)) {
1206 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
1207 }
1208 }
1209
EmitOMPSectionDirective(const OMPSectionDirective & S)1210 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
1211 LexicalScope Scope(*this, S.getSourceRange());
1212 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1213 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1214 CGF.EnsureInsertPoint();
1215 };
1216 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1217 }
1218
EmitOMPSingleDirective(const OMPSingleDirective & S)1219 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
1220 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
1221 llvm::SmallVector<const Expr *, 8> DestExprs;
1222 llvm::SmallVector<const Expr *, 8> SrcExprs;
1223 llvm::SmallVector<const Expr *, 8> AssignmentOps;
1224 // Check if there are any 'copyprivate' clauses associated with this
1225 // 'single'
1226 // construct.
1227 auto CopyprivateFilter = [](const OMPClause *C) -> bool {
1228 return C->getClauseKind() == OMPC_copyprivate;
1229 };
1230 // Build a list of copyprivate variables along with helper expressions
1231 // (<source>, <destination>, <destination>=<source> expressions)
1232 typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
1233 CopyprivateFilter)> CopyprivateIter;
1234 for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
1235 auto *C = cast<OMPCopyprivateClause>(*I);
1236 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
1237 DestExprs.append(C->destination_exprs().begin(),
1238 C->destination_exprs().end());
1239 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
1240 AssignmentOps.append(C->assignment_ops().begin(),
1241 C->assignment_ops().end());
1242 }
1243 LexicalScope Scope(*this, S.getSourceRange());
1244 // Emit code for 'single' region along with 'copyprivate' clauses
1245 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1246 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1247 CGF.EnsureInsertPoint();
1248 };
1249 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
1250 CopyprivateVars, DestExprs, SrcExprs,
1251 AssignmentOps);
1252 // Emit an implicit barrier at the end.
1253 if (!S.getSingleClause(OMPC_nowait)) {
1254 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
1255 }
1256 }
1257
EmitOMPMasterDirective(const OMPMasterDirective & S)1258 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
1259 LexicalScope Scope(*this, S.getSourceRange());
1260 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1261 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1262 CGF.EnsureInsertPoint();
1263 };
1264 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
1265 }
1266
EmitOMPCriticalDirective(const OMPCriticalDirective & S)1267 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
1268 LexicalScope Scope(*this, S.getSourceRange());
1269 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1270 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1271 CGF.EnsureInsertPoint();
1272 };
1273 CGM.getOpenMPRuntime().emitCriticalRegion(
1274 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
1275 }
1276
EmitOMPParallelForDirective(const OMPParallelForDirective & S)1277 void CodeGenFunction::EmitOMPParallelForDirective(
1278 const OMPParallelForDirective &S) {
1279 // Emit directive as a combined directive that consists of two implicit
1280 // directives: 'parallel' with 'for' directive.
1281 LexicalScope Scope(*this, S.getSourceRange());
1282 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1283 CGF.EmitOMPWorksharingLoop(S);
1284 // Emit implicit barrier at the end of parallel region, but this barrier
1285 // is at the end of 'for' directive, so emit it as the implicit barrier for
1286 // this 'for' directive.
1287 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
1288 OMPD_parallel);
1289 };
1290 emitCommonOMPParallelDirective(*this, S, CodeGen);
1291 }
1292
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &)1293 void CodeGenFunction::EmitOMPParallelForSimdDirective(
1294 const OMPParallelForSimdDirective &) {
1295 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
1296 }
1297
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)1298 void CodeGenFunction::EmitOMPParallelSectionsDirective(
1299 const OMPParallelSectionsDirective &S) {
1300 // Emit directive as a combined directive that consists of two implicit
1301 // directives: 'parallel' with 'sections' directive.
1302 LexicalScope Scope(*this, S.getSourceRange());
1303 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1304 (void)emitSections(CGF, S);
1305 // Emit implicit barrier at the end of parallel region.
1306 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
1307 OMPD_parallel);
1308 };
1309 emitCommonOMPParallelDirective(*this, S, CodeGen);
1310 }
1311
EmitOMPTaskDirective(const OMPTaskDirective & S)1312 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
1313 // Emit outlined function for task construct.
1314 LexicalScope Scope(*this, S.getSourceRange());
1315 auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1316 auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
1317 auto *I = CS->getCapturedDecl()->param_begin();
1318 auto *PartId = std::next(I);
1319 // The first function argument for tasks is a thread id, the second one is a
1320 // part id (0 for tied tasks, >=0 for untied task).
1321 auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
1322 if (*PartId) {
1323 // TODO: emit code for untied tasks.
1324 }
1325 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1326 };
1327 auto OutlinedFn =
1328 CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
1329 // Check if we should emit tied or untied task.
1330 bool Tied = !S.getSingleClause(OMPC_untied);
1331 // Check if the task is final
1332 llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
1333 if (auto *Clause = S.getSingleClause(OMPC_final)) {
1334 // If the condition constant folds and can be elided, try to avoid emitting
1335 // the condition and the dead arm of the if/else.
1336 auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
1337 bool CondConstant;
1338 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
1339 Final.setInt(CondConstant);
1340 else
1341 Final.setPointer(EvaluateExprAsBool(Cond));
1342 } else {
1343 // By default the task is not final.
1344 Final.setInt(/*IntVal=*/false);
1345 }
1346 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
1347 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
1348 OutlinedFn, SharedsTy, CapturedStruct);
1349 }
1350
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)1351 void CodeGenFunction::EmitOMPTaskyieldDirective(
1352 const OMPTaskyieldDirective &S) {
1353 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
1354 }
1355
EmitOMPBarrierDirective(const OMPBarrierDirective & S)1356 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
1357 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
1358 }
1359
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &)1360 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
1361 llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
1362 }
1363
EmitOMPFlushDirective(const OMPFlushDirective & S)1364 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
1365 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
1366 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
1367 auto FlushClause = cast<OMPFlushClause>(C);
1368 return llvm::makeArrayRef(FlushClause->varlist_begin(),
1369 FlushClause->varlist_end());
1370 }
1371 return llvm::None;
1372 }(), S.getLocStart());
1373 }
1374
EmitOMPOrderedDirective(const OMPOrderedDirective &)1375 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
1376 llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
1377 }
1378
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType)1379 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
1380 QualType SrcType, QualType DestType) {
1381 assert(CGF.hasScalarEvaluationKind(DestType) &&
1382 "DestType must have scalar evaluation kind.");
1383 assert(!Val.isAggregate() && "Must be a scalar or complex.");
1384 return Val.isScalar()
1385 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
1386 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
1387 DestType);
1388 }
1389
1390 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType)1391 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
1392 QualType DestType) {
1393 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
1394 "DestType must have complex evaluation kind.");
1395 CodeGenFunction::ComplexPairTy ComplexVal;
1396 if (Val.isScalar()) {
1397 // Convert the input element to the element type of the complex.
1398 auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1399 auto ScalarVal =
1400 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
1401 ComplexVal = CodeGenFunction::ComplexPairTy(
1402 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
1403 } else {
1404 assert(Val.isComplex() && "Must be a scalar or complex.");
1405 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
1406 auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1407 ComplexVal.first = CGF.EmitScalarConversion(
1408 Val.getComplexVal().first, SrcElementType, DestElementType);
1409 ComplexVal.second = CGF.EmitScalarConversion(
1410 Val.getComplexVal().second, SrcElementType, DestElementType);
1411 }
1412 return ComplexVal;
1413 }
1414
EmitOMPAtomicReadExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * V,SourceLocation Loc)1415 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
1416 const Expr *X, const Expr *V,
1417 SourceLocation Loc) {
1418 // v = x;
1419 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
1420 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
1421 LValue XLValue = CGF.EmitLValue(X);
1422 LValue VLValue = CGF.EmitLValue(V);
1423 RValue Res = XLValue.isGlobalReg()
1424 ? CGF.EmitLoadOfLValue(XLValue, Loc)
1425 : CGF.EmitAtomicLoad(XLValue, Loc,
1426 IsSeqCst ? llvm::SequentiallyConsistent
1427 : llvm::Monotonic,
1428 XLValue.isVolatile());
1429 // OpenMP, 2.12.6, atomic Construct
1430 // Any atomic construct with a seq_cst clause forces the atomically
1431 // performed operation to include an implicit flush operation without a
1432 // list.
1433 if (IsSeqCst)
1434 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1435 switch (CGF.getEvaluationKind(V->getType())) {
1436 case TEK_Scalar:
1437 CGF.EmitStoreOfScalar(
1438 convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
1439 break;
1440 case TEK_Complex:
1441 CGF.EmitStoreOfComplex(
1442 convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
1443 /*isInit=*/false);
1444 break;
1445 case TEK_Aggregate:
1446 llvm_unreachable("Must be a scalar or complex.");
1447 }
1448 }
1449
EmitOMPAtomicWriteExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * E,SourceLocation Loc)1450 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
1451 const Expr *X, const Expr *E,
1452 SourceLocation Loc) {
1453 // x = expr;
1454 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
1455 LValue XLValue = CGF.EmitLValue(X);
1456 RValue ExprRValue = CGF.EmitAnyExpr(E);
1457 if (XLValue.isGlobalReg())
1458 CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
1459 else
1460 CGF.EmitAtomicStore(ExprRValue, XLValue,
1461 IsSeqCst ? llvm::SequentiallyConsistent
1462 : llvm::Monotonic,
1463 XLValue.isVolatile(), /*IsInit=*/false);
1464 // OpenMP, 2.12.6, atomic Construct
1465 // Any atomic construct with a seq_cst clause forces the atomically
1466 // performed operation to include an implicit flush operation without a
1467 // list.
1468 if (IsSeqCst)
1469 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1470 }
1471
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)1472 bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
1473 BinaryOperatorKind BO, llvm::AtomicOrdering AO,
1474 bool IsXLHSInRHSPart) {
1475 auto &Context = CGF.CGM.getContext();
1476 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
1477 // expression is simple and atomic is allowed for the given type for the
1478 // target platform.
1479 if (BO == BO_Comma || !Update.isScalar() ||
1480 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
1481 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
1482 (Update.getScalarVal()->getType() !=
1483 X.getAddress()->getType()->getPointerElementType())) ||
1484 !Context.getTargetInfo().hasBuiltinAtomic(
1485 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
1486 return false;
1487
1488 llvm::AtomicRMWInst::BinOp RMWOp;
1489 switch (BO) {
1490 case BO_Add:
1491 RMWOp = llvm::AtomicRMWInst::Add;
1492 break;
1493 case BO_Sub:
1494 if (!IsXLHSInRHSPart)
1495 return false;
1496 RMWOp = llvm::AtomicRMWInst::Sub;
1497 break;
1498 case BO_And:
1499 RMWOp = llvm::AtomicRMWInst::And;
1500 break;
1501 case BO_Or:
1502 RMWOp = llvm::AtomicRMWInst::Or;
1503 break;
1504 case BO_Xor:
1505 RMWOp = llvm::AtomicRMWInst::Xor;
1506 break;
1507 case BO_LT:
1508 RMWOp = X.getType()->hasSignedIntegerRepresentation()
1509 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
1510 : llvm::AtomicRMWInst::Max)
1511 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
1512 : llvm::AtomicRMWInst::UMax);
1513 break;
1514 case BO_GT:
1515 RMWOp = X.getType()->hasSignedIntegerRepresentation()
1516 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
1517 : llvm::AtomicRMWInst::Min)
1518 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
1519 : llvm::AtomicRMWInst::UMin);
1520 break;
1521 case BO_Mul:
1522 case BO_Div:
1523 case BO_Rem:
1524 case BO_Shl:
1525 case BO_Shr:
1526 case BO_LAnd:
1527 case BO_LOr:
1528 return false;
1529 case BO_PtrMemD:
1530 case BO_PtrMemI:
1531 case BO_LE:
1532 case BO_GE:
1533 case BO_EQ:
1534 case BO_NE:
1535 case BO_Assign:
1536 case BO_AddAssign:
1537 case BO_SubAssign:
1538 case BO_AndAssign:
1539 case BO_OrAssign:
1540 case BO_XorAssign:
1541 case BO_MulAssign:
1542 case BO_DivAssign:
1543 case BO_RemAssign:
1544 case BO_ShlAssign:
1545 case BO_ShrAssign:
1546 case BO_Comma:
1547 llvm_unreachable("Unsupported atomic update operation");
1548 }
1549 auto *UpdateVal = Update.getScalarVal();
1550 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
1551 UpdateVal = CGF.Builder.CreateIntCast(
1552 IC, X.getAddress()->getType()->getPointerElementType(),
1553 X.getType()->hasSignedIntegerRepresentation());
1554 }
1555 CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
1556 return true;
1557 }
1558
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> & CommonGen)1559 void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
1560 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
1561 llvm::AtomicOrdering AO, SourceLocation Loc,
1562 const llvm::function_ref<RValue(RValue)> &CommonGen) {
1563 // Update expressions are allowed to have the following forms:
1564 // x binop= expr; -> xrval + expr;
1565 // x++, ++x -> xrval + 1;
1566 // x--, --x -> xrval - 1;
1567 // x = x binop expr; -> xrval binop expr
1568 // x = expr Op x; - > expr binop xrval;
1569 if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
1570 if (X.isGlobalReg()) {
1571 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
1572 // 'xrval'.
1573 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
1574 } else {
1575 // Perform compare-and-swap procedure.
1576 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
1577 }
1578 }
1579 }
1580
EmitOMPAtomicUpdateExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)1581 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
1582 const Expr *X, const Expr *E,
1583 const Expr *UE, bool IsXLHSInRHSPart,
1584 SourceLocation Loc) {
1585 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
1586 "Update expr in 'atomic update' must be a binary operator.");
1587 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
1588 // Update expressions are allowed to have the following forms:
1589 // x binop= expr; -> xrval + expr;
1590 // x++, ++x -> xrval + 1;
1591 // x--, --x -> xrval - 1;
1592 // x = x binop expr; -> xrval binop expr
1593 // x = expr Op x; - > expr binop xrval;
1594 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
1595 LValue XLValue = CGF.EmitLValue(X);
1596 RValue ExprRValue = CGF.EmitAnyExpr(E);
1597 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
1598 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
1599 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
1600 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
1601 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
1602 auto Gen =
1603 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
1604 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
1605 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
1606 return CGF.EmitAnyExpr(UE);
1607 };
1608 CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
1609 IsXLHSInRHSPart, AO, Loc, Gen);
1610 // OpenMP, 2.12.6, atomic Construct
1611 // Any atomic construct with a seq_cst clause forces the atomically
1612 // performed operation to include an implicit flush operation without a
1613 // list.
1614 if (IsSeqCst)
1615 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1616 }
1617
EmitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,bool IsSeqCst,const Expr * X,const Expr * V,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)1618 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
1619 bool IsSeqCst, const Expr *X, const Expr *V,
1620 const Expr *E, const Expr *UE,
1621 bool IsXLHSInRHSPart, SourceLocation Loc) {
1622 switch (Kind) {
1623 case OMPC_read:
1624 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
1625 break;
1626 case OMPC_write:
1627 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
1628 break;
1629 case OMPC_unknown:
1630 case OMPC_update:
1631 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
1632 break;
1633 case OMPC_capture:
1634 llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
1635 case OMPC_if:
1636 case OMPC_final:
1637 case OMPC_num_threads:
1638 case OMPC_private:
1639 case OMPC_firstprivate:
1640 case OMPC_lastprivate:
1641 case OMPC_reduction:
1642 case OMPC_safelen:
1643 case OMPC_collapse:
1644 case OMPC_default:
1645 case OMPC_seq_cst:
1646 case OMPC_shared:
1647 case OMPC_linear:
1648 case OMPC_aligned:
1649 case OMPC_copyin:
1650 case OMPC_copyprivate:
1651 case OMPC_flush:
1652 case OMPC_proc_bind:
1653 case OMPC_schedule:
1654 case OMPC_ordered:
1655 case OMPC_nowait:
1656 case OMPC_untied:
1657 case OMPC_threadprivate:
1658 case OMPC_mergeable:
1659 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
1660 }
1661 }
1662
EmitOMPAtomicDirective(const OMPAtomicDirective & S)1663 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
1664 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
1665 OpenMPClauseKind Kind = OMPC_unknown;
1666 for (auto *C : S.clauses()) {
1667 // Find first clause (skip seq_cst clause, if it is first).
1668 if (C->getClauseKind() != OMPC_seq_cst) {
1669 Kind = C->getClauseKind();
1670 break;
1671 }
1672 }
1673
1674 const auto *CS =
1675 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
1676 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
1677 enterFullExpression(EWC);
1678
1679 LexicalScope Scope(*this, S.getSourceRange());
1680 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
1681 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
1682 S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
1683 };
1684 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1685 }
1686
EmitOMPTargetDirective(const OMPTargetDirective &)1687 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
1688 llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
1689 }
1690
EmitOMPTeamsDirective(const OMPTeamsDirective &)1691 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
1692 llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
1693 }
1694
1695