1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 using namespace clang;
21 using namespace CodeGen;
22 
23 //===----------------------------------------------------------------------===//
24 //                              OpenMP Directive Emission
25 //===----------------------------------------------------------------------===//
26 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
27 /// function. Here is the logic:
28 /// if (Cond) {
29 ///   CodeGen(true);
30 /// } else {
31 ///   CodeGen(false);
32 /// }
EmitOMPIfClause(CodeGenFunction & CGF,const Expr * Cond,const std::function<void (bool)> & CodeGen)33 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
34                             const std::function<void(bool)> &CodeGen) {
35   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
36 
37   // If the condition constant folds and can be elided, try to avoid emitting
38   // the condition and the dead arm of the if/else.
39   bool CondConstant;
40   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
41     CodeGen(CondConstant);
42     return;
43   }
44 
45   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
46   // emit the conditional branch.
47   auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
48   auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
49   auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
50   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
51 
52   // Emit the 'then' code.
53   CGF.EmitBlock(ThenBlock);
54   CodeGen(/*ThenBlock*/ true);
55   CGF.EmitBranch(ContBlock);
56   // Emit the 'else' code if present.
57   {
58     // There is no need to emit line number for unconditional branch.
59     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
60     CGF.EmitBlock(ElseBlock);
61   }
62   CodeGen(/*ThenBlock*/ false);
63   {
64     // There is no need to emit line number for unconditional branch.
65     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
66     CGF.EmitBranch(ContBlock);
67   }
68   // Emit the continuation block for code after the if.
69   CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
70 }
71 
EmitOMPAggregateAssign(llvm::Value * DestAddr,llvm::Value * SrcAddr,QualType OriginalType,const llvm::function_ref<void (llvm::Value *,llvm::Value *)> & CopyGen)72 void CodeGenFunction::EmitOMPAggregateAssign(
73     llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
74     const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
75   // Perform element-by-element initialization.
76   QualType ElementTy;
77   auto SrcBegin = SrcAddr;
78   auto DestBegin = DestAddr;
79   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
80   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
81   // Cast from pointer to array type to pointer to single element.
82   SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin,
83                                                          DestBegin->getType());
84   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
85   // The basic structure here is a while-do loop.
86   auto BodyBB = createBasicBlock("omp.arraycpy.body");
87   auto DoneBB = createBasicBlock("omp.arraycpy.done");
88   auto IsEmpty =
89       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
90   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
91 
92   // Enter the loop body, making that address the current address.
93   auto EntryBB = Builder.GetInsertBlock();
94   EmitBlock(BodyBB);
95   auto SrcElementCurrent =
96       Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
97   SrcElementCurrent->addIncoming(SrcBegin, EntryBB);
98   auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2,
99                                               "omp.arraycpy.destElementPast");
100   DestElementCurrent->addIncoming(DestBegin, EntryBB);
101 
102   // Emit copy.
103   CopyGen(DestElementCurrent, SrcElementCurrent);
104 
105   // Shift the address forward by one element.
106   auto DestElementNext = Builder.CreateConstGEP1_32(
107       DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element");
108   auto SrcElementNext = Builder.CreateConstGEP1_32(
109       SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element");
110   // Check whether we've reached the end.
111   auto Done =
112       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
113   Builder.CreateCondBr(Done, DoneBB, BodyBB);
114   DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock());
115   SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock());
116 
117   // Done.
118   EmitBlock(DoneBB, /*IsFinished=*/true);
119 }
120 
EmitOMPCopy(CodeGenFunction & CGF,QualType OriginalType,llvm::Value * DestAddr,llvm::Value * SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)121 void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF,
122                                   QualType OriginalType, llvm::Value *DestAddr,
123                                   llvm::Value *SrcAddr, const VarDecl *DestVD,
124                                   const VarDecl *SrcVD, const Expr *Copy) {
125   if (OriginalType->isArrayType()) {
126     auto *BO = dyn_cast<BinaryOperator>(Copy);
127     if (BO && BO->getOpcode() == BO_Assign) {
128       // Perform simple memcpy for simple copying.
129       CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
130     } else {
131       // For arrays with complex element types perform element by element
132       // copying.
133       CGF.EmitOMPAggregateAssign(
134           DestAddr, SrcAddr, OriginalType,
135           [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement,
136                                           llvm::Value *SrcElement) {
137             // Working with the single array element, so have to remap
138             // destination and source variables to corresponding array
139             // elements.
140             CodeGenFunction::OMPPrivateScope Remap(CGF);
141             Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{
142               return DestElement;
143             });
144             Remap.addPrivate(
145                 SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; });
146             (void)Remap.Privatize();
147             CGF.EmitIgnoredExpr(Copy);
148           });
149     }
150   } else {
151     // Remap pseudo source variable to private copy.
152     CodeGenFunction::OMPPrivateScope Remap(CGF);
153     Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; });
154     Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; });
155     (void)Remap.Privatize();
156     // Emit copying of the whole variable.
157     CGF.EmitIgnoredExpr(Copy);
158   }
159 }
160 
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)161 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
162                                                 OMPPrivateScope &PrivateScope) {
163   auto FirstprivateFilter = [](const OMPClause *C) -> bool {
164     return C->getClauseKind() == OMPC_firstprivate;
165   };
166   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
167   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
168            FirstprivateFilter)> I(D.clauses(), FirstprivateFilter);
169        I; ++I) {
170     auto *C = cast<OMPFirstprivateClause>(*I);
171     auto IRef = C->varlist_begin();
172     auto InitsRef = C->inits().begin();
173     for (auto IInit : C->private_copies()) {
174       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
175       if (EmittedAsFirstprivate.count(OrigVD) == 0) {
176         EmittedAsFirstprivate.insert(OrigVD);
177         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
178         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
179         bool IsRegistered;
180         DeclRefExpr DRE(
181             const_cast<VarDecl *>(OrigVD),
182             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
183                 OrigVD) != nullptr,
184             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
185         auto *OriginalAddr = EmitLValue(&DRE).getAddress();
186         if (OrigVD->getType()->isArrayType()) {
187           // Emit VarDecl with copy init for arrays.
188           // Get the address of the original variable captured in current
189           // captured region.
190           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
191             auto Emission = EmitAutoVarAlloca(*VD);
192             auto *Init = VD->getInit();
193             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
194               // Perform simple memcpy.
195               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
196                                   (*IRef)->getType());
197             } else {
198               EmitOMPAggregateAssign(
199                   Emission.getAllocatedAddress(), OriginalAddr,
200                   (*IRef)->getType(),
201                   [this, VDInit, Init](llvm::Value *DestElement,
202                                        llvm::Value *SrcElement) {
203                     // Clean up any temporaries needed by the initialization.
204                     RunCleanupsScope InitScope(*this);
205                     // Emit initialization for single element.
206                     LocalDeclMap[VDInit] = SrcElement;
207                     EmitAnyExprToMem(Init, DestElement,
208                                      Init->getType().getQualifiers(),
209                                      /*IsInitializer*/ false);
210                     LocalDeclMap.erase(VDInit);
211                   });
212             }
213             EmitAutoVarCleanups(Emission);
214             return Emission.getAllocatedAddress();
215           });
216         } else {
217           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
218             // Emit private VarDecl with copy init.
219             // Remap temp VDInit variable to the address of the original
220             // variable
221             // (for proper handling of captured global variables).
222             LocalDeclMap[VDInit] = OriginalAddr;
223             EmitDecl(*VD);
224             LocalDeclMap.erase(VDInit);
225             return GetAddrOfLocalVar(VD);
226           });
227         }
228         assert(IsRegistered &&
229                "firstprivate var already registered as private");
230         // Silence the warning about unused variable.
231         (void)IsRegistered;
232       }
233       ++IRef, ++InitsRef;
234     }
235   }
236   return !EmittedAsFirstprivate.empty();
237 }
238 
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)239 void CodeGenFunction::EmitOMPPrivateClause(
240     const OMPExecutableDirective &D,
241     CodeGenFunction::OMPPrivateScope &PrivateScope) {
242   auto PrivateFilter = [](const OMPClause *C) -> bool {
243     return C->getClauseKind() == OMPC_private;
244   };
245   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
246            I(D.clauses(), PrivateFilter); I; ++I) {
247     auto *C = cast<OMPPrivateClause>(*I);
248     auto IRef = C->varlist_begin();
249     for (auto IInit : C->private_copies()) {
250       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
251       auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
252       bool IsRegistered =
253           PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
254             // Emit private VarDecl with copy init.
255             EmitDecl(*VD);
256             return GetAddrOfLocalVar(VD);
257           });
258       assert(IsRegistered && "private var already registered as private");
259       // Silence the warning about unused variable.
260       (void)IsRegistered;
261       ++IRef;
262     }
263   }
264 }
265 
EmitOMPCopyinClause(const OMPExecutableDirective & D)266 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
267   // threadprivate_var1 = master_threadprivate_var1;
268   // operator=(threadprivate_var2, master_threadprivate_var2);
269   // ...
270   // __kmpc_barrier(&loc, global_tid);
271   auto CopyinFilter = [](const OMPClause *C) -> bool {
272     return C->getClauseKind() == OMPC_copyin;
273   };
274   llvm::DenseSet<const VarDecl *> CopiedVars;
275   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
276   for (OMPExecutableDirective::filtered_clause_iterator<decltype(CopyinFilter)>
277            I(D.clauses(), CopyinFilter);
278        I; ++I) {
279     auto *C = cast<OMPCopyinClause>(*I);
280     auto IRef = C->varlist_begin();
281     auto ISrcRef = C->source_exprs().begin();
282     auto IDestRef = C->destination_exprs().begin();
283     for (auto *AssignOp : C->assignment_ops()) {
284       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
285       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
286         // Get the address of the master variable.
287         auto *MasterAddr = VD->isStaticLocal()
288                                ? CGM.getStaticLocalDeclAddress(VD)
289                                : CGM.GetAddrOfGlobal(VD);
290         // Get the address of the threadprivate variable.
291         auto *PrivateAddr = EmitLValue(*IRef).getAddress();
292         if (CopiedVars.size() == 1) {
293           // At first check if current thread is a master thread. If it is, no
294           // need to copy data.
295           CopyBegin = createBasicBlock("copyin.not.master");
296           CopyEnd = createBasicBlock("copyin.not.master.end");
297           Builder.CreateCondBr(
298               Builder.CreateICmpNE(
299                   Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy),
300                   Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)),
301               CopyBegin, CopyEnd);
302           EmitBlock(CopyBegin);
303         }
304         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
305         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
306         EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD,
307                     SrcVD, AssignOp);
308       }
309       ++IRef;
310       ++ISrcRef;
311       ++IDestRef;
312     }
313   }
314   if (CopyEnd) {
315     // Exit out of copying procedure for non-master thread.
316     EmitBlock(CopyEnd, /*IsFinished=*/true);
317     return true;
318   }
319   return false;
320 }
321 
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)322 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
323     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
324   auto LastprivateFilter = [](const OMPClause *C) -> bool {
325     return C->getClauseKind() == OMPC_lastprivate;
326   };
327   bool HasAtLeastOneLastprivate = false;
328   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
329   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
330            LastprivateFilter)> I(D.clauses(), LastprivateFilter);
331        I; ++I) {
332     auto *C = cast<OMPLastprivateClause>(*I);
333     auto IRef = C->varlist_begin();
334     auto IDestRef = C->destination_exprs().begin();
335     for (auto *IInit : C->private_copies()) {
336       // Keep the address of the original variable for future update at the end
337       // of the loop.
338       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
339       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
340         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
341         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{
342           DeclRefExpr DRE(
343               const_cast<VarDecl *>(OrigVD),
344               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
345                   OrigVD) != nullptr,
346               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
347           return EmitLValue(&DRE).getAddress();
348         });
349         // Check if the variable is also a firstprivate: in this case IInit is
350         // not generated. Initialization of this variable will happen in codegen
351         // for 'firstprivate' clause.
352         if (!IInit)
353           continue;
354         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
355         bool IsRegistered =
356             PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
357               // Emit private VarDecl with copy init.
358               EmitDecl(*VD);
359               return GetAddrOfLocalVar(VD);
360             });
361         assert(IsRegistered && "lastprivate var already registered as private");
362         HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered;
363       }
364       ++IRef, ++IDestRef;
365     }
366   }
367   return HasAtLeastOneLastprivate;
368 }
369 
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,llvm::Value * IsLastIterCond)370 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
371     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
372   // Emit following code:
373   // if (<IsLastIterCond>) {
374   //   orig_var1 = private_orig_var1;
375   //   ...
376   //   orig_varn = private_orig_varn;
377   // }
378   auto *ThenBB = createBasicBlock(".omp.lastprivate.then");
379   auto *DoneBB = createBasicBlock(".omp.lastprivate.done");
380   Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
381   EmitBlock(ThenBB);
382   {
383     auto LastprivateFilter = [](const OMPClause *C) -> bool {
384       return C->getClauseKind() == OMPC_lastprivate;
385     };
386     llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
387     for (OMPExecutableDirective::filtered_clause_iterator<decltype(
388              LastprivateFilter)> I(D.clauses(), LastprivateFilter);
389          I; ++I) {
390       auto *C = cast<OMPLastprivateClause>(*I);
391       auto IRef = C->varlist_begin();
392       auto ISrcRef = C->source_exprs().begin();
393       auto IDestRef = C->destination_exprs().begin();
394       for (auto *AssignOp : C->assignment_ops()) {
395         auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
396         if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) {
397           auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
398           auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
399           // Get the address of the original variable.
400           auto *OriginalAddr = GetAddrOfLocalVar(DestVD);
401           // Get the address of the private variable.
402           auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD);
403           EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr,
404                       DestVD, SrcVD, AssignOp);
405         }
406         ++IRef;
407         ++ISrcRef;
408         ++IDestRef;
409       }
410     }
411   }
412   EmitBlock(DoneBB, /*IsFinished=*/true);
413 }
414 
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)415 void CodeGenFunction::EmitOMPReductionClauseInit(
416     const OMPExecutableDirective &D,
417     CodeGenFunction::OMPPrivateScope &PrivateScope) {
418   auto ReductionFilter = [](const OMPClause *C) -> bool {
419     return C->getClauseKind() == OMPC_reduction;
420   };
421   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
422            ReductionFilter)> I(D.clauses(), ReductionFilter);
423        I; ++I) {
424     auto *C = cast<OMPReductionClause>(*I);
425     auto ILHS = C->lhs_exprs().begin();
426     auto IRHS = C->rhs_exprs().begin();
427     for (auto IRef : C->varlists()) {
428       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
429       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
430       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
431       // Store the address of the original variable associated with the LHS
432       // implicit variable.
433       PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
434         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
435                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
436                         IRef->getType(), VK_LValue, IRef->getExprLoc());
437         return EmitLValue(&DRE).getAddress();
438       });
439       // Emit reduction copy.
440       bool IsRegistered =
441           PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
442             // Emit private VarDecl with reduction init.
443             EmitDecl(*PrivateVD);
444             return GetAddrOfLocalVar(PrivateVD);
445           });
446       assert(IsRegistered && "private var already registered as private");
447       // Silence the warning about unused variable.
448       (void)IsRegistered;
449       ++ILHS, ++IRHS;
450     }
451   }
452 }
453 
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D)454 void CodeGenFunction::EmitOMPReductionClauseFinal(
455     const OMPExecutableDirective &D) {
456   llvm::SmallVector<const Expr *, 8> LHSExprs;
457   llvm::SmallVector<const Expr *, 8> RHSExprs;
458   llvm::SmallVector<const Expr *, 8> ReductionOps;
459   auto ReductionFilter = [](const OMPClause *C) -> bool {
460     return C->getClauseKind() == OMPC_reduction;
461   };
462   bool HasAtLeastOneReduction = false;
463   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
464            ReductionFilter)> I(D.clauses(), ReductionFilter);
465        I; ++I) {
466     HasAtLeastOneReduction = true;
467     auto *C = cast<OMPReductionClause>(*I);
468     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
469     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
470     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
471   }
472   if (HasAtLeastOneReduction) {
473     // Emit nowait reduction if nowait clause is present or directive is a
474     // parallel directive (it always has implicit barrier).
475     CGM.getOpenMPRuntime().emitReduction(
476         *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
477         D.getSingleClause(OMPC_nowait) ||
478             isOpenMPParallelDirective(D.getDirectiveKind()));
479   }
480 }
481 
482 /// \brief Emits code for OpenMP parallel directive in the parallel region.
emitOMPParallelCall(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)483 static void emitOMPParallelCall(CodeGenFunction &CGF,
484                                 const OMPExecutableDirective &S,
485                                 llvm::Value *OutlinedFn,
486                                 llvm::Value *CapturedStruct) {
487   if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
488     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
489     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
490     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
491                                          /*IgnoreResultAssign*/ true);
492     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
493         CGF, NumThreads, NumThreadsClause->getLocStart());
494   }
495   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
496                                               CapturedStruct);
497 }
498 
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)499 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
500                                            const OMPExecutableDirective &S,
501                                            const RegionCodeGenTy &CodeGen) {
502   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
503   auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
504   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
505       S, *CS->getCapturedDecl()->param_begin(), CodeGen);
506   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
507     auto Cond = cast<OMPIfClause>(C)->getCondition();
508     EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
509       if (ThenBlock)
510         emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
511       else
512         CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
513                                                   OutlinedFn, CapturedStruct);
514     });
515   } else
516     emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
517 }
518 
EmitOMPParallelDirective(const OMPParallelDirective & S)519 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
520   LexicalScope Scope(*this, S.getSourceRange());
521   // Emit parallel region as a standalone region.
522   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
523     OMPPrivateScope PrivateScope(CGF);
524     bool Copyins = CGF.EmitOMPCopyinClause(S);
525     bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
526     if (Copyins || Firstprivates) {
527       // Emit implicit barrier to synchronize threads and avoid data races on
528       // initialization of firstprivate variables or propagation master's thread
529       // values of threadprivate variables to local instances of that variables
530       // of all other implicit threads.
531       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
532                                                  OMPD_unknown);
533     }
534     CGF.EmitOMPPrivateClause(S, PrivateScope);
535     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
536     (void)PrivateScope.Privatize();
537     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
538     CGF.EmitOMPReductionClauseFinal(S);
539     // Emit implicit barrier at the end of the 'parallel' directive.
540     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
541                                                OMPD_unknown);
542   };
543   emitCommonOMPParallelDirective(*this, S, CodeGen);
544 }
545 
EmitOMPLoopBody(const OMPLoopDirective & S,bool SeparateIter)546 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
547                                       bool SeparateIter) {
548   RunCleanupsScope BodyScope(*this);
549   // Update counters values on current iteration.
550   for (auto I : S.updates()) {
551     EmitIgnoredExpr(I);
552   }
553   // Update the linear variables.
554   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
555     for (auto U : C->updates()) {
556       EmitIgnoredExpr(U);
557     }
558   }
559 
560   // On a continue in the body, jump to the end.
561   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
562   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
563   // Emit loop body.
564   EmitStmt(S.getBody());
565   // The end (updates/cleanups).
566   EmitBlock(Continue.getBlock());
567   BreakContinueStack.pop_back();
568   if (SeparateIter) {
569     // TODO: Update lastprivates if the SeparateIter flag is true.
570     // This will be implemented in a follow-up OMPLastprivateClause patch, but
571     // result should be still correct without it, as we do not make these
572     // variables private yet.
573   }
574 }
575 
EmitOMPInnerLoop(const Stmt & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> & BodyGen)576 void CodeGenFunction::EmitOMPInnerLoop(
577     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
578     const Expr *IncExpr,
579     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
580   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
581   auto Cnt = getPGORegionCounter(&S);
582 
583   // Start the loop with a block that tests the condition.
584   auto CondBlock = createBasicBlock("omp.inner.for.cond");
585   EmitBlock(CondBlock);
586   LoopStack.push(CondBlock);
587 
588   // If there are any cleanups between here and the loop-exit scope,
589   // create a block to stage a loop exit along.
590   auto ExitBlock = LoopExit.getBlock();
591   if (RequiresCleanup)
592     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
593 
594   auto LoopBody = createBasicBlock("omp.inner.for.body");
595 
596   // Emit condition.
597   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
598   if (ExitBlock != LoopExit.getBlock()) {
599     EmitBlock(ExitBlock);
600     EmitBranchThroughCleanup(LoopExit);
601   }
602 
603   EmitBlock(LoopBody);
604   Cnt.beginRegion(Builder);
605 
606   // Create a block for the increment.
607   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
608   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
609 
610   BodyGen(*this);
611 
612   // Emit "IV = IV + 1" and a back-edge to the condition block.
613   EmitBlock(Continue.getBlock());
614   EmitIgnoredExpr(IncExpr);
615   BreakContinueStack.pop_back();
616   EmitBranch(CondBlock);
617   LoopStack.pop();
618   // Emit the fall-through block.
619   EmitBlock(LoopExit.getBlock());
620 }
621 
EmitOMPSimdFinal(const OMPLoopDirective & S)622 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
623   auto IC = S.counters().begin();
624   for (auto F : S.finals()) {
625     if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
626       EmitIgnoredExpr(F);
627     }
628     ++IC;
629   }
630   // Emit the final values of the linear variables.
631   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
632     for (auto F : C->finals()) {
633       EmitIgnoredExpr(F);
634     }
635   }
636 }
637 
EmitOMPAlignedClause(CodeGenFunction & CGF,CodeGenModule & CGM,const OMPAlignedClause & Clause)638 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
639                                  const OMPAlignedClause &Clause) {
640   unsigned ClauseAlignment = 0;
641   if (auto AlignmentExpr = Clause.getAlignment()) {
642     auto AlignmentCI =
643         cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
644     ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
645   }
646   for (auto E : Clause.varlists()) {
647     unsigned Alignment = ClauseAlignment;
648     if (Alignment == 0) {
649       // OpenMP [2.8.1, Description]
650       // If no optional parameter is specified, implementation-defined default
651       // alignments for SIMD instructions on the target platforms are assumed.
652       Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
653           E->getType());
654     }
655     assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
656            "alignment is not power of 2");
657     if (Alignment != 0) {
658       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
659       CGF.EmitAlignmentAssumption(PtrValue, Alignment);
660     }
661   }
662 }
663 
EmitPrivateLoopCounters(CodeGenFunction & CGF,CodeGenFunction::OMPPrivateScope & LoopScope,ArrayRef<Expr * > Counters)664 static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
665                                     CodeGenFunction::OMPPrivateScope &LoopScope,
666                                     ArrayRef<Expr *> Counters) {
667   for (auto *E : Counters) {
668     auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
669     bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
670       // Emit var without initialization.
671       auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
672       CGF.EmitAutoVarCleanups(VarEmission);
673       return VarEmission.getAllocatedAddress();
674     });
675     assert(IsRegistered && "counter already registered as private");
676     // Silence the warning about unused variable.
677     (void)IsRegistered;
678   }
679 }
680 
681 static void
EmitPrivateLinearVars(CodeGenFunction & CGF,const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)682 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
683                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
684   for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
685     for (auto *E : Clause->varlists()) {
686       auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
687       bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
688         // Emit var without initialization.
689         auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
690         CGF.EmitAutoVarCleanups(VarEmission);
691         return VarEmission.getAllocatedAddress();
692       });
693       assert(IsRegistered && "linear var already registered as private");
694       // Silence the warning about unused variable.
695       (void)IsRegistered;
696     }
697   }
698 }
699 
EmitOMPSimdDirective(const OMPSimdDirective & S)700 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
701   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
702     // Pragma 'simd' code depends on presence of 'lastprivate'.
703     // If present, we have to separate last iteration of the loop:
704     //
705     // if (LastIteration != 0) {
706     //   for (IV in 0..LastIteration-1) BODY;
707     //   BODY with updates of lastprivate vars;
708     //   <Final counter/linear vars updates>;
709     // }
710     //
711     // otherwise (when there's no lastprivate):
712     //
713     //   for (IV in 0..LastIteration) BODY;
714     //   <Final counter/linear vars updates>;
715     //
716 
717     // Walk clauses and process safelen/lastprivate.
718     bool SeparateIter = false;
719     CGF.LoopStack.setParallel();
720     CGF.LoopStack.setVectorizerEnable(true);
721     for (auto C : S.clauses()) {
722       switch (C->getClauseKind()) {
723       case OMPC_safelen: {
724         RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
725                                      AggValueSlot::ignored(), true);
726         llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
727         CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
728         // In presence of finite 'safelen', it may be unsafe to mark all
729         // the memory instructions parallel, because loop-carried
730         // dependences of 'safelen' iterations are possible.
731         CGF.LoopStack.setParallel(false);
732         break;
733       }
734       case OMPC_aligned:
735         EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
736         break;
737       case OMPC_lastprivate:
738         SeparateIter = true;
739         break;
740       default:
741         // Not handled yet
742         ;
743       }
744     }
745 
746     // Emit inits for the linear variables.
747     for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
748       for (auto Init : C->inits()) {
749         auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
750         CGF.EmitVarDecl(*D);
751       }
752     }
753 
754     // Emit the loop iteration variable.
755     const Expr *IVExpr = S.getIterationVariable();
756     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
757     CGF.EmitVarDecl(*IVDecl);
758     CGF.EmitIgnoredExpr(S.getInit());
759 
760     // Emit the iterations count variable.
761     // If it is not a variable, Sema decided to calculate iterations count on
762     // each
763     // iteration (e.g., it is foldable into a constant).
764     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
765       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
766       // Emit calculation of the iterations count.
767       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
768     }
769 
770     // Emit the linear steps for the linear clauses.
771     // If a step is not constant, it is pre-calculated before the loop.
772     for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
773       if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
774         if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
775           CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
776           // Emit calculation of the linear step.
777           CGF.EmitIgnoredExpr(CS);
778         }
779     }
780 
781     if (SeparateIter) {
782       // Emit: if (LastIteration > 0) - begin.
783       RegionCounter Cnt = CGF.getPGORegionCounter(&S);
784       auto ThenBlock = CGF.createBasicBlock("simd.if.then");
785       auto ContBlock = CGF.createBasicBlock("simd.if.end");
786       CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
787                                Cnt.getCount());
788       CGF.EmitBlock(ThenBlock);
789       Cnt.beginRegion(CGF.Builder);
790       // Emit 'then' code.
791       {
792         OMPPrivateScope LoopScope(CGF);
793         EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
794         EmitPrivateLinearVars(CGF, S, LoopScope);
795         CGF.EmitOMPPrivateClause(S, LoopScope);
796         (void)LoopScope.Privatize();
797         CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
798                              S.getCond(/*SeparateIter=*/true), S.getInc(),
799                              [&S](CodeGenFunction &CGF) {
800                                CGF.EmitOMPLoopBody(S);
801                                CGF.EmitStopPoint(&S);
802                              });
803         CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
804       }
805       CGF.EmitOMPSimdFinal(S);
806       // Emit: if (LastIteration != 0) - end.
807       CGF.EmitBranch(ContBlock);
808       CGF.EmitBlock(ContBlock, true);
809     } else {
810       {
811         OMPPrivateScope LoopScope(CGF);
812         EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
813         EmitPrivateLinearVars(CGF, S, LoopScope);
814         CGF.EmitOMPPrivateClause(S, LoopScope);
815         (void)LoopScope.Privatize();
816         CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
817                              S.getCond(/*SeparateIter=*/false), S.getInc(),
818                              [&S](CodeGenFunction &CGF) {
819                                CGF.EmitOMPLoopBody(S);
820                                CGF.EmitStopPoint(&S);
821                              });
822       }
823       CGF.EmitOMPSimdFinal(S);
824     }
825   };
826   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
827 }
828 
EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,llvm::Value * LB,llvm::Value * UB,llvm::Value * ST,llvm::Value * IL,llvm::Value * Chunk)829 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
830                                           const OMPLoopDirective &S,
831                                           OMPPrivateScope &LoopScope,
832                                           llvm::Value *LB, llvm::Value *UB,
833                                           llvm::Value *ST, llvm::Value *IL,
834                                           llvm::Value *Chunk) {
835   auto &RT = CGM.getOpenMPRuntime();
836 
837   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
838   const bool Dynamic = RT.isDynamic(ScheduleKind);
839 
840   assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
841          "static non-chunked schedule does not need outer loop");
842 
843   // Emit outer loop.
844   //
845   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
846   // When schedule(dynamic,chunk_size) is specified, the iterations are
847   // distributed to threads in the team in chunks as the threads request them.
848   // Each thread executes a chunk of iterations, then requests another chunk,
849   // until no chunks remain to be distributed. Each chunk contains chunk_size
850   // iterations, except for the last chunk to be distributed, which may have
851   // fewer iterations. When no chunk_size is specified, it defaults to 1.
852   //
853   // When schedule(guided,chunk_size) is specified, the iterations are assigned
854   // to threads in the team in chunks as the executing threads request them.
855   // Each thread executes a chunk of iterations, then requests another chunk,
856   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
857   // each chunk is proportional to the number of unassigned iterations divided
858   // by the number of threads in the team, decreasing to 1. For a chunk_size
859   // with value k (greater than 1), the size of each chunk is determined in the
860   // same way, with the restriction that the chunks do not contain fewer than k
861   // iterations (except for the last chunk to be assigned, which may have fewer
862   // than k iterations).
863   //
864   // When schedule(auto) is specified, the decision regarding scheduling is
865   // delegated to the compiler and/or runtime system. The programmer gives the
866   // implementation the freedom to choose any possible mapping of iterations to
867   // threads in the team.
868   //
869   // When schedule(runtime) is specified, the decision regarding scheduling is
870   // deferred until run time, and the schedule and chunk size are taken from the
871   // run-sched-var ICV. If the ICV is set to auto, the schedule is
872   // implementation defined
873   //
874   // while(__kmpc_dispatch_next(&LB, &UB)) {
875   //   idx = LB;
876   //   while (idx <= UB) { BODY; ++idx; } // inner loop
877   // }
878   //
879   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
880   // When schedule(static, chunk_size) is specified, iterations are divided into
881   // chunks of size chunk_size, and the chunks are assigned to the threads in
882   // the team in a round-robin fashion in the order of the thread number.
883   //
884   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
885   //   while (idx <= UB) { BODY; ++idx; } // inner loop
886   //   LB = LB + ST;
887   //   UB = UB + ST;
888   // }
889   //
890 
891   const Expr *IVExpr = S.getIterationVariable();
892   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
893   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
894 
895   RT.emitForInit(
896       *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
897       (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
898       Chunk);
899 
900   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
901 
902   // Start the loop with a block that tests the condition.
903   auto CondBlock = createBasicBlock("omp.dispatch.cond");
904   EmitBlock(CondBlock);
905   LoopStack.push(CondBlock);
906 
907   llvm::Value *BoolCondVal = nullptr;
908   if (!Dynamic) {
909     // UB = min(UB, GlobalUB)
910     EmitIgnoredExpr(S.getEnsureUpperBound());
911     // IV = LB
912     EmitIgnoredExpr(S.getInit());
913     // IV < UB
914     BoolCondVal = EvaluateExprAsBool(S.getCond(false));
915   } else {
916     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
917                                     IL, LB, UB, ST);
918   }
919 
920   // If there are any cleanups between here and the loop-exit scope,
921   // create a block to stage a loop exit along.
922   auto ExitBlock = LoopExit.getBlock();
923   if (LoopScope.requiresCleanups())
924     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
925 
926   auto LoopBody = createBasicBlock("omp.dispatch.body");
927   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
928   if (ExitBlock != LoopExit.getBlock()) {
929     EmitBlock(ExitBlock);
930     EmitBranchThroughCleanup(LoopExit);
931   }
932   EmitBlock(LoopBody);
933 
934   // Emit "IV = LB" (in case of static schedule, we have already calculated new
935   // LB for loop condition and emitted it above).
936   if (Dynamic)
937     EmitIgnoredExpr(S.getInit());
938 
939   // Create a block for the increment.
940   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
941   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
942 
943   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
944                    S.getCond(/*SeparateIter=*/false), S.getInc(),
945                    [&S](CodeGenFunction &CGF) {
946                      CGF.EmitOMPLoopBody(S);
947                      CGF.EmitStopPoint(&S);
948                    });
949 
950   EmitBlock(Continue.getBlock());
951   BreakContinueStack.pop_back();
952   if (!Dynamic) {
953     // Emit "LB = LB + Stride", "UB = UB + Stride".
954     EmitIgnoredExpr(S.getNextLowerBound());
955     EmitIgnoredExpr(S.getNextUpperBound());
956   }
957 
958   EmitBranch(CondBlock);
959   LoopStack.pop();
960   // Emit the fall-through block.
961   EmitBlock(LoopExit.getBlock());
962 
963   // Tell the runtime we are done.
964   // FIXME: Also call fini for ordered loops with dynamic scheduling.
965   if (!Dynamic)
966     RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
967 }
968 
969 /// \brief Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)970 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
971                                const DeclRefExpr *Helper) {
972   auto VDecl = cast<VarDecl>(Helper->getDecl());
973   CGF.EmitVarDecl(*VDecl);
974   return CGF.EmitLValue(Helper);
975 }
976 
EmitOMPWorksharingLoop(const OMPLoopDirective & S)977 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
978   // Emit the loop iteration variable.
979   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
980   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
981   EmitVarDecl(*IVDecl);
982 
983   // Emit the iterations count variable.
984   // If it is not a variable, Sema decided to calculate iterations count on each
985   // iteration (e.g., it is foldable into a constant).
986   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
987     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
988     // Emit calculation of the iterations count.
989     EmitIgnoredExpr(S.getCalcLastIteration());
990   }
991 
992   auto &RT = CGM.getOpenMPRuntime();
993 
994   bool HasLastprivateClause;
995   // Check pre-condition.
996   {
997     // Skip the entire loop if we don't meet the precondition.
998     RegionCounter Cnt = getPGORegionCounter(&S);
999     auto ThenBlock = createBasicBlock("omp.precond.then");
1000     auto ContBlock = createBasicBlock("omp.precond.end");
1001     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
1002     EmitBlock(ThenBlock);
1003     Cnt.beginRegion(Builder);
1004     // Emit 'then' code.
1005     {
1006       // Emit helper vars inits.
1007       LValue LB =
1008           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1009       LValue UB =
1010           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1011       LValue ST =
1012           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1013       LValue IL =
1014           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1015 
1016       OMPPrivateScope LoopScope(*this);
1017       if (EmitOMPFirstprivateClause(S, LoopScope)) {
1018         // Emit implicit barrier to synchronize threads and avoid data races on
1019         // initialization of firstprivate variables.
1020         CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1021                                                OMPD_unknown);
1022       }
1023       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1024       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
1025       (void)LoopScope.Privatize();
1026 
1027       // Detect the loop schedule kind and chunk.
1028       auto ScheduleKind = OMPC_SCHEDULE_unknown;
1029       llvm::Value *Chunk = nullptr;
1030       if (auto C = cast_or_null<OMPScheduleClause>(
1031               S.getSingleClause(OMPC_schedule))) {
1032         ScheduleKind = C->getScheduleKind();
1033         if (auto Ch = C->getChunkSize()) {
1034           Chunk = EmitScalarExpr(Ch);
1035           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1036                                        S.getIterationVariable()->getType());
1037         }
1038       }
1039       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1040       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1041       if (RT.isStaticNonchunked(ScheduleKind,
1042                                 /* Chunked */ Chunk != nullptr)) {
1043         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1044         // When no chunk_size is specified, the iteration space is divided into
1045         // chunks that are approximately equal in size, and at most one chunk is
1046         // distributed to each thread. Note that the size of the chunks is
1047         // unspecified in this case.
1048         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1049                        IL.getAddress(), LB.getAddress(), UB.getAddress(),
1050                        ST.getAddress());
1051         // UB = min(UB, GlobalUB);
1052         EmitIgnoredExpr(S.getEnsureUpperBound());
1053         // IV = LB;
1054         EmitIgnoredExpr(S.getInit());
1055         // while (idx <= UB) { BODY; ++idx; }
1056         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
1057                          S.getCond(/*SeparateIter=*/false), S.getInc(),
1058                          [&S](CodeGenFunction &CGF) {
1059                            CGF.EmitOMPLoopBody(S);
1060                            CGF.EmitStopPoint(&S);
1061                          });
1062         // Tell the runtime we are done.
1063         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
1064       } else {
1065         // Emit the outer loop, which requests its work chunk [LB..UB] from
1066         // runtime and runs the inner loop to process it.
1067         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
1068                             UB.getAddress(), ST.getAddress(), IL.getAddress(),
1069                             Chunk);
1070       }
1071       // Emit final copy of the lastprivate variables if IsLastIter != 0.
1072       if (HasLastprivateClause)
1073         EmitOMPLastprivateClauseFinal(
1074             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
1075     }
1076     // We're now done with the loop, so jump to the continuation block.
1077     EmitBranch(ContBlock);
1078     EmitBlock(ContBlock, true);
1079   }
1080   return HasLastprivateClause;
1081 }
1082 
EmitOMPForDirective(const OMPForDirective & S)1083 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
1084   LexicalScope Scope(*this, S.getSourceRange());
1085   bool HasLastprivates = false;
1086   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1087     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1088   };
1089   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1090 
1091   // Emit an implicit barrier at the end.
1092   if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
1093     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1094   }
1095 }
1096 
EmitOMPForSimdDirective(const OMPForSimdDirective &)1097 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
1098   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
1099 }
1100 
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)1101 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
1102                                 const Twine &Name,
1103                                 llvm::Value *Init = nullptr) {
1104   auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
1105   if (Init)
1106     CGF.EmitScalarInit(Init, LVal);
1107   return LVal;
1108 }
1109 
emitSections(CodeGenFunction & CGF,const OMPExecutableDirective & S)1110 static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
1111                                         const OMPExecutableDirective &S) {
1112   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
1113   auto *CS = dyn_cast<CompoundStmt>(Stmt);
1114   if (CS && CS->size() > 1) {
1115     auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
1116       auto &C = CGF.CGM.getContext();
1117       auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1118       // Emit helper vars inits.
1119       LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
1120                                     CGF.Builder.getInt32(0));
1121       auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
1122       LValue UB =
1123           createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
1124       LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
1125                                     CGF.Builder.getInt32(1));
1126       LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
1127                                     CGF.Builder.getInt32(0));
1128       // Loop counter.
1129       LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
1130       OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1131       CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
1132       OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1133       CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
1134       // Generate condition for loop.
1135       BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
1136                           OK_Ordinary, S.getLocStart(),
1137                           /*fpContractable=*/false);
1138       // Increment for loop counter.
1139       UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
1140                         OK_Ordinary, S.getLocStart());
1141       auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
1142         // Iterate through all sections and emit a switch construct:
1143         // switch (IV) {
1144         //   case 0:
1145         //     <SectionStmt[0]>;
1146         //     break;
1147         // ...
1148         //   case <NumSection> - 1:
1149         //     <SectionStmt[<NumSection> - 1]>;
1150         //     break;
1151         // }
1152         // .omp.sections.exit:
1153         auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
1154         auto *SwitchStmt = CGF.Builder.CreateSwitch(
1155             CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
1156             CS->size());
1157         unsigned CaseNumber = 0;
1158         for (auto C = CS->children(); C; ++C, ++CaseNumber) {
1159           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1160           CGF.EmitBlock(CaseBB);
1161           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
1162           CGF.EmitStmt(*C);
1163           CGF.EmitBranch(ExitBB);
1164         }
1165         CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1166       };
1167       // Emit static non-chunked loop.
1168       CGF.CGM.getOpenMPRuntime().emitForInit(
1169           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
1170           /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
1171           ST.getAddress());
1172       // UB = min(UB, GlobalUB);
1173       auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
1174       auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
1175           CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
1176       CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
1177       // IV = LB;
1178       CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
1179       // while (idx <= UB) { BODY; ++idx; }
1180       CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
1181       // Tell the runtime we are done.
1182       CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
1183                                                OMPC_SCHEDULE_static);
1184     };
1185 
1186     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
1187     return OMPD_sections;
1188   }
1189   // If only one section is found - no need to generate loop, emit as a single
1190   // region.
1191   auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
1192     CGF.EmitStmt(Stmt);
1193     CGF.EnsureInsertPoint();
1194   };
1195   CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
1196                                               llvm::None, llvm::None,
1197                                               llvm::None, llvm::None);
1198   return OMPD_single;
1199 }
1200 
EmitOMPSectionsDirective(const OMPSectionsDirective & S)1201 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
1202   LexicalScope Scope(*this, S.getSourceRange());
1203   OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
1204   // Emit an implicit barrier at the end.
1205   if (!S.getSingleClause(OMPC_nowait)) {
1206     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
1207   }
1208 }
1209 
EmitOMPSectionDirective(const OMPSectionDirective & S)1210 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
1211   LexicalScope Scope(*this, S.getSourceRange());
1212   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1213     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1214     CGF.EnsureInsertPoint();
1215   };
1216   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1217 }
1218 
EmitOMPSingleDirective(const OMPSingleDirective & S)1219 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
1220   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
1221   llvm::SmallVector<const Expr *, 8> DestExprs;
1222   llvm::SmallVector<const Expr *, 8> SrcExprs;
1223   llvm::SmallVector<const Expr *, 8> AssignmentOps;
1224   // Check if there are any 'copyprivate' clauses associated with this
1225   // 'single'
1226   // construct.
1227   auto CopyprivateFilter = [](const OMPClause *C) -> bool {
1228     return C->getClauseKind() == OMPC_copyprivate;
1229   };
1230   // Build a list of copyprivate variables along with helper expressions
1231   // (<source>, <destination>, <destination>=<source> expressions)
1232   typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
1233       CopyprivateFilter)> CopyprivateIter;
1234   for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
1235     auto *C = cast<OMPCopyprivateClause>(*I);
1236     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
1237     DestExprs.append(C->destination_exprs().begin(),
1238                      C->destination_exprs().end());
1239     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
1240     AssignmentOps.append(C->assignment_ops().begin(),
1241                          C->assignment_ops().end());
1242   }
1243   LexicalScope Scope(*this, S.getSourceRange());
1244   // Emit code for 'single' region along with 'copyprivate' clauses
1245   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1246     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1247     CGF.EnsureInsertPoint();
1248   };
1249   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
1250                                           CopyprivateVars, DestExprs, SrcExprs,
1251                                           AssignmentOps);
1252   // Emit an implicit barrier at the end.
1253   if (!S.getSingleClause(OMPC_nowait)) {
1254     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
1255   }
1256 }
1257 
EmitOMPMasterDirective(const OMPMasterDirective & S)1258 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
1259   LexicalScope Scope(*this, S.getSourceRange());
1260   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1261     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1262     CGF.EnsureInsertPoint();
1263   };
1264   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
1265 }
1266 
EmitOMPCriticalDirective(const OMPCriticalDirective & S)1267 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
1268   LexicalScope Scope(*this, S.getSourceRange());
1269   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1270     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1271     CGF.EnsureInsertPoint();
1272   };
1273   CGM.getOpenMPRuntime().emitCriticalRegion(
1274       *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
1275 }
1276 
EmitOMPParallelForDirective(const OMPParallelForDirective & S)1277 void CodeGenFunction::EmitOMPParallelForDirective(
1278     const OMPParallelForDirective &S) {
1279   // Emit directive as a combined directive that consists of two implicit
1280   // directives: 'parallel' with 'for' directive.
1281   LexicalScope Scope(*this, S.getSourceRange());
1282   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1283     CGF.EmitOMPWorksharingLoop(S);
1284     // Emit implicit barrier at the end of parallel region, but this barrier
1285     // is at the end of 'for' directive, so emit it as the implicit barrier for
1286     // this 'for' directive.
1287     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
1288                                                OMPD_parallel);
1289   };
1290   emitCommonOMPParallelDirective(*this, S, CodeGen);
1291 }
1292 
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &)1293 void CodeGenFunction::EmitOMPParallelForSimdDirective(
1294     const OMPParallelForSimdDirective &) {
1295   llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
1296 }
1297 
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)1298 void CodeGenFunction::EmitOMPParallelSectionsDirective(
1299     const OMPParallelSectionsDirective &S) {
1300   // Emit directive as a combined directive that consists of two implicit
1301   // directives: 'parallel' with 'sections' directive.
1302   LexicalScope Scope(*this, S.getSourceRange());
1303   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1304     (void)emitSections(CGF, S);
1305     // Emit implicit barrier at the end of parallel region.
1306     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
1307                                                OMPD_parallel);
1308   };
1309   emitCommonOMPParallelDirective(*this, S, CodeGen);
1310 }
1311 
EmitOMPTaskDirective(const OMPTaskDirective & S)1312 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
1313   // Emit outlined function for task construct.
1314   LexicalScope Scope(*this, S.getSourceRange());
1315   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1316   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
1317   auto *I = CS->getCapturedDecl()->param_begin();
1318   auto *PartId = std::next(I);
1319   // The first function argument for tasks is a thread id, the second one is a
1320   // part id (0 for tied tasks, >=0 for untied task).
1321   auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
1322     if (*PartId) {
1323       // TODO: emit code for untied tasks.
1324     }
1325     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1326   };
1327   auto OutlinedFn =
1328       CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
1329   // Check if we should emit tied or untied task.
1330   bool Tied = !S.getSingleClause(OMPC_untied);
1331   // Check if the task is final
1332   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
1333   if (auto *Clause = S.getSingleClause(OMPC_final)) {
1334     // If the condition constant folds and can be elided, try to avoid emitting
1335     // the condition and the dead arm of the if/else.
1336     auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
1337     bool CondConstant;
1338     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
1339       Final.setInt(CondConstant);
1340     else
1341       Final.setPointer(EvaluateExprAsBool(Cond));
1342   } else {
1343     // By default the task is not final.
1344     Final.setInt(/*IntVal=*/false);
1345   }
1346   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
1347   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
1348                                       OutlinedFn, SharedsTy, CapturedStruct);
1349 }
1350 
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)1351 void CodeGenFunction::EmitOMPTaskyieldDirective(
1352     const OMPTaskyieldDirective &S) {
1353   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
1354 }
1355 
EmitOMPBarrierDirective(const OMPBarrierDirective & S)1356 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
1357   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
1358 }
1359 
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &)1360 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
1361   llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
1362 }
1363 
EmitOMPFlushDirective(const OMPFlushDirective & S)1364 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
1365   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
1366     if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
1367       auto FlushClause = cast<OMPFlushClause>(C);
1368       return llvm::makeArrayRef(FlushClause->varlist_begin(),
1369                                 FlushClause->varlist_end());
1370     }
1371     return llvm::None;
1372   }(), S.getLocStart());
1373 }
1374 
EmitOMPOrderedDirective(const OMPOrderedDirective &)1375 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
1376   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
1377 }
1378 
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType)1379 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
1380                                          QualType SrcType, QualType DestType) {
1381   assert(CGF.hasScalarEvaluationKind(DestType) &&
1382          "DestType must have scalar evaluation kind.");
1383   assert(!Val.isAggregate() && "Must be a scalar or complex.");
1384   return Val.isScalar()
1385              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
1386              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
1387                                                  DestType);
1388 }
1389 
1390 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType)1391 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
1392                       QualType DestType) {
1393   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
1394          "DestType must have complex evaluation kind.");
1395   CodeGenFunction::ComplexPairTy ComplexVal;
1396   if (Val.isScalar()) {
1397     // Convert the input element to the element type of the complex.
1398     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1399     auto ScalarVal =
1400         CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
1401     ComplexVal = CodeGenFunction::ComplexPairTy(
1402         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
1403   } else {
1404     assert(Val.isComplex() && "Must be a scalar or complex.");
1405     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
1406     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1407     ComplexVal.first = CGF.EmitScalarConversion(
1408         Val.getComplexVal().first, SrcElementType, DestElementType);
1409     ComplexVal.second = CGF.EmitScalarConversion(
1410         Val.getComplexVal().second, SrcElementType, DestElementType);
1411   }
1412   return ComplexVal;
1413 }
1414 
EmitOMPAtomicReadExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * V,SourceLocation Loc)1415 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
1416                                   const Expr *X, const Expr *V,
1417                                   SourceLocation Loc) {
1418   // v = x;
1419   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
1420   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
1421   LValue XLValue = CGF.EmitLValue(X);
1422   LValue VLValue = CGF.EmitLValue(V);
1423   RValue Res = XLValue.isGlobalReg()
1424                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
1425                    : CGF.EmitAtomicLoad(XLValue, Loc,
1426                                         IsSeqCst ? llvm::SequentiallyConsistent
1427                                                  : llvm::Monotonic,
1428                                         XLValue.isVolatile());
1429   // OpenMP, 2.12.6, atomic Construct
1430   // Any atomic construct with a seq_cst clause forces the atomically
1431   // performed operation to include an implicit flush operation without a
1432   // list.
1433   if (IsSeqCst)
1434     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1435   switch (CGF.getEvaluationKind(V->getType())) {
1436   case TEK_Scalar:
1437     CGF.EmitStoreOfScalar(
1438         convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
1439     break;
1440   case TEK_Complex:
1441     CGF.EmitStoreOfComplex(
1442         convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
1443         /*isInit=*/false);
1444     break;
1445   case TEK_Aggregate:
1446     llvm_unreachable("Must be a scalar or complex.");
1447   }
1448 }
1449 
EmitOMPAtomicWriteExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * E,SourceLocation Loc)1450 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
1451                                    const Expr *X, const Expr *E,
1452                                    SourceLocation Loc) {
1453   // x = expr;
1454   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
1455   LValue XLValue = CGF.EmitLValue(X);
1456   RValue ExprRValue = CGF.EmitAnyExpr(E);
1457   if (XLValue.isGlobalReg())
1458     CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
1459   else
1460     CGF.EmitAtomicStore(ExprRValue, XLValue,
1461                         IsSeqCst ? llvm::SequentiallyConsistent
1462                                  : llvm::Monotonic,
1463                         XLValue.isVolatile(), /*IsInit=*/false);
1464   // OpenMP, 2.12.6, atomic Construct
1465   // Any atomic construct with a seq_cst clause forces the atomically
1466   // performed operation to include an implicit flush operation without a
1467   // list.
1468   if (IsSeqCst)
1469     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1470 }
1471 
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)1472 bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
1473                       BinaryOperatorKind BO, llvm::AtomicOrdering AO,
1474                       bool IsXLHSInRHSPart) {
1475   auto &Context = CGF.CGM.getContext();
1476   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
1477   // expression is simple and atomic is allowed for the given type for the
1478   // target platform.
1479   if (BO == BO_Comma || !Update.isScalar() ||
1480       !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
1481       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
1482        (Update.getScalarVal()->getType() !=
1483         X.getAddress()->getType()->getPointerElementType())) ||
1484       !Context.getTargetInfo().hasBuiltinAtomic(
1485           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
1486     return false;
1487 
1488   llvm::AtomicRMWInst::BinOp RMWOp;
1489   switch (BO) {
1490   case BO_Add:
1491     RMWOp = llvm::AtomicRMWInst::Add;
1492     break;
1493   case BO_Sub:
1494     if (!IsXLHSInRHSPart)
1495       return false;
1496     RMWOp = llvm::AtomicRMWInst::Sub;
1497     break;
1498   case BO_And:
1499     RMWOp = llvm::AtomicRMWInst::And;
1500     break;
1501   case BO_Or:
1502     RMWOp = llvm::AtomicRMWInst::Or;
1503     break;
1504   case BO_Xor:
1505     RMWOp = llvm::AtomicRMWInst::Xor;
1506     break;
1507   case BO_LT:
1508     RMWOp = X.getType()->hasSignedIntegerRepresentation()
1509                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
1510                                    : llvm::AtomicRMWInst::Max)
1511                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
1512                                    : llvm::AtomicRMWInst::UMax);
1513     break;
1514   case BO_GT:
1515     RMWOp = X.getType()->hasSignedIntegerRepresentation()
1516                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
1517                                    : llvm::AtomicRMWInst::Min)
1518                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
1519                                    : llvm::AtomicRMWInst::UMin);
1520     break;
1521   case BO_Mul:
1522   case BO_Div:
1523   case BO_Rem:
1524   case BO_Shl:
1525   case BO_Shr:
1526   case BO_LAnd:
1527   case BO_LOr:
1528     return false;
1529   case BO_PtrMemD:
1530   case BO_PtrMemI:
1531   case BO_LE:
1532   case BO_GE:
1533   case BO_EQ:
1534   case BO_NE:
1535   case BO_Assign:
1536   case BO_AddAssign:
1537   case BO_SubAssign:
1538   case BO_AndAssign:
1539   case BO_OrAssign:
1540   case BO_XorAssign:
1541   case BO_MulAssign:
1542   case BO_DivAssign:
1543   case BO_RemAssign:
1544   case BO_ShlAssign:
1545   case BO_ShrAssign:
1546   case BO_Comma:
1547     llvm_unreachable("Unsupported atomic update operation");
1548   }
1549   auto *UpdateVal = Update.getScalarVal();
1550   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
1551     UpdateVal = CGF.Builder.CreateIntCast(
1552         IC, X.getAddress()->getType()->getPointerElementType(),
1553         X.getType()->hasSignedIntegerRepresentation());
1554   }
1555   CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
1556   return true;
1557 }
1558 
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> & CommonGen)1559 void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
1560     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
1561     llvm::AtomicOrdering AO, SourceLocation Loc,
1562     const llvm::function_ref<RValue(RValue)> &CommonGen) {
1563   // Update expressions are allowed to have the following forms:
1564   // x binop= expr; -> xrval + expr;
1565   // x++, ++x -> xrval + 1;
1566   // x--, --x -> xrval - 1;
1567   // x = x binop expr; -> xrval binop expr
1568   // x = expr Op x; - > expr binop xrval;
1569   if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
1570     if (X.isGlobalReg()) {
1571       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
1572       // 'xrval'.
1573       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
1574     } else {
1575       // Perform compare-and-swap procedure.
1576       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
1577     }
1578   }
1579 }
1580 
EmitOMPAtomicUpdateExpr(CodeGenFunction & CGF,bool IsSeqCst,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)1581 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
1582                                     const Expr *X, const Expr *E,
1583                                     const Expr *UE, bool IsXLHSInRHSPart,
1584                                     SourceLocation Loc) {
1585   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
1586          "Update expr in 'atomic update' must be a binary operator.");
1587   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
1588   // Update expressions are allowed to have the following forms:
1589   // x binop= expr; -> xrval + expr;
1590   // x++, ++x -> xrval + 1;
1591   // x--, --x -> xrval - 1;
1592   // x = x binop expr; -> xrval binop expr
1593   // x = expr Op x; - > expr binop xrval;
1594   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
1595   LValue XLValue = CGF.EmitLValue(X);
1596   RValue ExprRValue = CGF.EmitAnyExpr(E);
1597   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
1598   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
1599   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
1600   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
1601   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
1602   auto Gen =
1603       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
1604         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
1605         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
1606         return CGF.EmitAnyExpr(UE);
1607       };
1608   CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
1609                                     IsXLHSInRHSPart, AO, Loc, Gen);
1610   // OpenMP, 2.12.6, atomic Construct
1611   // Any atomic construct with a seq_cst clause forces the atomically
1612   // performed operation to include an implicit flush operation without a
1613   // list.
1614   if (IsSeqCst)
1615     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1616 }
1617 
EmitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,bool IsSeqCst,const Expr * X,const Expr * V,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)1618 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
1619                               bool IsSeqCst, const Expr *X, const Expr *V,
1620                               const Expr *E, const Expr *UE,
1621                               bool IsXLHSInRHSPart, SourceLocation Loc) {
1622   switch (Kind) {
1623   case OMPC_read:
1624     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
1625     break;
1626   case OMPC_write:
1627     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
1628     break;
1629   case OMPC_unknown:
1630   case OMPC_update:
1631     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
1632     break;
1633   case OMPC_capture:
1634     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
1635   case OMPC_if:
1636   case OMPC_final:
1637   case OMPC_num_threads:
1638   case OMPC_private:
1639   case OMPC_firstprivate:
1640   case OMPC_lastprivate:
1641   case OMPC_reduction:
1642   case OMPC_safelen:
1643   case OMPC_collapse:
1644   case OMPC_default:
1645   case OMPC_seq_cst:
1646   case OMPC_shared:
1647   case OMPC_linear:
1648   case OMPC_aligned:
1649   case OMPC_copyin:
1650   case OMPC_copyprivate:
1651   case OMPC_flush:
1652   case OMPC_proc_bind:
1653   case OMPC_schedule:
1654   case OMPC_ordered:
1655   case OMPC_nowait:
1656   case OMPC_untied:
1657   case OMPC_threadprivate:
1658   case OMPC_mergeable:
1659     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
1660   }
1661 }
1662 
EmitOMPAtomicDirective(const OMPAtomicDirective & S)1663 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
1664   bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
1665   OpenMPClauseKind Kind = OMPC_unknown;
1666   for (auto *C : S.clauses()) {
1667     // Find first clause (skip seq_cst clause, if it is first).
1668     if (C->getClauseKind() != OMPC_seq_cst) {
1669       Kind = C->getClauseKind();
1670       break;
1671     }
1672   }
1673 
1674   const auto *CS =
1675       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
1676   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
1677     enterFullExpression(EWC);
1678 
1679   LexicalScope Scope(*this, S.getSourceRange());
1680   auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
1681     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
1682                       S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
1683   };
1684   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1685 }
1686 
EmitOMPTargetDirective(const OMPTargetDirective &)1687 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
1688   llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
1689 }
1690 
EmitOMPTeamsDirective(const OMPTeamsDirective &)1691 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
1692   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
1693 }
1694 
1695