1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// \brief This file implements semantic analysis for CUDA constructs.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Sema/Lookup.h"
19 #include "clang/Sema/Sema.h"
20 #include "clang/Sema/SemaDiagnostic.h"
21 #include "clang/Sema/Template.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/SmallVector.h"
24 using namespace clang;
25 
ActOnCUDAExecConfigExpr(Scope * S,SourceLocation LLLLoc,MultiExprArg ExecConfig,SourceLocation GGGLoc)26 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
27                                          MultiExprArg ExecConfig,
28                                          SourceLocation GGGLoc) {
29   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
30   if (!ConfigDecl)
31     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
32                      << "cudaConfigureCall");
33   QualType ConfigQTy = ConfigDecl->getType();
34 
35   DeclRefExpr *ConfigDR = new (Context)
36       DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
37   MarkFunctionReferenced(LLLLoc, ConfigDecl);
38 
39   return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
40                        /*IsExecConfig=*/true);
41 }
42 
43 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
IdentifyCUDATarget(const FunctionDecl * D)44 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
45   if (D->hasAttr<CUDAInvalidTargetAttr>())
46     return CFT_InvalidTarget;
47 
48   if (D->hasAttr<CUDAGlobalAttr>())
49     return CFT_Global;
50 
51   if (D->hasAttr<CUDADeviceAttr>()) {
52     if (D->hasAttr<CUDAHostAttr>())
53       return CFT_HostDevice;
54     return CFT_Device;
55   } else if (D->hasAttr<CUDAHostAttr>()) {
56     return CFT_Host;
57   } else if (D->isImplicit()) {
58     // Some implicit declarations (like intrinsic functions) are not marked.
59     // Set the most lenient target on them for maximal flexibility.
60     return CFT_HostDevice;
61   }
62 
63   return CFT_Host;
64 }
65 
66 // * CUDA Call preference table
67 //
68 // F - from,
69 // T - to
70 // Ph - preference in host mode
71 // Pd - preference in device mode
72 // H  - handled in (x)
73 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
74 //
75 // | F  | T  | Ph  | Pd  |  H  |
76 // |----+----+-----+-----+-----+
77 // | d  | d  | N   | N   | (c) |
78 // | d  | g  | --  | --  | (a) |
79 // | d  | h  | --  | --  | (e) |
80 // | d  | hd | HD  | HD  | (b) |
81 // | g  | d  | N   | N   | (c) |
82 // | g  | g  | --  | --  | (a) |
83 // | g  | h  | --  | --  | (e) |
84 // | g  | hd | HD  | HD  | (b) |
85 // | h  | d  | --  | --  | (e) |
86 // | h  | g  | N   | N   | (c) |
87 // | h  | h  | N   | N   | (c) |
88 // | h  | hd | HD  | HD  | (b) |
89 // | hd | d  | WS  | SS  | (d) |
90 // | hd | g  | SS  | --  |(d/a)|
91 // | hd | h  | SS  | WS  | (d) |
92 // | hd | hd | HD  | HD  | (b) |
93 
94 Sema::CUDAFunctionPreference
IdentifyCUDAPreference(const FunctionDecl * Caller,const FunctionDecl * Callee)95 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
96                              const FunctionDecl *Callee) {
97   assert(Callee && "Callee must be valid.");
98   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99   CUDAFunctionTarget CallerTarget =
100       (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101 
102   // If one of the targets is invalid, the check always fails, no matter what
103   // the other target is.
104   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105     return CFP_Never;
106 
107   // (a) Can't call global from some contexts until we support CUDA's
108   // dynamic parallelism.
109   if (CalleeTarget == CFT_Global &&
110       (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111        (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112     return CFP_Never;
113 
114   // (b) Calling HostDevice is OK for everyone.
115   if (CalleeTarget == CFT_HostDevice)
116     return CFP_HostDevice;
117 
118   // (c) Best case scenarios
119   if (CalleeTarget == CallerTarget ||
120       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
121       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
122     return CFP_Native;
123 
124   // (d) HostDevice behavior depends on compilation mode.
125   if (CallerTarget == CFT_HostDevice) {
126     // It's OK to call a compilation-mode matching function from an HD one.
127     if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
128         (!getLangOpts().CUDAIsDevice &&
129          (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
130       return CFP_SameSide;
131 
132     // Calls from HD to non-mode-matching functions (i.e., to host functions
133     // when compiling in device mode or to device functions when compiling in
134     // host mode) are allowed at the sema level, but eventually rejected if
135     // they're ever codegened.  TODO: Reject said calls earlier.
136     return CFP_WrongSide;
137   }
138 
139   // (e) Calling across device/host boundary is not something you should do.
140   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
141       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
142       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
143     return CFP_Never;
144 
145   llvm_unreachable("All cases should've been handled by now.");
146 }
147 
148 template <typename T>
EraseUnwantedCUDAMatchesImpl(Sema & S,const FunctionDecl * Caller,llvm::SmallVectorImpl<T> & Matches,std::function<const FunctionDecl * (const T &)> FetchDecl)149 static void EraseUnwantedCUDAMatchesImpl(
150     Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
151     std::function<const FunctionDecl *(const T &)> FetchDecl) {
152   if (Matches.size() <= 1)
153     return;
154 
155   // Gets the CUDA function preference for a call from Caller to Match.
156   auto GetCFP = [&](const T &Match) {
157     return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
158   };
159 
160   // Find the best call preference among the functions in Matches.
161   Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
162       Matches.begin(), Matches.end(),
163       [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
164 
165   // Erase all functions with lower priority.
166   Matches.erase(
167       llvm::remove_if(Matches,
168                       [&](const T &Match) { return GetCFP(Match) < BestCFP; }),
169       Matches.end());
170 }
171 
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<FunctionDecl * > & Matches)172 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
173                                     SmallVectorImpl<FunctionDecl *> &Matches){
174   EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
175       *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
176 }
177 
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<DeclAccessPair> & Matches)178 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
179                                     SmallVectorImpl<DeclAccessPair> &Matches) {
180   EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
181       *this, Caller, Matches, [](const DeclAccessPair &item) {
182         return dyn_cast<FunctionDecl>(item.getDecl());
183       });
184 }
185 
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<std::pair<DeclAccessPair,FunctionDecl * >> & Matches)186 void Sema::EraseUnwantedCUDAMatches(
187     const FunctionDecl *Caller,
188     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
189   EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
190       *this, Caller, Matches,
191       [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
192         return dyn_cast<FunctionDecl>(item.second);
193       });
194 }
195 
196 /// When an implicitly-declared special member has to invoke more than one
197 /// base/field special member, conflicts may occur in the targets of these
198 /// members. For example, if one base's member __host__ and another's is
199 /// __device__, it's a conflict.
200 /// This function figures out if the given targets \param Target1 and
201 /// \param Target2 conflict, and if they do not it fills in
202 /// \param ResolvedTarget with a target that resolves for both calls.
203 /// \return true if there's a conflict, false otherwise.
204 static bool
resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,Sema::CUDAFunctionTarget Target2,Sema::CUDAFunctionTarget * ResolvedTarget)205 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
206                                 Sema::CUDAFunctionTarget Target2,
207                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
208   // Only free functions and static member functions may be global.
209   assert(Target1 != Sema::CFT_Global);
210   assert(Target2 != Sema::CFT_Global);
211 
212   if (Target1 == Sema::CFT_HostDevice) {
213     *ResolvedTarget = Target2;
214   } else if (Target2 == Sema::CFT_HostDevice) {
215     *ResolvedTarget = Target1;
216   } else if (Target1 != Target2) {
217     return true;
218   } else {
219     *ResolvedTarget = Target1;
220   }
221 
222   return false;
223 }
224 
inferCUDATargetForImplicitSpecialMember(CXXRecordDecl * ClassDecl,CXXSpecialMember CSM,CXXMethodDecl * MemberDecl,bool ConstRHS,bool Diagnose)225 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
226                                                    CXXSpecialMember CSM,
227                                                    CXXMethodDecl *MemberDecl,
228                                                    bool ConstRHS,
229                                                    bool Diagnose) {
230   llvm::Optional<CUDAFunctionTarget> InferredTarget;
231 
232   // We're going to invoke special member lookup; mark that these special
233   // members are called from this one, and not from its caller.
234   ContextRAII MethodContext(*this, MemberDecl);
235 
236   // Look for special members in base classes that should be invoked from here.
237   // Infer the target of this member base on the ones it should call.
238   // Skip direct and indirect virtual bases for abstract classes.
239   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
240   for (const auto &B : ClassDecl->bases()) {
241     if (!B.isVirtual()) {
242       Bases.push_back(&B);
243     }
244   }
245 
246   if (!ClassDecl->isAbstract()) {
247     for (const auto &VB : ClassDecl->vbases()) {
248       Bases.push_back(&VB);
249     }
250   }
251 
252   for (const auto *B : Bases) {
253     const RecordType *BaseType = B->getType()->getAs<RecordType>();
254     if (!BaseType) {
255       continue;
256     }
257 
258     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
259     Sema::SpecialMemberOverloadResult *SMOR =
260         LookupSpecialMember(BaseClassDecl, CSM,
261                             /* ConstArg */ ConstRHS,
262                             /* VolatileArg */ false,
263                             /* RValueThis */ false,
264                             /* ConstThis */ false,
265                             /* VolatileThis */ false);
266 
267     if (!SMOR || !SMOR->getMethod()) {
268       continue;
269     }
270 
271     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
272     if (!InferredTarget.hasValue()) {
273       InferredTarget = BaseMethodTarget;
274     } else {
275       bool ResolutionError = resolveCalleeCUDATargetConflict(
276           InferredTarget.getValue(), BaseMethodTarget,
277           InferredTarget.getPointer());
278       if (ResolutionError) {
279         if (Diagnose) {
280           Diag(ClassDecl->getLocation(),
281                diag::note_implicit_member_target_infer_collision)
282               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
283         }
284         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
285         return true;
286       }
287     }
288   }
289 
290   // Same as for bases, but now for special members of fields.
291   for (const auto *F : ClassDecl->fields()) {
292     if (F->isInvalidDecl()) {
293       continue;
294     }
295 
296     const RecordType *FieldType =
297         Context.getBaseElementType(F->getType())->getAs<RecordType>();
298     if (!FieldType) {
299       continue;
300     }
301 
302     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
303     Sema::SpecialMemberOverloadResult *SMOR =
304         LookupSpecialMember(FieldRecDecl, CSM,
305                             /* ConstArg */ ConstRHS && !F->isMutable(),
306                             /* VolatileArg */ false,
307                             /* RValueThis */ false,
308                             /* ConstThis */ false,
309                             /* VolatileThis */ false);
310 
311     if (!SMOR || !SMOR->getMethod()) {
312       continue;
313     }
314 
315     CUDAFunctionTarget FieldMethodTarget =
316         IdentifyCUDATarget(SMOR->getMethod());
317     if (!InferredTarget.hasValue()) {
318       InferredTarget = FieldMethodTarget;
319     } else {
320       bool ResolutionError = resolveCalleeCUDATargetConflict(
321           InferredTarget.getValue(), FieldMethodTarget,
322           InferredTarget.getPointer());
323       if (ResolutionError) {
324         if (Diagnose) {
325           Diag(ClassDecl->getLocation(),
326                diag::note_implicit_member_target_infer_collision)
327               << (unsigned)CSM << InferredTarget.getValue()
328               << FieldMethodTarget;
329         }
330         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
331         return true;
332       }
333     }
334   }
335 
336   if (InferredTarget.hasValue()) {
337     if (InferredTarget.getValue() == CFT_Device) {
338       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
339     } else if (InferredTarget.getValue() == CFT_Host) {
340       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
341     } else {
342       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
343       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
344     }
345   } else {
346     // If no target was inferred, mark this member as __host__ __device__;
347     // it's the least restrictive option that can be invoked from any target.
348     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
349     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
350   }
351 
352   return false;
353 }
354 
isEmptyCudaConstructor(SourceLocation Loc,CXXConstructorDecl * CD)355 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
356   if (!CD->isDefined() && CD->isTemplateInstantiation())
357     InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
358 
359   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
360   // empty at a point in the translation unit, if it is either a
361   // trivial constructor
362   if (CD->isTrivial())
363     return true;
364 
365   // ... or it satisfies all of the following conditions:
366   // The constructor function has been defined.
367   // The constructor function has no parameters,
368   // and the function body is an empty compound statement.
369   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
370     return false;
371 
372   // Its class has no virtual functions and no virtual base classes.
373   if (CD->getParent()->isDynamicClass())
374     return false;
375 
376   // The only form of initializer allowed is an empty constructor.
377   // This will recursively check all base classes and member initializers
378   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
379         if (const CXXConstructExpr *CE =
380                 dyn_cast<CXXConstructExpr>(CI->getInit()))
381           return isEmptyCudaConstructor(Loc, CE->getConstructor());
382         return false;
383       }))
384     return false;
385 
386   return true;
387 }
388 
isEmptyCudaDestructor(SourceLocation Loc,CXXDestructorDecl * DD)389 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
390   // No destructor -> no problem.
391   if (!DD)
392     return true;
393 
394   if (!DD->isDefined() && DD->isTemplateInstantiation())
395     InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
396 
397   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
398   // empty at a point in the translation unit, if it is either a
399   // trivial constructor
400   if (DD->isTrivial())
401     return true;
402 
403   // ... or it satisfies all of the following conditions:
404   // The destructor function has been defined.
405   // and the function body is an empty compound statement.
406   if (!DD->hasTrivialBody())
407     return false;
408 
409   const CXXRecordDecl *ClassDecl = DD->getParent();
410 
411   // Its class has no virtual functions and no virtual base classes.
412   if (ClassDecl->isDynamicClass())
413     return false;
414 
415   // Only empty destructors are allowed. This will recursively check
416   // destructors for all base classes...
417   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
418         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
419           return isEmptyCudaDestructor(Loc, RD->getDestructor());
420         return true;
421       }))
422     return false;
423 
424   // ... and member fields.
425   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
426         if (CXXRecordDecl *RD = Field->getType()
427                                     ->getBaseElementTypeUnsafe()
428                                     ->getAsCXXRecordDecl())
429           return isEmptyCudaDestructor(Loc, RD->getDestructor());
430         return true;
431       }))
432     return false;
433 
434   return true;
435 }
436 
437 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
438 // treated as implicitly __host__ __device__, unless:
439 //  * it is a variadic function (device-side variadic functions are not
440 //    allowed), or
441 //  * a __device__ function with this signature was already declared, in which
442 //    case in which case we output an error, unless the __device__ decl is in a
443 //    system header, in which case we leave the constexpr function unattributed.
maybeAddCUDAHostDeviceAttrs(Scope * S,FunctionDecl * NewD,const LookupResult & Previous)444 void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
445                                        const LookupResult &Previous) {
446   assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
447   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
448       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
449       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
450     return;
451 
452   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
453   // attributes?
454   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
455     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
456       D = Using->getTargetDecl();
457     FunctionDecl *OldD = D->getAsFunction();
458     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
459            !OldD->hasAttr<CUDAHostAttr>() &&
460            !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
461                        /* ConsiderCudaAttrs = */ false);
462   };
463   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
464   if (It != Previous.end()) {
465     // We found a __device__ function with the same name and signature as NewD
466     // (ignoring CUDA attrs).  This is an error unless that function is defined
467     // in a system header, in which case we simply return without making NewD
468     // host+device.
469     NamedDecl *Match = *It;
470     if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
471       Diag(NewD->getLocation(),
472            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
473           << NewD->getName();
474       Diag(Match->getLocation(),
475            diag::note_cuda_conflicting_device_function_declared_here);
476     }
477     return;
478   }
479 
480   NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
481   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
482 }
483