1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// \brief This file implements semantic analysis for CUDA constructs.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Sema/Sema.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Sema/SemaDiagnostic.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 using namespace clang;
22 
ActOnCUDAExecConfigExpr(Scope * S,SourceLocation LLLLoc,MultiExprArg ExecConfig,SourceLocation GGGLoc)23 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
24                                          MultiExprArg ExecConfig,
25                                          SourceLocation GGGLoc) {
26   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
27   if (!ConfigDecl)
28     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
29                      << "cudaConfigureCall");
30   QualType ConfigQTy = ConfigDecl->getType();
31 
32   DeclRefExpr *ConfigDR = new (Context)
33       DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
34   MarkFunctionReferenced(LLLLoc, ConfigDecl);
35 
36   return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
37                        /*IsExecConfig=*/true);
38 }
39 
40 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
IdentifyCUDATarget(const FunctionDecl * D)41 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
42   if (D->hasAttr<CUDAInvalidTargetAttr>())
43     return CFT_InvalidTarget;
44 
45   if (D->hasAttr<CUDAGlobalAttr>())
46     return CFT_Global;
47 
48   if (D->hasAttr<CUDADeviceAttr>()) {
49     if (D->hasAttr<CUDAHostAttr>())
50       return CFT_HostDevice;
51     return CFT_Device;
52   } else if (D->hasAttr<CUDAHostAttr>()) {
53     return CFT_Host;
54   } else if (D->isImplicit()) {
55     // Some implicit declarations (like intrinsic functions) are not marked.
56     // Set the most lenient target on them for maximal flexibility.
57     return CFT_HostDevice;
58   }
59 
60   return CFT_Host;
61 }
62 
63 // * CUDA Call preference table
64 //
65 // F - from,
66 // T - to
67 // Ph - preference in host mode
68 // Pd - preference in device mode
69 // H  - handled in (x)
70 // Preferences: b-best, f-fallback, l-last resort, n-never.
71 //
72 // | F  | T  | Ph | Pd |  H  |
73 // |----+----+----+----+-----+
74 // | d  | d  | b  | b  | (b) |
75 // | d  | g  | n  | n  | (a) |
76 // | d  | h  | l  | l  | (e) |
77 // | d  | hd | f  | f  | (c) |
78 // | g  | d  | b  | b  | (b) |
79 // | g  | g  | n  | n  | (a) |
80 // | g  | h  | l  | l  | (e) |
81 // | g  | hd | f  | f  | (c) |
82 // | h  | d  | l  | l  | (e) |
83 // | h  | g  | b  | b  | (b) |
84 // | h  | h  | b  | b  | (b) |
85 // | h  | hd | f  | f  | (c) |
86 // | hd | d  | l  | f  | (d) |
87 // | hd | g  | f  | n  |(d/a)|
88 // | hd | h  | f  | l  | (d) |
89 // | hd | hd | b  | b  | (b) |
90 
91 Sema::CUDAFunctionPreference
IdentifyCUDAPreference(const FunctionDecl * Caller,const FunctionDecl * Callee)92 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
93                              const FunctionDecl *Callee) {
94   assert(getLangOpts().CUDATargetOverloads &&
95          "Should not be called w/o enabled target overloads.");
96 
97   assert(Callee && "Callee must be valid.");
98   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99   CUDAFunctionTarget CallerTarget =
100       (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101 
102   // If one of the targets is invalid, the check always fails, no matter what
103   // the other target is.
104   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105     return CFP_Never;
106 
107   // (a) Can't call global from some contexts until we support CUDA's
108   // dynamic parallelism.
109   if (CalleeTarget == CFT_Global &&
110       (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111        (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112     return CFP_Never;
113 
114   // (b) Best case scenarios
115   if (CalleeTarget == CallerTarget ||
116       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
117       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
118     return CFP_Best;
119 
120   // (c) Calling HostDevice is OK as a fallback that works for everyone.
121   if (CalleeTarget == CFT_HostDevice)
122     return CFP_Fallback;
123 
124   // Figure out what should be returned 'last resort' cases. Normally
125   // those would not be allowed, but we'll consider them if
126   // CUDADisableTargetCallChecks is true.
127   CUDAFunctionPreference QuestionableResult =
128       getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
129 
130   // (d) HostDevice behavior depends on compilation mode.
131   if (CallerTarget == CFT_HostDevice) {
132     // Calling a function that matches compilation mode is OK.
133     // Calling a function from the other side is frowned upon.
134     if (getLangOpts().CUDAIsDevice)
135       return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
136     else
137       return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
138                  ? CFP_Fallback
139                  : QuestionableResult;
140   }
141 
142   // (e) Calling across device/host boundary is not something you should do.
143   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
144       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
145       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
146     return QuestionableResult;
147 
148   llvm_unreachable("All cases should've been handled by now.");
149 }
150 
CheckCUDATarget(const FunctionDecl * Caller,const FunctionDecl * Callee)151 bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
152                            const FunctionDecl *Callee) {
153   // With target overloads enabled, we only disallow calling
154   // combinations with CFP_Never.
155   if (getLangOpts().CUDATargetOverloads)
156     return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
157 
158   // The CUDADisableTargetCallChecks short-circuits this check: we assume all
159   // cross-target calls are valid.
160   if (getLangOpts().CUDADisableTargetCallChecks)
161     return false;
162 
163   CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
164                      CalleeTarget = IdentifyCUDATarget(Callee);
165 
166   // If one of the targets is invalid, the check always fails, no matter what
167   // the other target is.
168   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
169     return true;
170 
171   // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
172   // Callable from the device only."
173   if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
174     return true;
175 
176   // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
177   // Callable from the host only."
178   // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
179   // Callable from the host only."
180   if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
181       (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
182     return true;
183 
184   // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
185   // however, in which case the function is compiled for both the host and the
186   // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
187   // paths between host and device."
188   if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
189     // If the caller is implicit then the check always passes.
190     if (Caller->isImplicit()) return false;
191 
192     bool InDeviceMode = getLangOpts().CUDAIsDevice;
193     if (!InDeviceMode && CalleeTarget != CFT_Host)
194         return true;
195     if (InDeviceMode && CalleeTarget != CFT_Device) {
196       // Allow host device functions to call host functions if explicitly
197       // requested.
198       if (CalleeTarget == CFT_Host &&
199           getLangOpts().CUDAAllowHostCallsFromHostDevice) {
200         Diag(Caller->getLocation(),
201              diag::warn_host_calls_from_host_device)
202             << Callee->getNameAsString() << Caller->getNameAsString();
203         return false;
204       }
205 
206       return true;
207     }
208   }
209 
210   return false;
211 }
212 
213 template <typename T, typename FetchDeclFn>
EraseUnwantedCUDAMatchesImpl(Sema & S,const FunctionDecl * Caller,llvm::SmallVectorImpl<T> & Matches,FetchDeclFn FetchDecl)214 static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
215                                          llvm::SmallVectorImpl<T> &Matches,
216                                          FetchDeclFn FetchDecl) {
217   assert(S.getLangOpts().CUDATargetOverloads &&
218          "Should not be called w/o enabled target overloads.");
219   if (Matches.size() <= 1)
220     return;
221 
222   // Find the best call preference among the functions in Matches.
223   Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
224   for (auto const &Match : Matches) {
225     P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
226     if (P > BestCFP)
227       BestCFP = P;
228   }
229 
230   // Erase all functions with lower priority.
231   for (unsigned I = 0, N = Matches.size(); I != N;)
232     if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
233       Matches[I] = Matches[--N];
234       Matches.resize(N);
235     } else {
236       ++I;
237     }
238 }
239 
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<FunctionDecl * > & Matches)240 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
241                                     SmallVectorImpl<FunctionDecl *> &Matches){
242   EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
243       *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
244 }
245 
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<DeclAccessPair> & Matches)246 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
247                                     SmallVectorImpl<DeclAccessPair> &Matches) {
248   EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
249       *this, Caller, Matches, [](const DeclAccessPair &item) {
250         return dyn_cast<FunctionDecl>(item.getDecl());
251       });
252 }
253 
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<std::pair<DeclAccessPair,FunctionDecl * >> & Matches)254 void Sema::EraseUnwantedCUDAMatches(
255     const FunctionDecl *Caller,
256     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
257   EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
258       *this, Caller, Matches,
259       [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
260         return dyn_cast<FunctionDecl>(item.second);
261       });
262 }
263 
264 /// When an implicitly-declared special member has to invoke more than one
265 /// base/field special member, conflicts may occur in the targets of these
266 /// members. For example, if one base's member __host__ and another's is
267 /// __device__, it's a conflict.
268 /// This function figures out if the given targets \param Target1 and
269 /// \param Target2 conflict, and if they do not it fills in
270 /// \param ResolvedTarget with a target that resolves for both calls.
271 /// \return true if there's a conflict, false otherwise.
272 static bool
resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,Sema::CUDAFunctionTarget Target2,Sema::CUDAFunctionTarget * ResolvedTarget)273 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
274                                 Sema::CUDAFunctionTarget Target2,
275                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
276   if (Target1 == Sema::CFT_Global && Target2 == Sema::CFT_Global) {
277     // TODO: this shouldn't happen, really. Methods cannot be marked __global__.
278     // Clang should detect this earlier and produce an error. Then this
279     // condition can be changed to an assertion.
280     return true;
281   }
282 
283   if (Target1 == Sema::CFT_HostDevice) {
284     *ResolvedTarget = Target2;
285   } else if (Target2 == Sema::CFT_HostDevice) {
286     *ResolvedTarget = Target1;
287   } else if (Target1 != Target2) {
288     return true;
289   } else {
290     *ResolvedTarget = Target1;
291   }
292 
293   return false;
294 }
295 
inferCUDATargetForImplicitSpecialMember(CXXRecordDecl * ClassDecl,CXXSpecialMember CSM,CXXMethodDecl * MemberDecl,bool ConstRHS,bool Diagnose)296 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
297                                                    CXXSpecialMember CSM,
298                                                    CXXMethodDecl *MemberDecl,
299                                                    bool ConstRHS,
300                                                    bool Diagnose) {
301   llvm::Optional<CUDAFunctionTarget> InferredTarget;
302 
303   // We're going to invoke special member lookup; mark that these special
304   // members are called from this one, and not from its caller.
305   ContextRAII MethodContext(*this, MemberDecl);
306 
307   // Look for special members in base classes that should be invoked from here.
308   // Infer the target of this member base on the ones it should call.
309   // Skip direct and indirect virtual bases for abstract classes.
310   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
311   for (const auto &B : ClassDecl->bases()) {
312     if (!B.isVirtual()) {
313       Bases.push_back(&B);
314     }
315   }
316 
317   if (!ClassDecl->isAbstract()) {
318     for (const auto &VB : ClassDecl->vbases()) {
319       Bases.push_back(&VB);
320     }
321   }
322 
323   for (const auto *B : Bases) {
324     const RecordType *BaseType = B->getType()->getAs<RecordType>();
325     if (!BaseType) {
326       continue;
327     }
328 
329     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
330     Sema::SpecialMemberOverloadResult *SMOR =
331         LookupSpecialMember(BaseClassDecl, CSM,
332                             /* ConstArg */ ConstRHS,
333                             /* VolatileArg */ false,
334                             /* RValueThis */ false,
335                             /* ConstThis */ false,
336                             /* VolatileThis */ false);
337 
338     if (!SMOR || !SMOR->getMethod()) {
339       continue;
340     }
341 
342     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
343     if (!InferredTarget.hasValue()) {
344       InferredTarget = BaseMethodTarget;
345     } else {
346       bool ResolutionError = resolveCalleeCUDATargetConflict(
347           InferredTarget.getValue(), BaseMethodTarget,
348           InferredTarget.getPointer());
349       if (ResolutionError) {
350         if (Diagnose) {
351           Diag(ClassDecl->getLocation(),
352                diag::note_implicit_member_target_infer_collision)
353               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
354         }
355         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
356         return true;
357       }
358     }
359   }
360 
361   // Same as for bases, but now for special members of fields.
362   for (const auto *F : ClassDecl->fields()) {
363     if (F->isInvalidDecl()) {
364       continue;
365     }
366 
367     const RecordType *FieldType =
368         Context.getBaseElementType(F->getType())->getAs<RecordType>();
369     if (!FieldType) {
370       continue;
371     }
372 
373     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
374     Sema::SpecialMemberOverloadResult *SMOR =
375         LookupSpecialMember(FieldRecDecl, CSM,
376                             /* ConstArg */ ConstRHS && !F->isMutable(),
377                             /* VolatileArg */ false,
378                             /* RValueThis */ false,
379                             /* ConstThis */ false,
380                             /* VolatileThis */ false);
381 
382     if (!SMOR || !SMOR->getMethod()) {
383       continue;
384     }
385 
386     CUDAFunctionTarget FieldMethodTarget =
387         IdentifyCUDATarget(SMOR->getMethod());
388     if (!InferredTarget.hasValue()) {
389       InferredTarget = FieldMethodTarget;
390     } else {
391       bool ResolutionError = resolveCalleeCUDATargetConflict(
392           InferredTarget.getValue(), FieldMethodTarget,
393           InferredTarget.getPointer());
394       if (ResolutionError) {
395         if (Diagnose) {
396           Diag(ClassDecl->getLocation(),
397                diag::note_implicit_member_target_infer_collision)
398               << (unsigned)CSM << InferredTarget.getValue()
399               << FieldMethodTarget;
400         }
401         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
402         return true;
403       }
404     }
405   }
406 
407   if (InferredTarget.hasValue()) {
408     if (InferredTarget.getValue() == CFT_Device) {
409       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
410     } else if (InferredTarget.getValue() == CFT_Host) {
411       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
412     } else {
413       MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
414       MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
415     }
416   } else {
417     // If no target was inferred, mark this member as __host__ __device__;
418     // it's the least restrictive option that can be invoked from any target.
419     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
420     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
421   }
422 
423   return false;
424 }
425