1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// \brief This file implements semantic analysis for CUDA constructs.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/Sema/Sema.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Sema/SemaDiagnostic.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 using namespace clang;
22
ActOnCUDAExecConfigExpr(Scope * S,SourceLocation LLLLoc,MultiExprArg ExecConfig,SourceLocation GGGLoc)23 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
24 MultiExprArg ExecConfig,
25 SourceLocation GGGLoc) {
26 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
27 if (!ConfigDecl)
28 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
29 << "cudaConfigureCall");
30 QualType ConfigQTy = ConfigDecl->getType();
31
32 DeclRefExpr *ConfigDR = new (Context)
33 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
34 MarkFunctionReferenced(LLLLoc, ConfigDecl);
35
36 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
37 /*IsExecConfig=*/true);
38 }
39
40 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
IdentifyCUDATarget(const FunctionDecl * D)41 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
42 if (D->hasAttr<CUDAInvalidTargetAttr>())
43 return CFT_InvalidTarget;
44
45 if (D->hasAttr<CUDAGlobalAttr>())
46 return CFT_Global;
47
48 if (D->hasAttr<CUDADeviceAttr>()) {
49 if (D->hasAttr<CUDAHostAttr>())
50 return CFT_HostDevice;
51 return CFT_Device;
52 } else if (D->hasAttr<CUDAHostAttr>()) {
53 return CFT_Host;
54 } else if (D->isImplicit()) {
55 // Some implicit declarations (like intrinsic functions) are not marked.
56 // Set the most lenient target on them for maximal flexibility.
57 return CFT_HostDevice;
58 }
59
60 return CFT_Host;
61 }
62
63 // * CUDA Call preference table
64 //
65 // F - from,
66 // T - to
67 // Ph - preference in host mode
68 // Pd - preference in device mode
69 // H - handled in (x)
70 // Preferences: b-best, f-fallback, l-last resort, n-never.
71 //
72 // | F | T | Ph | Pd | H |
73 // |----+----+----+----+-----+
74 // | d | d | b | b | (b) |
75 // | d | g | n | n | (a) |
76 // | d | h | l | l | (e) |
77 // | d | hd | f | f | (c) |
78 // | g | d | b | b | (b) |
79 // | g | g | n | n | (a) |
80 // | g | h | l | l | (e) |
81 // | g | hd | f | f | (c) |
82 // | h | d | l | l | (e) |
83 // | h | g | b | b | (b) |
84 // | h | h | b | b | (b) |
85 // | h | hd | f | f | (c) |
86 // | hd | d | l | f | (d) |
87 // | hd | g | f | n |(d/a)|
88 // | hd | h | f | l | (d) |
89 // | hd | hd | b | b | (b) |
90
91 Sema::CUDAFunctionPreference
IdentifyCUDAPreference(const FunctionDecl * Caller,const FunctionDecl * Callee)92 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
93 const FunctionDecl *Callee) {
94 assert(getLangOpts().CUDATargetOverloads &&
95 "Should not be called w/o enabled target overloads.");
96
97 assert(Callee && "Callee must be valid.");
98 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99 CUDAFunctionTarget CallerTarget =
100 (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101
102 // If one of the targets is invalid, the check always fails, no matter what
103 // the other target is.
104 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105 return CFP_Never;
106
107 // (a) Can't call global from some contexts until we support CUDA's
108 // dynamic parallelism.
109 if (CalleeTarget == CFT_Global &&
110 (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111 (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112 return CFP_Never;
113
114 // (b) Best case scenarios
115 if (CalleeTarget == CallerTarget ||
116 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
117 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
118 return CFP_Best;
119
120 // (c) Calling HostDevice is OK as a fallback that works for everyone.
121 if (CalleeTarget == CFT_HostDevice)
122 return CFP_Fallback;
123
124 // Figure out what should be returned 'last resort' cases. Normally
125 // those would not be allowed, but we'll consider them if
126 // CUDADisableTargetCallChecks is true.
127 CUDAFunctionPreference QuestionableResult =
128 getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
129
130 // (d) HostDevice behavior depends on compilation mode.
131 if (CallerTarget == CFT_HostDevice) {
132 // Calling a function that matches compilation mode is OK.
133 // Calling a function from the other side is frowned upon.
134 if (getLangOpts().CUDAIsDevice)
135 return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
136 else
137 return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
138 ? CFP_Fallback
139 : QuestionableResult;
140 }
141
142 // (e) Calling across device/host boundary is not something you should do.
143 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
144 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
145 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
146 return QuestionableResult;
147
148 llvm_unreachable("All cases should've been handled by now.");
149 }
150
CheckCUDATarget(const FunctionDecl * Caller,const FunctionDecl * Callee)151 bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
152 const FunctionDecl *Callee) {
153 // With target overloads enabled, we only disallow calling
154 // combinations with CFP_Never.
155 if (getLangOpts().CUDATargetOverloads)
156 return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
157
158 // The CUDADisableTargetCallChecks short-circuits this check: we assume all
159 // cross-target calls are valid.
160 if (getLangOpts().CUDADisableTargetCallChecks)
161 return false;
162
163 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
164 CalleeTarget = IdentifyCUDATarget(Callee);
165
166 // If one of the targets is invalid, the check always fails, no matter what
167 // the other target is.
168 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
169 return true;
170
171 // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
172 // Callable from the device only."
173 if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
174 return true;
175
176 // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
177 // Callable from the host only."
178 // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
179 // Callable from the host only."
180 if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
181 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
182 return true;
183
184 // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
185 // however, in which case the function is compiled for both the host and the
186 // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
187 // paths between host and device."
188 if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
189 // If the caller is implicit then the check always passes.
190 if (Caller->isImplicit()) return false;
191
192 bool InDeviceMode = getLangOpts().CUDAIsDevice;
193 if (!InDeviceMode && CalleeTarget != CFT_Host)
194 return true;
195 if (InDeviceMode && CalleeTarget != CFT_Device) {
196 // Allow host device functions to call host functions if explicitly
197 // requested.
198 if (CalleeTarget == CFT_Host &&
199 getLangOpts().CUDAAllowHostCallsFromHostDevice) {
200 Diag(Caller->getLocation(),
201 diag::warn_host_calls_from_host_device)
202 << Callee->getNameAsString() << Caller->getNameAsString();
203 return false;
204 }
205
206 return true;
207 }
208 }
209
210 return false;
211 }
212
213 template <typename T, typename FetchDeclFn>
EraseUnwantedCUDAMatchesImpl(Sema & S,const FunctionDecl * Caller,llvm::SmallVectorImpl<T> & Matches,FetchDeclFn FetchDecl)214 static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
215 llvm::SmallVectorImpl<T> &Matches,
216 FetchDeclFn FetchDecl) {
217 assert(S.getLangOpts().CUDATargetOverloads &&
218 "Should not be called w/o enabled target overloads.");
219 if (Matches.size() <= 1)
220 return;
221
222 // Find the best call preference among the functions in Matches.
223 Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
224 for (auto const &Match : Matches) {
225 P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
226 if (P > BestCFP)
227 BestCFP = P;
228 }
229
230 // Erase all functions with lower priority.
231 for (unsigned I = 0, N = Matches.size(); I != N;)
232 if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
233 Matches[I] = Matches[--N];
234 Matches.resize(N);
235 } else {
236 ++I;
237 }
238 }
239
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<FunctionDecl * > & Matches)240 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
241 SmallVectorImpl<FunctionDecl *> &Matches){
242 EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
243 *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
244 }
245
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<DeclAccessPair> & Matches)246 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
247 SmallVectorImpl<DeclAccessPair> &Matches) {
248 EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
249 *this, Caller, Matches, [](const DeclAccessPair &item) {
250 return dyn_cast<FunctionDecl>(item.getDecl());
251 });
252 }
253
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<std::pair<DeclAccessPair,FunctionDecl * >> & Matches)254 void Sema::EraseUnwantedCUDAMatches(
255 const FunctionDecl *Caller,
256 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
257 EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
258 *this, Caller, Matches,
259 [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
260 return dyn_cast<FunctionDecl>(item.second);
261 });
262 }
263
264 /// When an implicitly-declared special member has to invoke more than one
265 /// base/field special member, conflicts may occur in the targets of these
266 /// members. For example, if one base's member __host__ and another's is
267 /// __device__, it's a conflict.
268 /// This function figures out if the given targets \param Target1 and
269 /// \param Target2 conflict, and if they do not it fills in
270 /// \param ResolvedTarget with a target that resolves for both calls.
271 /// \return true if there's a conflict, false otherwise.
272 static bool
resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,Sema::CUDAFunctionTarget Target2,Sema::CUDAFunctionTarget * ResolvedTarget)273 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
274 Sema::CUDAFunctionTarget Target2,
275 Sema::CUDAFunctionTarget *ResolvedTarget) {
276 if (Target1 == Sema::CFT_Global && Target2 == Sema::CFT_Global) {
277 // TODO: this shouldn't happen, really. Methods cannot be marked __global__.
278 // Clang should detect this earlier and produce an error. Then this
279 // condition can be changed to an assertion.
280 return true;
281 }
282
283 if (Target1 == Sema::CFT_HostDevice) {
284 *ResolvedTarget = Target2;
285 } else if (Target2 == Sema::CFT_HostDevice) {
286 *ResolvedTarget = Target1;
287 } else if (Target1 != Target2) {
288 return true;
289 } else {
290 *ResolvedTarget = Target1;
291 }
292
293 return false;
294 }
295
inferCUDATargetForImplicitSpecialMember(CXXRecordDecl * ClassDecl,CXXSpecialMember CSM,CXXMethodDecl * MemberDecl,bool ConstRHS,bool Diagnose)296 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
297 CXXSpecialMember CSM,
298 CXXMethodDecl *MemberDecl,
299 bool ConstRHS,
300 bool Diagnose) {
301 llvm::Optional<CUDAFunctionTarget> InferredTarget;
302
303 // We're going to invoke special member lookup; mark that these special
304 // members are called from this one, and not from its caller.
305 ContextRAII MethodContext(*this, MemberDecl);
306
307 // Look for special members in base classes that should be invoked from here.
308 // Infer the target of this member base on the ones it should call.
309 // Skip direct and indirect virtual bases for abstract classes.
310 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
311 for (const auto &B : ClassDecl->bases()) {
312 if (!B.isVirtual()) {
313 Bases.push_back(&B);
314 }
315 }
316
317 if (!ClassDecl->isAbstract()) {
318 for (const auto &VB : ClassDecl->vbases()) {
319 Bases.push_back(&VB);
320 }
321 }
322
323 for (const auto *B : Bases) {
324 const RecordType *BaseType = B->getType()->getAs<RecordType>();
325 if (!BaseType) {
326 continue;
327 }
328
329 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
330 Sema::SpecialMemberOverloadResult *SMOR =
331 LookupSpecialMember(BaseClassDecl, CSM,
332 /* ConstArg */ ConstRHS,
333 /* VolatileArg */ false,
334 /* RValueThis */ false,
335 /* ConstThis */ false,
336 /* VolatileThis */ false);
337
338 if (!SMOR || !SMOR->getMethod()) {
339 continue;
340 }
341
342 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
343 if (!InferredTarget.hasValue()) {
344 InferredTarget = BaseMethodTarget;
345 } else {
346 bool ResolutionError = resolveCalleeCUDATargetConflict(
347 InferredTarget.getValue(), BaseMethodTarget,
348 InferredTarget.getPointer());
349 if (ResolutionError) {
350 if (Diagnose) {
351 Diag(ClassDecl->getLocation(),
352 diag::note_implicit_member_target_infer_collision)
353 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
354 }
355 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
356 return true;
357 }
358 }
359 }
360
361 // Same as for bases, but now for special members of fields.
362 for (const auto *F : ClassDecl->fields()) {
363 if (F->isInvalidDecl()) {
364 continue;
365 }
366
367 const RecordType *FieldType =
368 Context.getBaseElementType(F->getType())->getAs<RecordType>();
369 if (!FieldType) {
370 continue;
371 }
372
373 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
374 Sema::SpecialMemberOverloadResult *SMOR =
375 LookupSpecialMember(FieldRecDecl, CSM,
376 /* ConstArg */ ConstRHS && !F->isMutable(),
377 /* VolatileArg */ false,
378 /* RValueThis */ false,
379 /* ConstThis */ false,
380 /* VolatileThis */ false);
381
382 if (!SMOR || !SMOR->getMethod()) {
383 continue;
384 }
385
386 CUDAFunctionTarget FieldMethodTarget =
387 IdentifyCUDATarget(SMOR->getMethod());
388 if (!InferredTarget.hasValue()) {
389 InferredTarget = FieldMethodTarget;
390 } else {
391 bool ResolutionError = resolveCalleeCUDATargetConflict(
392 InferredTarget.getValue(), FieldMethodTarget,
393 InferredTarget.getPointer());
394 if (ResolutionError) {
395 if (Diagnose) {
396 Diag(ClassDecl->getLocation(),
397 diag::note_implicit_member_target_infer_collision)
398 << (unsigned)CSM << InferredTarget.getValue()
399 << FieldMethodTarget;
400 }
401 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
402 return true;
403 }
404 }
405 }
406
407 if (InferredTarget.hasValue()) {
408 if (InferredTarget.getValue() == CFT_Device) {
409 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
410 } else if (InferredTarget.getValue() == CFT_Host) {
411 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
412 } else {
413 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
414 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
415 }
416 } else {
417 // If no target was inferred, mark this member as __host__ __device__;
418 // it's the least restrictive option that can be invoked from any target.
419 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
420 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
421 }
422
423 return false;
424 }
425