1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "SourceCode.h"
16 #include "SymbolLocation.h"
17 #include "URI.h"
18 #include "index/Relation.h"
19 #include "index/SymbolID.h"
20 #include "support/Logger.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/DeclBase.h"
23 #include "clang/AST/DeclCXX.h"
24 #include "clang/AST/DeclObjC.h"
25 #include "clang/AST/DeclTemplate.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Basic/Specifiers.h"
29 #include "clang/Index/IndexSymbol.h"
30 #include "clang/Index/IndexingAction.h"
31 #include "clang/Index/USRGeneration.h"
32 #include "clang/Lex/Preprocessor.h"
33 #include "clang/Tooling/Syntax/Tokens.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 
39 namespace clang {
40 namespace clangd {
41 namespace {
42 
43 /// If \p ND is a template specialization, returns the described template.
44 /// Otherwise, returns \p ND.
getTemplateOrThis(const NamedDecl & ND)45 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
46   if (auto T = ND.getDescribedTemplate())
47     return *T;
48   return ND;
49 }
50 
51 // Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
52 // current working directory of the given SourceManager if the Path is not an
53 // absolute path. If failed, this resolves relative paths against \p FallbackDir
54 // to get an absolute path. Then, this tries creating an URI for the absolute
55 // path with schemes specified in \p Opts. This returns an URI with the first
56 // working scheme, if there is any; otherwise, this returns None.
57 //
58 // The Path can be a path relative to the build directory, or retrieved from
59 // the SourceManager.
toURI(const SourceManager & SM,llvm::StringRef Path,const SymbolCollector::Options & Opts)60 std::string toURI(const SourceManager &SM, llvm::StringRef Path,
61                   const SymbolCollector::Options &Opts) {
62   llvm::SmallString<128> AbsolutePath(Path);
63   if (auto File = SM.getFileManager().getFile(Path)) {
64     if (auto CanonPath = getCanonicalPath(*File, SM)) {
65       AbsolutePath = *CanonPath;
66     }
67   }
68   // We don't perform is_absolute check in an else branch because makeAbsolute
69   // might return a relative path on some InMemoryFileSystems.
70   if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
71     llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
72   llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
73   return URI::create(AbsolutePath).toString();
74 }
75 
76 // Checks whether the decl is a private symbol in a header generated by
77 // protobuf compiler.
78 // FIXME: make filtering extensible when there are more use cases for symbol
79 // filters.
isPrivateProtoDecl(const NamedDecl & ND)80 bool isPrivateProtoDecl(const NamedDecl &ND) {
81   const auto &SM = ND.getASTContext().getSourceManager();
82   if (!isProtoFile(nameLocation(ND, SM), SM))
83     return false;
84 
85   // ND without identifier can be operators.
86   if (ND.getIdentifier() == nullptr)
87     return false;
88   auto Name = ND.getIdentifier()->getName();
89   if (!Name.contains('_'))
90     return false;
91   // Nested proto entities (e.g. Message::Nested) have top-level decls
92   // that shouldn't be used (Message_Nested). Ignore them completely.
93   // The nested entities are dangling type aliases, we may want to reconsider
94   // including them in the future.
95   // For enum constants, SOME_ENUM_CONSTANT is not private and should be
96   // indexed. Outer_INNER is private. This heuristic relies on naming style, it
97   // will include OUTER_INNER and exclude some_enum_constant.
98   // FIXME: the heuristic relies on naming style (i.e. no underscore in
99   // user-defined names) and can be improved.
100   return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
101 }
102 
103 // We only collect #include paths for symbols that are suitable for global code
104 // completion, except for namespaces since #include path for a namespace is hard
105 // to define.
shouldCollectIncludePath(index::SymbolKind Kind)106 bool shouldCollectIncludePath(index::SymbolKind Kind) {
107   using SK = index::SymbolKind;
108   switch (Kind) {
109   case SK::Macro:
110   case SK::Enum:
111   case SK::Struct:
112   case SK::Class:
113   case SK::Union:
114   case SK::TypeAlias:
115   case SK::Using:
116   case SK::Function:
117   case SK::Variable:
118   case SK::EnumConstant:
119     return true;
120   default:
121     return false;
122   }
123 }
124 
125 // Return the symbol range of the token at \p TokLoc.
126 std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc,const SourceManager & SM,const LangOptions & LangOpts)127 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
128               const LangOptions &LangOpts) {
129   auto CreatePosition = [&SM](SourceLocation Loc) {
130     auto LSPLoc = sourceLocToPosition(SM, Loc);
131     SymbolLocation::Position Pos;
132     Pos.setLine(LSPLoc.line);
133     Pos.setColumn(LSPLoc.character);
134     return Pos;
135   };
136 
137   auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
138   return {CreatePosition(TokLoc),
139           CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
140 }
141 
142 // Return the symbol location of the token at \p TokLoc.
143 llvm::Optional<SymbolLocation>
getTokenLocation(SourceLocation TokLoc,const SourceManager & SM,const SymbolCollector::Options & Opts,const clang::LangOptions & LangOpts,std::string & FileURIStorage)144 getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
145                  const SymbolCollector::Options &Opts,
146                  const clang::LangOptions &LangOpts,
147                  std::string &FileURIStorage) {
148   auto Path = SM.getFilename(TokLoc);
149   if (Path.empty())
150     return None;
151   FileURIStorage = toURI(SM, Path, Opts);
152   SymbolLocation Result;
153   Result.FileURI = FileURIStorage.c_str();
154   auto Range = getTokenRange(TokLoc, SM, LangOpts);
155   Result.Start = Range.first;
156   Result.End = Range.second;
157 
158   return Result;
159 }
160 
161 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
162 // its symbol (e.g. a go-to-declaration target). This overrides the default of
163 // using Clang's canonical declaration, which is the first in the TU.
164 //
165 // Example: preferring a class declaration over its forward declaration.
isPreferredDeclaration(const NamedDecl & ND,index::SymbolRoleSet Roles)166 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
167   const auto &SM = ND.getASTContext().getSourceManager();
168   if (isa<TagDecl>(ND))
169     return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
170            !isInsideMainFile(ND.getLocation(), SM);
171   if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
172     return ID->isThisDeclarationADefinition();
173   if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
174     return PD->isThisDeclarationADefinition();
175   return false;
176 }
177 
toRefKind(index::SymbolRoleSet Roles,bool Spelled=false)178 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
179   RefKind Result = RefKind::Unknown;
180   if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
181     Result |= RefKind::Declaration;
182   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
183     Result |= RefKind::Definition;
184   if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
185     Result |= RefKind::Reference;
186   if (Spelled)
187     Result |= RefKind::Spelled;
188   return Result;
189 }
190 
indexableRelation(const index::SymbolRelation & R)191 llvm::Optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
192   if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
193     return RelationKind::BaseOf;
194   if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
195     return RelationKind::OverriddenBy;
196   return None;
197 }
198 
199 } // namespace
200 
SymbolCollector(Options Opts)201 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
202 
initialize(ASTContext & Ctx)203 void SymbolCollector::initialize(ASTContext &Ctx) {
204   ASTCtx = &Ctx;
205   CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
206   CompletionTUInfo =
207       std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
208 }
209 
shouldCollectSymbol(const NamedDecl & ND,const ASTContext & ASTCtx,const Options & Opts,bool IsMainFileOnly)210 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
211                                           const ASTContext &ASTCtx,
212                                           const Options &Opts,
213                                           bool IsMainFileOnly) {
214   // Skip anonymous declarations, e.g (anonymous enum/class/struct).
215   if (ND.getDeclName().isEmpty())
216     return false;
217 
218   // Skip main-file symbols if we are not collecting them.
219   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
220     return false;
221 
222   // Skip symbols in anonymous namespaces in header files.
223   if (!IsMainFileOnly && ND.isInAnonymousNamespace())
224     return false;
225 
226   // We want most things but not "local" symbols such as symbols inside
227   // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
228   // FIXME: Need a matcher for ExportDecl in order to include symbols declared
229   // within an export.
230   const auto *DeclCtx = ND.getDeclContext();
231   switch (DeclCtx->getDeclKind()) {
232   case Decl::TranslationUnit:
233   case Decl::Namespace:
234   case Decl::LinkageSpec:
235   case Decl::Enum:
236   case Decl::ObjCProtocol:
237   case Decl::ObjCInterface:
238   case Decl::ObjCCategory:
239   case Decl::ObjCCategoryImpl:
240   case Decl::ObjCImplementation:
241     break;
242   default:
243     // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
244     // easier to cast.
245     if (!isa<RecordDecl>(DeclCtx))
246       return false;
247   }
248 
249   // Avoid indexing internal symbols in protobuf generated headers.
250   if (isPrivateProtoDecl(ND))
251     return false;
252   return true;
253 }
254 
255 // Always return true to continue indexing.
handleDeclOccurrence(const Decl * D,index::SymbolRoleSet Roles,llvm::ArrayRef<index::SymbolRelation> Relations,SourceLocation Loc,index::IndexDataConsumer::ASTNodeInfo ASTNode)256 bool SymbolCollector::handleDeclOccurrence(
257     const Decl *D, index::SymbolRoleSet Roles,
258     llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
259     index::IndexDataConsumer::ASTNodeInfo ASTNode) {
260   assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
261   assert(CompletionAllocator && CompletionTUInfo);
262   assert(ASTNode.OrigD);
263   // Indexing API puts canonical decl into D, which might not have a valid
264   // source location for implicit/built-in decls. Fallback to original decl in
265   // such cases.
266   if (D->getLocation().isInvalid())
267     D = ASTNode.OrigD;
268   // If OrigD is an declaration associated with a friend declaration and it's
269   // not a definition, skip it. Note that OrigD is the occurrence that the
270   // collector is currently visiting.
271   if ((ASTNode.OrigD->getFriendObjectKind() !=
272        Decl::FriendObjectKind::FOK_None) &&
273       !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
274     return true;
275   // A declaration created for a friend declaration should not be used as the
276   // canonical declaration in the index. Use OrigD instead, unless we've already
277   // picked a replacement for D
278   if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
279     D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
280   // Flag to mark that D should be considered canonical meaning its declaration
281   // will override any previous declaration for the Symbol.
282   bool DeclIsCanonical = false;
283   // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
284   // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
285   if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
286     DeclIsCanonical = true;
287     if (const auto *CID = IID->getClassInterface())
288       if (const auto *DD = CID->getDefinition())
289         if (!DD->isImplicitInterfaceDecl())
290           D = DD;
291   }
292   // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
293   // its ObjCCategoryDecl if it has one.
294   if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
295     DeclIsCanonical = true;
296     if (const auto *CD = CID->getCategoryDecl())
297       D = CD;
298   }
299   const NamedDecl *ND = dyn_cast<NamedDecl>(D);
300   if (!ND)
301     return true;
302 
303   // Mark D as referenced if this is a reference coming from the main file.
304   // D may not be an interesting symbol, but it's cheaper to check at the end.
305   auto &SM = ASTCtx->getSourceManager();
306   if (Opts.CountReferences &&
307       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
308       SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
309     ReferencedDecls.insert(ND);
310 
311   auto ID = getSymbolID(ND);
312   if (!ID)
313     return true;
314 
315   // ND is the canonical (i.e. first) declaration. If it's in the main file
316   // (which is not a header), then no public declaration was visible, so assume
317   // it's main-file only.
318   bool IsMainFileOnly =
319       SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
320       !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
321                     ASTCtx->getLangOpts());
322   // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
323   if (ASTNode.OrigD->isImplicit() ||
324       !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
325     return true;
326 
327   // Note: we need to process relations for all decl occurrences, including
328   // refs, because the indexing code only populates relations for specific
329   // occurrences. For example, RelationBaseOf is only populated for the
330   // occurrence inside the base-specifier.
331   processRelations(*ND, ID, Relations);
332 
333   bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
334   bool IsOnlyRef =
335       !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
336                  static_cast<unsigned>(index::SymbolRole::Definition)));
337 
338   if (IsOnlyRef && !CollectRef)
339     return true;
340 
341   // Unlike other fields, e.g. Symbols (which use spelling locations), we use
342   // file locations for references (as it aligns the behavior of clangd's
343   // AST-based xref).
344   // FIXME: we should try to use the file locations for other fields.
345   if (CollectRef &&
346       (!IsMainFileOnly || Opts.CollectMainFileRefs ||
347        ND->isExternallyVisible()) &&
348       !isa<NamespaceDecl>(ND) &&
349       (Opts.RefsInHeaders ||
350        SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
351     DeclRefs[ND].push_back(
352         SymbolRef{SM.getFileLoc(Loc), Roles, ASTNode.Parent});
353   // Don't continue indexing if this is a mere reference.
354   if (IsOnlyRef)
355     return true;
356 
357   // FIXME: ObjCPropertyDecl are not properly indexed here:
358   // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
359   // not a NamedDecl.
360   auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
361   if (!OriginalDecl)
362     return true;
363 
364   const Symbol *BasicSymbol = Symbols.find(ID);
365   if (isPreferredDeclaration(*OriginalDecl, Roles))
366     // If OriginalDecl is preferred, replace/create the existing canonical
367     // declaration (e.g. a class forward declaration). There should be at most
368     // one duplicate as we expect to see only one preferred declaration per
369     // TU, because in practice they are definitions.
370     BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
371   else if (!BasicSymbol || DeclIsCanonical)
372     BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
373 
374   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
375     addDefinition(*OriginalDecl, *BasicSymbol);
376 
377   return true;
378 }
379 
handleMacros(const MainFileMacros & MacroRefsToIndex)380 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
381   assert(PP.get());
382   const auto &SM = PP->getSourceManager();
383   const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
384   assert(MainFileEntry);
385 
386   const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
387   // Add macro references.
388   for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
389     for (const auto &Range : IDToRefs.second) {
390       Ref R;
391       R.Location.Start.setLine(Range.start.line);
392       R.Location.Start.setColumn(Range.start.character);
393       R.Location.End.setLine(Range.end.line);
394       R.Location.End.setColumn(Range.end.character);
395       R.Location.FileURI = MainFileURI.c_str();
396       // FIXME: Add correct RefKind information to MainFileMacros.
397       R.Kind = RefKind::Reference;
398       Refs.insert(IDToRefs.first, R);
399     }
400   }
401 }
402 
handleMacroOccurrence(const IdentifierInfo * Name,const MacroInfo * MI,index::SymbolRoleSet Roles,SourceLocation Loc)403 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
404                                             const MacroInfo *MI,
405                                             index::SymbolRoleSet Roles,
406                                             SourceLocation Loc) {
407   assert(PP.get());
408   // Builtin macros don't have useful locations and aren't needed in completion.
409   if (MI->isBuiltinMacro())
410     return true;
411 
412   const auto &SM = PP->getSourceManager();
413   auto DefLoc = MI->getDefinitionLoc();
414   // Also avoid storing predefined macros like __DBL_MIN__.
415   if (SM.isWrittenInBuiltinFile(DefLoc))
416     return true;
417 
418   auto ID = getSymbolID(Name->getName(), MI, SM);
419   if (!ID)
420     return true;
421 
422   auto SpellingLoc = SM.getSpellingLoc(Loc);
423   bool IsMainFileOnly =
424       SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
425       !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
426                     ASTCtx->getLangOpts());
427   // Do not store references to main-file macros.
428   if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
429       (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
430     // FIXME: Populate container information for macro references.
431     MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
432 
433   // Collect symbols.
434   if (!Opts.CollectMacro)
435     return true;
436 
437   // Skip main-file macros if we are not collecting them.
438   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
439     return false;
440 
441   // Mark the macro as referenced if this is a reference coming from the main
442   // file. The macro may not be an interesting symbol, but it's cheaper to check
443   // at the end.
444   if (Opts.CountReferences &&
445       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
446       SM.getFileID(SpellingLoc) == SM.getMainFileID())
447     ReferencedMacros.insert(Name);
448 
449   // Don't continue indexing if this is a mere reference.
450   // FIXME: remove macro with ID if it is undefined.
451   if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
452         Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
453     return true;
454 
455   // Only collect one instance in case there are multiple.
456   if (Symbols.find(ID) != nullptr)
457     return true;
458 
459   Symbol S;
460   S.ID = std::move(ID);
461   S.Name = Name->getName();
462   if (!IsMainFileOnly) {
463     S.Flags |= Symbol::IndexedForCodeCompletion;
464     S.Flags |= Symbol::VisibleOutsideFile;
465   }
466   S.SymInfo = index::getSymbolInfoForMacro(*MI);
467   S.Origin = Opts.Origin;
468   std::string FileURI;
469   // FIXME: use the result to filter out symbols.
470   shouldIndexFile(SM.getFileID(Loc));
471   if (auto DeclLoc =
472           getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
473     S.CanonicalDeclaration = *DeclLoc;
474 
475   CodeCompletionResult SymbolCompletion(Name);
476   const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
477       *PP, *CompletionAllocator, *CompletionTUInfo);
478   std::string Signature;
479   std::string SnippetSuffix;
480   getSignature(*CCS, &Signature, &SnippetSuffix);
481   S.Signature = Signature;
482   S.CompletionSnippetSuffix = SnippetSuffix;
483 
484   IndexedMacros.insert(Name);
485   setIncludeLocation(S, DefLoc);
486   Symbols.insert(S);
487   return true;
488 }
489 
processRelations(const NamedDecl & ND,const SymbolID & ID,ArrayRef<index::SymbolRelation> Relations)490 void SymbolCollector::processRelations(
491     const NamedDecl &ND, const SymbolID &ID,
492     ArrayRef<index::SymbolRelation> Relations) {
493   for (const auto &R : Relations) {
494     auto RKind = indexableRelation(R);
495     if (!RKind)
496       continue;
497     const Decl *Object = R.RelatedSymbol;
498 
499     auto ObjectID = getSymbolID(Object);
500     if (!ObjectID)
501       continue;
502 
503     // Record the relation.
504     // TODO: There may be cases where the object decl is not indexed for some
505     // reason. Those cases should probably be removed in due course, but for
506     // now there are two possible ways to handle it:
507     //   (A) Avoid storing the relation in such cases.
508     //   (B) Store it anyways. Clients will likely lookup() the SymbolID
509     //       in the index and find nothing, but that's a situation they
510     //       probably need to handle for other reasons anyways.
511     // We currently do (B) because it's simpler.
512     if (*RKind == RelationKind::BaseOf)
513       this->Relations.insert({ID, *RKind, ObjectID});
514     else if (*RKind == RelationKind::OverriddenBy)
515       this->Relations.insert({ObjectID, *RKind, ID});
516   }
517 }
518 
setIncludeLocation(const Symbol & S,SourceLocation Loc)519 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
520   if (Opts.CollectIncludePath)
521     if (shouldCollectIncludePath(S.SymInfo.Kind))
522       // Use the expansion location to get the #include header since this is
523       // where the symbol is exposed.
524       IncludeFiles[S.ID] =
525           PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
526 }
527 
finish()528 void SymbolCollector::finish() {
529   // At the end of the TU, add 1 to the refcount of all referenced symbols.
530   auto IncRef = [this](const SymbolID &ID) {
531     if (const auto *S = Symbols.find(ID)) {
532       Symbol Inc = *S;
533       ++Inc.References;
534       Symbols.insert(Inc);
535     }
536   };
537   for (const NamedDecl *ND : ReferencedDecls) {
538     if (auto ID = getSymbolID(ND)) {
539       IncRef(ID);
540     }
541   }
542   if (Opts.CollectMacro) {
543     assert(PP);
544     // First, drop header guards. We can't identify these until EOF.
545     for (const IdentifierInfo *II : IndexedMacros) {
546       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
547         if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
548           if (MI->isUsedForHeaderGuard())
549             Symbols.erase(ID);
550     }
551     // Now increment refcounts.
552     for (const IdentifierInfo *II : ReferencedMacros) {
553       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
554         if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
555           IncRef(ID);
556     }
557   }
558   // Fill in IncludeHeaders.
559   // We delay this until end of TU so header guards are all resolved.
560   // Symbols in slabs aren't mutable, so insert() has to walk all the strings
561   // :-(
562   for (const auto &Entry : IncludeFiles)
563     if (const Symbol *S = Symbols.find(Entry.first)) {
564       if (auto Header = getIncludeHeader(*S, Entry.second)) {
565         Symbol NewSym = *S;
566         NewSym.IncludeHeaders.push_back({std::move(*Header), 1});
567         Symbols.insert(NewSym);
568       }
569     }
570 
571   const auto &SM = ASTCtx->getSourceManager();
572   llvm::DenseMap<FileID, std::string> URICache;
573   auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
574     auto Found = URICache.find(FID);
575     if (Found == URICache.end()) {
576       if (auto *FileEntry = SM.getFileEntryForID(FID)) {
577         auto FileURI = toURI(SM, FileEntry->getName(), Opts);
578         Found = URICache.insert({FID, FileURI}).first;
579       } else {
580         // Ignore cases where we can not find a corresponding file entry for
581         // given location, e.g. symbols formed via macro concatenation.
582         return None;
583       }
584     }
585     return Found->second;
586   };
587   auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
588                         bool Spelled = false) {
589     auto FileID = SM.getFileID(LocAndRole.Loc);
590     // FIXME: use the result to filter out references.
591     shouldIndexFile(FileID);
592     if (auto FileURI = GetURI(FileID)) {
593       auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
594       Ref R;
595       R.Location.Start = Range.first;
596       R.Location.End = Range.second;
597       R.Location.FileURI = FileURI->c_str();
598       R.Kind = toRefKind(LocAndRole.Roles, Spelled);
599       R.Container = getSymbolID(LocAndRole.Container);
600       Refs.insert(ID, R);
601     }
602   };
603   // Populate Refs slab from MacroRefs.
604   // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
605   for (const auto &IDAndRefs : MacroRefs)
606     for (const auto &LocAndRole : IDAndRefs.second)
607       CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
608   // Populate Refs slab from DeclRefs.
609   llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
610   for (auto &DeclAndRef : DeclRefs) {
611     if (auto ID = getSymbolID(DeclAndRef.first)) {
612       for (auto &LocAndRole : DeclAndRef.second) {
613         const auto FileID = SM.getFileID(LocAndRole.Loc);
614         // FIXME: It's better to use TokenBuffer by passing spelled tokens from
615         // the caller of SymbolCollector.
616         if (!FilesToTokensCache.count(FileID))
617           FilesToTokensCache[FileID] =
618               syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
619         llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
620         // Check if the referenced symbol is spelled exactly the same way the
621         // corresponding NamedDecl is. If it is, mark this reference as spelled.
622         const auto *IdentifierToken =
623             spelledIdentifierTouching(LocAndRole.Loc, Tokens);
624         DeclarationName Name = DeclAndRef.first->getDeclName();
625         const auto NameKind = Name.getNameKind();
626         bool IsTargetKind = NameKind == DeclarationName::Identifier ||
627                             NameKind == DeclarationName::CXXConstructorName;
628         bool Spelled = IdentifierToken && IsTargetKind &&
629                        Name.getAsString() == IdentifierToken->text(SM);
630         CollectRef(ID, LocAndRole, Spelled);
631       }
632     }
633   }
634 
635   ReferencedDecls.clear();
636   ReferencedMacros.clear();
637   DeclRefs.clear();
638   FilesToIndexCache.clear();
639   HeaderIsSelfContainedCache.clear();
640   IncludeFiles.clear();
641 }
642 
addDeclaration(const NamedDecl & ND,SymbolID ID,bool IsMainFileOnly)643 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
644                                               bool IsMainFileOnly) {
645   auto &Ctx = ND.getASTContext();
646   auto &SM = Ctx.getSourceManager();
647 
648   Symbol S;
649   S.ID = std::move(ID);
650   std::string QName = printQualifiedName(ND);
651   // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
652   // for consistency with CodeCompletionString and a clean name/signature split.
653   std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
654   std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
655   S.TemplateSpecializationArgs = TemplateSpecializationArgs;
656 
657   // We collect main-file symbols, but do not use them for code completion.
658   if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
659     S.Flags |= Symbol::IndexedForCodeCompletion;
660   if (isImplementationDetail(&ND))
661     S.Flags |= Symbol::ImplementationDetail;
662   if (!IsMainFileOnly)
663     S.Flags |= Symbol::VisibleOutsideFile;
664   S.SymInfo = index::getSymbolInfo(&ND);
665   std::string FileURI;
666   auto Loc = nameLocation(ND, SM);
667   assert(Loc.isValid() && "Invalid source location for NamedDecl");
668   // FIXME: use the result to filter out symbols.
669   shouldIndexFile(SM.getFileID(Loc));
670   if (auto DeclLoc =
671           getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
672     S.CanonicalDeclaration = *DeclLoc;
673 
674   S.Origin = Opts.Origin;
675   if (ND.getAvailability() == AR_Deprecated)
676     S.Flags |= Symbol::Deprecated;
677 
678   // Add completion info.
679   // FIXME: we may want to choose a different redecl, or combine from several.
680   assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
681   // We use the primary template, as clang does during code completion.
682   CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
683   const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
684       *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
685       *CompletionTUInfo,
686       /*IncludeBriefComments*/ false);
687   std::string Documentation =
688       formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
689                                               /*CommentsFromHeaders=*/true));
690   if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
691     if (Opts.StoreAllDocumentation)
692       S.Documentation = Documentation;
693     Symbols.insert(S);
694     return Symbols.find(S.ID);
695   }
696   S.Documentation = Documentation;
697   std::string Signature;
698   std::string SnippetSuffix;
699   getSignature(*CCS, &Signature, &SnippetSuffix);
700   S.Signature = Signature;
701   S.CompletionSnippetSuffix = SnippetSuffix;
702   std::string ReturnType = getReturnType(*CCS);
703   S.ReturnType = ReturnType;
704 
705   llvm::Optional<OpaqueType> TypeStorage;
706   if (S.Flags & Symbol::IndexedForCodeCompletion) {
707     TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
708     if (TypeStorage)
709       S.Type = TypeStorage->raw();
710   }
711 
712   Symbols.insert(S);
713   setIncludeLocation(S, ND.getLocation());
714   return Symbols.find(S.ID);
715 }
716 
addDefinition(const NamedDecl & ND,const Symbol & DeclSym)717 void SymbolCollector::addDefinition(const NamedDecl &ND,
718                                     const Symbol &DeclSym) {
719   if (DeclSym.Definition)
720     return;
721   // If we saw some forward declaration, we end up copying the symbol.
722   // This is not ideal, but avoids duplicating the "is this a definition" check
723   // in clang::index. We should only see one definition.
724   Symbol S = DeclSym;
725   std::string FileURI;
726   const auto &SM = ND.getASTContext().getSourceManager();
727   auto Loc = nameLocation(ND, SM);
728   // FIXME: use the result to filter out symbols.
729   shouldIndexFile(SM.getFileID(Loc));
730   if (auto DefLoc =
731           getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
732     S.Definition = *DefLoc;
733   Symbols.insert(S);
734 }
735 
736 /// Gets a canonical include (URI of the header or <header> or "header") for
737 /// header of \p FID (which should usually be the *expansion* file).
738 /// Returns None if includes should not be inserted for this file.
getIncludeHeader(const Symbol & S,FileID FID)739 llvm::Optional<std::string> SymbolCollector::getIncludeHeader(const Symbol &S,
740                                                               FileID FID) {
741   const SourceManager &SM = ASTCtx->getSourceManager();
742   const FileEntry *FE = SM.getFileEntryForID(FID);
743   if (!FE || FE->getName().empty())
744     return llvm::None;
745   llvm::StringRef Filename = FE->getName();
746   // If a file is mapped by canonical headers, use that mapping, regardless
747   // of whether it's an otherwise-good header (header guards etc).
748   if (Opts.Includes) {
749     llvm::SmallString<256> QName = S.Scope;
750     QName.append(S.Name);
751     llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
752     // If we had a mapping, always use it.
753     if (Canonical.startswith("<") || Canonical.startswith("\"")) {
754       // Hack: there are two std::move() overloads from different headers.
755       // CanonicalIncludes returns the common one-arg one from <utility>.
756       if (Canonical == "<utility>" && S.Name == "move" &&
757           S.Signature.contains(','))
758         Canonical = "<algorithm>";
759       return Canonical.str();
760     }
761     if (Canonical != Filename)
762       return toURI(SM, Canonical, Opts);
763   }
764   if (!isSelfContainedHeader(FID)) {
765     // A .inc or .def file is often included into a real header to define
766     // symbols (e.g. LLVM tablegen files).
767     if (Filename.endswith(".inc") || Filename.endswith(".def"))
768       return getIncludeHeader(S, SM.getFileID(SM.getIncludeLoc(FID)));
769     // Conservatively refuse to insert #includes to files without guards.
770     return llvm::None;
771   }
772   // Standard case: just insert the file itself.
773   return toURI(SM, Filename, Opts);
774 }
775 
isSelfContainedHeader(FileID FID)776 bool SymbolCollector::isSelfContainedHeader(FileID FID) {
777   // The real computation (which will be memoized).
778   auto Compute = [&] {
779     const SourceManager &SM = ASTCtx->getSourceManager();
780     const FileEntry *FE = SM.getFileEntryForID(FID);
781     if (!FE)
782       return false;
783     // FIXME: Should files that have been #import'd be considered
784     // self-contained? That's really a property of the includer,
785     // not of the file.
786     if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
787         !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
788       return false;
789     // This pattern indicates that a header can't be used without
790     // particular preprocessor state, usually set up by another header.
791     if (isDontIncludeMeHeader(SM.getBufferData(FID)))
792       return false;
793     return true;
794   };
795 
796   auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
797   if (R.second)
798     R.first->second = Compute();
799   return R.first->second;
800 }
801 
802 // Is Line an #if or #ifdef directive?
isIf(llvm::StringRef Line)803 static bool isIf(llvm::StringRef Line) {
804   Line = Line.ltrim();
805   if (!Line.consume_front("#"))
806     return false;
807   Line = Line.ltrim();
808   return Line.startswith("if");
809 }
810 // Is Line an #error directive mentioning includes?
isErrorAboutInclude(llvm::StringRef Line)811 static bool isErrorAboutInclude(llvm::StringRef Line) {
812   Line = Line.ltrim();
813   if (!Line.consume_front("#"))
814     return false;
815   Line = Line.ltrim();
816   if (!Line.startswith("error"))
817     return false;
818   return Line.contains_lower("includ"); // Matches "include" or "including".
819 }
820 
isDontIncludeMeHeader(llvm::StringRef Content)821 bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
822   llvm::StringRef Line;
823   // Only sniff up to 100 lines or 10KB.
824   Content = Content.take_front(100 * 100);
825   for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
826     std::tie(Line, Content) = Content.split('\n');
827     if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
828       return true;
829   }
830   return false;
831 }
832 
shouldIndexFile(FileID FID)833 bool SymbolCollector::shouldIndexFile(FileID FID) {
834   if (!Opts.FileFilter)
835     return true;
836   auto I = FilesToIndexCache.try_emplace(FID);
837   if (I.second)
838     I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
839   return I.first->second;
840 }
841 
842 } // namespace clangd
843 } // namespace clang
844