1 //===--- SymbolCollector.h ---------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H
9 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H
10 
11 #include "CanonicalIncludes.h"
12 #include "CollectMacros.h"
13 #include "Index.h"
14 #include "SymbolOrigin.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/Basic/SourceLocation.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Index/IndexDataConsumer.h"
20 #include "clang/Index/IndexSymbol.h"
21 #include "clang/Sema/CodeCompleteConsumer.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/Support/Regex.h"
24 #include <functional>
25 
26 namespace clang {
27 namespace clangd {
28 
29 /// Collect declarations (symbols) from an AST.
30 /// It collects most declarations except:
31 /// - Implicit declarations
32 /// - Anonymous declarations (anonymous enum/class/struct, etc)
33 /// - Declarations in anonymous namespaces in headers
34 /// - Local declarations (in function bodies, blocks, etc)
35 /// - Template specializations
36 /// - Library-specific private declarations (e.g. private declaration generated
37 /// by protobuf compiler)
38 ///
39 /// References to main-file symbols are not collected.
40 ///
41 /// See also shouldCollectSymbol(...).
42 ///
43 /// Clients (e.g. clangd) can use SymbolCollector together with
44 /// index::indexTopLevelDecls to retrieve all symbols when the source file is
45 /// changed.
46 class SymbolCollector : public index::IndexDataConsumer {
47 public:
48   struct Options {
49     /// When symbol paths cannot be resolved to absolute paths (e.g. files in
50     /// VFS that does not have absolute path), combine the fallback directory
51     /// with symbols' paths to get absolute paths. This must be an absolute
52     /// path.
53     std::string FallbackDir;
54     bool CollectIncludePath = false;
55     /// If set, this is used to map symbol #include path to a potentially
56     /// different #include path.
57     const CanonicalIncludes *Includes = nullptr;
58     // Populate the Symbol.References field.
59     bool CountReferences = false;
60     /// The symbol ref kinds that will be collected.
61     /// If not set, SymbolCollector will not collect refs.
62     /// Note that references of namespace decls are not collected, as they
63     /// contribute large part of the index, and they are less useful compared
64     /// with other decls.
65     RefKind RefFilter = RefKind::Unknown;
66     /// If set to true, SymbolCollector will collect all refs (from main file
67     /// and included headers); otherwise, only refs from main file will be
68     /// collected.
69     /// This flag is only meaningful when RefFilter is set.
70     bool RefsInHeaders = false;
71     // Every symbol collected will be stamped with this origin.
72     SymbolOrigin Origin = SymbolOrigin::Unknown;
73     /// Collect macros.
74     /// Note that SymbolCollector must be run with preprocessor in order to
75     /// collect macros. For example, `indexTopLevelDecls` will not index any
76     /// macro even if this is true.
77     bool CollectMacro = false;
78     /// Collect symbols local to main-files, such as static functions
79     /// and symbols inside an anonymous namespace.
80     bool CollectMainFileSymbols = true;
81     /// Collect references to main-file symbols.
82     bool CollectMainFileRefs = false;
83     /// If set to true, SymbolCollector will collect doc for all symbols.
84     /// Note that documents of symbols being indexed for completion will always
85     /// be collected regardless of this option.
86     bool StoreAllDocumentation = false;
87     /// If this is set, only collect symbols/references from a file if
88     /// `FileFilter(SM, FID)` is true. If not set, all files are indexed.
89     std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr;
90   };
91 
92   SymbolCollector(Options Opts);
93 
94   /// Returns true is \p ND should be collected.
95   static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx,
96                                   const Options &Opts, bool IsMainFileSymbol);
97 
98   void initialize(ASTContext &Ctx) override;
99 
setPreprocessor(std::shared_ptr<Preprocessor> PP)100   void setPreprocessor(std::shared_ptr<Preprocessor> PP) override {
101     this->PP = std::move(PP);
102   }
103 
104   bool
105   handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles,
106                        ArrayRef<index::SymbolRelation> Relations,
107                        SourceLocation Loc,
108                        index::IndexDataConsumer::ASTNodeInfo ASTNode) override;
109 
110   bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI,
111                              index::SymbolRoleSet Roles,
112                              SourceLocation Loc) override;
113 
114   void handleMacros(const MainFileMacros &MacroRefsToIndex);
115 
takeSymbols()116   SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
takeRefs()117   RefSlab takeRefs() { return std::move(Refs).build(); }
takeRelations()118   RelationSlab takeRelations() { return std::move(Relations).build(); }
119 
120   /// Returns true if we are interested in references and declarations from \p
121   /// FID. If this function return false, bodies of functions inside those files
122   /// will be skipped to decrease indexing time.
123   bool shouldIndexFile(FileID FID);
124 
125   void finish() override;
126 
127 private:
128   const Symbol *addDeclaration(const NamedDecl &, SymbolID,
129                                bool IsMainFileSymbol);
130   void addDefinition(const NamedDecl &, const Symbol &DeclSymbol);
131   void processRelations(const NamedDecl &ND, const SymbolID &ID,
132                         ArrayRef<index::SymbolRelation> Relations);
133 
134   llvm::Optional<std::string> getIncludeHeader(const Symbol &S, FileID);
135   bool isSelfContainedHeader(FileID);
136   // Heuristically headers that only want to be included via an umbrella.
137   static bool isDontIncludeMeHeader(llvm::StringRef);
138 
139   // All Symbols collected from the AST.
140   SymbolSlab::Builder Symbols;
141   // File IDs for Symbol.IncludeHeaders.
142   // The final spelling is calculated in finish().
143   llvm::DenseMap<SymbolID, FileID> IncludeFiles;
144   void setIncludeLocation(const Symbol &S, SourceLocation);
145   // Indexed macros, to be erased if they turned out to be include guards.
146   llvm::DenseSet<const IdentifierInfo *> IndexedMacros;
147   // All refs collected from the AST. It includes:
148   //   1) symbols declared in the preamble and referenced from the main file (
149   //     which is not a header), or
150   //   2) symbols declared and referenced from the main file (which is a header)
151   RefSlab::Builder Refs;
152   // All relations collected from the AST.
153   RelationSlab::Builder Relations;
154   ASTContext *ASTCtx;
155   std::shared_ptr<Preprocessor> PP;
156   std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator;
157   std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo;
158   Options Opts;
159   struct SymbolRef {
160     SourceLocation Loc;
161     index::SymbolRoleSet Roles;
162     const Decl *Container;
163   };
164   // Symbols referenced from the current TU, flushed on finish().
165   llvm::DenseSet<const NamedDecl *> ReferencedDecls;
166   llvm::DenseSet<const IdentifierInfo *> ReferencedMacros;
167   llvm::DenseMap<const NamedDecl *, std::vector<SymbolRef>> DeclRefs;
168   llvm::DenseMap<SymbolID, std::vector<SymbolRef>> MacroRefs;
169   // Maps canonical declaration provided by clang to canonical declaration for
170   // an index symbol, if clangd prefers a different declaration than that
171   // provided by clang. For example, friend declaration might be considered
172   // canonical by clang but should not be considered canonical in the index
173   // unless it's a definition.
174   llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls;
175   // Cache whether to index a file or not.
176   llvm::DenseMap<FileID, bool> FilesToIndexCache;
177   llvm::DenseMap<FileID, bool> HeaderIsSelfContainedCache;
178 };
179 
180 } // namespace clangd
181 } // namespace clang
182 
183 #endif
184