1 //===--- FileIndex.h - Index for files. ---------------------------- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileIndex implements SymbolIndex for symbols from a set of files. Symbols are
10 // maintained at source-file granularity (e.g. with ASTs), and files can be
11 // updated dynamically.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
16 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
17 
18 #include "Headers.h"
19 #include "Index.h"
20 #include "MemIndex.h"
21 #include "Merge.h"
22 #include "index/CanonicalIncludes.h"
23 #include "index/Ref.h"
24 #include "index/Relation.h"
25 #include "index/Serialization.h"
26 #include "index/Symbol.h"
27 #include "support/MemoryTree.h"
28 #include "support/Path.h"
29 #include "clang/Lex/Preprocessor.h"
30 #include "clang/Tooling/CompilationDatabase.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
35 #include <memory>
36 #include <vector>
37 
38 namespace clang {
39 class ASTContext;
40 namespace clangd {
41 class ParsedAST;
42 
43 /// Select between in-memory index implementations, which have tradeoffs.
44 enum class IndexType {
45   // MemIndex is trivially cheap to build, but has poor query performance.
46   Light,
47   // Dex is relatively expensive to build and uses more memory, but is fast.
48   Heavy,
49 };
50 
51 /// How to handle duplicated symbols across multiple files.
52 enum class DuplicateHandling {
53   // Pick a random symbol. Less accurate but faster.
54   PickOne,
55   // Merge symbols. More accurate but slower.
56   Merge,
57 };
58 
59 /// A container of slabs associated with a key. It can be updated at key
60 /// granularity, replacing all slabs belonging to a key with a new set. Keys are
61 /// usually file paths/uris.
62 ///
63 /// This implements snapshot semantics. Each update will create a new snapshot
64 /// for all slabs of the Key. Snapshots are managed with shared pointers that
65 /// are shared between this class and the users. For each key, this class only
66 /// stores a pointer pointing to the newest snapshot, and an outdated snapshot
67 /// is deleted by the last owner of the snapshot, either this class or the
68 /// symbol index.
69 ///
70 /// The snapshot semantics keeps critical sections minimal since we only need
71 /// locking when we swap or obtain references to snapshots.
72 class FileSymbols {
73 public:
74   /// Updates all slabs associated with the \p Key.
75   /// If either is nullptr, corresponding data for \p Key will be removed.
76   /// If CountReferences is true, \p Refs will be used for counting references
77   /// during merging.
78   void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols,
79               std::unique_ptr<RefSlab> Refs,
80               std::unique_ptr<RelationSlab> Relations, bool CountReferences);
81 
82   /// The index keeps the slabs alive.
83   /// Will count Symbol::References based on number of references in the main
84   /// files, while building the index with DuplicateHandling::Merge option.
85   /// Version is populated with an increasing sequence counter.
86   std::unique_ptr<SymbolIndex>
87   buildIndex(IndexType,
88              DuplicateHandling DuplicateHandle = DuplicateHandling::PickOne,
89              size_t *Version = nullptr);
90 
91   void profile(MemoryTree &MT) const;
92 
93 private:
94   struct RefSlabAndCountReferences {
95     std::shared_ptr<RefSlab> Slab;
96     bool CountReferences = false;
97   };
98   mutable std::mutex Mutex;
99 
100   size_t Version = 0;
101   llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot;
102   llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot;
103   llvm::StringMap<std::shared_ptr<RelationSlab>> RelationsSnapshot;
104 };
105 
106 /// This manages symbols from files and an in-memory index on all symbols.
107 /// FIXME: Expose an interface to remove files that are closed.
108 class FileIndex : public MergedIndex {
109 public:
110   FileIndex(bool UseDex = true, bool CollectMainFileRefs = false);
111 
112   /// Update preamble symbols of file \p Path with all declarations in \p AST
113   /// and macros in \p PP.
114   void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
115                       std::shared_ptr<Preprocessor> PP,
116                       const CanonicalIncludes &Includes);
117 
118   /// Update symbols and references from main file \p Path with
119   /// `indexMainDecls`.
120   void updateMain(PathRef Path, ParsedAST &AST);
121 
122   void profile(MemoryTree &MT) const;
123 
124 private:
125   bool UseDex; // FIXME: this should be always on.
126   bool CollectMainFileRefs;
127 
128   // Contains information from each file's preamble only. Symbols and relations
129   // are sharded per declaration file to deduplicate multiple symbols and reduce
130   // memory usage.
131   // Missing information:
132   //  - symbol refs (these are always "from the main file")
133   //  - definition locations in the main file
134   //
135   // Note that we store only one version of a header, hence symbols appearing in
136   // different PP states will be missing.
137   FileSymbols PreambleSymbols;
138   SwapIndex PreambleIndex;
139 
140   // Contains information from each file's main AST.
141   // These are updated frequently (on file change), but are relatively small.
142   // Mostly contains:
143   //  - refs to symbols declared in the preamble and referenced from main
144   //  - symbols declared both in the main file and the preamble
145   // (Note that symbols *only* in the main file are not indexed).
146   FileSymbols MainFileSymbols;
147   SwapIndex MainFileIndex;
148 
149   // While both the FileIndex and SwapIndex are threadsafe, we need to track
150   // versions to ensure that we don't overwrite newer indexes with older ones.
151   std::mutex UpdateIndexMu;
152   unsigned MainIndexVersion = 0;
153   unsigned PreambleIndexVersion = 0;
154 };
155 
156 using SlabTuple = std::tuple<SymbolSlab, RefSlab, RelationSlab>;
157 
158 /// Retrieves symbols and refs of local top level decls in \p AST (i.e.
159 /// `AST.getLocalTopLevelDecls()`).
160 /// Exposed to assist in unit tests.
161 SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs = false);
162 
163 /// Index declarations from \p AST and macros from \p PP that are declared in
164 /// included headers.
165 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
166                              std::shared_ptr<Preprocessor> PP,
167                              const CanonicalIncludes &Includes);
168 
169 /// Takes slabs coming from a TU (multiple files) and shards them per
170 /// declaration location.
171 struct FileShardedIndex {
172   /// \p HintPath is used to convert file URIs stored in symbols into absolute
173   /// paths.
174   explicit FileShardedIndex(IndexFileIn Input);
175 
176   /// Returns uris for all files that has a shard.
177   std::vector<llvm::StringRef> getAllSources() const;
178 
179   /// Generates index shard for the \p Uri. Note that this function results in
180   /// a copy of all the relevant data.
181   /// Returned index will always have Symbol/Refs/Relation Slabs set, even if
182   /// they are empty.
183   llvm::Optional<IndexFileIn> getShard(llvm::StringRef Uri) const;
184 
185 private:
186   // Contains all the information that belongs to a single file.
187   struct FileShard {
188     // Either declared or defined in the file.
189     llvm::DenseSet<const Symbol *> Symbols;
190     // Reference occurs in the file.
191     llvm::DenseSet<const Ref *> Refs;
192     // Subject is declared in the file.
193     llvm::DenseSet<const Relation *> Relations;
194     // Contains edges for only the direct includes.
195     IncludeGraph IG;
196   };
197 
198   // Keeps all the information alive.
199   const IndexFileIn Index;
200   // Mapping from URIs to slab information.
201   llvm::StringMap<FileShard> Shards;
202   // Used to build RefSlabs.
203   llvm::DenseMap<const Ref *, SymbolID> RefToSymID;
204 };
205 
206 } // namespace clangd
207 } // namespace clang
208 
209 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
210