1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "FileIndex.h"
10 #include "CollectMacros.h"
11 #include "ParsedAST.h"
12 #include "SymbolCollector.h"
13 #include "index/CanonicalIncludes.h"
14 #include "index/Index.h"
15 #include "index/MemIndex.h"
16 #include "index/Merge.h"
17 #include "index/Ref.h"
18 #include "index/Relation.h"
19 #include "index/Serialization.h"
20 #include "index/Symbol.h"
21 #include "index/SymbolID.h"
22 #include "index/SymbolOrigin.h"
23 #include "index/dex/Dex.h"
24 #include "support/Logger.h"
25 #include "support/MemoryTree.h"
26 #include "support/Path.h"
27 #include "clang/AST/ASTContext.h"
28 #include "clang/Index/IndexingAction.h"
29 #include "clang/Index/IndexingOptions.h"
30 #include "clang/Lex/MacroInfo.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringMap.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/Support/Error.h"
38 #include <algorithm>
39 #include <memory>
40 #include <tuple>
41 #include <utility>
42 #include <vector>
43 
44 namespace clang {
45 namespace clangd {
46 namespace {
47 
indexSymbols(ASTContext & AST,std::shared_ptr<Preprocessor> PP,llvm::ArrayRef<Decl * > DeclsToIndex,const MainFileMacros * MacroRefsToIndex,const CanonicalIncludes & Includes,bool IsIndexMainAST,llvm::StringRef Version,bool CollectMainFileRefs)48 SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
49                        llvm::ArrayRef<Decl *> DeclsToIndex,
50                        const MainFileMacros *MacroRefsToIndex,
51                        const CanonicalIncludes &Includes, bool IsIndexMainAST,
52                        llvm::StringRef Version, bool CollectMainFileRefs) {
53   SymbolCollector::Options CollectorOpts;
54   CollectorOpts.CollectIncludePath = true;
55   CollectorOpts.Includes = &Includes;
56   CollectorOpts.CountReferences = false;
57   CollectorOpts.Origin = SymbolOrigin::Dynamic;
58   CollectorOpts.CollectMainFileRefs = CollectMainFileRefs;
59 
60   index::IndexingOptions IndexOpts;
61   // We only need declarations, because we don't count references.
62   IndexOpts.SystemSymbolFilter =
63       index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
64   IndexOpts.IndexFunctionLocals = false;
65   if (IsIndexMainAST) {
66     // We only collect refs when indexing main AST.
67     CollectorOpts.RefFilter = RefKind::All;
68     // Comments for main file can always be obtained from sema, do not store
69     // them in the index.
70     CollectorOpts.StoreAllDocumentation = false;
71   } else {
72     IndexOpts.IndexMacrosInPreprocessor = true;
73     CollectorOpts.CollectMacro = true;
74     CollectorOpts.StoreAllDocumentation = true;
75   }
76 
77   SymbolCollector Collector(std::move(CollectorOpts));
78   Collector.setPreprocessor(PP);
79   if (MacroRefsToIndex)
80     Collector.handleMacros(*MacroRefsToIndex);
81   index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
82 
83   const auto &SM = AST.getSourceManager();
84   const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
85   std::string FileName =
86       std::string(MainFileEntry ? MainFileEntry->getName() : "");
87 
88   auto Syms = Collector.takeSymbols();
89   auto Refs = Collector.takeRefs();
90   auto Relations = Collector.takeRelations();
91 
92   vlog("indexed {0} AST for {1} version {2}:\n"
93        "  symbol slab: {3} symbols, {4} bytes\n"
94        "  ref slab: {5} symbols, {6} refs, {7} bytes\n"
95        "  relations slab: {8} relations, {9} bytes",
96        IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(),
97        Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(),
98        Relations.size(), Relations.bytes());
99   return std::make_tuple(std::move(Syms), std::move(Refs),
100                          std::move(Relations));
101 }
102 
103 // We keep only the node "U" and its edges. Any node other than "U" will be
104 // empty in the resultant graph.
getSubGraph(llvm::StringRef URI,const IncludeGraph & FullGraph)105 IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
106   IncludeGraph IG;
107 
108   auto Entry = IG.try_emplace(URI).first;
109   auto &Node = Entry->getValue();
110   Node = FullGraph.lookup(Entry->getKey());
111   Node.URI = Entry->getKey();
112 
113   // URIs inside nodes must point into the keys of the same IncludeGraph.
114   for (auto &Include : Node.DirectIncludes) {
115     auto I = IG.try_emplace(Include).first;
116     I->getValue().URI = I->getKey();
117     Include = I->getKey();
118   }
119   return IG;
120 }
121 } // namespace
122 
FileShardedIndex(IndexFileIn Input)123 FileShardedIndex::FileShardedIndex(IndexFileIn Input)
124     : Index(std::move(Input)) {
125   // Used to build RelationSlabs.
126   llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
127 
128   // Attribute each Symbol to both their declaration and definition locations.
129   if (Index.Symbols) {
130     for (const auto &S : *Index.Symbols) {
131       auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI);
132       It.first->getValue().Symbols.insert(&S);
133       SymbolIDToFile[S.ID] = &It.first->getValue();
134       // Only bother if definition file is different than declaration file.
135       if (S.Definition &&
136           S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
137         auto It = Shards.try_emplace(S.Definition.FileURI);
138         It.first->getValue().Symbols.insert(&S);
139       }
140     }
141   }
142   // Attribute references into each file they occured in.
143   if (Index.Refs) {
144     for (const auto &SymRefs : *Index.Refs) {
145       for (const auto &R : SymRefs.second) {
146         const auto It = Shards.try_emplace(R.Location.FileURI);
147         It.first->getValue().Refs.insert(&R);
148         RefToSymID[&R] = SymRefs.first;
149       }
150     }
151   }
152   // The Subject and/or Object shards might be part of multiple TUs. In
153   // such cases there will be a race and the last TU to write the shard
154   // will win and all the other relations will be lost. To avoid this,
155   // we store relations in both shards. A race might still happen if the
156   // same translation unit produces different relations under different
157   // configurations, but that's something clangd doesn't handle in general.
158   if (Index.Relations) {
159     for (const auto &R : *Index.Relations) {
160       // FIXME: RelationSlab shouldn't contain dangling relations.
161       FileShard *SubjectFile = SymbolIDToFile.lookup(R.Subject);
162       FileShard *ObjectFile = SymbolIDToFile.lookup(R.Object);
163       if (SubjectFile)
164         SubjectFile->Relations.insert(&R);
165       if (ObjectFile && ObjectFile != SubjectFile)
166         ObjectFile->Relations.insert(&R);
167     }
168   }
169   // Store only the direct includes of a file in a shard.
170   if (Index.Sources) {
171     const auto &FullGraph = *Index.Sources;
172     for (const auto &It : FullGraph) {
173       auto ShardIt = Shards.try_emplace(It.first());
174       ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);
175     }
176   }
177 }
getAllSources() const178 std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
179   // It should be enough to construct a vector with {Shards.keys().begin(),
180   // Shards.keys().end()} but MSVC fails to compile that.
181   std::vector<PathRef> Result;
182   Result.reserve(Shards.size());
183   for (auto Key : Shards.keys())
184     Result.push_back(Key);
185   return Result;
186 }
187 
188 llvm::Optional<IndexFileIn>
getShard(llvm::StringRef Uri) const189 FileShardedIndex::getShard(llvm::StringRef Uri) const {
190   auto It = Shards.find(Uri);
191   if (It == Shards.end())
192     return llvm::None;
193 
194   IndexFileIn IF;
195   IF.Sources = It->getValue().IG;
196   IF.Cmd = Index.Cmd;
197 
198   SymbolSlab::Builder SymB;
199   for (const auto *S : It->getValue().Symbols)
200     SymB.insert(*S);
201   IF.Symbols = std::move(SymB).build();
202 
203   RefSlab::Builder RefB;
204   for (const auto *Ref : It->getValue().Refs) {
205     auto SID = RefToSymID.lookup(Ref);
206     RefB.insert(SID, *Ref);
207   }
208   IF.Refs = std::move(RefB).build();
209 
210   RelationSlab::Builder RelB;
211   for (const auto *Rel : It->getValue().Relations) {
212     RelB.insert(*Rel);
213   }
214   IF.Relations = std::move(RelB).build();
215   // Explicit move here is needed by some compilers.
216   return std::move(IF);
217 }
218 
indexMainDecls(ParsedAST & AST,bool CollectMainFileRefs)219 SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs) {
220   return indexSymbols(
221       AST.getASTContext(), AST.getPreprocessorPtr(),
222       AST.getLocalTopLevelDecls(), &AST.getMacros(), AST.getCanonicalIncludes(),
223       /*IsIndexMainAST=*/true, AST.version(), CollectMainFileRefs);
224 }
225 
indexHeaderSymbols(llvm::StringRef Version,ASTContext & AST,std::shared_ptr<Preprocessor> PP,const CanonicalIncludes & Includes)226 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
227                              std::shared_ptr<Preprocessor> PP,
228                              const CanonicalIncludes &Includes) {
229   std::vector<Decl *> DeclsToIndex(
230       AST.getTranslationUnitDecl()->decls().begin(),
231       AST.getTranslationUnitDecl()->decls().end());
232   return indexSymbols(AST, std::move(PP), DeclsToIndex,
233                       /*MainFileMacros=*/nullptr, Includes,
234                       /*IsIndexMainAST=*/false, Version,
235                       /*CollectMainFileRefs=*/false);
236 }
237 
update(llvm::StringRef Key,std::unique_ptr<SymbolSlab> Symbols,std::unique_ptr<RefSlab> Refs,std::unique_ptr<RelationSlab> Relations,bool CountReferences)238 void FileSymbols::update(llvm::StringRef Key,
239                          std::unique_ptr<SymbolSlab> Symbols,
240                          std::unique_ptr<RefSlab> Refs,
241                          std::unique_ptr<RelationSlab> Relations,
242                          bool CountReferences) {
243   std::lock_guard<std::mutex> Lock(Mutex);
244   ++Version;
245   if (!Symbols)
246     SymbolsSnapshot.erase(Key);
247   else
248     SymbolsSnapshot[Key] = std::move(Symbols);
249   if (!Refs) {
250     RefsSnapshot.erase(Key);
251   } else {
252     RefSlabAndCountReferences Item;
253     Item.CountReferences = CountReferences;
254     Item.Slab = std::move(Refs);
255     RefsSnapshot[Key] = std::move(Item);
256   }
257   if (!Relations)
258     RelationsSnapshot.erase(Key);
259   else
260     RelationsSnapshot[Key] = std::move(Relations);
261 }
262 
263 std::unique_ptr<SymbolIndex>
buildIndex(IndexType Type,DuplicateHandling DuplicateHandle,size_t * Version)264 FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
265                         size_t *Version) {
266   std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
267   std::vector<std::shared_ptr<RefSlab>> RefSlabs;
268   std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
269   std::vector<RefSlab *> MainFileRefs;
270   {
271     std::lock_guard<std::mutex> Lock(Mutex);
272     for (const auto &FileAndSymbols : SymbolsSnapshot)
273       SymbolSlabs.push_back(FileAndSymbols.second);
274     for (const auto &FileAndRefs : RefsSnapshot) {
275       RefSlabs.push_back(FileAndRefs.second.Slab);
276       if (FileAndRefs.second.CountReferences)
277         MainFileRefs.push_back(RefSlabs.back().get());
278     }
279     for (const auto &FileAndRelations : RelationsSnapshot)
280       RelationSlabs.push_back(FileAndRelations.second);
281 
282     if (Version)
283       *Version = this->Version;
284   }
285   std::vector<const Symbol *> AllSymbols;
286   std::vector<Symbol> SymsStorage;
287   switch (DuplicateHandle) {
288   case DuplicateHandling::Merge: {
289     llvm::DenseMap<SymbolID, Symbol> Merged;
290     for (const auto &Slab : SymbolSlabs) {
291       for (const auto &Sym : *Slab) {
292         assert(Sym.References == 0 &&
293                "Symbol with non-zero references sent to FileSymbols");
294         auto I = Merged.try_emplace(Sym.ID, Sym);
295         if (!I.second)
296           I.first->second = mergeSymbol(I.first->second, Sym);
297       }
298     }
299     for (const RefSlab *Refs : MainFileRefs)
300       for (const auto &Sym : *Refs) {
301         auto It = Merged.find(Sym.first);
302         // This might happen while background-index is still running.
303         if (It == Merged.end())
304           continue;
305         It->getSecond().References += Sym.second.size();
306       }
307     SymsStorage.reserve(Merged.size());
308     for (auto &Sym : Merged) {
309       SymsStorage.push_back(std::move(Sym.second));
310       AllSymbols.push_back(&SymsStorage.back());
311     }
312     break;
313   }
314   case DuplicateHandling::PickOne: {
315     llvm::DenseSet<SymbolID> AddedSymbols;
316     for (const auto &Slab : SymbolSlabs)
317       for (const auto &Sym : *Slab) {
318         assert(Sym.References == 0 &&
319                "Symbol with non-zero references sent to FileSymbols");
320         if (AddedSymbols.insert(Sym.ID).second)
321           AllSymbols.push_back(&Sym);
322       }
323     break;
324   }
325   }
326 
327   std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
328   llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
329   {
330     llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
331     size_t Count = 0;
332     for (const auto &RefSlab : RefSlabs)
333       for (const auto &Sym : *RefSlab) {
334         MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
335         Count += Sym.second.size();
336       }
337     RefsStorage.reserve(Count);
338     AllRefs.reserve(MergedRefs.size());
339     for (auto &Sym : MergedRefs) {
340       auto &SymRefs = Sym.second;
341       // Sorting isn't required, but yields more stable results over rebuilds.
342       llvm::sort(SymRefs);
343       llvm::copy(SymRefs, back_inserter(RefsStorage));
344       AllRefs.try_emplace(
345           Sym.first,
346           llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
347                               SymRefs.size()));
348     }
349   }
350 
351   std::vector<Relation> AllRelations;
352   for (const auto &RelationSlab : RelationSlabs) {
353     for (const auto &R : *RelationSlab)
354       AllRelations.push_back(R);
355   }
356   // Sort relations and remove duplicates that could arise due to
357   // relations being stored in both the shards containing their
358   // subject and object.
359   llvm::sort(AllRelations);
360   AllRelations.erase(std::unique(AllRelations.begin(), AllRelations.end()),
361                      AllRelations.end());
362 
363   size_t StorageSize =
364       RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
365   for (const auto &Slab : SymbolSlabs)
366     StorageSize += Slab->bytes();
367   for (const auto &RefSlab : RefSlabs)
368     StorageSize += RefSlab->bytes();
369 
370   // Index must keep the slabs and contiguous ranges alive.
371   switch (Type) {
372   case IndexType::Light:
373     return std::make_unique<MemIndex>(
374         llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
375         std::move(AllRelations),
376         std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
377                         std::move(RefsStorage), std::move(SymsStorage)),
378         StorageSize);
379   case IndexType::Heavy:
380     return std::make_unique<dex::Dex>(
381         llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
382         std::move(AllRelations),
383         std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
384                         std::move(RefsStorage), std::move(SymsStorage)),
385         StorageSize);
386   }
387   llvm_unreachable("Unknown clangd::IndexType");
388 }
389 
profile(MemoryTree & MT) const390 void FileSymbols::profile(MemoryTree &MT) const {
391   std::lock_guard<std::mutex> Lock(Mutex);
392   for (const auto &SymSlab : SymbolsSnapshot) {
393     MT.detail(SymSlab.first())
394         .child("symbols")
395         .addUsage(SymSlab.second->bytes());
396   }
397   for (const auto &RefSlab : RefsSnapshot) {
398     MT.detail(RefSlab.first())
399         .child("references")
400         .addUsage(RefSlab.second.Slab->bytes());
401   }
402   for (const auto &RelSlab : RelationsSnapshot) {
403     MT.detail(RelSlab.first())
404         .child("relations")
405         .addUsage(RelSlab.second->bytes());
406   }
407 }
408 
FileIndex(bool UseDex,bool CollectMainFileRefs)409 FileIndex::FileIndex(bool UseDex, bool CollectMainFileRefs)
410     : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex),
411       CollectMainFileRefs(CollectMainFileRefs),
412       PreambleIndex(std::make_unique<MemIndex>()),
413       MainFileIndex(std::make_unique<MemIndex>()) {}
414 
updatePreamble(PathRef Path,llvm::StringRef Version,ASTContext & AST,std::shared_ptr<Preprocessor> PP,const CanonicalIncludes & Includes)415 void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
416                                ASTContext &AST,
417                                std::shared_ptr<Preprocessor> PP,
418                                const CanonicalIncludes &Includes) {
419   IndexFileIn IF;
420   std::tie(IF.Symbols, std::ignore, IF.Relations) =
421       indexHeaderSymbols(Version, AST, std::move(PP), Includes);
422   FileShardedIndex ShardedIndex(std::move(IF));
423   for (auto Uri : ShardedIndex.getAllSources()) {
424     auto IF = ShardedIndex.getShard(Uri);
425     // We are using the key received from ShardedIndex, so it should always
426     // exist.
427     assert(IF);
428     PreambleSymbols.update(
429         Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
430         std::make_unique<RefSlab>(),
431         std::make_unique<RelationSlab>(std::move(*IF->Relations)),
432         /*CountReferences=*/false);
433   }
434   size_t IndexVersion = 0;
435   auto NewIndex =
436       PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
437                                  DuplicateHandling::PickOne, &IndexVersion);
438   {
439     std::lock_guard<std::mutex> Lock(UpdateIndexMu);
440     if (IndexVersion <= PreambleIndexVersion) {
441       // We lost the race, some other thread built a later version.
442       return;
443     }
444     PreambleIndexVersion = IndexVersion;
445     PreambleIndex.reset(std::move(NewIndex));
446     vlog(
447         "Build dynamic index for header symbols with estimated memory usage of "
448         "{0} bytes",
449         PreambleIndex.estimateMemoryUsage());
450   }
451 }
452 
updateMain(PathRef Path,ParsedAST & AST)453 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
454   auto Contents = indexMainDecls(AST, CollectMainFileRefs);
455   MainFileSymbols.update(
456       Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),
457       std::make_unique<RefSlab>(std::move(std::get<1>(Contents))),
458       std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),
459       /*CountReferences=*/true);
460   size_t IndexVersion = 0;
461   auto NewIndex = MainFileSymbols.buildIndex(
462       IndexType::Light, DuplicateHandling::Merge, &IndexVersion);
463   {
464     std::lock_guard<std::mutex> Lock(UpdateIndexMu);
465     if (IndexVersion <= MainIndexVersion) {
466       // We lost the race, some other thread built a later version.
467       return;
468     }
469     MainIndexVersion = IndexVersion;
470     MainFileIndex.reset(std::move(NewIndex));
471     vlog(
472         "Build dynamic index for main-file symbols with estimated memory usage "
473         "of {0} bytes",
474         MainFileIndex.estimateMemoryUsage());
475   }
476 }
477 
profile(MemoryTree & MT) const478 void FileIndex::profile(MemoryTree &MT) const {
479   PreambleSymbols.profile(MT.child("preamble").child("slabs"));
480   MT.child("preamble")
481       .child("index")
482       .addUsage(PreambleIndex.estimateMemoryUsage());
483   MainFileSymbols.profile(MT.child("main_file").child("slabs"));
484   MT.child("main_file")
485       .child("index")
486       .addUsage(MainFileIndex.estimateMemoryUsage());
487 }
488 } // namespace clangd
489 } // namespace clang
490