1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This defines Dex - a symbol index implementation based on query iterators 11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc. 12 /// While consuming more memory and having longer build stage due to 13 /// preprocessing, Dex will have substantially lower latency. It will also allow 14 /// efficient symbol searching which is crucial for operations like code 15 /// completion, and can be very important for a number of different code 16 /// transformations which will be eventually supported by Clangd. 17 /// 18 //===----------------------------------------------------------------------===// 19 20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H 21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H 22 23 #include "Iterator.h" 24 #include "PostingList.h" 25 #include "Token.h" 26 #include "Trigram.h" 27 #include "index/Index.h" 28 #include "index/MemIndex.h" 29 #include "index/Relation.h" 30 #include "index/SymbolCollector.h" 31 32 namespace clang { 33 namespace clangd { 34 namespace dex { 35 36 /// In-memory Dex trigram-based index implementation. 37 // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol 38 // index so that it can be loaded from the disk. Since static index is not 39 // changed frequently, it's safe to assume that it has to be built only once 40 // (when the clangd process starts). Therefore, it can be easier to store built 41 // index on disk and then load it if available. 42 class Dex : public SymbolIndex { 43 public: 44 // All data must outlive this index. 45 template <typename SymbolRange, typename RefsRange, typename RelationsRange> Dex(SymbolRange && Symbols,RefsRange && Refs,RelationsRange && Relations)46 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations) 47 : Corpus(0) { 48 for (auto &&Sym : Symbols) 49 this->Symbols.push_back(&Sym); 50 for (auto &&Ref : Refs) 51 this->Refs.try_emplace(Ref.first, Ref.second); 52 for (auto &&Rel : Relations) 53 this->Relations[std::make_pair(Rel.Subject, 54 static_cast<uint8_t>(Rel.Predicate))] 55 .push_back(Rel.Object); 56 buildIndex(); 57 } 58 // Symbols and Refs are owned by BackingData, Index takes ownership. 59 template <typename SymbolRange, typename RefsRange, typename RelationsRange, 60 typename Payload> Dex(SymbolRange && Symbols,RefsRange && Refs,RelationsRange && Relations,Payload && BackingData,size_t BackingDataSize)61 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, 62 Payload &&BackingData, size_t BackingDataSize) 63 : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), 64 std::forward<RelationsRange>(Relations)) { 65 KeepAlive = std::shared_ptr<void>( 66 std::make_shared<Payload>(std::move(BackingData)), nullptr); 67 this->BackingDataSize = BackingDataSize; 68 } 69 70 /// Builds an index from slabs. The index takes ownership of the slab. 71 static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab); 72 73 bool 74 fuzzyFind(const FuzzyFindRequest &Req, 75 llvm::function_ref<void(const Symbol &)> Callback) const override; 76 77 void lookup(const LookupRequest &Req, 78 llvm::function_ref<void(const Symbol &)> Callback) const override; 79 80 bool refs(const RefsRequest &Req, 81 llvm::function_ref<void(const Ref &)> Callback) const override; 82 83 void relations(const RelationsRequest &Req, 84 llvm::function_ref<void(const SymbolID &, const Symbol &)> 85 Callback) const override; 86 87 size_t estimateMemoryUsage() const override; 88 89 private: 90 void buildIndex(); 91 std::unique_ptr<Iterator> iterator(const Token &Tok) const; 92 std::unique_ptr<Iterator> 93 createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const; 94 std::unique_ptr<Iterator> 95 createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const; 96 97 /// Stores symbols sorted in the descending order of symbol quality.. 98 std::vector<const Symbol *> Symbols; 99 /// SymbolQuality[I] is the quality of Symbols[I]. 100 std::vector<float> SymbolQuality; 101 llvm::DenseMap<SymbolID, const Symbol *> LookupTable; 102 /// Inverted index is a mapping from the search token to the posting list, 103 /// which contains all items which can be characterized by such search token. 104 /// For example, if the search token is scope "std::", the corresponding 105 /// posting list would contain all indices of symbols defined in namespace 106 /// std. Inverted index is used to retrieve posting lists which are processed 107 /// during the fuzzyFind process. 108 llvm::DenseMap<Token, PostingList> InvertedIndex; 109 dex::Corpus Corpus; 110 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs; 111 static_assert(sizeof(RelationKind) == sizeof(uint8_t), 112 "RelationKind should be of same size as a uint8_t"); 113 llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations; 114 std::shared_ptr<void> KeepAlive; // poor man's move-only std::any 115 // Size of memory retained by KeepAlive. 116 size_t BackingDataSize = 0; 117 }; 118 119 /// Returns Search Token for a number of parent directories of given Path. 120 /// Should be used within the index build process. 121 /// 122 /// This function is exposed for testing only. 123 std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath); 124 125 } // namespace dex 126 } // namespace clangd 127 } // namespace clang 128 129 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H 130