//===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This defines Dex - a symbol index implementation based on query iterators /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc. /// While consuming more memory and having longer build stage due to /// preprocessing, Dex will have substantially lower latency. It will also allow /// efficient symbol searching which is crucial for operations like code /// completion, and can be very important for a number of different code /// transformations which will be eventually supported by Clangd. /// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H #include "Iterator.h" #include "PostingList.h" #include "Token.h" #include "Trigram.h" #include "index/Index.h" #include "index/MemIndex.h" #include "index/Relation.h" #include "index/SymbolCollector.h" namespace clang { namespace clangd { namespace dex { /// In-memory Dex trigram-based index implementation. // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol // index so that it can be loaded from the disk. Since static index is not // changed frequently, it's safe to assume that it has to be built only once // (when the clangd process starts). Therefore, it can be easier to store built // index on disk and then load it if available. class Dex : public SymbolIndex { public: // All data must outlive this index. template Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations) : Corpus(0) { for (auto &&Sym : Symbols) this->Symbols.push_back(&Sym); for (auto &&Ref : Refs) this->Refs.try_emplace(Ref.first, Ref.second); for (auto &&Rel : Relations) this->Relations[std::make_pair(Rel.Subject, static_cast(Rel.Predicate))] .push_back(Rel.Object); buildIndex(); } // Symbols and Refs are owned by BackingData, Index takes ownership. template Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, Payload &&BackingData, size_t BackingDataSize) : Dex(std::forward(Symbols), std::forward(Refs), std::forward(Relations)) { KeepAlive = std::shared_ptr( std::make_shared(std::move(BackingData)), nullptr); this->BackingDataSize = BackingDataSize; } /// Builds an index from slabs. The index takes ownership of the slab. static std::unique_ptr build(SymbolSlab, RefSlab, RelationSlab); bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref Callback) const override; void lookup(const LookupRequest &Req, llvm::function_ref Callback) const override; bool refs(const RefsRequest &Req, llvm::function_ref Callback) const override; void relations(const RelationsRequest &Req, llvm::function_ref Callback) const override; size_t estimateMemoryUsage() const override; private: void buildIndex(); std::unique_ptr iterator(const Token &Tok) const; std::unique_ptr createFileProximityIterator(llvm::ArrayRef ProximityPaths) const; std::unique_ptr createTypeBoostingIterator(llvm::ArrayRef Types) const; /// Stores symbols sorted in the descending order of symbol quality.. std::vector Symbols; /// SymbolQuality[I] is the quality of Symbols[I]. std::vector SymbolQuality; llvm::DenseMap LookupTable; /// Inverted index is a mapping from the search token to the posting list, /// which contains all items which can be characterized by such search token. /// For example, if the search token is scope "std::", the corresponding /// posting list would contain all indices of symbols defined in namespace /// std. Inverted index is used to retrieve posting lists which are processed /// during the fuzzyFind process. llvm::DenseMap InvertedIndex; dex::Corpus Corpus; llvm::DenseMap> Refs; static_assert(sizeof(RelationKind) == sizeof(uint8_t), "RelationKind should be of same size as a uint8_t"); llvm::DenseMap, std::vector> Relations; std::shared_ptr KeepAlive; // poor man's move-only std::any // Size of memory retained by KeepAlive. size_t BackingDataSize = 0; }; /// Returns Search Token for a number of parent directories of given Path. /// Should be used within the index build process. /// /// This function is exposed for testing only. std::vector generateProximityURIs(llvm::StringRef URIPath); } // namespace dex } // namespace clangd } // namespace clang #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H