//===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// Token objects represent a characteristic of a symbol, which can be used to /// perform efficient search. Tokens are keys for inverted index which are /// mapped to the corresponding posting lists. /// /// The symbol std::cout might have the tokens: /// * Scope "std::" /// * Trigram "cou" /// * Trigram "out" /// * Type "std::ostream" /// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H #include "index/Index.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/raw_ostream.h" #include #include namespace clang { namespace clangd { namespace dex { /// A Token represents an attribute of a symbol, such as a particular trigram /// present in the name (used for fuzzy search). /// /// Tokens can be used to perform more sophisticated search queries by /// constructing complex iterator trees. class Token { public: /// Kind specifies Token type which defines semantics for the internal /// representation. Each Kind has different representation stored in Data /// field. // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw // strings. For example, PathURI store URIs of each directory and its parents, // which induces a lot of overhead because these paths tend to be long and // each parent directory is a prefix. enum class Kind { /// Represents trigram used for fuzzy search of unqualified symbol names. /// /// Data contains 3 bytes with trigram contents. Trigram, /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". /// /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global /// scope). Scope, /// Path Proximity URI to symbol declaration. /// /// Data stores path URI of symbol declaration file or its parent. /// /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" /// and some amount of its parents. ProximityURI, /// Type of symbol (see `Symbol::Type`). Type, /// Internal Token type for invalid/special tokens, e.g. empty tokens for /// llvm::DenseMap. Sentinel, }; Token(Kind TokenKind, llvm::StringRef Data) : Data(Data), TokenKind(TokenKind) {} bool operator==(const Token &Other) const { return TokenKind == Other.TokenKind && Data == Other.Data; } friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) { switch (T.TokenKind) { case Kind::Trigram: OS << "T="; break; case Kind::Scope: OS << "S="; break; case Kind::ProximityURI: OS << "U="; break; case Kind::Type: OS << "Ty="; break; case Kind::Sentinel: OS << "?="; break; } return OS << T.Data; } private: /// Representation which is unique among Token with the same Kind. std::string Data; Kind TokenKind; friend llvm::hash_code hash_value(const Token &Token) { return llvm::hash_combine(static_cast(Token.TokenKind), Token.Data); } }; } // namespace dex } // namespace clangd } // namespace clang namespace llvm { // Support Tokens as DenseMap keys. template <> struct DenseMapInfo { static inline clang::clangd::dex::Token getEmptyKey() { return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"}; } static inline clang::clangd::dex::Token getTombstoneKey() { return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"}; } static unsigned getHashValue(const clang::clangd::dex::Token &Tag) { return hash_value(Tag); } static bool isEqual(const clang::clangd::dex::Token &LHS, const clang::clangd::dex::Token &RHS) { return LHS == RHS; } }; } // namespace llvm #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H