1 //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Token objects represent a characteristic of a symbol, which can be used to 11 /// perform efficient search. Tokens are keys for inverted index which are 12 /// mapped to the corresponding posting lists. 13 /// 14 /// The symbol std::cout might have the tokens: 15 /// * Scope "std::" 16 /// * Trigram "cou" 17 /// * Trigram "out" 18 /// * Type "std::ostream" 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H 23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H 24 25 #include "index/Index.h" 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <string> 29 #include <vector> 30 31 namespace clang { 32 namespace clangd { 33 namespace dex { 34 35 /// A Token represents an attribute of a symbol, such as a particular trigram 36 /// present in the name (used for fuzzy search). 37 /// 38 /// Tokens can be used to perform more sophisticated search queries by 39 /// constructing complex iterator trees. 40 class Token { 41 public: 42 /// Kind specifies Token type which defines semantics for the internal 43 /// representation. Each Kind has different representation stored in Data 44 /// field. 45 // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw 46 // strings. For example, PathURI store URIs of each directory and its parents, 47 // which induces a lot of overhead because these paths tend to be long and 48 // each parent directory is a prefix. 49 enum class Kind { 50 /// Represents trigram used for fuzzy search of unqualified symbol names. 51 /// 52 /// Data contains 3 bytes with trigram contents. 53 Trigram, 54 /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". 55 /// 56 /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global 57 /// scope). 58 Scope, 59 /// Path Proximity URI to symbol declaration. 60 /// 61 /// Data stores path URI of symbol declaration file or its parent. 62 /// 63 /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" 64 /// and some amount of its parents. 65 ProximityURI, 66 /// Type of symbol (see `Symbol::Type`). 67 Type, 68 /// Internal Token type for invalid/special tokens, e.g. empty tokens for 69 /// llvm::DenseMap. 70 Sentinel, 71 }; 72 Token(Kind TokenKind,llvm::StringRef Data)73 Token(Kind TokenKind, llvm::StringRef Data) 74 : Data(Data), TokenKind(TokenKind) {} 75 76 bool operator==(const Token &Other) const { 77 return TokenKind == Other.TokenKind && Data == Other.Data; 78 } 79 80 friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) { 81 switch (T.TokenKind) { 82 case Kind::Trigram: 83 OS << "T="; 84 break; 85 case Kind::Scope: 86 OS << "S="; 87 break; 88 case Kind::ProximityURI: 89 OS << "U="; 90 break; 91 case Kind::Type: 92 OS << "Ty="; 93 break; 94 case Kind::Sentinel: 95 OS << "?="; 96 break; 97 } 98 return OS << T.Data; 99 } 100 101 private: 102 /// Representation which is unique among Token with the same Kind. 103 std::string Data; 104 Kind TokenKind; 105 hash_value(const Token & Token)106 friend llvm::hash_code hash_value(const Token &Token) { 107 return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data); 108 } 109 }; 110 111 } // namespace dex 112 } // namespace clangd 113 } // namespace clang 114 115 namespace llvm { 116 117 // Support Tokens as DenseMap keys. 118 template <> struct DenseMapInfo<clang::clangd::dex::Token> { 119 static inline clang::clangd::dex::Token getEmptyKey() { 120 return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"}; 121 } 122 123 static inline clang::clangd::dex::Token getTombstoneKey() { 124 return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"}; 125 } 126 127 static unsigned getHashValue(const clang::clangd::dex::Token &Tag) { 128 return hash_value(Tag); 129 } 130 131 static bool isEqual(const clang::clangd::dex::Token &LHS, 132 const clang::clangd::dex::Token &RHS) { 133 return LHS == RHS; 134 } 135 }; 136 137 } // namespace llvm 138 139 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H 140