1 //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Token objects represent a characteristic of a symbol, which can be used to
11 /// perform efficient search. Tokens are keys for inverted index which are
12 /// mapped to the corresponding posting lists.
13 ///
14 /// The symbol std::cout might have the tokens:
15 /// * Scope "std::"
16 /// * Trigram "cou"
17 /// * Trigram "out"
18 /// * Type "std::ostream"
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
24 
25 #include "index/Index.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <string>
29 #include <vector>
30 
31 namespace clang {
32 namespace clangd {
33 namespace dex {
34 
35 /// A Token represents an attribute of a symbol, such as a particular trigram
36 /// present in the name (used for fuzzy search).
37 ///
38 /// Tokens can be used to perform more sophisticated search queries by
39 /// constructing complex iterator trees.
40 class Token {
41 public:
42   /// Kind specifies Token type which defines semantics for the internal
43   /// representation. Each Kind has different representation stored in Data
44   /// field.
45   // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
46   // strings. For example, PathURI store URIs of each directory and its parents,
47   // which induces a lot of overhead because these paths tend to be long and
48   // each parent directory is a prefix.
49   enum class Kind {
50     /// Represents trigram used for fuzzy search of unqualified symbol names.
51     ///
52     /// Data contains 3 bytes with trigram contents.
53     Trigram,
54     /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
55     ///
56     /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
57     /// scope).
58     Scope,
59     /// Path Proximity URI to symbol declaration.
60     ///
61     /// Data stores path URI of symbol declaration file or its parent.
62     ///
63     /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
64     /// and some amount of its parents.
65     ProximityURI,
66     /// Type of symbol (see `Symbol::Type`).
67     Type,
68     /// Internal Token type for invalid/special tokens, e.g. empty tokens for
69     /// llvm::DenseMap.
70     Sentinel,
71   };
72 
Token(Kind TokenKind,llvm::StringRef Data)73   Token(Kind TokenKind, llvm::StringRef Data)
74       : Data(Data), TokenKind(TokenKind) {}
75 
76   bool operator==(const Token &Other) const {
77     return TokenKind == Other.TokenKind && Data == Other.Data;
78   }
79 
80   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
81     switch (T.TokenKind) {
82     case Kind::Trigram:
83       OS << "T=";
84       break;
85     case Kind::Scope:
86       OS << "S=";
87       break;
88     case Kind::ProximityURI:
89       OS << "U=";
90       break;
91     case Kind::Type:
92       OS << "Ty=";
93       break;
94     case Kind::Sentinel:
95       OS << "?=";
96       break;
97     }
98     return OS << T.Data;
99   }
100 
101 private:
102   /// Representation which is unique among Token with the same Kind.
103   std::string Data;
104   Kind TokenKind;
105 
hash_value(const Token & Token)106   friend llvm::hash_code hash_value(const Token &Token) {
107     return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data);
108   }
109 };
110 
111 } // namespace dex
112 } // namespace clangd
113 } // namespace clang
114 
115 namespace llvm {
116 
117 // Support Tokens as DenseMap keys.
118 template <> struct DenseMapInfo<clang::clangd::dex::Token> {
119   static inline clang::clangd::dex::Token getEmptyKey() {
120     return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"};
121   }
122 
123   static inline clang::clangd::dex::Token getTombstoneKey() {
124     return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"};
125   }
126 
127   static unsigned getHashValue(const clang::clangd::dex::Token &Tag) {
128     return hash_value(Tag);
129   }
130 
131   static bool isEqual(const clang::clangd::dex::Token &LHS,
132                       const clang::clangd::dex::Token &RHS) {
133     return LHS == RHS;
134   }
135 };
136 
137 } // namespace llvm
138 
139 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
140