1 //===--- FindSymbols.cpp ------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "FindSymbols.h"
9
10 #include "AST.h"
11 #include "FuzzyMatch.h"
12 #include "ParsedAST.h"
13 #include "Quality.h"
14 #include "SourceCode.h"
15 #include "index/Index.h"
16 #include "support/Logger.h"
17 #include "clang/AST/DeclTemplate.h"
18 #include "clang/Index/IndexDataConsumer.h"
19 #include "clang/Index/IndexSymbol.h"
20 #include "clang/Index/IndexingAction.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/FormatVariadic.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/ScopedPrinter.h"
28 #include <tuple>
29
30 #define DEBUG_TYPE "FindSymbols"
31
32 namespace clang {
33 namespace clangd {
34
35 namespace {
36 using ScoredSymbolInfo = std::pair<float, SymbolInformation>;
37 struct ScoredSymbolGreater {
operator ()clang::clangd::__anon27691a550111::ScoredSymbolGreater38 bool operator()(const ScoredSymbolInfo &L, const ScoredSymbolInfo &R) {
39 if (L.first != R.first)
40 return L.first > R.first;
41 return L.second.name < R.second.name; // Earlier name is better.
42 }
43 };
44
45 // Returns true if \p Query can be found as a sub-sequence inside \p Scope.
approximateScopeMatch(llvm::StringRef Scope,llvm::StringRef Query)46 bool approximateScopeMatch(llvm::StringRef Scope, llvm::StringRef Query) {
47 assert(Scope.empty() || Scope.endswith("::"));
48 assert(Query.empty() || Query.endswith("::"));
49 while (!Scope.empty() && !Query.empty()) {
50 auto Colons = Scope.find("::");
51 assert(Colons != llvm::StringRef::npos);
52
53 llvm::StringRef LeadingSpecifier = Scope.slice(0, Colons + 2);
54 Scope = Scope.slice(Colons + 2, llvm::StringRef::npos);
55 Query.consume_front(LeadingSpecifier);
56 }
57 return Query.empty();
58 }
59
60 } // namespace
61
indexToLSPLocation(const SymbolLocation & Loc,llvm::StringRef TUPath)62 llvm::Expected<Location> indexToLSPLocation(const SymbolLocation &Loc,
63 llvm::StringRef TUPath) {
64 auto Path = URI::resolve(Loc.FileURI, TUPath);
65 if (!Path)
66 return error("Could not resolve path for file '{0}': {1}", Loc.FileURI,
67 Path.takeError());
68 Location L;
69 L.uri = URIForFile::canonicalize(*Path, TUPath);
70 Position Start, End;
71 Start.line = Loc.Start.line();
72 Start.character = Loc.Start.column();
73 End.line = Loc.End.line();
74 End.character = Loc.End.column();
75 L.range = {Start, End};
76 return L;
77 }
78
symbolToLocation(const Symbol & Sym,llvm::StringRef TUPath)79 llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
80 llvm::StringRef TUPath) {
81 // Prefer the definition over e.g. a function declaration in a header
82 return indexToLSPLocation(
83 Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, TUPath);
84 }
85
86 llvm::Expected<std::vector<SymbolInformation>>
getWorkspaceSymbols(llvm::StringRef Query,int Limit,const SymbolIndex * const Index,llvm::StringRef HintPath)87 getWorkspaceSymbols(llvm::StringRef Query, int Limit,
88 const SymbolIndex *const Index, llvm::StringRef HintPath) {
89 std::vector<SymbolInformation> Result;
90 if (Query.empty() || !Index)
91 return Result;
92
93 // Lookup for qualified names are performed as:
94 // - Exact namespaces are boosted by the index.
95 // - Approximate matches are (sub-scope match) included via AnyScope logic.
96 // - Non-matching namespaces (no sub-scope match) are post-filtered.
97 auto Names = splitQualifiedName(Query);
98
99 FuzzyFindRequest Req;
100 Req.Query = std::string(Names.second);
101
102 // FuzzyFind doesn't want leading :: qualifier.
103 auto HasLeadingColons = Names.first.consume_front("::");
104 // Limit the query to specific namespace if it is fully-qualified.
105 Req.AnyScope = !HasLeadingColons;
106 // Boost symbols from desired namespace.
107 if (HasLeadingColons || !Names.first.empty())
108 Req.Scopes = {std::string(Names.first)};
109 if (Limit) {
110 Req.Limit = Limit;
111 // If we are boosting a specific scope allow more results to be retrieved,
112 // since some symbols from preferred namespaces might not make the cut.
113 if (Req.AnyScope && !Req.Scopes.empty())
114 *Req.Limit *= 5;
115 }
116 TopN<ScoredSymbolInfo, ScoredSymbolGreater> Top(
117 Req.Limit ? *Req.Limit : std::numeric_limits<size_t>::max());
118 FuzzyMatcher Filter(Req.Query);
119
120 Index->fuzzyFind(Req, [HintPath, &Top, &Filter, AnyScope = Req.AnyScope,
121 ReqScope = Names.first](const Symbol &Sym) {
122 llvm::StringRef Scope = Sym.Scope;
123 // Fuzzyfind might return symbols from irrelevant namespaces if query was
124 // not fully-qualified, drop those.
125 if (AnyScope && !approximateScopeMatch(Scope, ReqScope))
126 return;
127
128 auto Loc = symbolToLocation(Sym, HintPath);
129 if (!Loc) {
130 log("Workspace symbols: {0}", Loc.takeError());
131 return;
132 }
133
134 SymbolQualitySignals Quality;
135 Quality.merge(Sym);
136 SymbolRelevanceSignals Relevance;
137 Relevance.Name = Sym.Name;
138 Relevance.Query = SymbolRelevanceSignals::Generic;
139 // If symbol and request scopes do not match exactly, apply a penalty.
140 Relevance.InBaseClass = AnyScope && Scope != ReqScope;
141 if (auto NameMatch = Filter.match(Sym.Name))
142 Relevance.NameMatch = *NameMatch;
143 else {
144 log("Workspace symbol: {0} didn't match query {1}", Sym.Name,
145 Filter.pattern());
146 return;
147 }
148 Relevance.merge(Sym);
149 auto Score = evaluateSymbolAndRelevance(Quality.evaluateHeuristics(),
150 Relevance.evaluateHeuristics());
151 dlog("FindSymbols: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, Sym.Name, Score,
152 Quality, Relevance);
153
154 SymbolInformation Info;
155 Info.name = (Sym.Name + Sym.TemplateSpecializationArgs).str();
156 Info.kind = indexSymbolKindToSymbolKind(Sym.SymInfo.Kind);
157 Info.location = *Loc;
158 Scope.consume_back("::");
159 Info.containerName = Scope.str();
160
161 // Exposed score excludes fuzzy-match component, for client-side re-ranking.
162 Info.score = Score / Relevance.NameMatch;
163 Top.push({Score, std::move(Info)});
164 });
165 for (auto &R : std::move(Top).items())
166 Result.push_back(std::move(R.second));
167 return Result;
168 }
169
170 namespace {
declToSym(ASTContext & Ctx,const NamedDecl & ND)171 llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
172 auto &SM = Ctx.getSourceManager();
173
174 SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc()));
175 SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc()));
176 const auto SymbolRange =
177 toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc});
178 if (!SymbolRange)
179 return llvm::None;
180
181 index::SymbolInfo SymInfo = index::getSymbolInfo(&ND);
182 // FIXME: This is not classifying constructors, destructors and operators
183 // correctly.
184 SymbolKind SK = indexSymbolKindToSymbolKind(SymInfo.Kind);
185
186 DocumentSymbol SI;
187 SI.name = printName(Ctx, ND);
188 SI.kind = SK;
189 SI.deprecated = ND.isDeprecated();
190 SI.range = Range{sourceLocToPosition(SM, SymbolRange->getBegin()),
191 sourceLocToPosition(SM, SymbolRange->getEnd())};
192
193 SourceLocation NameLoc = ND.getLocation();
194 SourceLocation FallbackNameLoc;
195 if (NameLoc.isMacroID()) {
196 if (isSpelledInSource(NameLoc, SM)) {
197 // Prefer the spelling loc, but save the expansion loc as a fallback.
198 FallbackNameLoc = SM.getExpansionLoc(NameLoc);
199 NameLoc = SM.getSpellingLoc(NameLoc);
200 } else {
201 NameLoc = SM.getExpansionLoc(NameLoc);
202 }
203 }
204 auto ComputeSelectionRange = [&](SourceLocation L) -> Range {
205 Position NameBegin = sourceLocToPosition(SM, L);
206 Position NameEnd = sourceLocToPosition(
207 SM, Lexer::getLocForEndOfToken(L, 0, SM, Ctx.getLangOpts()));
208 return Range{NameBegin, NameEnd};
209 };
210
211 SI.selectionRange = ComputeSelectionRange(NameLoc);
212 if (!SI.range.contains(SI.selectionRange) && FallbackNameLoc.isValid()) {
213 // 'selectionRange' must be contained in 'range'. In cases where clang
214 // reports unrelated ranges, we first try falling back to the expansion
215 // loc for the selection range.
216 SI.selectionRange = ComputeSelectionRange(FallbackNameLoc);
217 }
218 if (!SI.range.contains(SI.selectionRange)) {
219 // If the containment relationship still doesn't hold, throw away
220 // 'range' and use 'selectionRange' for both.
221 SI.range = SI.selectionRange;
222 }
223 return SI;
224 }
225
226 /// A helper class to build an outline for the parse AST. It traverses the AST
227 /// directly instead of using RecursiveASTVisitor (RAV) for three main reasons:
228 /// - there is no way to keep RAV from traversing subtrees we are not
229 /// interested in. E.g. not traversing function locals or implicit template
230 /// instantiations.
231 /// - it's easier to combine results of recursive passes,
232 /// - visiting decls is actually simple, so we don't hit the complicated
233 /// cases that RAV mostly helps with (types, expressions, etc.)
234 class DocumentOutline {
235 public:
DocumentOutline(ParsedAST & AST)236 DocumentOutline(ParsedAST &AST) : AST(AST) {}
237
238 /// Builds the document outline for the generated AST.
build()239 std::vector<DocumentSymbol> build() {
240 std::vector<DocumentSymbol> Results;
241 for (auto &TopLevel : AST.getLocalTopLevelDecls())
242 traverseDecl(TopLevel, Results);
243 return Results;
244 }
245
246 private:
247 enum class VisitKind { No, OnlyDecl, OnlyChildren, DeclAndChildren };
248
traverseDecl(Decl * D,std::vector<DocumentSymbol> & Results)249 void traverseDecl(Decl *D, std::vector<DocumentSymbol> &Results) {
250 if (auto *Templ = llvm::dyn_cast<TemplateDecl>(D)) {
251 // TemplatedDecl might be null, e.g. concepts.
252 if (auto *TD = Templ->getTemplatedDecl())
253 D = TD;
254 }
255
256 VisitKind Visit = shouldVisit(D);
257 if (Visit == VisitKind::No)
258 return;
259
260 if (Visit == VisitKind::OnlyChildren)
261 return traverseChildren(D, Results);
262
263 auto *ND = llvm::cast<NamedDecl>(D);
264 auto Sym = declToSym(AST.getASTContext(), *ND);
265 if (!Sym)
266 return;
267 Results.push_back(std::move(*Sym));
268
269 if (Visit == VisitKind::OnlyDecl)
270 return;
271
272 assert(Visit == VisitKind::DeclAndChildren && "Unexpected VisitKind");
273 traverseChildren(ND, Results.back().children);
274 }
275
traverseChildren(Decl * D,std::vector<DocumentSymbol> & Results)276 void traverseChildren(Decl *D, std::vector<DocumentSymbol> &Results) {
277 auto *Scope = llvm::dyn_cast<DeclContext>(D);
278 if (!Scope)
279 return;
280 for (auto *C : Scope->decls())
281 traverseDecl(C, Results);
282 }
283
shouldVisit(Decl * D)284 VisitKind shouldVisit(Decl *D) {
285 if (D->isImplicit())
286 return VisitKind::No;
287
288 if (llvm::isa<LinkageSpecDecl>(D) || llvm::isa<ExportDecl>(D))
289 return VisitKind::OnlyChildren;
290
291 if (!llvm::isa<NamedDecl>(D))
292 return VisitKind::No;
293
294 if (auto Func = llvm::dyn_cast<FunctionDecl>(D)) {
295 // Some functions are implicit template instantiations, those should be
296 // ignored.
297 if (auto *Info = Func->getTemplateSpecializationInfo()) {
298 if (!Info->isExplicitInstantiationOrSpecialization())
299 return VisitKind::No;
300 }
301 // Only visit the function itself, do not visit the children (i.e.
302 // function parameters, etc.)
303 return VisitKind::OnlyDecl;
304 }
305 // Handle template instantiations. We have three cases to consider:
306 // - explicit instantiations, e.g. 'template class std::vector<int>;'
307 // Visit the decl itself (it's present in the code), but not the
308 // children.
309 // - implicit instantiations, i.e. not written by the user.
310 // Do not visit at all, they are not present in the code.
311 // - explicit specialization, e.g. 'template <> class vector<bool> {};'
312 // Visit both the decl and its children, both are written in the code.
313 if (auto *TemplSpec = llvm::dyn_cast<ClassTemplateSpecializationDecl>(D)) {
314 if (TemplSpec->isExplicitInstantiationOrSpecialization())
315 return TemplSpec->isExplicitSpecialization()
316 ? VisitKind::DeclAndChildren
317 : VisitKind::OnlyDecl;
318 return VisitKind::No;
319 }
320 if (auto *TemplSpec = llvm::dyn_cast<VarTemplateSpecializationDecl>(D)) {
321 if (TemplSpec->isExplicitInstantiationOrSpecialization())
322 return TemplSpec->isExplicitSpecialization()
323 ? VisitKind::DeclAndChildren
324 : VisitKind::OnlyDecl;
325 return VisitKind::No;
326 }
327 // For all other cases, visit both the children and the decl.
328 return VisitKind::DeclAndChildren;
329 }
330
331 ParsedAST &AST;
332 };
333
collectDocSymbols(ParsedAST & AST)334 std::vector<DocumentSymbol> collectDocSymbols(ParsedAST &AST) {
335 return DocumentOutline(AST).build();
336 }
337 } // namespace
338
getDocumentSymbols(ParsedAST & AST)339 llvm::Expected<std::vector<DocumentSymbol>> getDocumentSymbols(ParsedAST &AST) {
340 return collectDocSymbols(AST);
341 }
342
343 } // namespace clangd
344 } // namespace clang
345