1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Selection.h"
10 #include "SourceCode.h"
11 #include "support/Logger.h"
12 #include "support/Trace.h"
13 #include "clang/AST/ASTTypeTraits.h"
14 #include "clang/AST/Decl.h"
15 #include "clang/AST/DeclCXX.h"
16 #include "clang/AST/Expr.h"
17 #include "clang/AST/ExprCXX.h"
18 #include "clang/AST/PrettyPrinter.h"
19 #include "clang/AST/RecursiveASTVisitor.h"
20 #include "clang/AST/TypeLoc.h"
21 #include "clang/Basic/OperatorKinds.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TokenKinds.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Tooling/Syntax/Tokens.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <algorithm>
32 #include <string>
33
34 namespace clang {
35 namespace clangd {
36 namespace {
37 using Node = SelectionTree::Node;
38 using ast_type_traits::DynTypedNode;
39
40 // Measure the fraction of selections that were enabled by recovery AST.
recordMetrics(const SelectionTree & S,const LangOptions & Lang)41 void recordMetrics(const SelectionTree &S, const LangOptions &Lang) {
42 if (!trace::enabled())
43 return;
44 const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C";
45 static constexpr trace::Metric SelectionUsedRecovery(
46 "selection_recovery", trace::Metric::Distribution, "language");
47 static constexpr trace::Metric RecoveryType(
48 "selection_recovery_type", trace::Metric::Distribution, "language");
49 const auto *Common = S.commonAncestor();
50 for (const auto *N = Common; N; N = N->Parent) {
51 if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
52 SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast.
53 RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel);
54 return;
55 }
56 }
57 if (Common)
58 SelectionUsedRecovery.record(0, LanguageLabel); // unused.
59 }
60
61 // An IntervalSet maintains a set of disjoint subranges of an array.
62 //
63 // Initially, it contains the entire array.
64 // [-----------------------------------------------------------]
65 //
66 // When a range is erased(), it will typically split the array in two.
67 // Claim: [--------------------]
68 // after: [----------------] [-------------------]
69 //
70 // erase() returns the segments actually erased. Given the state above:
71 // Claim: [---------------------------------------]
72 // Out: [---------] [------]
73 // After: [-----] [-----------]
74 //
75 // It is used to track (expanded) tokens not yet associated with an AST node.
76 // On traversing an AST node, its token range is erased from the unclaimed set.
77 // The tokens actually removed are associated with that node, and hit-tested
78 // against the selection to determine whether the node is selected.
79 template <typename T> class IntervalSet {
80 public:
IntervalSet(llvm::ArrayRef<T> Range)81 IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
82
83 // Removes the elements of Claim from the set, modifying or removing ranges
84 // that overlap it.
85 // Returns the continuous subranges of Claim that were actually removed.
erase(llvm::ArrayRef<T> Claim)86 llvm::SmallVector<llvm::ArrayRef<T>, 4> erase(llvm::ArrayRef<T> Claim) {
87 llvm::SmallVector<llvm::ArrayRef<T>, 4> Out;
88 if (Claim.empty())
89 return Out;
90
91 // General case:
92 // Claim: [-----------------]
93 // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
94 // Overlap: ^first ^second
95 // Ranges C and D are fully included. Ranges B and E must be trimmed.
96 auto Overlap = std::make_pair(
97 UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
98 UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
99 // Rewind to cover B.
100 if (Overlap.first != UnclaimedRanges.begin()) {
101 --Overlap.first;
102 // ...unless B isn't selected at all.
103 if (Overlap.first->end() <= Claim.begin())
104 ++Overlap.first;
105 }
106 if (Overlap.first == Overlap.second)
107 return Out;
108
109 // First, copy all overlapping ranges into the output.
110 auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
111 // If any of the overlapping ranges were sliced by the claim, split them:
112 // - restrict the returned range to the claimed part
113 // - save the unclaimed part so it can be reinserted
114 llvm::ArrayRef<T> RemainingHead, RemainingTail;
115 if (Claim.begin() > OutFirst->begin()) {
116 RemainingHead = {OutFirst->begin(), Claim.begin()};
117 *OutFirst = {Claim.begin(), OutFirst->end()};
118 }
119 if (Claim.end() < Out.back().end()) {
120 RemainingTail = {Claim.end(), Out.back().end()};
121 Out.back() = {Out.back().begin(), Claim.end()};
122 }
123
124 // Erase all the overlapping ranges (invalidating all iterators).
125 UnclaimedRanges.erase(Overlap.first, Overlap.second);
126 // Reinsert ranges that were merely trimmed.
127 if (!RemainingHead.empty())
128 UnclaimedRanges.insert(RemainingHead);
129 if (!RemainingTail.empty())
130 UnclaimedRanges.insert(RemainingTail);
131
132 return Out;
133 }
134
135 private:
136 using TokenRange = llvm::ArrayRef<T>;
137 struct RangeLess {
operator ()clang::clangd::__anoncd6088310111::IntervalSet::RangeLess138 bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
139 return L.begin() < R.begin();
140 }
141 };
142
143 // Disjoint sorted unclaimed ranges of expanded tokens.
144 std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
145 };
146
147 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
148 // This resolves to Unselected if no tokens are ever seen.
149 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
150 // This value is never exposed publicly.
151 constexpr SelectionTree::Selection NoTokens =
152 static_cast<SelectionTree::Selection>(
153 static_cast<unsigned char>(SelectionTree::Complete + 1));
154
155 // Nodes start with NoTokens, and then use this function to aggregate the
156 // selectedness as more tokens are found.
update(SelectionTree::Selection & Result,SelectionTree::Selection New)157 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
158 if (New == NoTokens)
159 return;
160 if (Result == NoTokens)
161 Result = New;
162 else if (Result != New)
163 // Can only be completely selected (or unselected) if all tokens are.
164 Result = SelectionTree::Partial;
165 }
166
167 // As well as comments, don't count semicolons as real tokens.
168 // They're not properly claimed as expr-statement is missing from the AST.
shouldIgnore(const syntax::Token & Tok)169 bool shouldIgnore(const syntax::Token &Tok) {
170 return Tok.kind() == tok::comment || Tok.kind() == tok::semi;
171 }
172
173 // Determine whether 'Target' is the first expansion of the macro
174 // argument whose top-level spelling location is 'SpellingLoc'.
isFirstExpansion(FileID Target,SourceLocation SpellingLoc,const SourceManager & SM)175 bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
176 const SourceManager &SM) {
177 SourceLocation Prev = SpellingLoc;
178 while (true) {
179 // If the arg is expanded multiple times, getMacroArgExpandedLocation()
180 // returns the first expansion.
181 SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
182 // So if we reach the target, target is the first-expansion of the
183 // first-expansion ...
184 if (SM.getFileID(Next) == Target)
185 return true;
186
187 // Otherwise, if the FileID stops changing, we've reached the innermost
188 // macro expansion, and Target was on a different branch.
189 if (SM.getFileID(Next) == SM.getFileID(Prev))
190 return false;
191
192 Prev = Next;
193 }
194 return false;
195 }
196
197 // SelectionTester can determine whether a range of tokens from the PP-expanded
198 // stream (corresponding to an AST node) is considered selected.
199 //
200 // When the tokens result from macro expansions, the appropriate tokens in the
201 // main file are examined (macro invocation or args). Similarly for #includes.
202 // However, only the first expansion of a given spelled token is considered
203 // selected.
204 //
205 // It tests each token in the range (not just the endpoints) as contiguous
206 // expanded tokens may not have contiguous spellings (with macros).
207 //
208 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
209 // are ignored when determining selectedness.
210 class SelectionTester {
211 public:
212 // The selection is offsets [SelBegin, SelEnd) in SelFile.
SelectionTester(const syntax::TokenBuffer & Buf,FileID SelFile,unsigned SelBegin,unsigned SelEnd,const SourceManager & SM)213 SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
214 unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
215 : SelFile(SelFile), SM(SM) {
216 // Find all tokens (partially) selected in the file.
217 auto AllSpelledTokens = Buf.spelledTokens(SelFile);
218 const syntax::Token *SelFirst =
219 llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
220 return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
221 });
222 const syntax::Token *SelLimit = std::partition_point(
223 SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
224 return SM.getFileOffset(Tok.location()) < SelEnd;
225 });
226 auto Sel = llvm::makeArrayRef(SelFirst, SelLimit);
227 // Find which of these are preprocessed to nothing and should be ignored.
228 std::vector<bool> PPIgnored(Sel.size(), false);
229 for (const syntax::TokenBuffer::Expansion &X :
230 Buf.expansionsOverlapping(Sel)) {
231 if (X.Expanded.empty()) {
232 for (const syntax::Token &Tok : X.Spelled) {
233 if (&Tok >= SelFirst && &Tok < SelLimit)
234 PPIgnored[&Tok - SelFirst] = true;
235 }
236 }
237 }
238 // Precompute selectedness and offset for selected spelled tokens.
239 for (unsigned I = 0; I < Sel.size(); ++I) {
240 if (shouldIgnore(Sel[I]) || PPIgnored[I])
241 continue;
242 SpelledTokens.emplace_back();
243 Tok &S = SpelledTokens.back();
244 S.Offset = SM.getFileOffset(Sel[I].location());
245 if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd)
246 S.Selected = SelectionTree::Complete;
247 else
248 S.Selected = SelectionTree::Partial;
249 }
250 }
251
252 // Test whether a consecutive range of tokens is selected.
253 // The tokens are taken from the expanded token stream.
254 SelectionTree::Selection
test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const255 test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
256 if (SpelledTokens.empty())
257 return NoTokens;
258 SelectionTree::Selection Result = NoTokens;
259 while (!ExpandedTokens.empty()) {
260 // Take consecutive tokens from the same context together for efficiency.
261 FileID FID = SM.getFileID(ExpandedTokens.front().location());
262 auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
263 return SM.getFileID(T.location()) == FID;
264 });
265 assert(!Batch.empty());
266 ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
267
268 update(Result, testChunk(FID, Batch));
269 }
270 return Result;
271 }
272
273 // Cheap check whether any of the tokens in R might be selected.
274 // If it returns false, test() will return NoTokens or Unselected.
275 // If it returns true, test() may return any value.
mayHit(SourceRange R) const276 bool mayHit(SourceRange R) const {
277 if (SpelledTokens.empty())
278 return false;
279 auto B = SM.getDecomposedLoc(R.getBegin());
280 auto E = SM.getDecomposedLoc(R.getEnd());
281 if (B.first == SelFile && E.first == SelFile)
282 if (E.second < SpelledTokens.front().Offset ||
283 B.second > SpelledTokens.back().Offset)
284 return false;
285 return true;
286 }
287
288 private:
289 // Hit-test a consecutive range of tokens from a single file ID.
290 SelectionTree::Selection
testChunk(FileID FID,llvm::ArrayRef<syntax::Token> Batch) const291 testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
292 assert(!Batch.empty());
293 SourceLocation StartLoc = Batch.front().location();
294 // There are several possible categories of FileID depending on how the
295 // preprocessor was used to generate these tokens:
296 // main file, #included file, macro args, macro bodies.
297 // We need to identify the main-file tokens that represent Batch, and
298 // determine whether we want to exclusively claim them. Regular tokens
299 // represent one AST construct, but a macro invocation can represent many.
300
301 // Handle tokens written directly in the main file.
302 if (FID == SelFile) {
303 return testTokenRange(SM.getFileOffset(Batch.front().location()),
304 SM.getFileOffset(Batch.back().location()));
305 }
306
307 // Handle tokens in another file #included into the main file.
308 // Check if the #include is selected, but don't claim it exclusively.
309 if (StartLoc.isFileID()) {
310 for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
311 Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
312 if (SM.getFileID(Loc) == SelFile)
313 // FIXME: use whole #include directive, not just the filename string.
314 return testToken(SM.getFileOffset(Loc));
315 }
316 return NoTokens;
317 }
318
319 assert(StartLoc.isMacroID());
320 // Handle tokens that were passed as a macro argument.
321 SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
322 if (SM.getFileID(ArgStart) == SelFile) {
323 if (isFirstExpansion(FID, ArgStart, SM)) {
324 SourceLocation ArgEnd =
325 SM.getTopMacroCallerLoc(Batch.back().location());
326 return testTokenRange(SM.getFileOffset(ArgStart),
327 SM.getFileOffset(ArgEnd));
328 } else {
329 /* fall through and treat as part of the macro body */
330 }
331 }
332
333 // Handle tokens produced by non-argument macro expansion.
334 // Check if the macro name is selected, don't claim it exclusively.
335 auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
336 if (Expansion.first == SelFile)
337 // FIXME: also check ( and ) for function-like macros?
338 return testToken(Expansion.second);
339 else
340 return NoTokens;
341 }
342
343 // Is the closed token range [Begin, End] selected?
testTokenRange(unsigned Begin,unsigned End) const344 SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
345 assert(Begin <= End);
346 // Outside the selection entirely?
347 if (End < SpelledTokens.front().Offset ||
348 Begin > SpelledTokens.back().Offset)
349 return SelectionTree::Unselected;
350
351 // Compute range of tokens.
352 auto B = llvm::partition_point(
353 SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
354 auto E = std::partition_point(
355 B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
356
357 // Aggregate selectedness of tokens in range.
358 bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
359 End > SpelledTokens.back().Offset;
360 SelectionTree::Selection Result =
361 ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
362 for (auto It = B; It != E; ++It)
363 update(Result, It->Selected);
364 return Result;
365 }
366
367 // Is the token at `Offset` selected?
testToken(unsigned Offset) const368 SelectionTree::Selection testToken(unsigned Offset) const {
369 // Outside the selection entirely?
370 if (Offset < SpelledTokens.front().Offset ||
371 Offset > SpelledTokens.back().Offset)
372 return SelectionTree::Unselected;
373 // Find the token, if it exists.
374 auto It = llvm::partition_point(
375 SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
376 if (It != SpelledTokens.end() && It->Offset == Offset)
377 return It->Selected;
378 return NoTokens;
379 }
380
381 struct Tok {
382 unsigned Offset;
383 SelectionTree::Selection Selected;
384 };
385 std::vector<Tok> SpelledTokens;
386 FileID SelFile;
387 const SourceManager &SM;
388 };
389
390 // Show the type of a node for debugging.
printNodeKind(llvm::raw_ostream & OS,const DynTypedNode & N)391 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
392 if (const TypeLoc *TL = N.get<TypeLoc>()) {
393 // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
394 // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
395 if (TL->getTypeLocClass() == TypeLoc::Qualified)
396 OS << "QualifiedTypeLoc";
397 else
398 OS << TL->getType()->getTypeClassName() << "TypeLoc";
399 } else {
400 OS << N.getNodeKind().asStringRef();
401 }
402 }
403
404 #ifndef NDEBUG
printNodeToString(const DynTypedNode & N,const PrintingPolicy & PP)405 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
406 std::string S;
407 llvm::raw_string_ostream OS(S);
408 printNodeKind(OS, N);
409 OS << " ";
410 return std::move(OS.str());
411 }
412 #endif
413
isImplicit(const Stmt * S)414 bool isImplicit(const Stmt *S) {
415 // Some Stmts are implicit and shouldn't be traversed, but there's no
416 // "implicit" attribute on Stmt/Expr.
417 // Unwrap implicit casts first if present (other nodes too?).
418 if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
419 S = ICE->getSubExprAsWritten();
420 // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
421 // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
422 if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
423 if (CTI->isImplicit())
424 return true;
425 // Refs to operator() and [] are (almost?) always implicit as part of calls.
426 if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
427 if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
428 switch (FD->getOverloadedOperator()) {
429 case OO_Call:
430 case OO_Subscript:
431 return true;
432 default:
433 break;
434 }
435 }
436 }
437 return false;
438 }
439
440 // We find the selection by visiting written nodes in the AST, looking for nodes
441 // that intersect with the selected character range.
442 //
443 // While traversing, we maintain a parent stack. As nodes pop off the stack,
444 // we decide whether to keep them or not. To be kept, they must either be
445 // selected or contain some nodes that are.
446 //
447 // For simple cases (not inside macros) we prune subtrees that don't intersect.
448 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
449 public:
450 // Runs the visitor to gather selected nodes and their ancestors.
451 // If there is any selection, the root (TUDecl) is the first node.
collect(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned Begin,unsigned End,FileID File)452 static std::deque<Node> collect(ASTContext &AST,
453 const syntax::TokenBuffer &Tokens,
454 const PrintingPolicy &PP, unsigned Begin,
455 unsigned End, FileID File) {
456 SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
457 V.TraverseAST(AST);
458 assert(V.Stack.size() == 1 && "Unpaired push/pop?");
459 assert(V.Stack.top() == &V.Nodes.front());
460 return std::move(V.Nodes);
461 }
462
463 // We traverse all "well-behaved" nodes the same way:
464 // - push the node onto the stack
465 // - traverse its children recursively
466 // - pop it from the stack
467 // - hit testing: is intersection(node, selection) - union(children) empty?
468 // - attach it to the tree if it or any children hit the selection
469 //
470 // Two categories of nodes are not "well-behaved":
471 // - those without source range information, we don't record those
472 // - those that can't be stored in DynTypedNode.
473 // We're missing some interesting things like Attr due to the latter.
TraverseDecl(Decl * X)474 bool TraverseDecl(Decl *X) {
475 if (X && isa<TranslationUnitDecl>(X))
476 return Base::TraverseDecl(X); // Already pushed by constructor.
477 // Base::TraverseDecl will suppress children, but not this node itself.
478 if (X && X->isImplicit())
479 return true;
480 return traverseNode(X, [&] { return Base::TraverseDecl(X); });
481 }
TraverseTypeLoc(TypeLoc X)482 bool TraverseTypeLoc(TypeLoc X) {
483 return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
484 }
TraverseTemplateArgumentLoc(const TemplateArgumentLoc & X)485 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) {
486 return traverseNode(&X,
487 [&] { return Base::TraverseTemplateArgumentLoc(X); });
488 }
TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X)489 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
490 return traverseNode(
491 &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
492 }
TraverseConstructorInitializer(CXXCtorInitializer * X)493 bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
494 return traverseNode(
495 X, [&] { return Base::TraverseConstructorInitializer(X); });
496 }
497 // Stmt is the same, but this form allows the data recursion optimization.
dataTraverseStmtPre(Stmt * X)498 bool dataTraverseStmtPre(Stmt *X) {
499 if (!X || isImplicit(X))
500 return false;
501 auto N = DynTypedNode::create(*X);
502 if (canSafelySkipNode(N))
503 return false;
504 push(std::move(N));
505 if (shouldSkipChildren(X)) {
506 pop();
507 return false;
508 }
509 return true;
510 }
dataTraverseStmtPost(Stmt * X)511 bool dataTraverseStmtPost(Stmt *X) {
512 pop();
513 return true;
514 }
515 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
516 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
517 // This means we'd never see 'int' in 'const int'! Work around that here.
518 // (The reason for the behavior is to avoid traversing the nested Type twice,
519 // but we ignore TraverseType anyway).
TraverseQualifiedTypeLoc(QualifiedTypeLoc QX)520 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
521 return traverseNode<TypeLoc>(
522 &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
523 }
524 // Uninteresting parts of the AST that don't have locations within them.
TraverseNestedNameSpecifier(NestedNameSpecifier *)525 bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
TraverseType(QualType)526 bool TraverseType(QualType) { return true; }
527
528 // The DeclStmt for the loop variable claims to cover the whole range
529 // inside the parens, this causes the range-init expression to not be hit.
530 // Traverse the loop VarDecl instead, which has the right source range.
TraverseCXXForRangeStmt(CXXForRangeStmt * S)531 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
532 return traverseNode(S, [&] {
533 return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
534 TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
535 });
536 }
537 // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
TraverseOpaqueValueExpr(OpaqueValueExpr * E)538 bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
539 return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
540 }
541 // We only want to traverse the *syntactic form* to understand the selection.
TraversePseudoObjectExpr(PseudoObjectExpr * E)542 bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
543 return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
544 }
545
546 private:
547 using Base = RecursiveASTVisitor<SelectionVisitor>;
548
SelectionVisitor(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned SelBegin,unsigned SelEnd,FileID SelFile)549 SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
550 const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
551 FileID SelFile)
552 : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
553 #ifndef NDEBUG
554 PrintPolicy(PP),
555 #endif
556 TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
557 UnclaimedExpandedTokens(Tokens.expandedTokens()) {
558 // Ensure we have a node for the TU decl, regardless of traversal scope.
559 Nodes.emplace_back();
560 Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
561 Nodes.back().Parent = nullptr;
562 Nodes.back().Selected = SelectionTree::Unselected;
563 Stack.push(&Nodes.back());
564 }
565
566 // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
567 // Node is always a pointer so the generic code can handle any null checks.
568 template <typename T, typename Func>
traverseNode(T * Node,const Func & Body)569 bool traverseNode(T *Node, const Func &Body) {
570 if (Node == nullptr)
571 return true;
572 auto N = DynTypedNode::create(*Node);
573 if (canSafelySkipNode(N))
574 return true;
575 push(DynTypedNode::create(*Node));
576 bool Ret = Body();
577 pop();
578 return Ret;
579 }
580
581 // HIT TESTING
582 //
583 // We do rough hit testing on the way down the tree to avoid traversing
584 // subtrees that don't touch the selection (canSafelySkipNode), but
585 // fine-grained hit-testing is mostly done on the way back up (in pop()).
586 // This means children get to claim parts of the selection first, and parents
587 // are only selected if they own tokens that no child owned.
588 //
589 // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
590 // parent's, and a node (transitively) owns all tokens in its range.
591 //
592 // Exception 1: child range claims tokens that should be owned by the parent.
593 // e.g. in `void foo(int);`, the FunctionTypeLoc should own
594 // `void (int)` but the parent FunctionDecl should own `foo`.
595 // To handle this case, certain nodes claim small token ranges *before*
596 // their children are traversed. (see earlySourceRange).
597 //
598 // Exception 2: siblings both claim the same node.
599 // e.g. `int x, y;` produces two sibling VarDecls.
600 // ~~~~~ x
601 // ~~~~~~~~ y
602 // Here the first ("leftmost") sibling claims the tokens it wants, and the
603 // other sibling gets what's left. So selecting "int" only includes the left
604 // VarDecl in the selection tree.
605
606 // An optimization for a common case: nodes outside macro expansions that
607 // don't intersect the selection may be recursively skipped.
canSafelySkipNode(const DynTypedNode & N)608 bool canSafelySkipNode(const DynTypedNode &N) {
609 SourceRange S = N.getSourceRange();
610 if (auto *TL = N.get<TypeLoc>()) {
611 // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile
612 // heuristics. We should consider only pruning critical TypeLoc nodes, to
613 // be more robust.
614
615 // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
616 // failing
617 // to descend into the child expression.
618 // decltype(2+2);
619 // ~~~~~~~~~~~~~ <-- correct range
620 // ~~~~~~~~ <-- range reported by getSourceRange()
621 // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren)
622 // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
623 // rid of this patch.
624 if (auto DT = TL->getAs<DecltypeTypeLoc>())
625 S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
626 // AttributedTypeLoc may point to the attribute's range, NOT the modified
627 // type's range.
628 if (auto AT = TL->getAs<AttributedTypeLoc>())
629 S = AT.getModifiedLoc().getSourceRange();
630 }
631 if (!SelChecker.mayHit(S)) {
632 dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
633 dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
634 return true;
635 }
636 return false;
637 }
638
639 // There are certain nodes we want to treat as leaves in the SelectionTree,
640 // although they do have children.
shouldSkipChildren(const Stmt * X) const641 bool shouldSkipChildren(const Stmt *X) const {
642 // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
643 // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
644 // So we treat UserDefinedLiteral as a leaf node, owning the token.
645 return llvm::isa<UserDefinedLiteral>(X);
646 }
647
648 // Pushes a node onto the ancestor stack. Pairs with pop().
649 // Performs early hit detection for some nodes (on the earlySourceRange).
push(DynTypedNode Node)650 void push(DynTypedNode Node) {
651 SourceRange Early = earlySourceRange(Node);
652 dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
653 Nodes.emplace_back();
654 Nodes.back().ASTNode = std::move(Node);
655 Nodes.back().Parent = Stack.top();
656 Nodes.back().Selected = NoTokens;
657 Stack.push(&Nodes.back());
658 claimRange(Early, Nodes.back().Selected);
659 }
660
661 // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
662 // Performs primary hit detection.
pop()663 void pop() {
664 Node &N = *Stack.top();
665 dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
666 claimRange(N.ASTNode.getSourceRange(), N.Selected);
667 if (N.Selected == NoTokens)
668 N.Selected = SelectionTree::Unselected;
669 if (N.Selected || !N.Children.empty()) {
670 // Attach to the tree.
671 N.Parent->Children.push_back(&N);
672 } else {
673 // Neither N any children are selected, it doesn't belong in the tree.
674 assert(&N == &Nodes.back());
675 Nodes.pop_back();
676 }
677 Stack.pop();
678 }
679
680 // Returns the range of tokens that this node will claim directly, and
681 // is not available to the node's children.
682 // Usually empty, but sometimes children cover tokens but shouldn't own them.
earlySourceRange(const DynTypedNode & N)683 SourceRange earlySourceRange(const DynTypedNode &N) {
684 if (const Decl *D = N.get<Decl>()) {
685 // We want constructor name to be claimed by TypeLoc not the constructor
686 // itself. Similar for deduction guides, we rather want to select the
687 // underlying TypeLoc.
688 // FIXME: Unfortunately this doesn't work, even though RecursiveASTVisitor
689 // traverses the underlying TypeLoc inside DeclarationName, it is null for
690 // constructors.
691 if (isa<CXXConstructorDecl>(D) || isa<CXXDeductionGuideDecl>(D))
692 return SourceRange();
693 // This will capture Field, Function, MSProperty, NonTypeTemplateParm and
694 // VarDecls. We want the name in the declarator to be claimed by the decl
695 // and not by any children. For example:
696 // void [[foo]]();
697 // int (*[[s]])();
698 // struct X { int [[hash]] [32]; [[operator]] int();}
699 if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D))
700 return DD->getLocation();
701 } else if (const auto *CCI = N.get<CXXCtorInitializer>()) {
702 // : [[b_]](42)
703 return CCI->getMemberLocation();
704 }
705 return SourceRange();
706 }
707
708 // Perform hit-testing of a complete Node against the selection.
709 // This runs for every node in the AST, and must be fast in common cases.
710 // This is usually called from pop(), so we can take children into account.
711 // The existing state of Result is relevant (early/late claims can interact).
claimRange(SourceRange S,SelectionTree::Selection & Result)712 void claimRange(SourceRange S, SelectionTree::Selection &Result) {
713 for (const auto &ClaimedRange :
714 UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
715 update(Result, SelChecker.test(ClaimedRange));
716
717 if (Result && Result != NoTokens)
718 dlog("{1}hit selection: {0}", S.printToString(SM), indent());
719 }
720
indent(int Offset=0)721 std::string indent(int Offset = 0) {
722 // Cast for signed arithmetic.
723 int Amount = int(Stack.size()) + Offset;
724 assert(Amount >= 0);
725 return std::string(Amount, ' ');
726 }
727
728 SourceManager &SM;
729 const LangOptions &LangOpts;
730 #ifndef NDEBUG
731 const PrintingPolicy &PrintPolicy;
732 #endif
733 const syntax::TokenBuffer &TokenBuf;
734 std::stack<Node *> Stack;
735 SelectionTester SelChecker;
736 IntervalSet<syntax::Token> UnclaimedExpandedTokens;
737 std::deque<Node> Nodes; // Stable pointers as we add more nodes.
738 };
739
740 } // namespace
741
abbreviatedString(DynTypedNode N,const PrintingPolicy & PP)742 llvm::SmallString<256> abbreviatedString(DynTypedNode N,
743 const PrintingPolicy &PP) {
744 llvm::SmallString<256> Result;
745 {
746 llvm::raw_svector_ostream OS(Result);
747 N.print(OS, PP);
748 }
749 auto Pos = Result.find('\n');
750 if (Pos != llvm::StringRef::npos) {
751 bool MoreText =
752 !llvm::all_of(llvm::StringRef(Result).drop_front(Pos), llvm::isSpace);
753 Result.resize(Pos);
754 if (MoreText)
755 Result.append(" …");
756 }
757 return Result;
758 }
759
print(llvm::raw_ostream & OS,const SelectionTree::Node & N,int Indent) const760 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
761 int Indent) const {
762 if (N.Selected)
763 OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
764 : '.');
765 else
766 OS.indent(Indent);
767 printNodeKind(OS, N.ASTNode);
768 OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
769 for (const Node *Child : N.Children)
770 print(OS, *Child, Indent + 2);
771 }
772
kind() const773 std::string SelectionTree::Node::kind() const {
774 std::string S;
775 llvm::raw_string_ostream OS(S);
776 printNodeKind(OS, ASTNode);
777 return std::move(OS.str());
778 }
779
780 // Decide which selections emulate a "point" query in between characters.
781 // If it's ambiguous (the neighboring characters are selectable tokens), returns
782 // both possibilities in preference order.
783 // Always returns at least one range - if no tokens touched, and empty range.
784 static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
pointBounds(unsigned Offset,const syntax::TokenBuffer & Tokens)785 pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
786 const auto &SM = Tokens.sourceManager();
787 SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
788 llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
789 // Prefer right token over left.
790 for (const syntax::Token &Tok :
791 llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
792 if (shouldIgnore(Tok))
793 continue;
794 unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
795 Result.emplace_back(Offset, Offset + Tok.length());
796 }
797 if (Result.empty())
798 Result.emplace_back(Offset, Offset);
799 return Result;
800 }
801
createEach(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Begin,unsigned End,llvm::function_ref<bool (SelectionTree)> Func)802 bool SelectionTree::createEach(ASTContext &AST,
803 const syntax::TokenBuffer &Tokens,
804 unsigned Begin, unsigned End,
805 llvm::function_ref<bool(SelectionTree)> Func) {
806 if (Begin != End)
807 return Func(SelectionTree(AST, Tokens, Begin, End));
808 for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
809 if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
810 return true;
811 return false;
812 }
813
createRight(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned int Begin,unsigned int End)814 SelectionTree SelectionTree::createRight(ASTContext &AST,
815 const syntax::TokenBuffer &Tokens,
816 unsigned int Begin, unsigned int End) {
817 llvm::Optional<SelectionTree> Result;
818 createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
819 Result = std::move(T);
820 return true;
821 });
822 return std::move(*Result);
823 }
824
SelectionTree(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Begin,unsigned End)825 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
826 unsigned Begin, unsigned End)
827 : PrintPolicy(AST.getLangOpts()) {
828 // No fundamental reason the selection needs to be in the main file,
829 // but that's all clangd has needed so far.
830 const SourceManager &SM = AST.getSourceManager();
831 FileID FID = SM.getMainFileID();
832 PrintPolicy.TerseOutput = true;
833 PrintPolicy.IncludeNewlines = false;
834
835 dlog("Computing selection for {0}",
836 SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
837 .printToString(SM));
838 Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
839 Root = Nodes.empty() ? nullptr : &Nodes.front();
840 recordMetrics(*this, AST.getLangOpts());
841 dlog("Built selection tree\n{0}", *this);
842 }
843
commonAncestor() const844 const Node *SelectionTree::commonAncestor() const {
845 const Node *Ancestor = Root;
846 while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
847 Ancestor = Ancestor->Children.front();
848 // Returning nullptr here is a bit unprincipled, but it makes the API safer:
849 // the TranslationUnitDecl contains all of the preamble, so traversing it is a
850 // performance cliff. Callers can check for null and use root() if they want.
851 return Ancestor != Root ? Ancestor : nullptr;
852 }
853
getDeclContext() const854 const DeclContext &SelectionTree::Node::getDeclContext() const {
855 for (const Node *CurrentNode = this; CurrentNode != nullptr;
856 CurrentNode = CurrentNode->Parent) {
857 if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
858 if (CurrentNode != this)
859 if (auto *DC = dyn_cast<DeclContext>(Current))
860 return *DC;
861 return *Current->getDeclContext();
862 }
863 }
864 llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
865 }
866
ignoreImplicit() const867 const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
868 if (Children.size() == 1 &&
869 Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
870 return Children.front()->ignoreImplicit();
871 return *this;
872 }
873
outerImplicit() const874 const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
875 if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange())
876 return Parent->outerImplicit();
877 return *this;
878 }
879
880 } // namespace clangd
881 } // namespace clang
882