1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ParsedAST.h"
9 #include "Protocol.h"
10 #include "Selection.h"
11 #include "SourceCode.h"
12 #include "refactor/Tweak.h"
13 #include "support/Logger.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/AST/OperationKinds.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtCXX.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Tooling/Core/Replacement.h"
25 #include "llvm/ADT/None.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/Error.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 namespace clang {
33 namespace clangd {
34 namespace {
35 // information regarding the Expr that is being extracted
36 class ExtractionContext {
37 public:
38   ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM,
39                     const ASTContext &Ctx);
getExpr() const40   const clang::Expr *getExpr() const { return Expr; }
getExprNode() const41   const SelectionTree::Node *getExprNode() const { return ExprNode; }
isExtractable() const42   bool isExtractable() const { return Extractable; }
43   // The half-open range for the expression to be extracted.
44   SourceRange getExtractionChars() const;
45   // Generate Replacement for replacing selected expression with given VarName
46   tooling::Replacement replaceWithVar(SourceRange Chars,
47                                       llvm::StringRef VarName) const;
48   // Generate Replacement for declaring the selected Expr as a new variable
49   tooling::Replacement insertDeclaration(llvm::StringRef VarName,
50                                          SourceRange InitChars) const;
51 
52 private:
53   bool Extractable = false;
54   const clang::Expr *Expr;
55   const SelectionTree::Node *ExprNode;
56   // Stmt before which we will extract
57   const clang::Stmt *InsertionPoint = nullptr;
58   const SourceManager &SM;
59   const ASTContext &Ctx;
60   // Decls referenced in the Expr
61   std::vector<clang::Decl *> ReferencedDecls;
62   // returns true if the Expr doesn't reference any variable declared in scope
63   bool exprIsValidOutside(const clang::Stmt *Scope) const;
64   // computes the Stmt before which we will extract out Expr
65   const clang::Stmt *computeInsertionPoint() const;
66 };
67 
68 // Returns all the Decls referenced inside the given Expr
69 static std::vector<clang::Decl *>
computeReferencedDecls(const clang::Expr * Expr)70 computeReferencedDecls(const clang::Expr *Expr) {
71   // RAV subclass to find all DeclRefs in a given Stmt
72   class FindDeclRefsVisitor
73       : public clang::RecursiveASTVisitor<FindDeclRefsVisitor> {
74   public:
75     std::vector<Decl *> ReferencedDecls;
76     bool VisitDeclRefExpr(DeclRefExpr *DeclRef) { // NOLINT
77       ReferencedDecls.push_back(DeclRef->getDecl());
78       return true;
79     }
80   };
81   FindDeclRefsVisitor Visitor;
82   Visitor.TraverseStmt(const_cast<Stmt *>(dyn_cast<Stmt>(Expr)));
83   return Visitor.ReferencedDecls;
84 }
85 
ExtractionContext(const SelectionTree::Node * Node,const SourceManager & SM,const ASTContext & Ctx)86 ExtractionContext::ExtractionContext(const SelectionTree::Node *Node,
87                                      const SourceManager &SM,
88                                      const ASTContext &Ctx)
89     : ExprNode(Node), SM(SM), Ctx(Ctx) {
90   Expr = Node->ASTNode.get<clang::Expr>();
91   ReferencedDecls = computeReferencedDecls(Expr);
92   InsertionPoint = computeInsertionPoint();
93   if (InsertionPoint)
94     Extractable = true;
95 }
96 
97 // checks whether extracting before InsertionPoint will take a
98 // variable reference out of scope
exprIsValidOutside(const clang::Stmt * Scope) const99 bool ExtractionContext::exprIsValidOutside(const clang::Stmt *Scope) const {
100   SourceLocation ScopeBegin = Scope->getBeginLoc();
101   SourceLocation ScopeEnd = Scope->getEndLoc();
102   for (const Decl *ReferencedDecl : ReferencedDecls) {
103     if (SM.isPointWithin(ReferencedDecl->getBeginLoc(), ScopeBegin, ScopeEnd) &&
104         SM.isPointWithin(ReferencedDecl->getEndLoc(), ScopeBegin, ScopeEnd))
105       return false;
106   }
107   return true;
108 }
109 
110 // Return the Stmt before which we need to insert the extraction.
111 // To find the Stmt, we go up the AST Tree and if the Parent of the current
112 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before
113 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a
114 // CompoundStmt
115 //
116 // FIXME: Extraction from label, switch and case statements
117 // FIXME: Doens't work for FoldExpr
118 // FIXME: Ensure extraction from loops doesn't change semantics.
computeInsertionPoint() const119 const clang::Stmt *ExtractionContext::computeInsertionPoint() const {
120   // returns true if we can extract before InsertionPoint
121   auto CanExtractOutside =
122       [](const SelectionTree::Node *InsertionPoint) -> bool {
123     if (const clang::Stmt *Stmt = InsertionPoint->ASTNode.get<clang::Stmt>()) {
124       // Allow all expressions except LambdaExpr since we don't want to extract
125       // from the captures/default arguments of a lambda
126       if (isa<clang::Expr>(Stmt))
127         return !isa<LambdaExpr>(Stmt);
128       // We don't yet allow extraction from switch/case stmt as we would need to
129       // jump over the switch stmt even if there is a CompoundStmt inside the
130       // switch. And there are other Stmts which we don't care about (e.g.
131       // continue and break) as there can never be anything to extract from
132       // them.
133       return isa<AttributedStmt>(Stmt) || isa<CompoundStmt>(Stmt) ||
134              isa<CXXForRangeStmt>(Stmt) || isa<DeclStmt>(Stmt) ||
135              isa<DoStmt>(Stmt) || isa<ForStmt>(Stmt) || isa<IfStmt>(Stmt) ||
136              isa<ReturnStmt>(Stmt) || isa<WhileStmt>(Stmt);
137     }
138     if (InsertionPoint->ASTNode.get<VarDecl>())
139       return true;
140     return false;
141   };
142   for (const SelectionTree::Node *CurNode = getExprNode();
143        CurNode->Parent && CanExtractOutside(CurNode);
144        CurNode = CurNode->Parent) {
145     const clang::Stmt *CurInsertionPoint = CurNode->ASTNode.get<Stmt>();
146     // give up if extraction will take a variable out of scope
147     if (CurInsertionPoint && !exprIsValidOutside(CurInsertionPoint))
148       break;
149     if (const clang::Stmt *CurParent = CurNode->Parent->ASTNode.get<Stmt>()) {
150       if (isa<CompoundStmt>(CurParent)) {
151         // Ensure we don't write inside a macro.
152         if (CurParent->getBeginLoc().isMacroID())
153           continue;
154         return CurInsertionPoint;
155       }
156     }
157   }
158   return nullptr;
159 }
160 
161 // returns the replacement for substituting the extraction with VarName
162 tooling::Replacement
replaceWithVar(SourceRange Chars,llvm::StringRef VarName) const163 ExtractionContext::replaceWithVar(SourceRange Chars,
164                                   llvm::StringRef VarName) const {
165   unsigned ExtractionLength =
166       SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin());
167   return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName);
168 }
169 // returns the Replacement for declaring a new variable storing the extraction
170 tooling::Replacement
insertDeclaration(llvm::StringRef VarName,SourceRange InitializerChars) const171 ExtractionContext::insertDeclaration(llvm::StringRef VarName,
172                                      SourceRange InitializerChars) const {
173   llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars);
174   const SourceLocation InsertionLoc =
175       toHalfOpenFileRange(SM, Ctx.getLangOpts(),
176                           InsertionPoint->getSourceRange())
177           ->getBegin();
178   // FIXME: Replace auto with explicit type and add &/&& as necessary
179   std::string ExtractedVarDecl = std::string("auto ") + VarName.str() + " = " +
180                                  ExtractionCode.str() + "; ";
181   return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl);
182 }
183 
184 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
185 //
186 // These are special, because the formal AST doesn't match what users expect:
187 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
188 // - but extracting `b + c` is reasonable, as + is (mathematically) associative.
189 //
190 // So we try to support these cases with some restrictions:
191 //  - the operator must be associative
192 //  - no mixing of operators is allowed
193 //  - we don't look inside macro expansions in the subexpressions
194 //  - we only adjust the extracted range, so references in the unselected parts
195 //    of the AST expression (e.g. `a`) are still considered referenced for
196 //    the purposes of calculating the insertion point.
197 //    FIXME: it would be nice to exclude these references, by micromanaging
198 //    the computeReferencedDecls() calls around the binary operator tree.
199 
200 // Information extracted about a binary operator encounted in a SelectionTree.
201 // It can represent either an overloaded or built-in operator.
202 struct ParsedBinaryOperator {
203   BinaryOperatorKind Kind;
204   SourceLocation ExprLoc;
205   llvm::SmallVector<const SelectionTree::Node*, 8> SelectedOperands;
206 
207   // If N is a binary operator, populate this and return true.
parseclang::clangd::__anon462339cf0111::ParsedBinaryOperator208   bool parse(const SelectionTree::Node &N) {
209     SelectedOperands.clear();
210 
211     if (const BinaryOperator *Op =
212         llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) {
213       Kind = Op->getOpcode();
214       ExprLoc = Op->getExprLoc();
215       SelectedOperands = N.Children;
216       return true;
217     }
218     if (const CXXOperatorCallExpr *Op =
219             llvm::dyn_cast_or_null<CXXOperatorCallExpr>(
220                 N.ASTNode.get<Expr>())) {
221       if (!Op->isInfixBinaryOp())
222         return false;
223 
224       Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator());
225       ExprLoc = Op->getExprLoc();
226       // Not all children are args, there's also the callee (operator).
227       for (const auto* Child : N.Children) {
228         const Expr *E = Child->ASTNode.get<Expr>();
229         assert(E && "callee and args should be Exprs!");
230         if (E == Op->getArg(0) || E == Op->getArg(1))
231           SelectedOperands.push_back(Child);
232       }
233       return true;
234     }
235     return false;
236   }
237 
associativeclang::clangd::__anon462339cf0111::ParsedBinaryOperator238   bool associative() const {
239     // Must also be left-associative, or update getBinaryOperatorRange()!
240     switch (Kind) {
241     case BO_Add:
242     case BO_Mul:
243     case BO_And:
244     case BO_Or:
245     case BO_Xor:
246     case BO_LAnd:
247     case BO_LOr:
248       return true;
249     default:
250       return false;
251     }
252   }
253 
crossesMacroBoundaryclang::clangd::__anon462339cf0111::ParsedBinaryOperator254   bool crossesMacroBoundary(const SourceManager &SM) {
255     FileID F = SM.getFileID(ExprLoc);
256     for (const SelectionTree::Node *Child : SelectedOperands)
257       if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F)
258         return true;
259     return false;
260   }
261 };
262 
263 // If have an associative operator at the top level, then we must find
264 // the start point (rightmost in LHS) and end point (leftmost in RHS).
265 // We can only descend into subtrees where the operator matches.
266 //
267 // e.g. for a + [[b + c]] + d
268 //        +
269 //       / \
270 //  N-> +   d
271 //     / \
272 //    +   c <- End
273 //   / \
274 //  a   b <- Start
getBinaryOperatorRange(const SelectionTree::Node & N,const SourceManager & SM,const LangOptions & LangOpts)275 const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N,
276                                          const SourceManager &SM,
277                                          const LangOptions &LangOpts) {
278   // If N is not a suitable binary operator, bail out.
279   ParsedBinaryOperator Op;
280   if (!Op.parse(N.ignoreImplicit()) || !Op.associative() ||
281       Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2)
282     return SourceRange();
283   BinaryOperatorKind OuterOp = Op.Kind;
284 
285   // Because the tree we're interested in contains only one operator type, and
286   // all eligible operators are left-associative, the shape of the tree is
287   // very restricted: it's a linked list along the left edges.
288   // This simplifies our implementation.
289   const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS
290   const SelectionTree::Node *End = Op.SelectedOperands.back();    // RHS
291   // End is already correct: it can't be an OuterOp (as it's left-associative).
292   // Start needs to be pushed down int the subtree to the right spot.
293   while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp &&
294          !Op.crossesMacroBoundary(SM)) {
295     assert(!Op.SelectedOperands.empty() && "got only operator on one side!");
296     if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected
297       Start = Op.SelectedOperands.back();
298       break;
299     }
300     // Op.LHS is (at least partially) selected, so descend into it.
301     Start = Op.SelectedOperands.front();
302   }
303 
304   return SourceRange(
305       toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange())
306           ->getBegin(),
307       toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange())
308           ->getEnd());
309 }
310 
getExtractionChars() const311 SourceRange ExtractionContext::getExtractionChars() const {
312   // Special case: we're extracting an associative binary subexpression.
313   SourceRange BinaryOperatorRange =
314       getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts());
315   if (BinaryOperatorRange.isValid())
316     return BinaryOperatorRange;
317 
318   // Usual case: we're extracting the whole expression.
319   return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange());
320 }
321 
322 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef
getCallExpr(const SelectionTree::Node * DeclRef)323 const SelectionTree::Node *getCallExpr(const SelectionTree::Node *DeclRef) {
324   const SelectionTree::Node &MaybeCallee = DeclRef->outerImplicit();
325   const SelectionTree::Node *MaybeCall = MaybeCallee.Parent;
326   if (!MaybeCall)
327     return nullptr;
328   const CallExpr *CE =
329       llvm::dyn_cast_or_null<CallExpr>(MaybeCall->ASTNode.get<Expr>());
330   if (!CE)
331     return nullptr;
332   if (CE->getCallee() != MaybeCallee.ASTNode.get<Expr>())
333     return nullptr;
334   return MaybeCall;
335 }
336 
337 // Returns true if Inner (which is a direct child of Outer) is appearing as
338 // a statement rather than an expression whose value can be used.
childExprIsStmt(const Stmt * Outer,const Expr * Inner)339 bool childExprIsStmt(const Stmt *Outer, const Expr *Inner) {
340   if (!Outer || !Inner)
341     return false;
342   // Exclude the most common places where an expr can appear but be unused.
343   if (llvm::isa<CompoundStmt>(Outer))
344     return true;
345   if (llvm::isa<SwitchCase>(Outer))
346     return true;
347   // Control flow statements use condition etc, but not the body.
348   if (const auto* WS = llvm::dyn_cast<WhileStmt>(Outer))
349     return Inner == WS->getBody();
350   if (const auto* DS = llvm::dyn_cast<DoStmt>(Outer))
351     return Inner == DS->getBody();
352   if (const auto* FS = llvm::dyn_cast<ForStmt>(Outer))
353     return Inner == FS->getBody();
354   if (const auto* FS = llvm::dyn_cast<CXXForRangeStmt>(Outer))
355     return Inner == FS->getBody();
356   if (const auto* IS = llvm::dyn_cast<IfStmt>(Outer))
357     return Inner == IS->getThen() || Inner == IS->getElse();
358   // Assume all other cases may be actual expressions.
359   // This includes the important case of subexpressions (where Outer is Expr).
360   return false;
361 }
362 
363 // check if N can and should be extracted (e.g. is not void-typed).
eligibleForExtraction(const SelectionTree::Node * N)364 bool eligibleForExtraction(const SelectionTree::Node *N) {
365   const Expr *E = N->ASTNode.get<Expr>();
366   if (!E)
367     return false;
368 
369   // Void expressions can't be assigned to variables.
370   if (const Type *ExprType = E->getType().getTypePtrOrNull())
371     if (ExprType->isVoidType())
372       return false;
373 
374   // A plain reference to a name (e.g. variable) isn't  worth extracting.
375   // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`?
376   if (llvm::isa<DeclRefExpr>(E) || llvm::isa<MemberExpr>(E))
377     return false;
378 
379   // Extracting Exprs like a = 1 gives dummy = a = 1 which isn't useful.
380   // FIXME: we could still hoist the assignment, and leave the variable there?
381   ParsedBinaryOperator BinOp;
382   if (BinOp.parse(*N) && BinaryOperator::isAssignmentOp(BinOp.Kind))
383     return false;
384 
385   const SelectionTree::Node &OuterImplicit = N->outerImplicit();
386   const auto *Parent = OuterImplicit.Parent;
387   if (!Parent)
388     return false;
389   // We don't want to extract expressions used as statements, that would leave
390   // a `dummy;` around that has no effect.
391   // Unfortunately because the AST doesn't have ExprStmt, we have to check in
392   // this roundabout way.
393   if (childExprIsStmt(Parent->ASTNode.get<Stmt>(),
394                       OuterImplicit.ASTNode.get<Expr>()))
395     return false;
396 
397   // Disable extraction of full RHS on assignment operations, e.g:
398   // auto x = [[RHS_EXPR]];
399   // This would just result in duplicating the code.
400   if (const auto *BO = Parent->ASTNode.get<BinaryOperator>()) {
401     if (BO->isAssignmentOp() &&
402         BO->getRHS() == OuterImplicit.ASTNode.get<Expr>())
403       return false;
404   }
405 
406   return true;
407 }
408 
409 // Find the Expr node that we're going to extract.
410 // We don't want to trigger for assignment expressions and variable/field
411 // DeclRefs. For function/member function, we want to extract the entire
412 // function call.
computeExtractedExpr(const SelectionTree::Node * N)413 const SelectionTree::Node *computeExtractedExpr(const SelectionTree::Node *N) {
414   if (!N)
415     return nullptr;
416   const SelectionTree::Node *TargetNode = N;
417   const clang::Expr *SelectedExpr = N->ASTNode.get<clang::Expr>();
418   if (!SelectedExpr)
419     return nullptr;
420   // For function and member function DeclRefs, extract the whole call.
421   if (llvm::isa<DeclRefExpr>(SelectedExpr) ||
422       llvm::isa<MemberExpr>(SelectedExpr))
423     if (const SelectionTree::Node *Call = getCallExpr(N))
424       TargetNode = Call;
425   // Extracting Exprs like a = 1 gives dummy = a = 1 which isn't useful.
426   if (const BinaryOperator *BinOpExpr =
427           dyn_cast_or_null<BinaryOperator>(SelectedExpr)) {
428     if (BinOpExpr->getOpcode() == BinaryOperatorKind::BO_Assign)
429       return nullptr;
430   }
431   if (!TargetNode || !eligibleForExtraction(TargetNode))
432     return nullptr;
433   return TargetNode;
434 }
435 
436 /// Extracts an expression to the variable dummy
437 /// Before:
438 /// int x = 5 + 4 * 3;
439 ///         ^^^^^
440 /// After:
441 /// auto dummy = 5 + 4;
442 /// int x = dummy * 3;
443 class ExtractVariable : public Tweak {
444 public:
445   const char *id() const override final;
446   bool prepare(const Selection &Inputs) override;
447   Expected<Effect> apply(const Selection &Inputs) override;
title() const448   std::string title() const override {
449     return "Extract subexpression to variable";
450   }
kind() const451   llvm::StringLiteral kind() const override {
452     return CodeAction::REFACTOR_KIND;
453   }
454 
455 private:
456   // the expression to extract
457   std::unique_ptr<ExtractionContext> Target;
458 };
REGISTER_TWEAK(ExtractVariable)459 REGISTER_TWEAK(ExtractVariable)
460 bool ExtractVariable::prepare(const Selection &Inputs) {
461   // we don't trigger on empty selections for now
462   if (Inputs.SelectionBegin == Inputs.SelectionEnd)
463     return false;
464   const ASTContext &Ctx = Inputs.AST->getASTContext();
465   // FIXME: Enable non-C++ cases once we start spelling types explicitly instead
466   // of making use of auto.
467   if (!Ctx.getLangOpts().CPlusPlus)
468     return false;
469   const SourceManager &SM = Inputs.AST->getSourceManager();
470   if (const SelectionTree::Node *N =
471           computeExtractedExpr(Inputs.ASTSelection.commonAncestor()))
472     Target = std::make_unique<ExtractionContext>(N, SM, Ctx);
473   return Target && Target->isExtractable();
474 }
475 
apply(const Selection & Inputs)476 Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) {
477   tooling::Replacements Result;
478   // FIXME: get variable name from user or suggest based on type
479   std::string VarName = "dummy";
480   SourceRange Range = Target->getExtractionChars();
481   // insert new variable declaration
482   if (auto Err = Result.add(Target->insertDeclaration(VarName, Range)))
483     return std::move(Err);
484   // replace expression with variable name
485   if (auto Err = Result.add(Target->replaceWithVar(Range, VarName)))
486     return std::move(Err);
487   return Effect::mainFileEdit(Inputs.AST->getSourceManager(),
488                               std::move(Result));
489 }
490 
491 } // namespace
492 } // namespace clangd
493 } // namespace clang
494