1 //===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InefficientVectorOperationCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
13 #include "../utils/DeclRefExprUtils.h"
14 #include "../utils/OptionsUtils.h"
15 
16 using namespace clang::ast_matchers;
17 
18 namespace clang {
19 namespace tidy {
20 namespace performance {
21 
22 namespace {
23 
24 // Matcher names. Given the code:
25 //
26 // \code
27 // void f() {
28 //   vector<T> v;
29 //   for (int i = 0; i < 10 + 1; ++i) {
30 //     v.push_back(i);
31 //   }
32 //
33 //   SomeProto p;
34 //   for (int i = 0; i < 10 + 1; ++i) {
35 //     p.add_xxx(i);
36 //   }
37 // }
38 // \endcode
39 //
40 // The matcher names are bound to following parts of the AST:
41 //   - LoopCounterName: The entire for loop (as ForStmt).
42 //   - LoopParentName: The body of function f (as CompoundStmt).
43 //   - VectorVarDeclName: 'v' (as VarDecl).
44 //   - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as
45 //     DeclStmt).
46 //   - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr).
47 //   - LoopInitVarName: 'i' (as VarDecl).
48 //   - LoopEndExpr: '10+1' (as Expr).
49 // If EnableProto, the proto related names are bound to the following parts:
50 //   - ProtoVarDeclName: 'p' (as VarDecl).
51 //   - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt).
52 //   - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr).
53 static const char LoopCounterName[] = "for_loop_counter";
54 static const char LoopParentName[] = "loop_parent";
55 static const char VectorVarDeclName[] = "vector_var_decl";
56 static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt";
57 static const char PushBackOrEmplaceBackCallName[] = "append_call";
58 static const char ProtoVarDeclName[] = "proto_var_decl";
59 static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt";
60 static const char ProtoAddFieldCallName[] = "proto_add_field";
61 static const char LoopInitVarName[] = "loop_init_var";
62 static const char LoopEndExprName[] = "loop_end_expr";
63 static const char RangeLoopName[] = "for_range_loop";
64 
supportedContainerTypesMatcher()65 ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() {
66   return hasType(cxxRecordDecl(hasAnyName(
67       "::std::vector", "::std::set", "::std::unordered_set", "::std::map",
68       "::std::unordered_map", "::std::array", "::std::deque")));
69 }
70 
71 } // namespace
72 
InefficientVectorOperationCheck(StringRef Name,ClangTidyContext * Context)73 InefficientVectorOperationCheck::InefficientVectorOperationCheck(
74     StringRef Name, ClangTidyContext *Context)
75     : ClangTidyCheck(Name, Context),
76       VectorLikeClasses(utils::options::parseStringList(
77           Options.get("VectorLikeClasses", "::std::vector"))),
78       EnableProto(Options.getLocalOrGlobal("EnableProto", false)) {}
79 
storeOptions(ClangTidyOptions::OptionMap & Opts)80 void InefficientVectorOperationCheck::storeOptions(
81     ClangTidyOptions::OptionMap &Opts) {
82   Options.store(Opts, "VectorLikeClasses",
83                 utils::options::serializeStringList(VectorLikeClasses));
84   Options.store(Opts, "EnableProto", EnableProto);
85 }
86 
AddMatcher(const DeclarationMatcher & TargetRecordDecl,StringRef VarDeclName,StringRef VarDeclStmtName,const DeclarationMatcher & AppendMethodDecl,StringRef AppendCallName,MatchFinder * Finder)87 void InefficientVectorOperationCheck::AddMatcher(
88     const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName,
89     StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl,
90     StringRef AppendCallName, MatchFinder *Finder) {
91   const auto DefaultConstructorCall = cxxConstructExpr(
92       hasType(TargetRecordDecl),
93       hasDeclaration(cxxConstructorDecl(isDefaultConstructor())));
94   const auto TargetVarDecl =
95       varDecl(hasInitializer(DefaultConstructorCall)).bind(VarDeclName);
96   const auto TargetVarDefStmt =
97       declStmt(hasSingleDecl(equalsBoundNode(std::string(VarDeclName))))
98           .bind(VarDeclStmtName);
99 
100   const auto AppendCallExpr =
101       cxxMemberCallExpr(
102           callee(AppendMethodDecl), on(hasType(TargetRecordDecl)),
103           onImplicitObjectArgument(declRefExpr(to(TargetVarDecl))))
104           .bind(AppendCallName);
105   const auto AppendCall = expr(ignoringImplicit(AppendCallExpr));
106   const auto LoopVarInit =
107       declStmt(hasSingleDecl(varDecl(hasInitializer(integerLiteral(equals(0))))
108                                  .bind(LoopInitVarName)));
109   const auto RefersToLoopVar = ignoringParenImpCasts(
110       declRefExpr(to(varDecl(equalsBoundNode(LoopInitVarName)))));
111 
112   // Matchers for the loop whose body has only 1 push_back/emplace_back calling
113   // statement.
114   const auto HasInterestingLoopBody = hasBody(
115       anyOf(compoundStmt(statementCountIs(1), has(AppendCall)), AppendCall));
116   const auto InInterestingCompoundStmt =
117       hasParent(compoundStmt(has(TargetVarDefStmt)).bind(LoopParentName));
118 
119   // Match counter-based for loops:
120   //  for (int i = 0; i < n; ++i) {
121   //    v.push_back(...);
122   //    // Or: proto.add_xxx(...);
123   //  }
124   //
125   // FIXME: Support more types of counter-based loops like decrement loops.
126   Finder->addMatcher(
127       forStmt(
128           hasLoopInit(LoopVarInit),
129           hasCondition(binaryOperator(
130               hasOperatorName("<"), hasLHS(RefersToLoopVar),
131               hasRHS(expr(unless(hasDescendant(expr(RefersToLoopVar))))
132                          .bind(LoopEndExprName)))),
133           hasIncrement(unaryOperator(hasOperatorName("++"),
134                                      hasUnaryOperand(RefersToLoopVar))),
135           HasInterestingLoopBody, InInterestingCompoundStmt)
136           .bind(LoopCounterName),
137       this);
138 
139   // Match for-range loops:
140   //   for (const auto& E : data) {
141   //     v.push_back(...);
142   //     // Or: proto.add_xxx(...);
143   //   }
144   //
145   // FIXME: Support more complex range-expressions.
146   Finder->addMatcher(
147       cxxForRangeStmt(
148           hasRangeInit(declRefExpr(supportedContainerTypesMatcher())),
149           HasInterestingLoopBody, InInterestingCompoundStmt)
150           .bind(RangeLoopName),
151       this);
152 }
153 
registerMatchers(MatchFinder * Finder)154 void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) {
155   const auto VectorDecl = cxxRecordDecl(hasAnyName(SmallVector<StringRef, 5>(
156       VectorLikeClasses.begin(), VectorLikeClasses.end())));
157   const auto AppendMethodDecl =
158       cxxMethodDecl(hasAnyName("push_back", "emplace_back"));
159   AddMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName,
160              AppendMethodDecl, PushBackOrEmplaceBackCallName, Finder);
161 
162   if (EnableProto) {
163     const auto ProtoDecl =
164         cxxRecordDecl(isDerivedFrom("::proto2::MessageLite"));
165 
166     // A method's name starts with "add_" might not mean it's an add field
167     // call; it could be the getter for a proto field of which the name starts
168     // with "add_". So we exclude const methods.
169     const auto AddFieldMethodDecl =
170         cxxMethodDecl(matchesName("::add_"), unless(isConst()));
171     AddMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName,
172                AddFieldMethodDecl, ProtoAddFieldCallName, Finder);
173   }
174 }
175 
check(const MatchFinder::MatchResult & Result)176 void InefficientVectorOperationCheck::check(
177     const MatchFinder::MatchResult &Result) {
178   auto* Context = Result.Context;
179   if (Context->getDiagnostics().hasUncompilableErrorOccurred())
180     return;
181 
182   const SourceManager &SM = *Result.SourceManager;
183   const auto *VectorVarDecl =
184       Result.Nodes.getNodeAs<VarDecl>(VectorVarDeclName);
185   const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(LoopCounterName);
186   const auto *RangeLoop =
187       Result.Nodes.getNodeAs<CXXForRangeStmt>(RangeLoopName);
188   const auto *VectorAppendCall =
189       Result.Nodes.getNodeAs<CXXMemberCallExpr>(PushBackOrEmplaceBackCallName);
190   const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ProtoVarDeclName);
191   const auto *ProtoAddFieldCall =
192       Result.Nodes.getNodeAs<CXXMemberCallExpr>(ProtoAddFieldCallName);
193   const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(LoopEndExprName);
194   const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(LoopParentName);
195 
196   const CXXMemberCallExpr *AppendCall =
197       VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall;
198   assert(AppendCall && "no append call expression");
199 
200   const Stmt *LoopStmt = ForLoop;
201   if (!LoopStmt)
202     LoopStmt = RangeLoop;
203 
204   const auto *TargetVarDecl = VectorVarDecl;
205   if (!TargetVarDecl)
206     TargetVarDecl = ProtoVarDecl;
207 
208   llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs =
209       utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent,
210                                             *Context);
211   for (const auto *Ref : AllVarRefs) {
212     // Skip cases where there are usages (defined as DeclRefExpr that refers
213     // to "v") of vector variable / proto variable `v` before the for loop. We
214     // consider these usages are operations causing memory preallocation (e.g.
215     // "v.resize(n)", "v.reserve(n)").
216     //
217     // FIXME: make it more intelligent to identify the pre-allocating
218     // operations before the for loop.
219     if (SM.isBeforeInTranslationUnit(Ref->getLocation(),
220                                      LoopStmt->getBeginLoc())) {
221       return;
222     }
223   }
224 
225   std::string PartialReserveStmt;
226   if (VectorAppendCall != nullptr) {
227     PartialReserveStmt = ".reserve";
228   } else {
229     llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName();
230     FieldName.consume_front("add_");
231     std::string MutableFieldName = ("mutable_" + FieldName).str();
232     PartialReserveStmt = "." + MutableFieldName +
233                          "()->Reserve"; // e.g., ".mutable_xxx()->Reserve"
234   }
235 
236   llvm::StringRef VarName = Lexer::getSourceText(
237       CharSourceRange::getTokenRange(
238           AppendCall->getImplicitObjectArgument()->getSourceRange()),
239       SM, Context->getLangOpts());
240 
241   std::string ReserveSize;
242   // Handle for-range loop cases.
243   if (RangeLoop) {
244     // Get the range-expression in a for-range statement represented as
245     // `for (range-declarator: range-expression)`.
246     StringRef RangeInitExpName =
247         Lexer::getSourceText(CharSourceRange::getTokenRange(
248                                  RangeLoop->getRangeInit()->getSourceRange()),
249                              SM, Context->getLangOpts());
250     ReserveSize = (RangeInitExpName + ".size()").str();
251   } else if (ForLoop) {
252     // Handle counter-based loop cases.
253     StringRef LoopEndSource = Lexer::getSourceText(
254         CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM,
255         Context->getLangOpts());
256     ReserveSize = std::string(LoopEndSource);
257   }
258 
259   auto Diag = diag(AppendCall->getBeginLoc(),
260                    "%0 is called inside a loop; consider pre-allocating the "
261                    "container capacity before the loop")
262               << AppendCall->getMethodDecl()->getDeclName();
263   if (!ReserveSize.empty()) {
264     std::string ReserveStmt =
265         (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n").str();
266     Diag << FixItHint::CreateInsertion(LoopStmt->getBeginLoc(), ReserveStmt);
267   }
268 }
269 
270 } // namespace performance
271 } // namespace tidy
272 } // namespace clang
273