1 //===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "InefficientVectorOperationCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
13 #include "../utils/DeclRefExprUtils.h"
14 #include "../utils/OptionsUtils.h"
15
16 using namespace clang::ast_matchers;
17
18 namespace clang {
19 namespace tidy {
20 namespace performance {
21
22 namespace {
23
24 // Matcher names. Given the code:
25 //
26 // \code
27 // void f() {
28 // vector<T> v;
29 // for (int i = 0; i < 10 + 1; ++i) {
30 // v.push_back(i);
31 // }
32 //
33 // SomeProto p;
34 // for (int i = 0; i < 10 + 1; ++i) {
35 // p.add_xxx(i);
36 // }
37 // }
38 // \endcode
39 //
40 // The matcher names are bound to following parts of the AST:
41 // - LoopCounterName: The entire for loop (as ForStmt).
42 // - LoopParentName: The body of function f (as CompoundStmt).
43 // - VectorVarDeclName: 'v' (as VarDecl).
44 // - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as
45 // DeclStmt).
46 // - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr).
47 // - LoopInitVarName: 'i' (as VarDecl).
48 // - LoopEndExpr: '10+1' (as Expr).
49 // If EnableProto, the proto related names are bound to the following parts:
50 // - ProtoVarDeclName: 'p' (as VarDecl).
51 // - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt).
52 // - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr).
53 static const char LoopCounterName[] = "for_loop_counter";
54 static const char LoopParentName[] = "loop_parent";
55 static const char VectorVarDeclName[] = "vector_var_decl";
56 static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt";
57 static const char PushBackOrEmplaceBackCallName[] = "append_call";
58 static const char ProtoVarDeclName[] = "proto_var_decl";
59 static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt";
60 static const char ProtoAddFieldCallName[] = "proto_add_field";
61 static const char LoopInitVarName[] = "loop_init_var";
62 static const char LoopEndExprName[] = "loop_end_expr";
63 static const char RangeLoopName[] = "for_range_loop";
64
supportedContainerTypesMatcher()65 ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() {
66 return hasType(cxxRecordDecl(hasAnyName(
67 "::std::vector", "::std::set", "::std::unordered_set", "::std::map",
68 "::std::unordered_map", "::std::array", "::std::deque")));
69 }
70
71 } // namespace
72
InefficientVectorOperationCheck(StringRef Name,ClangTidyContext * Context)73 InefficientVectorOperationCheck::InefficientVectorOperationCheck(
74 StringRef Name, ClangTidyContext *Context)
75 : ClangTidyCheck(Name, Context),
76 VectorLikeClasses(utils::options::parseStringList(
77 Options.get("VectorLikeClasses", "::std::vector"))),
78 EnableProto(Options.getLocalOrGlobal("EnableProto", false)) {}
79
storeOptions(ClangTidyOptions::OptionMap & Opts)80 void InefficientVectorOperationCheck::storeOptions(
81 ClangTidyOptions::OptionMap &Opts) {
82 Options.store(Opts, "VectorLikeClasses",
83 utils::options::serializeStringList(VectorLikeClasses));
84 Options.store(Opts, "EnableProto", EnableProto);
85 }
86
AddMatcher(const DeclarationMatcher & TargetRecordDecl,StringRef VarDeclName,StringRef VarDeclStmtName,const DeclarationMatcher & AppendMethodDecl,StringRef AppendCallName,MatchFinder * Finder)87 void InefficientVectorOperationCheck::AddMatcher(
88 const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName,
89 StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl,
90 StringRef AppendCallName, MatchFinder *Finder) {
91 const auto DefaultConstructorCall = cxxConstructExpr(
92 hasType(TargetRecordDecl),
93 hasDeclaration(cxxConstructorDecl(isDefaultConstructor())));
94 const auto TargetVarDecl =
95 varDecl(hasInitializer(DefaultConstructorCall)).bind(VarDeclName);
96 const auto TargetVarDefStmt =
97 declStmt(hasSingleDecl(equalsBoundNode(std::string(VarDeclName))))
98 .bind(VarDeclStmtName);
99
100 const auto AppendCallExpr =
101 cxxMemberCallExpr(
102 callee(AppendMethodDecl), on(hasType(TargetRecordDecl)),
103 onImplicitObjectArgument(declRefExpr(to(TargetVarDecl))))
104 .bind(AppendCallName);
105 const auto AppendCall = expr(ignoringImplicit(AppendCallExpr));
106 const auto LoopVarInit =
107 declStmt(hasSingleDecl(varDecl(hasInitializer(integerLiteral(equals(0))))
108 .bind(LoopInitVarName)));
109 const auto RefersToLoopVar = ignoringParenImpCasts(
110 declRefExpr(to(varDecl(equalsBoundNode(LoopInitVarName)))));
111
112 // Matchers for the loop whose body has only 1 push_back/emplace_back calling
113 // statement.
114 const auto HasInterestingLoopBody = hasBody(
115 anyOf(compoundStmt(statementCountIs(1), has(AppendCall)), AppendCall));
116 const auto InInterestingCompoundStmt =
117 hasParent(compoundStmt(has(TargetVarDefStmt)).bind(LoopParentName));
118
119 // Match counter-based for loops:
120 // for (int i = 0; i < n; ++i) {
121 // v.push_back(...);
122 // // Or: proto.add_xxx(...);
123 // }
124 //
125 // FIXME: Support more types of counter-based loops like decrement loops.
126 Finder->addMatcher(
127 forStmt(
128 hasLoopInit(LoopVarInit),
129 hasCondition(binaryOperator(
130 hasOperatorName("<"), hasLHS(RefersToLoopVar),
131 hasRHS(expr(unless(hasDescendant(expr(RefersToLoopVar))))
132 .bind(LoopEndExprName)))),
133 hasIncrement(unaryOperator(hasOperatorName("++"),
134 hasUnaryOperand(RefersToLoopVar))),
135 HasInterestingLoopBody, InInterestingCompoundStmt)
136 .bind(LoopCounterName),
137 this);
138
139 // Match for-range loops:
140 // for (const auto& E : data) {
141 // v.push_back(...);
142 // // Or: proto.add_xxx(...);
143 // }
144 //
145 // FIXME: Support more complex range-expressions.
146 Finder->addMatcher(
147 cxxForRangeStmt(
148 hasRangeInit(declRefExpr(supportedContainerTypesMatcher())),
149 HasInterestingLoopBody, InInterestingCompoundStmt)
150 .bind(RangeLoopName),
151 this);
152 }
153
registerMatchers(MatchFinder * Finder)154 void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) {
155 const auto VectorDecl = cxxRecordDecl(hasAnyName(SmallVector<StringRef, 5>(
156 VectorLikeClasses.begin(), VectorLikeClasses.end())));
157 const auto AppendMethodDecl =
158 cxxMethodDecl(hasAnyName("push_back", "emplace_back"));
159 AddMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName,
160 AppendMethodDecl, PushBackOrEmplaceBackCallName, Finder);
161
162 if (EnableProto) {
163 const auto ProtoDecl =
164 cxxRecordDecl(isDerivedFrom("::proto2::MessageLite"));
165
166 // A method's name starts with "add_" might not mean it's an add field
167 // call; it could be the getter for a proto field of which the name starts
168 // with "add_". So we exclude const methods.
169 const auto AddFieldMethodDecl =
170 cxxMethodDecl(matchesName("::add_"), unless(isConst()));
171 AddMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName,
172 AddFieldMethodDecl, ProtoAddFieldCallName, Finder);
173 }
174 }
175
check(const MatchFinder::MatchResult & Result)176 void InefficientVectorOperationCheck::check(
177 const MatchFinder::MatchResult &Result) {
178 auto* Context = Result.Context;
179 if (Context->getDiagnostics().hasUncompilableErrorOccurred())
180 return;
181
182 const SourceManager &SM = *Result.SourceManager;
183 const auto *VectorVarDecl =
184 Result.Nodes.getNodeAs<VarDecl>(VectorVarDeclName);
185 const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(LoopCounterName);
186 const auto *RangeLoop =
187 Result.Nodes.getNodeAs<CXXForRangeStmt>(RangeLoopName);
188 const auto *VectorAppendCall =
189 Result.Nodes.getNodeAs<CXXMemberCallExpr>(PushBackOrEmplaceBackCallName);
190 const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ProtoVarDeclName);
191 const auto *ProtoAddFieldCall =
192 Result.Nodes.getNodeAs<CXXMemberCallExpr>(ProtoAddFieldCallName);
193 const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(LoopEndExprName);
194 const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(LoopParentName);
195
196 const CXXMemberCallExpr *AppendCall =
197 VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall;
198 assert(AppendCall && "no append call expression");
199
200 const Stmt *LoopStmt = ForLoop;
201 if (!LoopStmt)
202 LoopStmt = RangeLoop;
203
204 const auto *TargetVarDecl = VectorVarDecl;
205 if (!TargetVarDecl)
206 TargetVarDecl = ProtoVarDecl;
207
208 llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs =
209 utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent,
210 *Context);
211 for (const auto *Ref : AllVarRefs) {
212 // Skip cases where there are usages (defined as DeclRefExpr that refers
213 // to "v") of vector variable / proto variable `v` before the for loop. We
214 // consider these usages are operations causing memory preallocation (e.g.
215 // "v.resize(n)", "v.reserve(n)").
216 //
217 // FIXME: make it more intelligent to identify the pre-allocating
218 // operations before the for loop.
219 if (SM.isBeforeInTranslationUnit(Ref->getLocation(),
220 LoopStmt->getBeginLoc())) {
221 return;
222 }
223 }
224
225 std::string PartialReserveStmt;
226 if (VectorAppendCall != nullptr) {
227 PartialReserveStmt = ".reserve";
228 } else {
229 llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName();
230 FieldName.consume_front("add_");
231 std::string MutableFieldName = ("mutable_" + FieldName).str();
232 PartialReserveStmt = "." + MutableFieldName +
233 "()->Reserve"; // e.g., ".mutable_xxx()->Reserve"
234 }
235
236 llvm::StringRef VarName = Lexer::getSourceText(
237 CharSourceRange::getTokenRange(
238 AppendCall->getImplicitObjectArgument()->getSourceRange()),
239 SM, Context->getLangOpts());
240
241 std::string ReserveSize;
242 // Handle for-range loop cases.
243 if (RangeLoop) {
244 // Get the range-expression in a for-range statement represented as
245 // `for (range-declarator: range-expression)`.
246 StringRef RangeInitExpName =
247 Lexer::getSourceText(CharSourceRange::getTokenRange(
248 RangeLoop->getRangeInit()->getSourceRange()),
249 SM, Context->getLangOpts());
250 ReserveSize = (RangeInitExpName + ".size()").str();
251 } else if (ForLoop) {
252 // Handle counter-based loop cases.
253 StringRef LoopEndSource = Lexer::getSourceText(
254 CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM,
255 Context->getLangOpts());
256 ReserveSize = std::string(LoopEndSource);
257 }
258
259 auto Diag = diag(AppendCall->getBeginLoc(),
260 "%0 is called inside a loop; consider pre-allocating the "
261 "container capacity before the loop")
262 << AppendCall->getMethodDecl()->getDeclName();
263 if (!ReserveSize.empty()) {
264 std::string ReserveStmt =
265 (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n").str();
266 Diag << FixItHint::CreateInsertion(LoopStmt->getBeginLoc(), ReserveStmt);
267 }
268 }
269
270 } // namespace performance
271 } // namespace tidy
272 } // namespace clang
273