1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "BranchCloneCheck.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Analysis/CloneDetection.h"
14 #include "clang/Lex/Lexer.h"
15 #include "llvm/Support/Casting.h"
16 
17 using namespace clang;
18 using namespace clang::ast_matchers;
19 
20 /// Returns true when the statements are Type I clones of each other.
areStatementsIdentical(const Stmt * LHS,const Stmt * RHS,const ASTContext & Context)21 static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS,
22                                    const ASTContext &Context) {
23   llvm::FoldingSetNodeID DataLHS, DataRHS;
24   LHS->Profile(DataLHS, Context, false);
25   RHS->Profile(DataRHS, Context, false);
26   return (DataLHS == DataRHS);
27 }
28 
29 namespace {
30 /// A branch in a switch may consist of several statements; while a branch in
31 /// an if/else if/else chain is one statement (which may be a CompoundStmt).
32 using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
33 } // anonymous namespace
34 
35 /// Determines if the bodies of two branches in a switch statements are Type I
36 /// clones of each other. This function only examines the body of the branch
37 /// and ignores the `case X:` or `default:` at the start of the branch.
areSwitchBranchesIdentical(const SwitchBranch LHS,const SwitchBranch RHS,const ASTContext & Context)38 static bool areSwitchBranchesIdentical(const SwitchBranch LHS,
39                                        const SwitchBranch RHS,
40                                        const ASTContext &Context) {
41   if (LHS.size() != RHS.size())
42     return false;
43 
44   for (size_t i = 0, Size = LHS.size(); i < Size; i++) {
45     // NOTE: We strip goto labels and annotations in addition to stripping
46     // the `case X:` or `default:` labels, but it is very unlikely that this
47     // would casue false positives in real-world code.
48     if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(),
49                                 RHS[i]->stripLabelLikeStatements(), Context)) {
50       return false;
51     }
52   }
53 
54   return true;
55 }
56 
57 namespace clang {
58 namespace tidy {
59 namespace bugprone {
60 
registerMatchers(MatchFinder * Finder)61 void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
62   Finder->addMatcher(
63       ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
64              stmt().bind("if"),
65              hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
66              hasElse(stmt().bind("else"))),
67       this);
68   Finder->addMatcher(switchStmt().bind("switch"), this);
69   Finder->addMatcher(conditionalOperator().bind("condOp"), this);
70 }
71 
check(const MatchFinder::MatchResult & Result)72 void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
73   const ASTContext &Context = *Result.Context;
74 
75   if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
76     const Stmt *Then = IS->getThen();
77     assert(Then && "An IfStmt must have a `then` branch!");
78 
79     const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
80     assert(Else && "We only look for `if` statements with an `else` branch!");
81 
82     if (!isa<IfStmt>(Else)) {
83       // Just a simple if with no `else if` branch.
84       if (areStatementsIdentical(Then->IgnoreContainers(),
85                                  Else->IgnoreContainers(), Context)) {
86         diag(IS->getBeginLoc(), "if with identical then and else branches");
87         diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
88       }
89       return;
90     }
91 
92     // This is the complicated case when we start an if/else if/else chain.
93     // To find all the duplicates, we collect all the branches into a vector.
94     llvm::SmallVector<const Stmt *, 4> Branches;
95     const IfStmt *Cur = IS;
96     while (true) {
97       // Store the `then` branch.
98       Branches.push_back(Cur->getThen());
99 
100       Else = Cur->getElse();
101       // The chain ends if there is no `else` branch.
102       if (!Else)
103         break;
104 
105       // Check if there is another `else if`...
106       Cur = dyn_cast<IfStmt>(Else);
107       if (!Cur) {
108         // ...this is just a plain `else` branch at the end of the chain.
109         Branches.push_back(Else);
110         break;
111       }
112     }
113 
114     size_t N = Branches.size();
115     llvm::BitVector KnownAsClone(N);
116 
117     for (size_t i = 0; i + 1 < N; i++) {
118       // We have already seen Branches[i] as a clone of an earlier branch.
119       if (KnownAsClone[i])
120         continue;
121 
122       int NumCopies = 1;
123 
124       for (size_t j = i + 1; j < N; j++) {
125         if (KnownAsClone[j] ||
126             !areStatementsIdentical(Branches[i]->IgnoreContainers(),
127                                     Branches[j]->IgnoreContainers(), Context))
128           continue;
129 
130         NumCopies++;
131         KnownAsClone[j] = true;
132 
133         if (NumCopies == 2) {
134           // We report the first occurrence only when we find the second one.
135           diag(Branches[i]->getBeginLoc(),
136                "repeated branch in conditional chain");
137           SourceLocation End =
138               Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0,
139                                          *Result.SourceManager, getLangOpts());
140           if (End.isValid()) {
141             diag(End, "end of the original", DiagnosticIDs::Note);
142           }
143         }
144 
145         diag(Branches[j]->getBeginLoc(), "clone %0 starts here",
146              DiagnosticIDs::Note)
147             << (NumCopies - 1);
148       }
149     }
150     return;
151   }
152 
153   if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
154     // We do not try to detect chains of ?: operators.
155     if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context))
156       diag(CO->getQuestionLoc(),
157            "conditional operator with identical true and false expressions");
158 
159     return;
160   }
161 
162   if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
163     const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
164 
165     // Code like
166     //   switch (x) case 0: case 1: foobar();
167     // is legal and calls foobar() if and only if x is either 0 or 1;
168     // but we do not try to distinguish branches in such code.
169     if (!Body)
170       return;
171 
172     // We will first collect the branches of the switch statements. For the
173     // sake of simplicity we say that branches are delimited by the SwitchCase
174     // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
175     // `default:` labels embedded inside other statements and we do not follow
176     // the effects of `break` and other manipulation of the control-flow.
177     llvm::SmallVector<SwitchBranch, 4> Branches;
178     for (const Stmt *S : Body->body()) {
179       // If this is a `case` or `default`, we start a new, empty branch.
180       if (isa<SwitchCase>(S))
181         Branches.emplace_back();
182 
183       // There may be code before the first branch (which can be dead code
184       // and can be code reached either through goto or through case labels
185       // that are embedded inside e.g. inner compound statements); we do not
186       // store those statements in branches.
187       if (!Branches.empty())
188         Branches.back().push_back(S);
189     }
190 
191     auto End = Branches.end();
192     auto BeginCurrent = Branches.begin();
193     while (BeginCurrent < End) {
194       auto EndCurrent = BeginCurrent + 1;
195       while (EndCurrent < End &&
196              areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
197         ++EndCurrent;
198       }
199       // At this point the iterator range {BeginCurrent, EndCurrent} contains a
200       // complete family of consecutive identical branches.
201       if (EndCurrent > BeginCurrent + 1) {
202         diag(BeginCurrent->front()->getBeginLoc(),
203              "switch has %0 consecutive identical branches")
204             << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
205 
206         SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
207         // If the case statement is generated from a macro, it's SourceLocation
208         // may be invalid, resulting in an assertion failure down the line.
209         // While not optimal, try the begin location in this case, it's still
210         // better then nothing.
211         if (EndLoc.isInvalid())
212           EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
213 
214         if (EndLoc.isMacroID())
215           EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
216         EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
217                                             getLangOpts());
218 
219         if (EndLoc.isValid()) {
220           diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
221         }
222       }
223       BeginCurrent = EndCurrent;
224     }
225     return;
226   }
227 
228   llvm_unreachable("No if statement and no switch statement.");
229 }
230 
231 } // namespace bugprone
232 } // namespace tidy
233 } // namespace clang
234