1 //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolIndexManager.h"
10 #include "find-all-symbols/SymbolInfo.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/Support/Debug.h"
14 #include "llvm/Support/Path.h"
15 
16 #define DEBUG_TYPE "clang-include-fixer"
17 
18 namespace clang {
19 namespace include_fixer {
20 
21 using find_all_symbols::SymbolInfo;
22 using find_all_symbols::SymbolAndSignals;
23 
24 // Calculate a score based on whether we think the given header is closely
25 // related to the given source file.
similarityScore(llvm::StringRef FileName,llvm::StringRef Header)26 static double similarityScore(llvm::StringRef FileName,
27                               llvm::StringRef Header) {
28   // Compute the maximum number of common path segments between Header and
29   // a suffix of FileName.
30   // We do not do a full longest common substring computation, as Header
31   // specifies the path we would directly #include, so we assume it is rooted
32   // relatively to a subproject of the repository.
33   int MaxSegments = 1;
34   for (auto FileI = llvm::sys::path::begin(FileName),
35             FileE = llvm::sys::path::end(FileName);
36        FileI != FileE; ++FileI) {
37     int Segments = 0;
38     for (auto HeaderI = llvm::sys::path::begin(Header),
39               HeaderE = llvm::sys::path::end(Header), I = FileI;
40          HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
41       ++Segments;
42     }
43     MaxSegments = std::max(Segments, MaxSegments);
44   }
45   return MaxSegments;
46 }
47 
rank(std::vector<SymbolAndSignals> & Symbols,llvm::StringRef FileName)48 static void rank(std::vector<SymbolAndSignals> &Symbols,
49                  llvm::StringRef FileName) {
50   llvm::DenseMap<llvm::StringRef, double> Score;
51   for (const auto &Symbol : Symbols) {
52     // Calculate a score from the similarity of the header the symbol is in
53     // with the current file and the popularity of the symbol.
54     double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
55                       (1.0 + std::log2(1 + Symbol.Signals.Seen));
56     double &S = Score[Symbol.Symbol.getFilePath()];
57     S = std::max(S, NewScore);
58   }
59   // Sort by the gathered scores. Use file name as a tie breaker so we can
60   // deduplicate.
61   std::sort(Symbols.begin(), Symbols.end(),
62             [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
63               auto AS = Score[A.Symbol.getFilePath()];
64               auto BS = Score[B.Symbol.getFilePath()];
65               if (AS != BS)
66                 return AS > BS;
67               return A.Symbol.getFilePath() < B.Symbol.getFilePath();
68             });
69 }
70 
71 std::vector<find_all_symbols::SymbolInfo>
search(llvm::StringRef Identifier,bool IsNestedSearch,llvm::StringRef FileName) const72 SymbolIndexManager::search(llvm::StringRef Identifier,
73                            bool IsNestedSearch,
74                            llvm::StringRef FileName) const {
75   // The identifier may be fully qualified, so split it and get all the context
76   // names.
77   llvm::SmallVector<llvm::StringRef, 8> Names;
78   Identifier.split(Names, "::");
79 
80   bool IsFullyQualified = false;
81   if (Identifier.startswith("::")) {
82     Names.erase(Names.begin()); // Drop first (empty) element.
83     IsFullyQualified = true;
84   }
85 
86   // As long as we don't find a result keep stripping name parts from the end.
87   // This is to support nested classes which aren't recorded in the database.
88   // Eventually we will either hit a class (namespaces aren't in the database
89   // either) and can report that result.
90   bool TookPrefix = false;
91   std::vector<SymbolAndSignals> MatchedSymbols;
92   do {
93     std::vector<SymbolAndSignals> Symbols;
94     for (const auto &DB : SymbolIndices) {
95       auto Res = DB.get()->search(Names.back());
96       Symbols.insert(Symbols.end(), Res.begin(), Res.end());
97     }
98 
99     LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
100                             << Symbols.size() << " results...\n");
101 
102     for (auto &SymAndSig : Symbols) {
103       const SymbolInfo &Symbol = SymAndSig.Symbol;
104       // Match the identifier name without qualifier.
105       bool IsMatched = true;
106       auto SymbolContext = Symbol.getContexts().begin();
107       auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
108       // Match the remaining context names.
109       while (IdentiferContext != Names.rend() &&
110              SymbolContext != Symbol.getContexts().end()) {
111         if (SymbolContext->second == *IdentiferContext) {
112           ++IdentiferContext;
113           ++SymbolContext;
114         } else if (SymbolContext->first ==
115                    find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
116           // Skip non-scoped enum context.
117           ++SymbolContext;
118         } else {
119           IsMatched = false;
120           break;
121         }
122       }
123 
124       // If the name was qualified we only want to add results if we evaluated
125       // all contexts.
126       if (IsFullyQualified)
127         IsMatched &= (SymbolContext == Symbol.getContexts().end());
128 
129       // FIXME: Support full match. At this point, we only find symbols in
130       // database which end with the same contexts with the identifier.
131       if (IsMatched && IdentiferContext == Names.rend()) {
132         // If we're in a situation where we took a prefix but the thing we
133         // found couldn't possibly have a nested member ignore it.
134         if (TookPrefix &&
135             (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
136              Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
137              Symbol.getSymbolKind() ==
138                  SymbolInfo::SymbolKind::EnumConstantDecl ||
139              Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
140           continue;
141 
142         MatchedSymbols.push_back(std::move(SymAndSig));
143       }
144     }
145     Names.pop_back();
146     TookPrefix = true;
147   } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
148 
149   rank(MatchedSymbols, FileName);
150   // Strip signals, they are no longer needed.
151   std::vector<SymbolInfo> Res;
152   for (auto &SymAndSig : MatchedSymbols)
153     Res.push_back(std::move(SymAndSig.Symbol));
154   return Res;
155 }
156 
157 } // namespace include_fixer
158 } // namespace clang
159