1 //===--- GlobalCompilationDatabase.cpp ---------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GlobalCompilationDatabase.h"
10 #include "FS.h"
11 #include "support/Logger.h"
12 #include "support/Path.h"
13 #include "clang/Frontend/CompilerInvocation.h"
14 #include "clang/Tooling/ArgumentsAdjusters.h"
15 #include "clang/Tooling/CompilationDatabase.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/FileSystem.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/Program.h"
25 #include <chrono>
26 #include <string>
27 #include <tuple>
28 #include <vector>
29 
30 namespace clang {
31 namespace clangd {
32 namespace {
33 
34 // Runs the given action on all parent directories of filename, starting from
35 // deepest directory and going up to root. Stops whenever action succeeds.
actOnAllParentDirectories(PathRef FileName,llvm::function_ref<bool (PathRef)> Action)36 void actOnAllParentDirectories(PathRef FileName,
37                                llvm::function_ref<bool(PathRef)> Action) {
38   for (auto Path = llvm::sys::path::parent_path(FileName);
39        !Path.empty() && !Action(Path);
40        Path = llvm::sys::path::parent_path(Path))
41     ;
42 }
43 
44 } // namespace
45 
46 tooling::CompileCommand
getFallbackCommand(PathRef File) const47 GlobalCompilationDatabase::getFallbackCommand(PathRef File) const {
48   std::vector<std::string> Argv = {"clang"};
49   // Clang treats .h files as C by default and files without extension as linker
50   // input, resulting in unhelpful diagnostics.
51   // Parsing as Objective C++ is friendly to more cases.
52   auto FileExtension = llvm::sys::path::extension(File);
53   if (FileExtension.empty() || FileExtension == ".h")
54     Argv.push_back("-xobjective-c++-header");
55   Argv.push_back(std::string(File));
56   tooling::CompileCommand Cmd(llvm::sys::path::parent_path(File),
57                               llvm::sys::path::filename(File), std::move(Argv),
58                               /*Output=*/"");
59   Cmd.Heuristic = "clangd fallback";
60   return Cmd;
61 }
62 
63 // Loads and caches the CDB from a single directory.
64 //
65 // This class is threadsafe, which is to say we have independent locks for each
66 // directory we're searching for a CDB.
67 // Loading is deferred until first access.
68 //
69 // The DirectoryBasedCDB keeps a map from path => DirectoryCache.
70 // Typical usage is to:
71 //  - 1) determine all the paths that might be searched
72 //  - 2) acquire the map lock and get-or-create all the DirectoryCache entries
73 //  - 3) release the map lock and query the caches as desired
74 //
75 // FIXME: this should revalidate the cache sometimes
76 // FIXME: IO should go through a VFS
77 class DirectoryBasedGlobalCompilationDatabase::DirectoryCache {
78   // Absolute canonical path that we're the cache for. (Not case-folded).
79   const std::string Path;
80 
81   // True if we've looked for a CDB here and found none.
82   // (This makes it possible for get() to return without taking a lock)
83   // FIXME: this should have an expiry time instead of lasting forever.
84   std::atomic<bool> FinalizedNoCDB = {false};
85 
86   // Guards following cache state.
87   std::mutex Mu;
88   // Has cache been filled from disk? FIXME: this should be an expiry time.
89   bool CachePopulated = false;
90   // Whether a new CDB has been loaded but not broadcast yet.
91   bool NeedsBroadcast = false;
92   // Last loaded CDB, meaningful if CachePopulated is set.
93   // shared_ptr so we can overwrite this when callers are still using the CDB.
94   std::shared_ptr<tooling::CompilationDatabase> CDB;
95 
96 public:
DirectoryCache(llvm::StringRef Path)97   DirectoryCache(llvm::StringRef Path) : Path(Path) {
98     assert(llvm::sys::path::is_absolute(Path));
99   }
100 
101   // Get the CDB associated with this directory.
102   // ShouldBroadcast:
103   //  - as input, signals whether the caller is willing to broadcast a
104   //    newly-discovered CDB. (e.g. to trigger background indexing)
105   //  - as output, signals whether the caller should do so.
106   // (If a new CDB is discovered and ShouldBroadcast is false, we mark the
107   // CDB as needing broadcast, and broadcast it next time we can).
108   std::shared_ptr<const tooling::CompilationDatabase>
get(bool & ShouldBroadcast)109   get(bool &ShouldBroadcast) {
110     // Fast path for common case without taking lock.
111     if (FinalizedNoCDB.load()) {
112       ShouldBroadcast = false;
113       return nullptr;
114     }
115     std::lock_guard<std::mutex> Lock(Mu);
116     auto RequestBroadcast = llvm::make_scope_exit([&, OldCDB(CDB.get())] {
117       // If we loaded a new CDB, it should be broadcast at some point.
118       if (CDB != nullptr && CDB.get() != OldCDB)
119         NeedsBroadcast = true;
120       else if (CDB == nullptr) // nothing to broadcast anymore!
121         NeedsBroadcast = false;
122       // If we have something to broadcast, then do so iff allowed.
123       if (!ShouldBroadcast)
124         return;
125       ShouldBroadcast = NeedsBroadcast;
126       NeedsBroadcast = false;
127     });
128 
129     // For now, we never actually attempt to revalidate a populated cache.
130     if (CachePopulated)
131       return CDB;
132     assert(CDB == nullptr);
133 
134     load();
135     CachePopulated = true;
136 
137     if (!CDB)
138       FinalizedNoCDB.store(true);
139     return CDB;
140   }
141 
path() const142   llvm::StringRef path() const { return Path; }
143 
144 private:
145   // Updates `CDB` from disk state.
load()146   void load() {
147     std::string Error; // ignored, because it's often "didn't find anything".
148     CDB = tooling::CompilationDatabase::loadFromDirectory(Path, Error);
149     if (!CDB) {
150       // Fallback: check for $src/build, the conventional CMake build root.
151       // Probe existence first to avoid each plugin doing IO if it doesn't
152       // exist.
153       llvm::SmallString<256> BuildDir(Path);
154       llvm::sys::path::append(BuildDir, "build");
155       if (llvm::sys::fs::is_directory(BuildDir)) {
156         vlog("Found candidate build directory {0}", BuildDir);
157         CDB = tooling::CompilationDatabase::loadFromDirectory(BuildDir, Error);
158       }
159     }
160     if (CDB) {
161       log("Loaded compilation database from {0}", Path);
162     } else {
163       vlog("No compilation database at {0}", Path);
164     }
165   }
166 };
167 
168 DirectoryBasedGlobalCompilationDatabase::
DirectoryBasedGlobalCompilationDatabase(llvm::Optional<Path> CompileCommandsDir)169     DirectoryBasedGlobalCompilationDatabase(
170         llvm::Optional<Path> CompileCommandsDir) {
171   if (CompileCommandsDir)
172     OnlyDirCache = std::make_unique<DirectoryCache>(*CompileCommandsDir);
173 }
174 
175 DirectoryBasedGlobalCompilationDatabase::
176     ~DirectoryBasedGlobalCompilationDatabase() = default;
177 
178 llvm::Optional<tooling::CompileCommand>
getCompileCommand(PathRef File) const179 DirectoryBasedGlobalCompilationDatabase::getCompileCommand(PathRef File) const {
180   CDBLookupRequest Req;
181   Req.FileName = File;
182   Req.ShouldBroadcast = true;
183 
184   auto Res = lookupCDB(Req);
185   if (!Res) {
186     log("Failed to find compilation database for {0}", File);
187     return llvm::None;
188   }
189 
190   auto Candidates = Res->CDB->getCompileCommands(File);
191   if (!Candidates.empty())
192     return std::move(Candidates.front());
193 
194   return None;
195 }
196 
197 // For platforms where paths are case-insensitive (but case-preserving),
198 // we need to do case-insensitive comparisons and use lowercase keys.
199 // FIXME: Make Path a real class with desired semantics instead.
200 //        This class is not the only place this problem exists.
201 // FIXME: Mac filesystems default to case-insensitive, but may be sensitive.
202 
maybeCaseFoldPath(PathRef Path)203 static std::string maybeCaseFoldPath(PathRef Path) {
204 #if defined(_WIN32) || defined(__APPLE__)
205   return Path.lower();
206 #else
207   return std::string(Path);
208 #endif
209 }
210 
pathEqual(PathRef A,PathRef B)211 static bool pathEqual(PathRef A, PathRef B) {
212 #if defined(_WIN32) || defined(__APPLE__)
213   return A.equals_lower(B);
214 #else
215   return A == B;
216 #endif
217 }
218 
219 std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
getDirectoryCaches(llvm::ArrayRef<llvm::StringRef> Dirs) const220 DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
221     llvm::ArrayRef<llvm::StringRef> Dirs) const {
222   std::vector<std::string> FoldedDirs;
223   FoldedDirs.reserve(Dirs.size());
224   for (const auto &Dir : Dirs)
225     FoldedDirs.push_back(maybeCaseFoldPath(Dir));
226 
227   std::vector<DirectoryCache *> Ret;
228   Ret.reserve(Dirs.size());
229 
230   std::lock_guard<std::mutex> Lock(DirCachesMutex);
231   for (unsigned I = 0; I < Dirs.size(); ++I)
232     Ret.push_back(&DirCaches.try_emplace(FoldedDirs[I], Dirs[I]).first->second);
233   return Ret;
234 }
235 
236 llvm::Optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult>
lookupCDB(CDBLookupRequest Request) const237 DirectoryBasedGlobalCompilationDatabase::lookupCDB(
238     CDBLookupRequest Request) const {
239   assert(llvm::sys::path::is_absolute(Request.FileName) &&
240          "path must be absolute");
241 
242   bool ShouldBroadcast = false;
243   DirectoryCache *DirCache = nullptr;
244   std::shared_ptr<const tooling::CompilationDatabase> CDB = nullptr;
245   if (OnlyDirCache) {
246     DirCache = OnlyDirCache.get();
247     ShouldBroadcast = Request.ShouldBroadcast;
248     CDB = DirCache->get(ShouldBroadcast);
249   } else {
250     // Traverse the canonical version to prevent false positives. i.e.:
251     // src/build/../a.cc can detect a CDB in /src/build if not canonicalized.
252     std::string CanonicalPath = removeDots(Request.FileName);
253     std::vector<llvm::StringRef> SearchDirs;
254     actOnAllParentDirectories(CanonicalPath, [&](PathRef Path) {
255       SearchDirs.push_back(Path);
256       return false;
257     });
258     for (DirectoryCache *Candidate : getDirectoryCaches(SearchDirs)) {
259       bool CandidateShouldBroadcast = Request.ShouldBroadcast;
260       if ((CDB = Candidate->get(CandidateShouldBroadcast))) {
261         DirCache = Candidate;
262         ShouldBroadcast = CandidateShouldBroadcast;
263         break;
264       }
265     }
266   }
267 
268   if (!CDB)
269     return llvm::None;
270 
271   CDBLookupResult Result;
272   Result.CDB = std::move(CDB);
273   Result.PI.SourceRoot = DirCache->path().str();
274 
275   // FIXME: Maybe make the following part async, since this can block
276   // retrieval of compile commands.
277   if (ShouldBroadcast)
278     broadcastCDB(Result);
279   return Result;
280 }
281 
broadcastCDB(CDBLookupResult Result) const282 void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
283     CDBLookupResult Result) const {
284   assert(Result.CDB && "Trying to broadcast an invalid CDB!");
285 
286   std::vector<std::string> AllFiles = Result.CDB->getAllFiles();
287   // We assume CDB in CompileCommandsDir owns all of its entries, since we don't
288   // perform any search in parent paths whenever it is set.
289   if (OnlyDirCache) {
290     assert(OnlyDirCache->path() == Result.PI.SourceRoot &&
291            "Trying to broadcast a CDB outside of CompileCommandsDir!");
292     OnCommandChanged.broadcast(std::move(AllFiles));
293     return;
294   }
295 
296   // Uniquify all parent directories of all files.
297   llvm::StringMap<bool> DirectoryHasCDB;
298   std::vector<llvm::StringRef> FileAncestors;
299   for (llvm::StringRef File : AllFiles) {
300     actOnAllParentDirectories(File, [&](PathRef Path) {
301       auto It = DirectoryHasCDB.try_emplace(Path);
302       // Already seen this path, and all of its parents.
303       if (!It.second)
304         return true;
305 
306       FileAncestors.push_back(It.first->getKey());
307       return pathEqual(Path, Result.PI.SourceRoot);
308     });
309   }
310   // Work out which ones have CDBs in them.
311   for (DirectoryCache *Dir : getDirectoryCaches(FileAncestors)) {
312     bool ShouldBroadcast = false;
313     if (Dir->get(ShouldBroadcast))
314       DirectoryHasCDB.find(Dir->path())->setValue(true);
315   }
316 
317   std::vector<std::string> GovernedFiles;
318   for (llvm::StringRef File : AllFiles) {
319     // A file is governed by this CDB if lookup for the file would find it.
320     // Independent of whether it has an entry for that file or not.
321     actOnAllParentDirectories(File, [&](PathRef Path) {
322       if (DirectoryHasCDB.lookup(Path)) {
323         if (pathEqual(Path, Result.PI.SourceRoot))
324           // Make sure listeners always get a canonical path for the file.
325           GovernedFiles.push_back(removeDots(File));
326         // Stop as soon as we hit a CDB.
327         return true;
328       }
329       return false;
330     });
331   }
332 
333   OnCommandChanged.broadcast(std::move(GovernedFiles));
334 }
335 
336 llvm::Optional<ProjectInfo>
getProjectInfo(PathRef File) const337 DirectoryBasedGlobalCompilationDatabase::getProjectInfo(PathRef File) const {
338   CDBLookupRequest Req;
339   Req.FileName = File;
340   Req.ShouldBroadcast = false;
341   auto Res = lookupCDB(Req);
342   if (!Res)
343     return llvm::None;
344   return Res->PI;
345 }
346 
OverlayCDB(const GlobalCompilationDatabase * Base,std::vector<std::string> FallbackFlags,tooling::ArgumentsAdjuster Adjuster)347 OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base,
348                        std::vector<std::string> FallbackFlags,
349                        tooling::ArgumentsAdjuster Adjuster)
350     : Base(Base), ArgsAdjuster(std::move(Adjuster)),
351       FallbackFlags(std::move(FallbackFlags)) {
352   if (Base)
353     BaseChanged = Base->watch([this](const std::vector<std::string> Changes) {
354       OnCommandChanged.broadcast(Changes);
355     });
356 }
357 
358 llvm::Optional<tooling::CompileCommand>
getCompileCommand(PathRef File) const359 OverlayCDB::getCompileCommand(PathRef File) const {
360   llvm::Optional<tooling::CompileCommand> Cmd;
361   {
362     std::lock_guard<std::mutex> Lock(Mutex);
363     auto It = Commands.find(removeDots(File));
364     if (It != Commands.end())
365       Cmd = It->second;
366   }
367   if (!Cmd && Base)
368     Cmd = Base->getCompileCommand(File);
369   if (!Cmd)
370     return llvm::None;
371   if (ArgsAdjuster)
372     Cmd->CommandLine = ArgsAdjuster(Cmd->CommandLine, Cmd->Filename);
373   return Cmd;
374 }
375 
getFallbackCommand(PathRef File) const376 tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const {
377   auto Cmd = Base ? Base->getFallbackCommand(File)
378                   : GlobalCompilationDatabase::getFallbackCommand(File);
379   std::lock_guard<std::mutex> Lock(Mutex);
380   Cmd.CommandLine.insert(Cmd.CommandLine.end(), FallbackFlags.begin(),
381                          FallbackFlags.end());
382   if (ArgsAdjuster)
383     Cmd.CommandLine = ArgsAdjuster(Cmd.CommandLine, Cmd.Filename);
384   return Cmd;
385 }
386 
setCompileCommand(PathRef File,llvm::Optional<tooling::CompileCommand> Cmd)387 void OverlayCDB::setCompileCommand(
388     PathRef File, llvm::Optional<tooling::CompileCommand> Cmd) {
389   // We store a canonical version internally to prevent mismatches between set
390   // and get compile commands. Also it assures clients listening to broadcasts
391   // doesn't receive different names for the same file.
392   std::string CanonPath = removeDots(File);
393   {
394     std::unique_lock<std::mutex> Lock(Mutex);
395     if (Cmd)
396       Commands[CanonPath] = std::move(*Cmd);
397     else
398       Commands.erase(CanonPath);
399   }
400   OnCommandChanged.broadcast({CanonPath});
401 }
402 
getProjectInfo(PathRef File) const403 llvm::Optional<ProjectInfo> OverlayCDB::getProjectInfo(PathRef File) const {
404   // It wouldn't make much sense to treat files with overridden commands
405   // specially when we can't do the same for the (unknown) local headers they
406   // include or changing behavior mid-air after receiving an override.
407   if (Base)
408     return Base->getProjectInfo(File);
409   return llvm::None;
410 }
411 } // namespace clangd
412 } // namespace clang
413