//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This library measures the distance between file paths. // It's used for ranking symbols, e.g. in code completion. // |foo/bar.h -> foo/bar.h| = 0. // |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|. // This is an edit-distance, where edits go up or down the directory tree. // It's not symmetrical, the costs of going up and down may not match. // // Dealing with multiple sources: // In practice we care about the distance from a source file, but files near // its main-header and #included files are considered "close". // So we start with a set of (anchor, cost) pairs, and call the distance to a // path the minimum of `cost + |source -> path|`. // // We allow each source to limit the number of up-traversals paths may start // with. Up-traversals may reach things that are not "semantically near". // // Symbol URI schemes: // Symbol locations may be represented by URIs rather than file paths directly. // In this case we want to perform distance computations in URI space rather // than in file-space, without performing redundant conversions. // Therefore we have a lookup structure that accepts URIs, so that intermediate // calculations for the same scheme can be reused. // // Caveats: // Assuming up and down traversals each have uniform costs is simplistic. // Often there are "semantic roots" whose children are almost unrelated. // (e.g. /usr/include/, or / in an umbrella repository). We ignore this. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H #include "URI.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Path.h" #include "llvm/Support/StringSaver.h" #include namespace clang { namespace clangd { struct FileDistanceOptions { unsigned UpCost = 2; // |foo/bar.h -> foo| unsigned DownCost = 1; // |foo -> foo/bar.h| unsigned IncludeCost = 2; // |foo.cc -> included_header.h| bool AllowDownTraversalFromRoot = true; // | / -> /a | }; struct SourceParams { // Base cost for paths starting at this source. unsigned Cost = 0; // Limits the number of upwards traversals allowed from this source. unsigned MaxUpTraversals = std::numeric_limits::max(); }; // Supports lookups to find the minimum distance to a file from any source. // This object should be reused, it memoizes intermediate computations. class FileDistance { public: static constexpr unsigned Unreachable = std::numeric_limits::max(); static const llvm::hash_code RootHash; FileDistance(llvm::StringMap Sources, const FileDistanceOptions &Opts = {}); // Computes the minimum distance from any source to the file path. unsigned distance(llvm::StringRef Path); private: // Costs computed so far. Always contains sources and their ancestors. // We store hash codes only. Collisions are rare and consequences aren't dire. llvm::DenseMap Cache; FileDistanceOptions Opts; }; // Supports lookups like FileDistance, but the lookup keys are URIs. // We convert each of the sources to the scheme of the URI and do a FileDistance // comparison on the bodies. class URIDistance { public: // \p Sources must contain absolute paths, not URIs. URIDistance(llvm::StringMap Sources, const FileDistanceOptions &Opts = {}) : Sources(Sources), Opts(Opts) {} // Computes the minimum distance from any source to the URI. // Only sources that can be mapped into the URI's scheme are considered. unsigned distance(llvm::StringRef URI); private: // Returns the FileDistance for a URI scheme, creating it if needed. FileDistance &forScheme(llvm::StringRef Scheme); // We cache the results using the original strings so we can skip URI parsing. llvm::DenseMap Cache; llvm::StringMap Sources; llvm::StringMap> ByScheme; FileDistanceOptions Opts; }; /// Support lookups like FileDistance, but the lookup keys are symbol scopes. /// For example, a scope "na::nb::" is converted to "/na/nb". class ScopeDistance { public: /// QueryScopes[0] is the preferred scope. ScopeDistance(llvm::ArrayRef QueryScopes); unsigned distance(llvm::StringRef SymbolScope); private: FileDistance Distance; }; } // namespace clangd } // namespace clang #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H