1# Copyright 2020 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Helpful commands for working with a Git repository."""
15
16import logging
17from pathlib import Path
18import subprocess
19from typing import Collection, Iterable, Iterator, List, NamedTuple, Optional
20from typing import Pattern, Set, Tuple, Union
21
22from pw_presubmit.tools import log_run, plural
23
24_LOG = logging.getLogger(__name__)
25PathOrStr = Union[Path, str]
26
27
28def git_stdout(*args: PathOrStr,
29               show_stderr=False,
30               repo: PathOrStr = '.') -> str:
31    return log_run(['git', '-C', repo, *args],
32                   stdout=subprocess.PIPE,
33                   stderr=None if show_stderr else subprocess.DEVNULL,
34                   check=True).stdout.decode().strip()
35
36
37def _ls_files(args: Collection[PathOrStr], repo: Path) -> Iterable[Path]:
38    """Returns results of git ls-files as absolute paths."""
39    git_root = repo.resolve()
40    for file in git_stdout('ls-files', '--', *args, repo=repo).splitlines():
41        yield git_root / file
42
43
44def _diff_names(commit: str, pathspecs: Collection[PathOrStr],
45                repo: Path) -> Iterable[Path]:
46    """Returns absolute paths of files changed since the specified commit."""
47    git_root = root(repo)
48    for file in git_stdout('diff',
49                           '--name-only',
50                           '--diff-filter=d',
51                           commit,
52                           '--',
53                           *pathspecs,
54                           repo=repo).splitlines():
55        yield git_root / file
56
57
58def list_files(commit: Optional[str] = None,
59               pathspecs: Collection[PathOrStr] = (),
60               repo_path: Optional[Path] = None) -> List[Path]:
61    """Lists files with git ls-files or git diff --name-only.
62
63    Args:
64      commit: commit to use as a base for git diff
65      pathspecs: Git pathspecs to use in git ls-files or diff
66      repo_path: repo path from which to run commands; defaults to Path.cwd()
67
68    Returns:
69      A sorted list of absolute paths
70    """
71    if repo_path is None:
72        repo_path = Path.cwd()
73
74    if commit:
75        return sorted(_diff_names(commit, pathspecs, repo_path))
76
77    return sorted(_ls_files(pathspecs, repo_path))
78
79
80def has_uncommitted_changes(repo: Optional[Path] = None) -> bool:
81    """Returns True if the Git repo has uncommitted changes in it.
82
83    This does not check for untracked files.
84    """
85    if repo is None:
86        repo = Path.cwd()
87
88    # Refresh the Git index so that the diff-index command will be accurate.
89    log_run(['git', '-C', repo, 'update-index', '-q', '--refresh'], check=True)
90
91    # diff-index exits with 1 if there are uncommitted changes.
92    return log_run(['git', '-C', repo, 'diff-index', '--quiet', 'HEAD',
93                    '--']).returncode == 1
94
95
96def _describe_constraints(git_root: Path, repo_path: Path,
97                          commit: Optional[str],
98                          pathspecs: Collection[PathOrStr],
99                          exclude: Collection[Pattern[str]]) -> Iterable[str]:
100    if not git_root.samefile(repo_path):
101        yield (
102            f'under the {repo_path.resolve().relative_to(git_root.resolve())} '
103            'subdirectory')
104
105    if commit:
106        yield f'that have changed since {commit}'
107
108    if pathspecs:
109        paths_str = ', '.join(str(p) for p in pathspecs)
110        yield f'that match {plural(pathspecs, "pathspec")} ({paths_str})'
111
112    if exclude:
113        yield (f'that do not match {plural(exclude, "pattern")} (' +
114               ', '.join(p.pattern for p in exclude) + ')')
115
116
117def describe_files(git_root: Path, repo_path: Path, commit: Optional[str],
118                   pathspecs: Collection[PathOrStr],
119                   exclude: Collection[Pattern]) -> str:
120    """Completes 'Doing something to ...' for a set of files in a Git repo."""
121    constraints = list(
122        _describe_constraints(git_root, repo_path, commit, pathspecs, exclude))
123    if not constraints:
124        return f'all files in the {git_root.name} repo'
125
126    msg = f'files in the {git_root.name} repo'
127    if len(constraints) == 1:
128        return f'{msg} {constraints[0]}'
129
130    return msg + ''.join(f'\n    - {line}' for line in constraints)
131
132
133def root(repo_path: PathOrStr = '.', *, show_stderr: bool = True) -> Path:
134    """Returns the repository root as an absolute path.
135
136    Raises:
137      FileNotFoundError: the path does not exist
138      subprocess.CalledProcessError: the path is not in a Git repo
139    """
140    repo_path = Path(repo_path)
141    if not repo_path.exists():
142        raise FileNotFoundError(f'{repo_path} does not exist')
143
144    return Path(
145        git_stdout('rev-parse',
146                   '--show-toplevel',
147                   repo=repo_path if repo_path.is_dir() else repo_path.parent,
148                   show_stderr=show_stderr))
149
150
151def within_repo(repo_path: PathOrStr = '.') -> Optional[Path]:
152    """Similar to root(repo_path), returns None if the path is not in a repo."""
153    try:
154        return root(repo_path, show_stderr=False)
155    except subprocess.CalledProcessError:
156        return None
157
158
159def is_repo(repo_path: PathOrStr = '.') -> bool:
160    """True if the path is tracked by a Git repo."""
161    return within_repo(repo_path) is not None
162
163
164def path(repo_path: PathOrStr,
165         *additional_repo_paths: PathOrStr,
166         repo: PathOrStr = '.') -> Path:
167    """Returns a path relative to a Git repository's root."""
168    return root(repo).joinpath(repo_path, *additional_repo_paths)
169
170
171class PythonPackage(NamedTuple):
172    root: Path  # Path to the file containing the setup.py
173    package: Path  # Path to the main package directory
174    packaged_files: Tuple[Path, ...]  # All sources in the main package dir
175    other_files: Tuple[Path, ...]  # Other Python files under root
176
177    def all_files(self) -> Tuple[Path, ...]:
178        return self.packaged_files + self.other_files
179
180
181def all_python_packages(repo: PathOrStr = '.') -> Iterator[PythonPackage]:
182    """Finds all Python packages in the repo based on setup.py locations."""
183    root_py_dirs = [
184        file.parent
185        for file in _ls_files(['setup.py', '*/setup.py'], Path(repo))
186    ]
187
188    for py_dir in root_py_dirs:
189        all_packaged_files = _ls_files([py_dir / '*' / '*.py'], repo=py_dir)
190        common_dir: Optional[str] = None
191
192        # Make there is only one package directory with Python files in it.
193        for file in all_packaged_files:
194            package_dir = file.relative_to(py_dir).parts[0]
195
196            if common_dir is None:
197                common_dir = package_dir
198            elif common_dir != package_dir:
199                _LOG.warning(
200                    'There are multiple Python package directories in %s: %s '
201                    'and %s. This is not supported by pw presubmit. Each '
202                    'setup.py should correspond with a single Python package',
203                    py_dir, common_dir, package_dir)
204                break
205
206        if common_dir is not None:
207            packaged_files = tuple(_ls_files(['*/*.py'], repo=py_dir))
208            other_files = tuple(
209                f for f in _ls_files(['*.py'], repo=py_dir)
210                if f.name != 'setup.py' and f not in packaged_files)
211
212            yield PythonPackage(py_dir, py_dir / common_dir, packaged_files,
213                                other_files)
214
215
216def python_packages_containing(
217        python_paths: Iterable[Path],
218        repo: PathOrStr = '.') -> Tuple[List[PythonPackage], List[Path]]:
219    """Finds all Python packages containing the provided Python paths.
220
221    Returns:
222      ([packages], [files_not_in_packages])
223    """
224    all_packages = list(all_python_packages(repo))
225
226    packages: Set[PythonPackage] = set()
227    files_not_in_packages: List[Path] = []
228
229    for python_path in python_paths:
230        for package in all_packages:
231            if package.root in python_path.parents:
232                packages.add(package)
233                break
234        else:
235            files_not_in_packages.append(python_path)
236
237    return list(packages), files_not_in_packages
238
239
240def commit_message(commit: str = 'HEAD', repo: PathOrStr = '.') -> str:
241    return git_stdout('log', '--format=%B', '-n1', commit, repo=repo)
242