1# Copyright 2013-2014 Sebastian Kreft
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Functions to get information from git."""
15
16import os.path
17import subprocess
18
19import gitlint.utils as utils
20
21
22def repository_root():
23    """Returns the root of the repository as an absolute path."""
24    try:
25        root = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'],
26                                       stderr=subprocess.STDOUT).strip()
27        # Convert to unicode first
28        return root.decode('utf-8')
29    except subprocess.CalledProcessError:
30        return None
31
32
33def last_commit():
34    """Returns the SHA1 of the last commit."""
35    try:
36        root = subprocess.check_output(['git', 'rev-parse', 'HEAD'],
37                                       stderr=subprocess.STDOUT).strip()
38        # Convert to unicode first
39        return root.decode('utf-8')
40    except subprocess.CalledProcessError:
41        return None
42
43
44def _remove_filename_quotes(filename):
45    """Removes the quotes from a filename returned by git status."""
46    if filename.startswith('"') and filename.endswith('"'):
47        return filename[1:-1]
48
49    return filename
50
51
52def modified_files(root, tracked_only=False, commit=None):
53    """Returns a list of files that has been modified since the last commit.
54
55    Args:
56      root: the root of the repository, it has to be an absolute path.
57      tracked_only: exclude untracked files when True.
58      commit: SHA1 of the commit. If None, it will get the modified files in the
59        working copy.
60
61    Returns: a dictionary with the modified files as keys, and additional
62      information as value. In this case it adds the status returned by
63      git status.
64    """
65    assert os.path.isabs(root), "Root has to be absolute, got: %s" % root
66
67    if commit:
68        return _modified_files_with_commit(root, commit)
69
70    # Convert to unicode and split
71    status_lines = subprocess.check_output([
72        'git', 'status', '--porcelain', '--untracked-files=all',
73        '--ignore-submodules=all']).decode('utf-8').split(os.linesep)
74
75    modes = ['M ', ' M', 'A ', 'AM', 'MM']
76    if not tracked_only:
77        modes.append(r'\?\?')
78    modes_str = '|'.join(modes)
79
80    modified_file_status = utils.filter_lines(
81        status_lines,
82        r'(?P<mode>%s) (?P<filename>.+)' % modes_str,
83        groups=('filename', 'mode'))
84
85    return dict((os.path.join(root, _remove_filename_quotes(filename)), mode)
86                for filename, mode in modified_file_status)
87
88
89def _modified_files_with_commit(root, commit):
90    # Convert to unicode and split
91    status_lines = subprocess.check_output(
92        ['git', 'diff-tree', '-r', '--root', '--no-commit-id', '--name-status',
93         commit]).decode('utf-8').split(os.linesep)
94
95    modified_file_status = utils.filter_lines(
96        status_lines,
97        r'(?P<mode>A|M)\s(?P<filename>.+)',
98        groups=('filename', 'mode'))
99
100    # We need to add a space to the mode, so to be compatible with the output
101    # generated by modified files.
102    return dict((os.path.join(root, _remove_filename_quotes(filename)),
103                 mode + ' ') for filename, mode in modified_file_status)
104
105
106def modified_lines(filename, extra_data, commit=None):
107    """Returns the lines that have been modifed for this file.
108
109    Args:
110      filename: the file to check.
111      extra_data: is the extra_data returned by modified_files. Additionally, a
112        value of None means that the file was not modified.
113      commit: the complete sha1 (40 chars) of the commit.
114
115    Returns: a list of lines that were modified, or None in case all lines are
116      new.
117    """
118    if extra_data is None:
119        return []
120    if extra_data not in ('M ', ' M', 'MM'):
121        return None
122
123    if commit is None:
124        commit = '0' * 40
125    commit = commit.encode('utf-8')
126
127    # Split as bytes, as the output may have some non unicode characters.
128    blame_lines = subprocess.check_output(
129        ['git', 'blame', commit, '--porcelain', '--', filename]).split(
130            os.linesep.encode('utf-8'))
131    modified_line_numbers = utils.filter_lines(
132        blame_lines,
133        commit + br' (?P<line>\d+) (\d+)',
134        groups=('line',))
135
136    return list(map(int, modified_line_numbers))
137