1# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Provide some basic utility functions for libchrome tools."""
6
7import collections
8import enum
9import os
10import re
11import subprocess
12
13class DiffOperations(enum.Enum):
14    """
15    Describes operations on files
16    """
17    ADD = 1
18    DEL = 2
19    REP = 3
20
21GitFile = collections.namedtuple(
22    'GitFile',
23    ['path', 'mode', 'id',]
24)
25
26GitDiffTree = collections.namedtuple(
27    'GitDiffTree',
28    ['op', 'file',]
29)
30
31GitBlameLine = collections.namedtuple(
32    'GitBlameLine',
33    ['data', 'commit', 'old_line', 'new_line',]
34)
35
36
37GIT_DIFFTREE_RE_LINE = re.compile(rb'^:([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)\t(.*)$')
38
39
40def _reverse(files):
41    """Creates a reverse map from file path to file.
42
43    Asserts if a file path exist only once in files.
44
45    Args:
46        files: list of files.
47    """
48    files_map = {}
49    for i in files:
50        if i.path in files_map:
51            assert i.path not in files_map
52        files_map[i.path] = i
53    return files_map
54
55
56def get_file_list(commit):
57    """Gets a list of the files of the commit.
58
59    Args:
60        commit: commit hash or refs.
61    """
62
63    output = subprocess.check_output(['git', 'ls-tree', '-r',
64                                      commit]).split(b'\n')
65    files = []
66    # Line looks like
67    # mode<space>type<space>id<tab>file name
68    # split by tab first, and by space.
69    re_line = re.compile(rb'^([^ ]*) ([^ ]*) ([^ ]*)\t(.*)$')
70    for line in output:
71        if not line:
72            continue
73        match = re_line.match(line)
74        mode, gittype, blobhash, path = match.groups()
75        if gittype == b'commit':
76            continue
77        assert gittype == b'blob', '%s\n\n%s' % (str(output), line)
78        files.append(GitFile(path, mode, blobhash))
79    return files
80
81
82def git_difftree(treeish1, treeish2):
83    """Gets diffs between treeish1 and treeish2.
84
85    It returns a list of GitDiffTree, each GitDiffTree contains an ADD, DEL or
86    REP operation and a GitFile.
87
88    Args:
89        treeish1, treeish2: treeish to diff.
90            treeish can be tree hash or commit hash. If treeish1 is None, it
91            generate difftrees with its parent.
92    """
93    out = None
94    if treeish1 is None:
95        # Remove first line since it's tree hash printed.
96        out = subprocess.check_output(['git', 'diff-tree', '-r',
97                                       treeish2]).split(b'\n')[1:]
98    else:
99        out = subprocess.check_output(['git', 'diff-tree', '-r',
100                                       treeish1, treeish2]).split(b'\n')
101    diff = []
102    for line in out:
103        if not line:
104            continue
105        match = GIT_DIFFTREE_RE_LINE.match(line)
106        oldmode, newmode, oldhash, newhash, typeofchange, path = match.groups()
107        assert typeofchange in b'ADMT', (treeish1, treeish2, line)
108        if typeofchange == b'A':
109            diff.append(
110                GitDiffTree(DiffOperations.ADD,
111                            GitFile(path, newmode, newhash)))
112        elif typeofchange == b'D':
113            diff.append(
114                GitDiffTree(DiffOperations.DEL,
115                            GitFile(path, oldmode, oldhash)))
116        elif typeofchange == b'M' or typeofchange == b'T':
117            diff.append(
118                GitDiffTree(DiffOperations.REP,
119                            GitFile(path, newmode, newhash)))
120        else:
121            raise Exception(b"Unsupported type: " + line)
122    return diff
123
124
125def gen_op(current_files, target_files):
126    """Returns an operation list to convert files to target_files.
127
128    Generates list of operations (add/delete/replace files) if we want to
129    convert current_files in directory to target_files
130
131    Args:
132        current_files: list of files in current directory.
133        target_files: list of files we want it to be in current directory.
134    """
135    current_file_map = _reverse(current_files)
136    target_file_map = _reverse(target_files)
137    op = []
138    for i in sorted(current_file_map):
139        if i not in target_file_map:
140            op.append((DiffOperations.DEL, current_file_map[i]))
141    for i in sorted(target_file_map):
142        if i in current_file_map and current_file_map[i] != target_file_map[i]:
143            op.append((DiffOperations.REP, target_file_map[i]))
144        elif i not in current_file_map:
145            op.append((DiffOperations.ADD, target_file_map[i]))
146    return op
147
148
149def git_mktree(files):
150    """Returns a git tree object hash after mktree recursively."""
151
152    def recursive_default_dict():
153        return collections.defaultdict(recursive_default_dict)
154
155    tree = recursive_default_dict()
156    for f in files:
157        directories = f.path.split(b'/')
158        directories, filename = directories[:-1], directories[-1]
159        cwd = tree
160        for directory in directories:
161            # If cwd is a GitFile, which means a file and a directory shares the
162            # same name.
163            assert type(cwd) == collections.defaultdict
164            cwd = cwd[directory]
165        assert filename not in cwd
166        cwd[filename] = f
167
168    def _mktree(prefix, node):
169        objects = []
170        for name, val in node.items():
171            prefix.append(name)
172            if isinstance(val, collections.defaultdict):
173                tree_hash = _mktree(prefix, val)
174                objects.append(b'\t'.join(
175                    [b' '.join([b'040000', b'tree', tree_hash]), name]))
176            else:
177                path = b'/'.join(prefix)
178                assert path == val.path, '%s\n%s' % (str(path), str(val.path))
179                objects.append(b'\t'.join(
180                    [b' '.join([val.mode, b'blob', val.id]), name]))
181            prefix.pop(-1)
182        return subprocess.check_output(['git', 'mktree'],
183                                       input=b'\n'.join(objects)).strip(b'\n')
184
185    return _mktree([], tree)
186
187
188def git_commit(tree, parents, message=b"", extra_env={}):
189    """Creates a commit.
190
191    Args:
192        tree: tree object id.
193        parents: parent commit id.
194        message: commit message.
195        extra_env: extra environment variables passed to git.
196    """
197    parent_args = []
198    for parent in parents:
199        parent_args.append('-p')
200        parent_args.append(parent)
201    return subprocess.check_output(
202        ['git', 'commit-tree', tree] + parent_args,
203        input=message,
204        env=dict(os.environ, **extra_env)).strip(b'\n')
205
206
207def git_revlist(from_commit, to_commit):
208    """Returns a list of commits and their parents.
209
210    Each item in the list is a tuple, containing two elements.
211    The first element is the commit hash; the second element is a list of parent
212    commits' hash.
213    """
214
215    commits = []
216    ret = None
217    if from_commit is None:
218        ret = subprocess.check_output(['git', 'rev-list', to_commit,
219                                       '--topo-order', '--parents'])
220    else:
221        # b'...'.join() later requires all variable to be binary-typed.
222        if type(from_commit) == str:
223            from_commit = from_commit.encode('ascii')
224        if type(to_commit) == str:
225            to_commit = to_commit.encode('ascii')
226        commit_range = b'...'.join([from_commit, to_commit])
227        ret = subprocess.check_output(['git', 'rev-list', commit_range,
228                                       '--topo-order', '--parents'])
229    ret = ret.split(b'\n')
230    for line in ret:
231        if not line:
232            continue
233        hashes = line.split(b' ')
234        commits.append((hashes[0], hashes[1:]))
235    return list(reversed(commits))
236
237
238def git_blame(commit, filepath):
239    """Returns line-by-line git blame.
240
241    Return value is represented by a list of GitBlameLine.
242
243    Args:
244        commit: commit hash to blame at.
245        filepath: file to blame.
246    """
247    output = subprocess.check_output(['git', 'blame', '-p',
248                                      commit, filepath])
249    commit, old_line, new_line = None, None, None
250    blames = []
251    COMMIT_LINE_PREFIX = re.compile(b'^[0-9a-f]* ')
252    for line in output.split(b'\n'):
253        if not line:
254            continue
255        if line[0] == ord(b'\t'):
256            assert commit != None
257            blames.append(GitBlameLine(line[1:], commit, old_line, new_line))
258            commit, old_line, new_line = None, None, None
259        elif COMMIT_LINE_PREFIX.match(line):
260            commit, old_line, new_line = line.split(b' ', 3)[0:3]
261    return blames
262