1#!/usr/bin/env python3
2# Copyright 2020 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import collections
8import datetime
9import os
10import subprocess
11import sys
12import time
13
14import filtered_utils
15import filters
16import lazytree
17import utils
18
19# Use avg speed of last TIMING_DISTANCE commits.
20_TIMING_DISTANCE = 100
21# Verify the tree is consistent (diff-based, and actual) when a commit is made
22# after every _VERIFY_INTEGRITY_DISTANCE in browser repository.
23# Merge commits are always verified.
24_VERIFY_INTEGRITY_DISTANCE = 1000
25
26
27def timing(timing_deque, update=True):
28    """Returns a speed (c/s), and updates timing_deque.
29
30    Args:
31        timing_deque: a deque to store the timing of past _TIMING_DISTANCE.
32        update: adds current timestamp to timing_deque if True. It needs to set
33            to False, if it wants to be called multiple times with the current
34            timestamp.
35    """
36    first = timing_deque[0]
37    now = time.time()
38    if update:
39        timing_deque.append(now)
40        if len(timing_deque) > _TIMING_DISTANCE:
41            timing_deque.popleft()
42    return _TIMING_DISTANCE / (now - first)
43
44
45def get_start_commit_of_browser_tree(parent_filtered):
46    """Returns the last commit committed by the script, and its metadata.
47
48    Args:
49        parent_filtered: the commit hash of the tip of the filtered branch.
50    """
51    current = parent_filtered
52    while True:
53        meta = filtered_utils.get_metadata(current)
54        if meta.original_commits:
55            return current, meta
56        if not meta.parents:
57            return None, None
58        # Follow main line only
59        current = meta.parents[0]
60
61
62def find_filtered_commit(commit, commits_map):
63    """Finds the corresponding parent of a browser commit in filtered branch.
64
65    If not found, the corresponding commit of its least ancestor is used.
66
67    Args:
68        commit: commit hash in browser repository.
69        commits_map: commit hash mapping from original commit to the one in the
70            filtered branch. commits_map may be altered.
71    """
72    look_for = commit
73    while look_for not in commits_map:
74        meta = filtered_utils.get_metadata(look_for)
75        assert len(meta.parents) <= 1
76        if len(meta.parents) == 1:
77            look_for = meta.parents[0]
78        else:
79            look_for = 'ROOT'
80    commits_map[commit] = commits_map[look_for]
81    return commits_map[look_for]
82
83
84def do_commit(treehash, commithash, meta, commits_map):
85    """Makes a commit with the given arguments.
86
87    This creates a commit on the filtered branch with preserving the original
88    commiter name, email, authored timestamp and the message.
89    Also, the special annotation `CrOS-Libchrome-Original-Commit:
90    <original-commit-hash>' is appended at the end of commit message.
91    The parent commits are identified by the parents of the original commit and
92    commits_map.
93
94    Args:
95        treehash: tree object id for this commit.
96        commithash: original commit hash, used to append to commit message.
97        meta: meta data of the original commit.
98        commits_map: current known commit mapping. commits_map may be altered.
99    """
100    parents_parameters = []
101    for parent in meta.parents:
102        parents_parameters.append('-p')
103        parents_parameters.append(find_filtered_commit(parent, commits_map))
104    msg = (meta.message + b'\n\n' +
105           filtered_utils.CROS_LIBCHROME_ORIGINAL_COMMIT +
106           b': ' + commithash + b'\n')
107    return subprocess.check_output(
108        ['git', 'commit-tree'] + parents_parameters + [treehash],
109        env=dict(os.environ,
110                 GIT_AUTHOR_NAME=meta.authorship.name,
111                 GIT_AUTHOR_EMAIL=meta.authorship.email,
112                 GIT_AUTHOR_DATE=b' '.join([meta.authorship.time,
113                                            meta.authorship.timezone])),
114        input=msg).strip(b'\n')
115
116
117def verify_commit(original_commit, new_tree):
118    """Verifies if new_tree is exactly original_commit after filters.
119
120    Args:
121        original_commit: commit hash in Chromium browser tree.
122        new_tree: tree hash created for upstream branch commit.
123    """
124    expected_file_list = filters.filter_file([], utils.get_file_list(original_commit))
125    assert utils.git_mktree(expected_file_list) == new_tree
126
127
128def process_commits(pending_commits, commits_map, progress_callback, commit_callback):
129    """Processes new commits in browser repository.
130
131    Returns the commit hash of the last commit made.
132
133    Args:
134        pending_commits: list of tuple (commit hash, parent hashes) to process,
135            in topological order.
136        commits_map: current known commit mapping. may be altered.
137            progress_callback: callback for every commit in pending_commits. It
138            should take (idx, total, orig_commit_hash, meta) as parameters.
139        commit_callback: callback when a commit is made to filtered branch. It
140            should take (orig_commit_hash, new_commit_hash, meta) as parameters.
141    """
142    last_commit = None
143    last_verified = -1
144    for i, commit in enumerate(pending_commits, start=1):
145        meta = filtered_utils.get_metadata(commit[0])
146        if progress_callback:
147            progress_callback(i, len(pending_commits), commit[0], meta)
148        diff_with_parent = filters.filter_diff(utils.git_difftree(
149            meta.parents[0] if meta.parents else None, commit[0]))
150        git_lazytree = lazytree.LazyTree(
151            filtered_utils.get_metadata(
152                find_filtered_commit(meta.parents[0], commits_map)).tree
153            if meta.parents else None)
154        if len(meta.parents) <= 1 and len(diff_with_parent) == 0:
155            # not merge commit    AND no diff
156            if len(meta.parents) == 1 and meta.parents[0] in commits_map:
157                commits_map[commit[0]] = commits_map[meta.parents[0]]
158            continue
159        for op, f in diff_with_parent:
160            if op == utils.DiffOperations.ADD or op == utils.DiffOperations.REP:
161                git_lazytree[f.path] = f
162            elif op == utils.DiffOperations.DEL:
163                del git_lazytree[f.path]
164        treehash_after_diff_applied = git_lazytree.hash()
165        filtered_commit = do_commit(treehash_after_diff_applied, commit[0],
166                                    meta, commits_map)
167        if commit_callback:
168            commit_callback(commit[0], filtered_commit, meta)
169        commits_map[commit[0]] = filtered_commit
170        last_commit = filtered_commit
171        if len(meta.parents) > 1 or (i - last_verified >=
172                                     _VERIFY_INTEGRITY_DISTANCE):
173            # merge commit    OR  every _VERIFY_INTEGRITY_DISTANCE
174            last_verified = i
175            verify_commit(commit[0], treehash_after_diff_applied)
176    # Verify last commit
177    verify_commit(pending_commits[-1][0], filtered_utils.get_metadata(last_commit).tree)
178    return last_commit
179
180
181def main():
182    # Init args
183    parser = argparse.ArgumentParser(
184        description='Copy file from given commits')
185    parser.add_argument(
186        'parent_filtered', metavar='parent_filtered', type=str, nargs=1,
187        help='commit hash in filtered branch to continue from. usually HEAD of that branch.')
188    parser.add_argument(
189        'goal_browser', metavar='goal_browser', type=str, nargs=1,
190        help='commit hash in browser master branch.')
191    parser.add_argument(
192        '--dry_run', dest='dry_run', action='store_const', const=True, default=False)
193    arg = parser.parse_args(sys.argv[1:])
194
195    # Look for last known commit made by the script in filtered branch.
196    print('Looking for last known commit from', arg.parent_filtered[0])
197    last_known, meta_last_known = get_start_commit_of_browser_tree(
198        arg.parent_filtered[0])
199    if last_known:
200        print('Continuing from', last_known, meta_last_known)
201    else:
202        print('No known last commit')
203    print('parent on filter branch', arg.parent_filtered[0])
204
205    # Get a mapping between browser repository and filtered branch for commits
206    # in filtered branch.
207    print('reading commits details for commits mapping')
208    timing_deque = collections.deque([time.time()])
209    commits_map = filtered_utils.get_commits_map(
210        arg.parent_filtered[0],
211        lambda cur_idx, tot_cnt, cur_hash:
212        (
213            print('Reading', cur_hash, '%d/%d' % (cur_idx, tot_cnt),
214                  '%f c/s' % timing(timing_deque),
215                  end='\r', flush=True),
216        ))
217    if not 'ROOT' in commits_map:
218        commits_map['ROOT'] =subprocess.check_output(
219            ['git', 'commit-tree', '-p', arg.parent_filtered[0],
220             utils.git_mktree([])],
221            input=filtered_utils.CROS_LIBCHROME_INITIAL_COMMIT).strip(b'\n')
222    print()
223    print('loaded commit mapping of', len(commits_map), 'commit')
224
225    # Process newer commits in browser repository from
226    # last_known.original_commits
227    print('search for commits to filter')
228    timing_deque= collections.deque([time.time()])
229    pending_commits = utils.git_revlist(
230        meta_last_known.original_commits[0] if meta_last_known else None,
231        arg.goal_browser[0])
232    print(len(pending_commits), 'commits to process')
233    new_head = process_commits(
234        pending_commits,
235        commits_map,
236        # Print progress
237        lambda cur_idx, tot_cnt, cur_hash, cur_meta: (
238            print('Processing',
239                  cur_hash, '%d/%d' % (cur_idx, tot_cnt),
240                  '%f c/s' % timing(timing_deque, update=False),
241                  'eta %s' % (
242                      datetime.timedelta(
243                          seconds=int((tot_cnt - cur_idx) / timing(timing_deque)))),
244                  cur_meta.title[:50],
245                  end='\r', flush=True),
246        ),
247        # Print new commits
248        lambda orig_hash, new_hash, commit_meta:
249            print(b'%s is commited as %s: %s' % (orig_hash, new_hash,
250                                                 commit_meta.title[:50]))
251    )
252    print()
253    print('New HEAD should be', new_head.decode('ascii'))
254
255
256if __name__ == '__main__':
257    main()
258