1#!/usr/bin/env python3 2# Copyright 2020 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6""" 7Utility to disconnect history of files from a branch, and reconnect with base on 8a different branch. 9""" 10 11import argparse 12import collections 13import subprocess 14import sys 15 16import filtered_utils 17import lazytree 18import utils 19 20 21class CommitMetadataFactory(dict): 22 """Dict-like class to read commit metadata""" 23 24 def __missing__(self, key): 25 """Reads commit metadata if missing""" 26 value = filtered_utils.get_metadata(key) 27 self.__setitem__(key, value) 28 return value 29 30 31def disconnect(source_commit, ref_commit): 32 """Creates a commit that disconnects files from source_commit. 33 34 All files existing in ref_commit will be removed from source_commit. 35 36 Args: 37 source_commit: commit hash to disconnect from. 38 ref_commit: commit hash to be a file list reference. 39 """ 40 source_files = utils.get_file_list(source_commit) 41 ref_files = utils.get_file_list(ref_commit) 42 ref_files_set = set(ref.path for ref in ref_files) 43 kept_files = [ref for ref in source_files if ref.path not in ref_files_set] 44 tree = utils.git_mktree(kept_files) 45 return utils.git_commit( 46 tree, [source_commit], 47 message=b'Disconnect history from %s' % (source_commit.encode('ascii'))) 48 49 50def connect_base(current_commit, base_commit): 51 """Creates a merge commit that takes files from base_commit. 52 53 Literally it's identical to git merge base_commit in current_commit. 54 55 Args: 56 current_commit: commit hashes on where to commit to. 57 base_commit: commit hashes contains file histories. 58 """ 59 current_files = utils.get_file_list(current_commit) 60 base_files = utils.get_file_list(base_commit) 61 tree = utils.git_mktree(current_files + base_files) 62 return utils.git_commit( 63 tree, [current_commit, base_commit], 64 message=b'Connect history with base %s' % (base_commit.encode('ascii'))) 65 66 67def blame_files(commithash, files): 68 """Blames files on givven commithash""" 69 blames = {} 70 for path in files: 71 blames[path] = utils.git_blame(commithash, path) 72 return blames 73 74 75def search_blame_line(blames, amend_commits, target_commit_hash): 76 """Searches blames matching target_commit_hash in amend_commits 77 78 Returns a map from file path to a list of tuple, each tuple has 79 len(amend_commits) + 1 elements. 0-th element is the line in blames. and 80 1st to n-th element are corresponding lines in amend_commits blaems. 81 82 Args: 83 blames: a dict from path to list of GitBlameLine, for files blamed on 84 target_commit_hash. 85 amend_commits: a list of commit hashes to provide actual history. 86 target_commit_hash: commit hash that blames are blaemd on. 87 """ 88 blames_combined = {} 89 for blame_file_path, blame_file in blames.items(): 90 blames_amend = [ 91 utils.git_blame(commit, blame_file_path) for commit in amend_commits 92 ] 93 blames_combined[blame_file_path] = [ 94 blame_combined for blame_combined in zip(blame_file, *blames_amend) 95 if blame_combined[0].commit == target_commit_hash 96 ] 97 return blames_combined 98 99 100def get_track_from_blames(blames_combined, virtual_goal_commit, amend_commits, 101 commit_choice_cache, commit_msg_cache): 102 """Blames diffs and locate the amend commits. 103 104 Returns a tuple containing: 105 - a set of commit hashes in amend_commits tree; 106 - a line-by-line mapping for files in diff to commit hashes in 107 amend_commits tree of diffed lines. 108 109 Args: 110 blames_combined: a map from path to a list of tuple. each tuple reflect 111 one line, and has len(amend_commits)+1 elements. See more details in 112 search_blame_line. 113 virtual_goal_commit: a commit that contains no useful history for diffs. 114 amend_commits: list of HEAD commit hashes that refers to tree that can 115 amend the diffs. 116 commit_choice_cache: caches user choice on which amend commit to use. 117 commit_msg_cache: caches commit metadata. 118 """ 119 blame_untracked_lines = {} 120 commits_to_track = set() 121 122 for blame_file_path, blame_lines in blames_combined.items(): 123 blame_untracked_lines[blame_file_path] = [] 124 for blame_line in blame_lines: 125 original_commits = tuple( 126 blame_amend.commit for blame_amend in list(blame_line)[1:]) 127 chosen = commit_choice_cache.get(original_commits) 128 if chosen is None: 129 for idx, original_commit in enumerate(original_commits): 130 print('%d: %s' % (idx, 131 commit_msg_cache[original_commit].title)) 132 # No validation on user_choice since no untrusted user. 133 # Also the developer can rerun if entered wrongly by accident. 134 user_choice = int(input('Choose patch: ')) 135 chosen = original_commits[user_choice] 136 commit_choice_cache[original_commits] = chosen 137 commits_to_track.add(chosen) 138 blame_untracked_lines[blame_file_path].append((blame_line[0], 139 chosen)) 140 141 return commits_to_track, blame_untracked_lines 142 143 144def reconstruct_file(blame_goal, blame_base, lines_to_reconstruct, 145 virtual_goal_commit): 146 """Reconstrucs a file to reflect changes in lines_to_reconstruct. 147 148 Takes lines to blame_base, and blame_goal it belongs lines_to_reconstruct. 149 It also deletes removed lines nearby. 150 151 Returns a binary for the new file content. 152 153 Args: 154 blame_goal: a list of utils.GitBlameLine blaming the file on 155 virtual_goal_commit. 156 blame_base: a list of utils.GitBlameLine blaming the file on last 157 commited commit. 158 lines_to_reconstruct: only to reconstruct these lines, instead of 159 everything in blame_goal. It is represented in a list of 160 GitBlameLine. 161 virtual_goal_commit: commit hash where blame_goal is based on. 162 """ 163 idx_base, idx_goal = 0, 0 164 reconstructed_file = [] 165 166 print('Changed lines are', [line.data for line in lines_to_reconstruct]) 167 line_iter = iter(lines_to_reconstruct) 168 line = next(line_iter, None) 169 while idx_base < len(blame_base) or idx_goal< len(blame_goal): 170 # Both sides are idendical. We can't compare blame_base, and line 171 # directly due to blame commit difference could end up different lineno. 172 if (idx_base < len(blame_base) and 173 blame_base[idx_base].data == blame_goal[idx_goal].data and 174 blame_base[idx_base].commit == blame_goal[idx_goal].commit): 175 # We append this line if both sides are identical. 176 reconstructed_file.append(blame_base[idx_base].data) 177 idx_base += 1 178 idx_goal += 1 179 should_skip_base = False 180 elif line and blame_goal[idx_goal] == line: 181 # We append the line from goal, if blame_goal[idx_goal] is the line 182 # we're interested in. 183 reconstructed_file.append(line.data) 184 line = next(line_iter, None) 185 idx_goal += 1 186 should_skip_base = True 187 elif blame_goal[idx_goal].commit == virtual_goal_commit: 188 # We skip the line from goal, if the change in not in the commit 189 # we're interested. Thus, changed lines in other commits will not be 190 # reflected. 191 idx_goal += 1 192 else: 193 # We should skip base if we just appended some lines from goal. 194 # This would treat modified lines and append first and skip later. 195 # If we didn't append something from goal, lines from base should be 196 # preserved because the modified lines are not in the commit we're 197 # currently interested in. 198 if not should_skip_base: 199 reconstructed_file.append(blame_base[idx_base].data) 200 idx_base += 1 201 202 return b''.join([line + b'\n' for line in reconstructed_file]) 203 204 205def reconstruct_files(track_commit, blame_untracked_lines, blames, 206 current_base_commit, virtual_goal_commit): 207 """Reconstructs files to reflect changes in track_commit. 208 209 Returns a map from file path to file content for reconstructed files. 210 211 Args: 212 track_commit: commit hashes to track, and reconstruct from. 213 blame_untracked_lines: a line-by-line mapping regarding selected amend 214 commits for diffs. see get_track_from_blames for more. 215 blames: a map from filename to list of utils.GitBlameLine 216 current_base_commit: commit hashes for HEAD of base that contains base 217 history + already committed amend history. 218 virtual_goal_commit: commit hash for one giant commit that has no 219 history. virtual_goal_commit is one commit ahead of 220 current_base_commit. 221 """ 222 lines_to_track = collections.defaultdict(list) 223 for file, lines in blame_untracked_lines.items(): 224 for line in lines: 225 if line[1] == track_commit: 226 lines_to_track[file].append(line[0]) 227 constructed_files = {} 228 for current_file, current_file_lines in lines_to_track.items(): 229 print('Reconstructing', current_file, 'for', track_commit) 230 blame_base = utils.git_blame(current_base_commit, current_file) 231 constructed_files[current_file] = reconstruct_file( 232 blames[current_file], blame_base, current_file_lines, 233 virtual_goal_commit) 234 return constructed_files 235 236 237def main(): 238 # Init args 239 parser = argparse.ArgumentParser(description='Reconnect git history') 240 parser.add_argument( 241 'disconnect_from', 242 metavar='disconnect_from', 243 type=str, 244 nargs=1, 245 help='disconnect history from this commit') 246 parser.add_argument( 247 'base_commit', 248 metavar='base_commit', 249 type=str, 250 nargs=1, 251 help='base commit to use the history') 252 parser.add_argument( 253 'amend_commits', 254 metavar='amend_commits', 255 type=str, 256 nargs='+', 257 help='commits to amend histories from base_commit') 258 259 arg = parser.parse_args(sys.argv[1:]) 260 empty_commit = disconnect(arg.disconnect_from[0], arg.base_commit[0]) 261 connected_base = connect_base(empty_commit, arg.base_commit[0]) 262 263 commit_msg_cache = CommitMetadataFactory() 264 commit_choice_cache = {} 265 last_commit = connected_base 266 # In each iteration of the loop, it 267 # - re-create the new goal commit, (base + committed history + (one giant) 268 # uncommited history). 269 # - blame on new goal commit and tot of amend commits. map line-by-line 270 # from uncommited to past histories. 271 # - choose one of the past commits, reconstruct files to reflect changes in 272 # that commit, and create a new commits. 273 # last_commit, commit_msg_cache, commit_choice_cache will be persistent 274 # across iteratins. 275 while True: 276 # One commit is processed per iteration. 277 278 # Create virtual target commit, and its diff. 279 virtual_goal = utils.git_commit(arg.disconnect_from[0] + '^{tree}', 280 [last_commit]) 281 diffs = utils.git_difftree(None, virtual_goal) 282 if not diffs: 283 print('No diffs are found between %s and goal.' % 284 (last_commit.decode('ascii'),)) 285 break 286 287 blames = blame_files(virtual_goal, 288 [diff.file.path for diff in diffs]) 289 blames_combined = search_blame_line(blames, arg.amend_commits, 290 virtual_goal) 291 292 commits_to_track, blame_untracked_lines = get_track_from_blames( 293 blames_combined, virtual_goal, arg.amend_commits, 294 commit_choice_cache, commit_msg_cache) 295 if not commits_to_track: 296 print('no commits to track, stopping') 297 break 298 299 # Stablely choose one commit from commits_to_track, and reconstruct it. 300 track_commit = min(commits_to_track) 301 print('Reconstructing commit %s: %s' % 302 (track_commit, commit_msg_cache[track_commit].title)) 303 constructed_files = reconstruct_files(track_commit, 304 blame_untracked_lines, blames, 305 last_commit, virtual_goal) 306 307 # Mktree and commit with re-constructed_files. 308 tree = lazytree.LazyTree(filtered_utils.get_metadata(last_commit).tree) 309 for filename, filedata in constructed_files.items(): 310 blob = subprocess.check_output( 311 ['git', 'hash-object', '-w', '/dev/stdin'], 312 input=filedata).strip() 313 tree[filename] = utils.GitFile(filename, tree[filename].mode, blob) 314 meta = commit_msg_cache[track_commit] 315 last_commit = utils.git_commit( 316 tree.hash(), [last_commit], 317 (meta.message + b'\n(Reconstructed from ' + track_commit + b')\n'), 318 dict( 319 GIT_AUTHOR_NAME=meta.authorship.name, 320 GIT_AUTHOR_EMAIL=meta.authorship.email, 321 GIT_AUTHOR_DATE=b' '.join( 322 [meta.authorship.time, meta.authorship.timezone]))) 323 print('Reconstructed as', last_commit) 324 # Make last commit for history reconstruction. 325 print( 326 utils.git_commit( 327 filtered_utils.get_metadata(arg.disconnect_from[0]).tree, 328 [last_commit], 329 b'Finished history reconstruction\n\nRemoving unnecessary lines\n')) 330 331 332if __name__ == '__main__': 333 main() 334