1#!/usr/bin/python3 -B 2 3# Copyright 2022 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Read the EXPECTED_UPSTREAM and merge the files from the upstream.""" 18import argparse 19import datetime 20import logging 21# pylint: disable=g-importing-member 22import os.path 23from pathlib import Path 24import random 25import re 26import string 27import sys 28from typing import List, Tuple, Set, Dict 29from typing import Sequence 30 31# pylint: disable=g-multiple-import 32from common_util import ( 33 ExpectedUpstreamEntry, 34 ExpectedUpstreamFile, 35 has_file_in_tree, 36 LIBCORE_DIR, 37 OjluniFinder, 38 TEST_PATH, 39) 40 41from git import ( 42 Commit, 43 DiffIndex, 44 GitCommandError, 45 Head, 46 IndexFile, 47 Repo, 48) 49 50# Enable INFO logging for error emitted by GitPython 51logging.basicConfig(level=logging.INFO) 52 53 54def validate_and_remove_unmodified_entries( 55 entries: List[ExpectedUpstreamEntry], 56 repo: Repo, commit: Commit) -> List[ExpectedUpstreamEntry]: 57 """Returns a list of entries of which the file content needs to be updated.""" 58 commit_tree = commit.tree 59 result: List[ExpectedUpstreamEntry] = [] 60 61 for e in entries: 62 try: 63 # The following step validate each entry by querying the git database 64 commit = repo.commit(e.git_ref) 65 source_blob = commit.tree.join(e.src_path) 66 if not has_file_in_tree(e.dst_path, commit_tree): 67 # Add the entry if the file is missing in the HEAD 68 result.append(e) 69 continue 70 71 dst_blob = commit_tree.join(e.dst_path) 72 # Add the entry if the content is different. 73 # data_stream will be close during GC. 74 if source_blob.data_stream.read() != dst_blob.data_stream.read(): 75 result.append(e) 76 except: 77 print(f"ERROR: reading entry: {e}", file=sys.stderr) 78 raise 79 80 return result 81 82 83THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR) 84 85TEMP_EXPECTED_BRANCH_PREFIX = "expected_upstream_" 86 87MSG_FIRST_COMMIT = ("Import {summary}\n" 88 "\n" 89 "List of files:\n" 90 " {files}\n" 91 "\n" 92 f"Generated by {THIS_TOOL_PATH}\n" 93 "\n" 94 "{bug}\n" 95 "Test: N/A\n" 96 "No-Typo-Check: Imported files" 97 "{change_id_str}") 98 99MSG_SECOND_COMMIT = ("Merge {summary} into the " 100 "aosp/main branch\n" 101 "\n" 102 "List of files:\n" 103 " {files}\n" 104 "\n" 105 "{bug}\n" 106 "Test: N/A" 107 "{change_id_str}") 108 109INVALID_DIFF = (None, None) 110 111LICENSE_BLOCK = r"\/\*(?:\*(?!\/)|[^*])*\*\/[ ]*\n+" 112REGEX_LICENSE_AND_IMPORT = re.compile( 113 r"^(" + LICENSE_BLOCK + ")(import .+;)$", re.MULTILINE) 114 115 116def create_commit_staging_diff(repo: Repo) -> None: 117 r"""Save the current EXPECTED_UPSTREAM filein a new git commit. 118 119 It can be retrieved later if this script fails. 120 121 Args: 122 repo: the repository object 123 """ 124 head = repo.head 125 index = IndexFile.from_tree(repo, head.commit) 126 index.add("EXPECTED_UPSTREAM") 127 128 now_str = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") 129 msg = f"Staging EXPECTED_UPSTREAM at {now_str}" 130 commit = index.commit(message=msg, parent_commits=[head.commit], head=False) 131 132 print( 133 f"The current EXPECTED_UPSTREAM file is saved in {commit.hexsha}.\n" 134 "If this script fails in the later stage, please retrieve the file by:\n" 135 f" git checkout {commit.hexsha} -- EXPECTED_UPSTREAM") 136 137 138def create_commit_summary(diff_entries: List[ExpectedUpstreamEntry]) -> str: 139 r"""Create a commit summary message. 140 141 Args: 142 diff_entries: list of new / modified entries 143 144 Returns: 145 a string message 146 """ 147 148 default_msg = "files" 149 entries_and_names = [] 150 for e in diff_entries: 151 t = (e, OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path)) 152 entries_and_names.append(t) 153 154 # Non-test entries 155 important_entries: List[tuple[ExpectedUpstreamEntry, str]] = [ 156 t for t in entries_and_names 157 if t[1] is not None and not t[1].startswith("test.")] 158 if not important_entries: 159 # Test entries 160 important_entries = [t for t in entries_and_names if t[1] is not None and 161 t[1].startswith("test.")] 162 # no path is under OJLUNI_JAVA_BASE_PATH or OJLUNI_TEST_PATH 163 if not important_entries: 164 return default_msg 165 166 # Get ref if all entries come from the same OpenJDK revision 167 git_ref = important_entries[0][0].git_ref 168 for e in important_entries: 169 if e[0].git_ref != git_ref: 170 git_ref = None 171 break 172 173 if len(important_entries) == 1: 174 classes_summary = important_entries[0][1].split(".")[-1] 175 else: 176 common_prefix = os.path.commonprefix(list(map( 177 lambda t: t[1], important_entries))) 178 prefix_split = common_prefix.split(".") 179 180 # short java package, e.g. javax. or java.n, doesn't provide meaningful 181 # commit summary. 182 if len(prefix_split) <= 2: 183 classes_summary = default_msg 184 else: 185 # Assume that package name isn't title-case. 186 is_package = (not prefix_split[-1] or prefix_split[-1][0].islower()) 187 if is_package: 188 # Discard the prefix after the last "." 189 classes_summary = ".".join(prefix_split[:-1]) 190 else: 191 classes_summary = common_prefix + "*" 192 193 if git_ref is None: 194 return classes_summary 195 else: 196 abbv_ref = git_ref.split("/", 1)[-1] 197 return f"{classes_summary} from {abbv_ref}" 198 199 200def create_commit_at_expected_upstream( 201 repo: Repo, head: Head, new_entries: List[ExpectedUpstreamEntry], 202 removed_paths: Set[str], bug_id: str, 203 last_expected_change_id: str, discard_working_tree: bool) -> Head: 204 r"""Create a new commit importing the given files at the head. 205 206 Args: 207 repo: the repository object 208 head: the temp expected_upstream branch 209 new_entries: a list of entries 210 removed_paths: removed paths 211 bug_id: bug id 212 last_expected_change_id: Gerrit's change Id 213 discard_working_tree: discard the working tree. 214 215 Returns: 216 a list of entries 217 """ 218 affected_paths = [e.dst_path for e in new_entries] + list(removed_paths) 219 str_affected_paths = "\n ".join(affected_paths) 220 221 for entry in new_entries: 222 ref = entry.git_ref 223 upstream_commit = repo.commit(ref) 224 src_blob = upstream_commit.tree[entry.src_path] 225 # Write into the file system directly because GitPython provides no API 226 # writing into the index in memory. IndexFile.move doesn't help here, 227 # because the API requires the file on the working tree too. 228 # However, it's fine, because we later reset the HEAD. 229 absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path) 230 absolute_dst_path.parent.mkdir(parents=True, exist_ok=True) 231 with absolute_dst_path.open("wb") as file: 232 file.write(src_blob.data_stream.read()) 233 234 entries = ExpectedUpstreamFile(head.commit.tree["EXPECTED_UPSTREAM"] 235 .data_stream.read()).read_all_entries() 236 entries = overlay_entries(entries, new_entries) 237 entries = list(filter(lambda e: e.dst_path not in removed_paths, entries)) 238 # Write the entries to the file system. 239 ExpectedUpstreamFile().sort_and_write_all_entries(entries) 240 241 if discard_working_tree: 242 repo.head.reference = head 243 repo.head.reset(index=True) 244 index = repo.index 245 else: 246 index = IndexFile.from_tree(repo, head.commit) 247 index.add("EXPECTED_UPSTREAM") 248 for entry in new_entries: 249 index.add(entry.dst_path) 250 251 for p in removed_paths: 252 index.remove(p) 253 254 summary_msg = create_commit_summary(new_entries) 255 str_bug = "" if bug_id is None else f"Bug: {bug_id}" 256 change_id_str = "" 257 if last_expected_change_id: 258 change_id_str = f"\nChange-Id: {last_expected_change_id}" 259 msg = MSG_FIRST_COMMIT.format(summary=summary_msg, files=str_affected_paths, 260 bug=str_bug, change_id_str=change_id_str) 261 commit = index.commit(message=msg, parent_commits=[head.commit], head=False) 262 new_head = head.set_commit(commit) 263 264 print(f"Create a new commit {commit.hexsha} at {head.name}") 265 266 return new_head 267 268 269def overlay_entries( 270 existing_entries: List[ExpectedUpstreamEntry], 271 new_entries: List[ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]: 272 r"""Return a list of entries after overlaying the new_entries. 273 274 Args: 275 existing_entries: current entries 276 new_entries: entries being overlaid 277 Returns: 278 a list of entries 279 """ 280 entries_map = {} 281 for e in existing_entries: 282 entries_map[e.dst_path] = e 283 284 for e in new_entries: 285 entries_map[e.dst_path] = e 286 287 return [e for key, e in entries_map.items()] 288 289 290REGEX_CHANGE_ID = r"^Change-Id: (I[0-9a-f]+)$" 291REGEX_BUG_ID = r"^Bug: ([0-9]+)$" 292 293 294def extract_change_id(commit: Commit) -> str: 295 r"""Extract gerrit's Change-Id from a commit message. 296 297 Args: 298 commit: commit 299 300 Returns: 301 Change-Id 302 """ 303 result = re.search(REGEX_CHANGE_ID, commit.message, re.M) 304 return result.group(1) if result else None 305 306 307def extract_bug_id(commit: Commit) -> str: 308 r"""Extract the bug id from a commit message. 309 310 Args: 311 commit: commit 312 313 Returns: 314 Buganizer Id 315 """ 316 result = re.search(REGEX_BUG_ID, commit.message, re.M) 317 return result.group(1) if result else None 318 319 320def get_diff_entries(repo: Repo, base_expected_commit: Commit) -> Tuple[ 321 List[ExpectedUpstreamEntry], Set[str]]: 322 """Get a list of entries different from the head commit. 323 324 Validate EXPECTED_UPSTREAM file and return the list of 325 modified or new entries between the working tree and HEAD. 326 327 Args: 328 repo: Repo 329 base_expected_commit: the base commit 330 331 Returns: 332 a list of entries 333 """ 334 current_tracking_branch = repo.active_branch.tracking_branch() 335 if current_tracking_branch.name != "aosp/main": 336 print("This script should only run on aosp/main branch. " 337 f"Currently, this is on branch {repo.active_branch} " 338 f"tracking {current_tracking_branch}", file=sys.stderr) 339 return INVALID_DIFF 340 341 print("Reading EXPECTED_UPSTREAM file...") 342 head_commit = repo.head.commit 343 diff_index = head_commit.diff(None) 344 no_file_change = len(diff_index) 345 if no_file_change == 0: 346 print("Can't find any EXPECTED_UPSTREAM file change", file=sys.stderr) 347 return INVALID_DIFF 348 elif no_file_change > 1 or diff_index[0].a_rawpath != b"EXPECTED_UPSTREAM": 349 print("Expect modification in the EXPECTED_UPSTREAM file only.\n" 350 "Please remove / commit the other changes. The below file changes " 351 "are detected: ", file=sys.stderr) 352 print_diff_index(diff_index, file=sys.stderr) 353 return INVALID_DIFF 354 355 prev_file = ExpectedUpstreamFile(head_commit.tree["EXPECTED_UPSTREAM"] 356 .data_stream.read()) 357 curr_file = ExpectedUpstreamFile() 358 diff_entries = prev_file.get_new_or_modified_entries(curr_file) 359 removed_paths = prev_file.get_removed_paths(curr_file) 360 361 modified_entries = validate_and_remove_unmodified_entries( 362 diff_entries, repo, base_expected_commit) 363 364 if not modified_entries and not removed_paths: 365 print("No need to update. All files are updated.") 366 return INVALID_DIFF 367 368 print("The following entries will be updated from upstream") 369 for e in modified_entries: 370 print(f" {e.dst_path}") 371 for p in removed_paths: 372 print(f" {p}") 373 374 return diff_entries, removed_paths 375 376 377def compute_absorbed_diff_entries( 378 repo: Repo, base_commit: Commit, commit: Commit, overlaid_entries: List[ 379 ExpectedUpstreamEntry], removed_paths: Set[ 380 str]) -> Tuple[List[ExpectedUpstreamEntry], Set[str]]: 381 r"""Compute the combined entries after absorbing the new changes. 382 383 Args: 384 repo: Repo 385 base_commit: the base commit in the expected_upstream 386 commit: The commit diff-ed against from the base_commit 387 overlaid_entries: Additional entries overlaid on top of the diff. 388 removed_paths: removed paths 389 390 Returns: 391 Combined diff entries 392 """ 393 prev_file = ExpectedUpstreamFile(base_commit.tree["EXPECTED_UPSTREAM"] 394 .data_stream.read()) 395 curr_file = ExpectedUpstreamFile(commit.tree["EXPECTED_UPSTREAM"] 396 .data_stream.read()) 397 diff_entries = prev_file.get_new_or_modified_entries(curr_file) 398 diff_entries = overlay_entries(diff_entries, overlaid_entries) 399 intersection = set(filter(lambda e: e.dst_path in removed_paths, 400 diff_entries)) 401 diff_entries = list(filter(lambda e: e.dst_path not in intersection, diff_entries)) 402 new_removed_paths = set(filter(lambda p: p not in intersection, 403 removed_paths)) 404 return validate_and_remove_unmodified_entries( 405 diff_entries, repo, base_commit), new_removed_paths 406 407 408def main_run( 409 repo: Repo, expected_upstream_base: str, 410 bug_id: str, use_rerere: bool, is_absorbed: bool, 411 discard_working_tree: bool) -> None: 412 """Create the commits importing files according to the EXPECTED_UPSTREAM. 413 414 Args: 415 repo: Repo 416 expected_upstream_base: the base commit in the expected_upstream branch. 417 bug_id: bug id 418 use_rerere: Reuses the recorded resolution from git 419 is_absorbed: Absorb the new changes from EXPECTED_UPSTREAM into the 420 existing commits created by this script 421 discard_working_tree: discard working tree flag. 422 """ 423 last_master_commit = repo.head.commit 424 last_master_change_id = None 425 last_expected_change_id = None 426 if is_absorbed: 427 head = repo.head 428 if len(head.commit.parents) != 2: 429 print("Error: HEAD isn't a merge commit.", file=sys.stderr) 430 return 431 432 last_branch = None 433 last_expected_commit = None 434 for commit in head.commit.parents: 435 name_rev: list[str] = commit.name_rev.split(" ", 1) 436 if (len(name_rev) > 1 and # name_rev[1] is usually the branch name 437 name_rev[1].startswith(TEMP_EXPECTED_BRANCH_PREFIX)): 438 last_branch = name_rev[1] 439 last_expected_commit = commit 440 else: 441 last_master_commit = commit 442 443 if last_branch is None: 444 print("Error: Can't find the last commit in the expected_upstream " 445 "branch.", file=sys.stderr) 446 return 447 448 if len(last_expected_commit.parents) != 1: 449 print(f"Error: The head commit at {last_branch} isn't in the expected " 450 f"state.") 451 return 452 453 base_expected_branch_commit = last_expected_commit.parents[0] 454 last_expected_change_id = extract_change_id(last_expected_commit) 455 last_master_change_id = extract_change_id(head.commit) 456 if bug_id is None: 457 bug_id = extract_bug_id(last_expected_commit) 458 else: 459 if expected_upstream_base is None: 460 expected_upstream_base = "aosp/expected_upstream" 461 try: 462 base_expected_branch_commit = repo.commit(expected_upstream_base) 463 finally: 464 if base_expected_branch_commit is None: 465 print(f"{expected_upstream_base} is not found in this repository.", 466 file=sys.stderr) 467 468 diff_entries, removed_paths = get_diff_entries(repo, 469 base_expected_branch_commit) 470 if not diff_entries and not removed_paths: 471 return 472 473 if is_absorbed: 474 diff_entries, removed_paths = compute_absorbed_diff_entries( 475 repo, base_expected_branch_commit, last_expected_commit, diff_entries, 476 removed_paths) 477 478 # Due to a limitation in GitPython, index.remove requires switching branch 479 # and discard the working tree. 480 if removed_paths and not discard_working_tree: 481 print("-r option is required to discard the current working tree.") 482 return 483 484 create_commit_staging_diff(repo) 485 486 master_head = repo.active_branch 487 branch_name = create_random_branch_name() 488 new_branch = repo.create_head(branch_name, base_expected_branch_commit.hexsha) 489 new_branch.set_tracking_branch(repo.remotes.aosp.refs.expected_upstream) 490 new_branch = create_commit_at_expected_upstream( 491 repo, new_branch, diff_entries, removed_paths, bug_id, 492 last_expected_change_id, discard_working_tree) 493 494 # Clean the working tree before merging branch 495 if discard_working_tree: 496 repo.head.reference = master_head 497 498 repo.head.reset(commit=last_master_commit, working_tree=True) 499 for e in diff_entries: 500 if not has_file_in_tree(e.dst_path, repo.head.commit.tree): 501 path = Path(LIBCORE_DIR, e.dst_path) 502 path.unlink(missing_ok=True) 503 504 affected_paths = [e.dst_path for e in diff_entries] + list(removed_paths) 505 str_affected_paths = "\n ".join(affected_paths) 506 summary_msg = create_commit_summary(diff_entries) 507 str_bug = "" if bug_id is None else f"Bug: {bug_id}" 508 change_id_str = "" 509 if last_master_change_id: 510 change_id_str = f"\nChange-Id: {last_master_change_id}" 511 msg = MSG_SECOND_COMMIT.format( 512 summary=summary_msg, files=str_affected_paths, bug=str_bug, 513 change_id_str=change_id_str) 514 rerere_str = "rerere.enabled=" 515 rerere_str += "true" if use_rerere else "false" 516 517 test_dst_paths = {} 518 for e in diff_entries: 519 if e.dst_path.startswith(TEST_PATH): 520 class_name = OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path) 521 if class_name is not None: 522 package_name = class_name[:class_name.rfind(".")] 523 test_dst_paths[e.dst_path] = package_name 524 525 # Run git-merge command here, and will let the user to handle 526 # any errors and merge conflicts 527 try: 528 repo.git.execute(["git", "-c", rerere_str, "merge", 529 new_branch.commit.hexsha, "-m", msg]) 530 except GitCommandError as err: 531 print(f"Error: {err}", file=sys.stderr) 532 533 insert_package_name_to_tests(test_dst_paths) 534 535 536def insert_package_name_to_tests(test_dst_paths: Dict[str, str]): 537 """Insert package name into the test file before the java import statement. 538 539 Args: 540 test_dst_paths: Map the file path to package names 541 """ 542 for dst_path, package_name in test_dst_paths.items(): 543 with open(dst_path, "r") as file: 544 src = file.read() 545 replacement = r"\1package " + package_name + r";\n\n\2" 546 modified = REGEX_LICENSE_AND_IMPORT.sub(replacement, src, count=1) 547 with open(dst_path, "w") as out: 548 out.write(modified) 549 550 551def create_random_branch_name(): 552 rand_suffix = "".join(random.choice(string.ascii_lowercase + 553 string.digits) for _ in range(10)) 554 return f"{TEMP_EXPECTED_BRANCH_PREFIX}{rand_suffix}" 555 556 557def print_diff_index(index: DiffIndex, file=sys.stdout) -> None: 558 for diff in index: 559 print(f" {diff.a_rawpath}", file=file) 560 561 562def main(argv: Sequence[str]) -> None: 563 arg_parser = argparse.ArgumentParser( 564 description="Read the EXPECTED_UPSTREAM and update the files from the " 565 "OpenJDK. This script imports the files from OpenJDK into " 566 "the expected_upstream branch and merges it into the " 567 "current branch.") 568 arg_parser.add_argument( 569 "-a", "--absorbed-to-last-merge", action="store_true", 570 help="Import more files but absorb them into the last commits created " 571 "by this script.") 572 arg_parser.add_argument( 573 "--disable-rerere", action="store_true", 574 help="Do not re-use the recorded resolution from git.") 575 arg_parser.add_argument( 576 "-r", "--reset", action="store_true", 577 help="Discard the current working tree. Experimental flag to " 578 "support file removal from ojluni/.") 579 arg_parser.add_argument( 580 "-b", "--bug", nargs="?", 581 help="Buganizer Id") 582 arg_parser.add_argument( 583 "-e", "--expected_upstream_base", nargs="?", 584 help="The base commit in the expected_upstream branch") 585 586 args = arg_parser.parse_args(argv) 587 588 bug_id = args.bug 589 expected_upstream_base = args.expected_upstream_base 590 use_rerere = not args.disable_rerere 591 is_absorbed = args.absorbed_to_last_merge 592 discard_working_tree = args.reset 593 if is_absorbed and expected_upstream_base is not None: 594 print("Error: -a and -e options can't be used together.", file=sys.stderr) 595 return 596 597 repo = Repo(LIBCORE_DIR.as_posix()) 598 try: 599 main_run(repo, expected_upstream_base, bug_id, use_rerere, is_absorbed, 600 discard_working_tree) 601 finally: 602 repo.close() 603 604 605if __name__ == "__main__": 606 main(sys.argv[1:]) 607