1#!/usr/bin/python3 -B
2
3# Copyright 2022 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Read the EXPECTED_UPSTREAM and  merge the files from the upstream."""
18import argparse
19import datetime
20import logging
21# pylint: disable=g-importing-member
22import os.path
23from pathlib import Path
24import random
25import re
26import string
27import sys
28from typing import List, Tuple, Set, Dict
29from typing import Sequence
30
31# pylint: disable=g-multiple-import
32from common_util import (
33    ExpectedUpstreamEntry,
34    ExpectedUpstreamFile,
35    has_file_in_tree,
36    LIBCORE_DIR,
37    OjluniFinder,
38    TEST_PATH,
39)
40
41from git import (
42    Commit,
43    DiffIndex,
44    GitCommandError,
45    Head,
46    IndexFile,
47    Repo,
48)
49
50# Enable INFO logging for error emitted by GitPython
51logging.basicConfig(level=logging.INFO)
52
53
54def validate_and_remove_unmodified_entries(
55    entries: List[ExpectedUpstreamEntry],
56    repo: Repo, commit: Commit) -> List[ExpectedUpstreamEntry]:
57  """Returns a list of entries of which the file content needs to be updated."""
58  commit_tree = commit.tree
59  result: List[ExpectedUpstreamEntry] = []
60
61  for e in entries:
62    try:
63      # The following step validate each entry by querying the git database
64      commit = repo.commit(e.git_ref)
65      source_blob = commit.tree.join(e.src_path)
66      if not has_file_in_tree(e.dst_path, commit_tree):
67        # Add the entry if the file is missing in the HEAD
68        result.append(e)
69        continue
70
71      dst_blob = commit_tree.join(e.dst_path)
72      # Add the entry if the content is different.
73      # data_stream will be close during GC.
74      if source_blob.data_stream.read() != dst_blob.data_stream.read():
75        result.append(e)
76    except:
77      print(f"ERROR: reading entry: {e}", file=sys.stderr)
78      raise
79
80  return result
81
82
83THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR)
84
85TEMP_EXPECTED_BRANCH_PREFIX = "expected_upstream_"
86
87MSG_FIRST_COMMIT = ("Import {summary}\n"
88                    "\n"
89                    "List of files:\n"
90                    "  {files}\n"
91                    "\n"
92                    f"Generated by {THIS_TOOL_PATH}\n"
93                    "\n"
94                    "{bug}\n"
95                    "Test: N/A\n"
96                    "No-Typo-Check: Imported files"
97                    "{change_id_str}")
98
99MSG_SECOND_COMMIT = ("Merge {summary} into the "
100                     "aosp/main branch\n"
101                     "\n"
102                     "List of files:\n"
103                     "  {files}\n"
104                     "\n"
105                     "{bug}\n"
106                     "Test: N/A"
107                     "{change_id_str}")
108
109INVALID_DIFF = (None, None)
110
111LICENSE_BLOCK = r"\/\*(?:\*(?!\/)|[^*])*\*\/[ ]*\n+"
112REGEX_LICENSE_AND_IMPORT = re.compile(
113    r"^(" + LICENSE_BLOCK + ")(import .+;)$", re.MULTILINE)
114
115
116def create_commit_staging_diff(repo: Repo) -> None:
117  r"""Save the current EXPECTED_UPSTREAM filein a new git commit.
118
119  It can be retrieved later if this script fails.
120
121  Args:
122    repo: the repository object
123  """
124  head = repo.head
125  index = IndexFile.from_tree(repo, head.commit)
126  index.add("EXPECTED_UPSTREAM")
127
128  now_str = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
129  msg = f"Staging EXPECTED_UPSTREAM at {now_str}"
130  commit = index.commit(message=msg, parent_commits=[head.commit], head=False)
131
132  print(
133      f"The current EXPECTED_UPSTREAM file is saved in {commit.hexsha}.\n"
134      "If this script fails in the later stage, please retrieve the file by:\n"
135      f"  git checkout {commit.hexsha} -- EXPECTED_UPSTREAM")
136
137
138def create_commit_summary(diff_entries: List[ExpectedUpstreamEntry]) -> str:
139  r"""Create a commit summary message.
140
141  Args:
142    diff_entries: list of new / modified entries
143
144  Returns:
145    a string message
146  """
147
148  default_msg = "files"
149  entries_and_names = []
150  for e in diff_entries:
151    t = (e, OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path))
152    entries_and_names.append(t)
153
154  # Non-test entries
155  important_entries: List[tuple[ExpectedUpstreamEntry, str]] = [
156      t for t in entries_and_names
157      if t[1] is not None and not t[1].startswith("test.")]
158  if not important_entries:
159    # Test entries
160    important_entries = [t for t in entries_and_names if t[1] is not None and
161                         t[1].startswith("test.")]
162    # no path is under OJLUNI_JAVA_BASE_PATH or OJLUNI_TEST_PATH
163    if not important_entries:
164      return default_msg
165
166  # Get ref if all entries come from the same OpenJDK revision
167  git_ref = important_entries[0][0].git_ref
168  for e in important_entries:
169    if e[0].git_ref != git_ref:
170      git_ref = None
171      break
172
173  if len(important_entries) == 1:
174    classes_summary = important_entries[0][1].split(".")[-1]
175  else:
176    common_prefix = os.path.commonprefix(list(map(
177        lambda t: t[1], important_entries)))
178    prefix_split = common_prefix.split(".")
179
180    # short java package, e.g. javax. or java.n, doesn't provide meaningful
181    # commit summary.
182    if len(prefix_split) <= 2:
183      classes_summary = default_msg
184    else:
185      # Assume that package name isn't title-case.
186      is_package = (not prefix_split[-1] or prefix_split[-1][0].islower())
187      if is_package:
188        # Discard the prefix after the last "."
189        classes_summary = ".".join(prefix_split[:-1])
190      else:
191        classes_summary = common_prefix + "*"
192
193  if git_ref is None:
194    return classes_summary
195  else:
196    abbv_ref = git_ref.split("/", 1)[-1]
197    return f"{classes_summary} from {abbv_ref}"
198
199
200def create_commit_at_expected_upstream(
201    repo: Repo, head: Head, new_entries: List[ExpectedUpstreamEntry],
202    removed_paths: Set[str], bug_id: str,
203    last_expected_change_id: str, discard_working_tree: bool) -> Head:
204  r"""Create a new commit importing the given files at the head.
205
206  Args:
207    repo: the repository object
208    head: the temp expected_upstream branch
209    new_entries: a list of entries
210    removed_paths: removed paths
211    bug_id: bug id
212    last_expected_change_id: Gerrit's change Id
213    discard_working_tree: discard the working tree.
214
215  Returns:
216    a list of entries
217  """
218  affected_paths = [e.dst_path for e in new_entries] + list(removed_paths)
219  str_affected_paths = "\n  ".join(affected_paths)
220
221  for entry in new_entries:
222    ref = entry.git_ref
223    upstream_commit = repo.commit(ref)
224    src_blob = upstream_commit.tree[entry.src_path]
225    # Write into the file system directly because GitPython provides no API
226    # writing into the index in memory. IndexFile.move doesn't help here,
227    # because the API requires the file on the working tree too.
228    # However, it's fine, because we later reset the HEAD.
229    absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path)
230    absolute_dst_path.parent.mkdir(parents=True, exist_ok=True)
231    with absolute_dst_path.open("wb") as file:
232      file.write(src_blob.data_stream.read())
233
234  entries = ExpectedUpstreamFile(head.commit.tree["EXPECTED_UPSTREAM"]
235                                 .data_stream.read()).read_all_entries()
236  entries = overlay_entries(entries, new_entries)
237  entries = list(filter(lambda e: e.dst_path not in removed_paths, entries))
238  # Write the entries to the file system.
239  ExpectedUpstreamFile().sort_and_write_all_entries(entries)
240
241  if discard_working_tree:
242    repo.head.reference = head
243    repo.head.reset(index=True)
244    index = repo.index
245  else:
246    index = IndexFile.from_tree(repo, head.commit)
247  index.add("EXPECTED_UPSTREAM")
248  for entry in new_entries:
249    index.add(entry.dst_path)
250
251  for p in removed_paths:
252    index.remove(p)
253
254  summary_msg = create_commit_summary(new_entries)
255  str_bug = "" if bug_id is None else f"Bug: {bug_id}"
256  change_id_str = ""
257  if last_expected_change_id:
258    change_id_str = f"\nChange-Id: {last_expected_change_id}"
259  msg = MSG_FIRST_COMMIT.format(summary=summary_msg, files=str_affected_paths,
260                                bug=str_bug, change_id_str=change_id_str)
261  commit = index.commit(message=msg, parent_commits=[head.commit], head=False)
262  new_head = head.set_commit(commit)
263
264  print(f"Create a new commit {commit.hexsha} at {head.name}")
265
266  return new_head
267
268
269def overlay_entries(
270    existing_entries: List[ExpectedUpstreamEntry],
271    new_entries: List[ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]:
272  r"""Return a list of entries after overlaying the new_entries.
273
274  Args:
275    existing_entries: current entries
276    new_entries: entries being overlaid
277  Returns:
278    a list of entries
279  """
280  entries_map = {}
281  for e in existing_entries:
282    entries_map[e.dst_path] = e
283
284  for e in new_entries:
285    entries_map[e.dst_path] = e
286
287  return [e for key, e in entries_map.items()]
288
289
290REGEX_CHANGE_ID = r"^Change-Id: (I[0-9a-f]+)$"
291REGEX_BUG_ID = r"^Bug: ([0-9]+)$"
292
293
294def extract_change_id(commit: Commit) -> str:
295  r"""Extract gerrit's Change-Id from a commit message.
296
297  Args:
298     commit: commit
299
300  Returns:
301    Change-Id
302  """
303  result = re.search(REGEX_CHANGE_ID, commit.message, re.M)
304  return result.group(1) if result else None
305
306
307def extract_bug_id(commit: Commit) -> str:
308  r"""Extract the bug id from a commit message.
309
310  Args:
311     commit: commit
312
313  Returns:
314    Buganizer Id
315  """
316  result = re.search(REGEX_BUG_ID, commit.message, re.M)
317  return result.group(1) if result else None
318
319
320def get_diff_entries(repo: Repo, base_expected_commit: Commit) -> Tuple[
321    List[ExpectedUpstreamEntry], Set[str]]:
322  """Get a list of entries different from the head commit.
323
324  Validate EXPECTED_UPSTREAM file and return the list of
325  modified or new entries between the working tree and HEAD.
326
327  Args:
328    repo: Repo
329    base_expected_commit: the base commit
330
331  Returns:
332    a list of entries
333  """
334  current_tracking_branch = repo.active_branch.tracking_branch()
335  if current_tracking_branch.name != "aosp/main":
336    print("This script should only run on aosp/main branch. "
337          f"Currently, this is on branch {repo.active_branch} "
338          f"tracking {current_tracking_branch}", file=sys.stderr)
339    return INVALID_DIFF
340
341  print("Reading EXPECTED_UPSTREAM file...")
342  head_commit = repo.head.commit
343  diff_index = head_commit.diff(None)
344  no_file_change = len(diff_index)
345  if no_file_change == 0:
346    print("Can't find any EXPECTED_UPSTREAM file change", file=sys.stderr)
347    return INVALID_DIFF
348  elif no_file_change > 1 or diff_index[0].a_rawpath != b"EXPECTED_UPSTREAM":
349    print("Expect modification in the EXPECTED_UPSTREAM file only.\n"
350          "Please remove / commit the other changes. The below file changes "
351          "are detected: ", file=sys.stderr)
352    print_diff_index(diff_index, file=sys.stderr)
353    return INVALID_DIFF
354
355  prev_file = ExpectedUpstreamFile(head_commit.tree["EXPECTED_UPSTREAM"]
356                                   .data_stream.read())
357  curr_file = ExpectedUpstreamFile()
358  diff_entries = prev_file.get_new_or_modified_entries(curr_file)
359  removed_paths = prev_file.get_removed_paths(curr_file)
360
361  modified_entries = validate_and_remove_unmodified_entries(
362      diff_entries, repo, base_expected_commit)
363
364  if not modified_entries and not removed_paths:
365    print("No need to update. All files are updated.")
366    return INVALID_DIFF
367
368  print("The following entries will be updated from upstream")
369  for e in modified_entries:
370    print(f"  {e.dst_path}")
371  for p in removed_paths:
372    print(f"  {p}")
373
374  return diff_entries, removed_paths
375
376
377def compute_absorbed_diff_entries(
378    repo: Repo, base_commit: Commit, commit: Commit, overlaid_entries: List[
379        ExpectedUpstreamEntry], removed_paths: Set[
380            str]) -> Tuple[List[ExpectedUpstreamEntry], Set[str]]:
381  r"""Compute the combined entries after absorbing the new changes.
382
383  Args:
384    repo: Repo
385    base_commit: the base commit in the expected_upstream
386    commit: The commit diff-ed against from the base_commit
387    overlaid_entries: Additional entries overlaid on top of the diff.
388    removed_paths: removed paths
389
390  Returns:
391    Combined diff entries
392  """
393  prev_file = ExpectedUpstreamFile(base_commit.tree["EXPECTED_UPSTREAM"]
394                                   .data_stream.read())
395  curr_file = ExpectedUpstreamFile(commit.tree["EXPECTED_UPSTREAM"]
396                                   .data_stream.read())
397  diff_entries = prev_file.get_new_or_modified_entries(curr_file)
398  diff_entries = overlay_entries(diff_entries, overlaid_entries)
399  intersection = set(filter(lambda e: e.dst_path in removed_paths,
400                            diff_entries))
401  diff_entries = list(filter(lambda e: e.dst_path not in intersection, diff_entries))
402  new_removed_paths = set(filter(lambda p: p not in intersection,
403                                 removed_paths))
404  return validate_and_remove_unmodified_entries(
405      diff_entries, repo, base_commit), new_removed_paths
406
407
408def main_run(
409    repo: Repo, expected_upstream_base: str,
410    bug_id: str, use_rerere: bool, is_absorbed: bool,
411    discard_working_tree: bool) -> None:
412  """Create the commits importing files according to the EXPECTED_UPSTREAM.
413
414  Args:
415    repo: Repo
416    expected_upstream_base: the base commit in the expected_upstream branch.
417    bug_id: bug id
418    use_rerere: Reuses the recorded resolution from git
419    is_absorbed: Absorb the new changes from EXPECTED_UPSTREAM into the
420      existing commits created by this script
421    discard_working_tree: discard working tree flag.
422  """
423  last_master_commit = repo.head.commit
424  last_master_change_id = None
425  last_expected_change_id = None
426  if is_absorbed:
427    head = repo.head
428    if len(head.commit.parents) != 2:
429      print("Error: HEAD isn't a merge commit.", file=sys.stderr)
430      return
431
432    last_branch = None
433    last_expected_commit = None
434    for commit in head.commit.parents:
435      name_rev: list[str] = commit.name_rev.split(" ", 1)
436      if (len(name_rev) > 1 and  # name_rev[1] is usually the branch name
437          name_rev[1].startswith(TEMP_EXPECTED_BRANCH_PREFIX)):
438        last_branch = name_rev[1]
439        last_expected_commit = commit
440      else:
441        last_master_commit = commit
442
443    if last_branch is None:
444      print("Error: Can't find the last commit in the expected_upstream "
445            "branch.", file=sys.stderr)
446      return
447
448    if len(last_expected_commit.parents) != 1:
449      print(f"Error: The head commit at {last_branch} isn't in the expected "
450            f"state.")
451      return
452
453    base_expected_branch_commit = last_expected_commit.parents[0]
454    last_expected_change_id = extract_change_id(last_expected_commit)
455    last_master_change_id = extract_change_id(head.commit)
456    if bug_id is None:
457      bug_id = extract_bug_id(last_expected_commit)
458  else:
459    if expected_upstream_base is None:
460      expected_upstream_base = "aosp/expected_upstream"
461    try:
462      base_expected_branch_commit = repo.commit(expected_upstream_base)
463    finally:
464      if base_expected_branch_commit is None:
465        print(f"{expected_upstream_base} is not found in this repository.",
466              file=sys.stderr)
467
468  diff_entries, removed_paths = get_diff_entries(repo,
469                                                 base_expected_branch_commit)
470  if not diff_entries and not removed_paths:
471    return
472
473  if is_absorbed:
474    diff_entries, removed_paths = compute_absorbed_diff_entries(
475        repo, base_expected_branch_commit, last_expected_commit, diff_entries,
476        removed_paths)
477
478  # Due to a limitation in GitPython, index.remove requires switching branch
479  # and discard the working tree.
480  if removed_paths and not discard_working_tree:
481    print("-r option is required to discard the current working tree.")
482    return
483
484  create_commit_staging_diff(repo)
485
486  master_head = repo.active_branch
487  branch_name = create_random_branch_name()
488  new_branch = repo.create_head(branch_name, base_expected_branch_commit.hexsha)
489  new_branch.set_tracking_branch(repo.remotes.aosp.refs.expected_upstream)
490  new_branch = create_commit_at_expected_upstream(
491      repo, new_branch, diff_entries, removed_paths, bug_id,
492      last_expected_change_id, discard_working_tree)
493
494  # Clean the working tree before merging branch
495  if discard_working_tree:
496    repo.head.reference = master_head
497
498  repo.head.reset(commit=last_master_commit, working_tree=True)
499  for e in diff_entries:
500    if not has_file_in_tree(e.dst_path, repo.head.commit.tree):
501      path = Path(LIBCORE_DIR, e.dst_path)
502      path.unlink(missing_ok=True)
503
504  affected_paths = [e.dst_path for e in diff_entries] + list(removed_paths)
505  str_affected_paths = "\n  ".join(affected_paths)
506  summary_msg = create_commit_summary(diff_entries)
507  str_bug = "" if bug_id is None else f"Bug: {bug_id}"
508  change_id_str = ""
509  if last_master_change_id:
510    change_id_str = f"\nChange-Id: {last_master_change_id}"
511  msg = MSG_SECOND_COMMIT.format(
512      summary=summary_msg, files=str_affected_paths, bug=str_bug,
513      change_id_str=change_id_str)
514  rerere_str = "rerere.enabled="
515  rerere_str += "true" if use_rerere else "false"
516
517  test_dst_paths = {}
518  for e in diff_entries:
519    if e.dst_path.startswith(TEST_PATH):
520      class_name = OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path)
521      if class_name is not None:
522        package_name = class_name[:class_name.rfind(".")]
523        test_dst_paths[e.dst_path] = package_name
524
525  # Run git-merge command here, and will let the user to handle
526  # any errors and merge conflicts
527  try:
528    repo.git.execute(["git", "-c", rerere_str, "merge",
529                      new_branch.commit.hexsha, "-m", msg])
530  except GitCommandError as err:
531    print(f"Error: {err}", file=sys.stderr)
532
533  insert_package_name_to_tests(test_dst_paths)
534
535
536def insert_package_name_to_tests(test_dst_paths: Dict[str, str]):
537  """Insert package name into the test file before the java import statement.
538
539  Args:
540    test_dst_paths: Map the file path to package names
541  """
542  for dst_path, package_name in test_dst_paths.items():
543    with open(dst_path, "r") as file:
544      src = file.read()
545    replacement = r"\1package " + package_name + r";\n\n\2"
546    modified = REGEX_LICENSE_AND_IMPORT.sub(replacement, src, count=1)
547    with open(dst_path, "w") as out:
548      out.write(modified)
549
550
551def create_random_branch_name():
552  rand_suffix = "".join(random.choice(string.ascii_lowercase +
553                                      string.digits) for _ in range(10))
554  return f"{TEMP_EXPECTED_BRANCH_PREFIX}{rand_suffix}"
555
556
557def print_diff_index(index: DiffIndex, file=sys.stdout) -> None:
558  for diff in index:
559    print(f"  {diff.a_rawpath}", file=file)
560
561
562def main(argv: Sequence[str]) -> None:
563  arg_parser = argparse.ArgumentParser(
564      description="Read the EXPECTED_UPSTREAM and update the files from the "
565                  "OpenJDK. This script imports the files from OpenJDK into "
566                  "the expected_upstream branch and merges it into the "
567                  "current branch.")
568  arg_parser.add_argument(
569      "-a", "--absorbed-to-last-merge", action="store_true",
570      help="Import more files but absorb them into the last commits created "
571           "by this script.")
572  arg_parser.add_argument(
573      "--disable-rerere", action="store_true",
574      help="Do not re-use the recorded resolution from git.")
575  arg_parser.add_argument(
576      "-r", "--reset", action="store_true",
577      help="Discard the current working tree. Experimental flag to "
578           "support file removal from ojluni/.")
579  arg_parser.add_argument(
580      "-b", "--bug", nargs="?",
581      help="Buganizer Id")
582  arg_parser.add_argument(
583      "-e", "--expected_upstream_base", nargs="?",
584      help="The base commit in the expected_upstream branch")
585
586  args = arg_parser.parse_args(argv)
587
588  bug_id = args.bug
589  expected_upstream_base = args.expected_upstream_base
590  use_rerere = not args.disable_rerere
591  is_absorbed = args.absorbed_to_last_merge
592  discard_working_tree = args.reset
593  if is_absorbed and expected_upstream_base is not None:
594    print("Error: -a and -e options can't be used together.", file=sys.stderr)
595    return
596
597  repo = Repo(LIBCORE_DIR.as_posix())
598  try:
599    main_run(repo, expected_upstream_base, bug_id, use_rerere, is_absorbed,
600             discard_working_tree)
601  finally:
602    repo.close()
603
604
605if __name__ == "__main__":
606  main(sys.argv[1:])
607