1#!/usr/bin/python3 -B
2
3# Copyright 2021 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Read the EXPECTED_UPSTREAM and update the files from the upstream."""
17import argparse
18import logging
19# pylint: disable=g-importing-member
20from pathlib import Path
21import sys
22from typing import List
23from typing import Sequence
24
25# pylint: disable=g-multiple-import
26from common_util import (
27    ExpectedUpstreamEntry,
28    ExpectedUpstreamFile,
29    has_file_in_tree,
30    LIBCORE_DIR,
31)
32
33from git import (
34    Blob,
35    IndexFile,
36    Repo,
37)
38
39# Enable INFO logging for error emitted by GitPython
40logging.basicConfig(level=logging.INFO)
41
42# Pick an arbitrary existing commit with an empty tree
43EMPTY_COMMIT_SHA = "d85bc16ba1cdcc20bec6fcbfe46dc90f9fcd2f78"
44
45
46def validate_and_remove_updated_entries(
47    entries: List[ExpectedUpstreamEntry],
48    repo: Repo) -> List[ExpectedUpstreamEntry]:
49  """Returns a list of entries of which the file content needs to be updated."""
50  head_tree = repo.head.commit.tree
51  result: List[ExpectedUpstreamEntry] = []
52
53  for e in entries:
54    try:
55      # The following step validate each entry by querying the git database
56      commit = repo.commit(e.git_ref)
57      source_blob = commit.tree.join(e.src_path)
58      if not has_file_in_tree(e.dst_path, head_tree):
59        # Add the entry if the file is missing in the HEAD
60        result.append(e)
61        continue
62
63      dst_blob = head_tree.join(e.dst_path)
64      # Add the entry if the content is different.
65      # data_stream will be close during GC.
66      if source_blob.data_stream.read() != dst_blob.data_stream.read():
67        result.append(e)
68    except:
69      print(f"ERROR: reading entry: {e}", file=sys.stderr)
70      raise
71
72  return result
73
74
75def partition_entries_by_ref(
76    entries: List[ExpectedUpstreamEntry]) -> List[List[ExpectedUpstreamEntry]]:
77  result_map = {}
78  for e in entries:
79    if result_map.get(e.git_ref) is None:
80      result_map[e.git_ref] = []
81    result_map[e.git_ref].append(e)
82
83  return list(result_map.values())
84
85
86THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR)
87MSG_FIRST_COMMIT = ("Import {summary} from {ref}\n"
88                    "\n"
89                    "List of files:\n"
90                    "  {files}\n"
91                    "\n"
92                    f"Generated by {THIS_TOOL_PATH}"
93                    "\n"
94                    "Test: N/A")
95
96MSG_SECOND_COMMIT = ("Merge {summary} from {ref} into the "
97                     "expected_upstream branch\n"
98                     "\n"
99                     "List of files:\n"
100                     "  {files}\n"
101                     "\n"
102                     f"Generated by {THIS_TOOL_PATH}"
103                     "\n"
104                     "Test: N/A")
105
106
107def merge_files_and_create_commit(entry_set: List[ExpectedUpstreamEntry],
108                                  repo: Repo, checkout_only: bool) -> None:
109  r"""Create the commits importing the given files into the current branch.
110
111  `--------<ref>---------------   aosp/upstream_openjdkXXX
112             \
113        <first_commit>
114              \
115  -------<second_commit>------   expected_upstream
116
117  This function creates the 2 commits, i.e. first_commit and second_commit, in
118  the diagram. The goal is to checkout a subset files specified in the
119  entry_set, and merged into the pected_upstream branch in order to keep the
120  git-blame history of the individual files. first_commit is needed in order
121  to move the files specified in the entry_set.
122
123  In the implementation, first_commit isn't really modified from the ref, but
124  created from an empty tree, and all files in entry_set will be added into
125  the first_commit, second_commit is a merged commit and modified from
126  the parent in the expected_upstream branch, and any file contents in the
127  first commit will override the file content in the second commit.
128
129  You may reference the following git commands for understanding which should
130  create the same commits, but the python implementation is cleaner, because
131  it doesn't change the working tree or create a new branch.
132  first_commit:
133      git checkout -b temp_branch <entry.git_ref>
134      rm -r * .jcheck/ .hgignore .hgtags # Remove hidden files
135      git checkout <entry.git_ref> <entry.src_path>
136      mkdir -p <entry.dst_path>.directory && git mv <entry.src_path>
137      <entry.dst_path>
138      git commit -a
139  second_commit:
140      git merge temp_branch
141      git checkout HEAD -- ojluni/ # Force checkout to resolve merge conflict
142      git checkout temp_branch -- <entry.dst_path>
143      git commit
144
145  Args:
146    entry_set: a list of entries
147    repo: the repository object
148    checkout_only: True if it creates no commit
149  """
150  ref = entry_set[0].git_ref
151  upstream_commit = repo.commit(ref)
152
153  dst_paths = [e.dst_path for e in entry_set]
154  str_dst_paths = "\n  ".join(dst_paths)
155
156  for entry in entry_set:
157    src_blob = upstream_commit.tree[entry.src_path]
158    # Write into the file system directly because GitPython provides no API
159    # writing into the index in memory. IndexFile.move doesn't help here,
160    # because the API requires the file on the working tree too.
161    # However, it's fine, because we later reset the HEAD to the second commit.
162    # The user expects the file showing in the file system, and the file is
163    # not staged/untracked because the file is in the second commit too.
164    absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path)
165    absolute_dst_path.parent.mkdir(parents=True, exist_ok=True)
166    with absolute_dst_path.open("wb") as file:
167      file.write(src_blob.data_stream.read())
168
169  if not checkout_only:
170    # We need an index empty initially, i.e. no staged files. Note that the
171    # empty commit is not the parent. The parents can be set later.
172    first_index = IndexFile.from_tree(repo, repo.commit(EMPTY_COMMIT_SHA))
173    for entry in entry_set:
174      first_index.add(entry.dst_path)
175
176    summary_msg = "files"
177    if len(entry_set) == 1:
178      summary_msg = Path(entry_set[0].dst_path).stem
179    msg = MSG_FIRST_COMMIT.format(
180        summary=summary_msg, ref=ref, files=str_dst_paths)
181
182    first_commit = first_index.commit(
183        message=msg, parent_commits=[upstream_commit], head=False)
184
185    # The second commit is a merge commit. It doesn't use the current index,
186    # i.e. repo.index, to avoid affecting the current staged files.
187    prev_head = repo.active_branch.commit
188    second_index = IndexFile.from_tree(repo, prev_head)
189    blob_filter = lambda obj, i: isinstance(obj, Blob)
190    blobs = first_commit.tree.traverse(blob_filter)
191    second_index.add(blobs)
192    msg = MSG_SECOND_COMMIT.format(
193        summary=summary_msg, ref=ref, files=str_dst_paths)
194    second_commit = second_index.commit(
195        message=msg, parent_commits=[prev_head, first_commit], head=True)
196
197    # We updated the HEAD to the second commit. Thus, git-reset updates the
198    # current index. Otherwise, the current index, aka, repo.index, shows that
199    # the files are deleted.
200    repo.index.reset()
201
202  if checkout_only:
203    print(f"Checked out the following files from {ref}:")
204  else:
205    print(f"New merge commit {second_commit} contains:")
206  print(f"  {str_dst_paths}")
207
208
209def create_commits(repo: Repo, checkout_only: bool) -> None:
210  """Create the commits importing files according to the EXPECTED_UPSTREAM."""
211  current_tracking_branch = repo.active_branch.tracking_branch()
212  if current_tracking_branch.name != "aosp/expected_upstream":
213    print("This script should only run on aosp/expected_upstream branch. "
214          f"Currently, this is on branch {repo.active_branch} "
215          f"tracking {current_tracking_branch}")
216    return
217
218  print("Reading EXPECTED_UPSTREAM file...")
219  expected_upstream_entries = ExpectedUpstreamFile().read_all_entries()
220
221  outdated_entries = validate_and_remove_updated_entries(
222      expected_upstream_entries, repo)
223
224  if not outdated_entries:
225    print("No need to update. All files are updated.")
226    return
227
228  print("The following entries will be updated from upstream")
229  for e in outdated_entries:
230    print(f"  {e.dst_path}")
231
232  entry_sets_to_be_merged = partition_entries_by_ref(outdated_entries)
233
234  for entry_set in entry_sets_to_be_merged:
235    merge_files_and_create_commit(entry_set, repo, checkout_only)
236
237
238def main(argv: Sequence[str]) -> None:
239  arg_parser = argparse.ArgumentParser(
240      description="Read the EXPECTED_UPSTREAM and update the files from the "
241                  "OpenJDK. By default, it creates commits forking from "
242                  "the upstream version in order to preserve the line history.")
243  arg_parser.add_argument(
244      "--checkout-only", action="store_true",
245      help="Checkout the files, but creates no commits")
246
247  args = arg_parser.parse_args(argv)
248
249  checkout_only = args.checkout_only
250
251  repo = Repo(LIBCORE_DIR.as_posix())
252  try:
253    create_commits(repo, checkout_only)
254  finally:
255    repo.close()
256
257
258if __name__ == "__main__":
259  main(sys.argv[1:])
260