#!/usr/bin/python3 -B

# Copyright 2021 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Read the EXPECTED_UPSTREAM and update the files from the upstream."""
import argparse
import logging
# pylint: disable=g-importing-member
from pathlib import Path
import sys
from typing import List
from typing import Sequence

# pylint: disable=g-multiple-import
from common_util import (
    ExpectedUpstreamEntry,
    ExpectedUpstreamFile,
    has_file_in_tree,
    LIBCORE_DIR,
)

from git import (
    Blob,
    IndexFile,
    Repo,
)

# Enable INFO logging for error emitted by GitPython
logging.basicConfig(level=logging.INFO)

# Pick an arbitrary existing commit with an empty tree
EMPTY_COMMIT_SHA = "d85bc16ba1cdcc20bec6fcbfe46dc90f9fcd2f78"


def validate_and_remove_updated_entries(
    entries: List[ExpectedUpstreamEntry],
    repo: Repo) -> List[ExpectedUpstreamEntry]:
  """Returns a list of entries of which the file content needs to be updated."""
  head_tree = repo.head.commit.tree
  result: List[ExpectedUpstreamEntry] = []

  for e in entries:
    try:
      # The following step validate each entry by querying the git database
      commit = repo.commit(e.git_ref)
      source_blob = commit.tree.join(e.src_path)
      if not has_file_in_tree(e.dst_path, head_tree):
        # Add the entry if the file is missing in the HEAD
        result.append(e)
        continue

      dst_blob = head_tree.join(e.dst_path)
      # Add the entry if the content is different.
      # data_stream will be close during GC.
      if source_blob.data_stream.read() != dst_blob.data_stream.read():
        result.append(e)
    except:
      print(f"ERROR: reading entry: {e}", file=sys.stderr)
      raise

  return result


def partition_entries_by_ref(
    entries: List[ExpectedUpstreamEntry]) -> List[List[ExpectedUpstreamEntry]]:
  result_map = {}
  for e in entries:
    if result_map.get(e.git_ref) is None:
      result_map[e.git_ref] = []
    result_map[e.git_ref].append(e)

  return list(result_map.values())


THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR)
MSG_FIRST_COMMIT = ("Import {summary} from {ref}\n"
                    "\n"
                    "List of files:\n"
                    "  {files}\n"
                    "\n"
                    f"Generated by {THIS_TOOL_PATH}"
                    "\n"
                    "Test: N/A")

MSG_SECOND_COMMIT = ("Merge {summary} from {ref} into the "
                     "expected_upstream branch\n"
                     "\n"
                     "List of files:\n"
                     "  {files}\n"
                     "\n"
                     f"Generated by {THIS_TOOL_PATH}"
                     "\n"
                     "Test: N/A")


def merge_files_and_create_commit(entry_set: List[ExpectedUpstreamEntry],
                                  repo: Repo, checkout_only: bool) -> None:
  r"""Create the commits importing the given files into the current branch.

  `--------<ref>---------------   aosp/upstream_openjdkXXX
             \
        <first_commit>
              \
  -------<second_commit>------   expected_upstream

  This function creates the 2 commits, i.e. first_commit and second_commit, in
  the diagram. The goal is to checkout a subset files specified in the
  entry_set, and merged into the pected_upstream branch in order to keep the
  git-blame history of the individual files. first_commit is needed in order
  to move the files specified in the entry_set.

  In the implementation, first_commit isn't really modified from the ref, but
  created from an empty tree, and all files in entry_set will be added into
  the first_commit, second_commit is a merged commit and modified from
  the parent in the expected_upstream branch, and any file contents in the
  first commit will override the file content in the second commit.

  You may reference the following git commands for understanding which should
  create the same commits, but the python implementation is cleaner, because
  it doesn't change the working tree or create a new branch.
  first_commit:
      git checkout -b temp_branch <entry.git_ref>
      rm -r * .jcheck/ .hgignore .hgtags # Remove hidden files
      git checkout <entry.git_ref> <entry.src_path>
      mkdir -p <entry.dst_path>.directory && git mv <entry.src_path>
      <entry.dst_path>
      git commit -a
  second_commit:
      git merge temp_branch
      git checkout HEAD -- ojluni/ # Force checkout to resolve merge conflict
      git checkout temp_branch -- <entry.dst_path>
      git commit

  Args:
    entry_set: a list of entries
    repo: the repository object
    checkout_only: True if it creates no commit
  """
  ref = entry_set[0].git_ref
  upstream_commit = repo.commit(ref)

  dst_paths = [e.dst_path for e in entry_set]
  str_dst_paths = "\n  ".join(dst_paths)

  for entry in entry_set:
    src_blob = upstream_commit.tree[entry.src_path]
    # Write into the file system directly because GitPython provides no API
    # writing into the index in memory. IndexFile.move doesn't help here,
    # because the API requires the file on the working tree too.
    # However, it's fine, because we later reset the HEAD to the second commit.
    # The user expects the file showing in the file system, and the file is
    # not staged/untracked because the file is in the second commit too.
    absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path)
    absolute_dst_path.parent.mkdir(parents=True, exist_ok=True)
    with absolute_dst_path.open("wb") as file:
      file.write(src_blob.data_stream.read())

  if not checkout_only:
    # We need an index empty initially, i.e. no staged files. Note that the
    # empty commit is not the parent. The parents can be set later.
    first_index = IndexFile.from_tree(repo, repo.commit(EMPTY_COMMIT_SHA))
    for entry in entry_set:
      first_index.add(entry.dst_path)

    summary_msg = "files"
    if len(entry_set) == 1:
      summary_msg = Path(entry_set[0].dst_path).stem
    msg = MSG_FIRST_COMMIT.format(
        summary=summary_msg, ref=ref, files=str_dst_paths)

    first_commit = first_index.commit(
        message=msg, parent_commits=[upstream_commit], head=False)

    # The second commit is a merge commit. It doesn't use the current index,
    # i.e. repo.index, to avoid affecting the current staged files.
    prev_head = repo.active_branch.commit
    second_index = IndexFile.from_tree(repo, prev_head)
    blob_filter = lambda obj, i: isinstance(obj, Blob)
    blobs = first_commit.tree.traverse(blob_filter)
    second_index.add(blobs)
    msg = MSG_SECOND_COMMIT.format(
        summary=summary_msg, ref=ref, files=str_dst_paths)
    second_commit = second_index.commit(
        message=msg, parent_commits=[prev_head, first_commit], head=True)

    # We updated the HEAD to the second commit. Thus, git-reset updates the
    # current index. Otherwise, the current index, aka, repo.index, shows that
    # the files are deleted.
    repo.index.reset()

  if checkout_only:
    print(f"Checked out the following files from {ref}:")
  else:
    print(f"New merge commit {second_commit} contains:")
  print(f"  {str_dst_paths}")


def create_commits(repo: Repo, checkout_only: bool) -> None:
  """Create the commits importing files according to the EXPECTED_UPSTREAM."""
  current_tracking_branch = repo.active_branch.tracking_branch()
  if current_tracking_branch.name != "aosp/expected_upstream":
    print("This script should only run on aosp/expected_upstream branch. "
          f"Currently, this is on branch {repo.active_branch} "
          f"tracking {current_tracking_branch}")
    return

  print("Reading EXPECTED_UPSTREAM file...")
  expected_upstream_entries = ExpectedUpstreamFile().read_all_entries()

  outdated_entries = validate_and_remove_updated_entries(
      expected_upstream_entries, repo)

  if not outdated_entries:
    print("No need to update. All files are updated.")
    return

  print("The following entries will be updated from upstream")
  for e in outdated_entries:
    print(f"  {e.dst_path}")

  entry_sets_to_be_merged = partition_entries_by_ref(outdated_entries)

  for entry_set in entry_sets_to_be_merged:
    merge_files_and_create_commit(entry_set, repo, checkout_only)


def main(argv: Sequence[str]) -> None:
  arg_parser = argparse.ArgumentParser(
      description="Read the EXPECTED_UPSTREAM and update the files from the "
                  "OpenJDK. By default, it creates commits forking from "
                  "the upstream version in order to preserve the line history.")
  arg_parser.add_argument(
      "--checkout-only", action="store_true",
      help="Checkout the files, but creates no commits")

  args = arg_parser.parse_args(argv)

  checkout_only = args.checkout_only

  repo = Repo(LIBCORE_DIR.as_posix())
  try:
    create_commits(repo, checkout_only)
  finally:
    repo.close()


if __name__ == "__main__":
  main(sys.argv[1:])