1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Fetch a Rust package from crates.io.
17
18Usage: get_rust_pkg.py -v syn-1.0.7
19Get the package syn-1.0.7 from crates.io and untar it into ./syn-1.0.7.
20
21Usage: get_rust_pkg.py -v -o tmp syn
22Get the latest version of package syn, say 1.0.17,
23and untar it into tmp/syn-1.0.17.
24
25Usage: get_rust_pkg.py -show bindgen cxx
26Count dependent packages of bindgen and cxx.
27
28When downloading a package, its target directory should not exist,
29or the download will be skipped.
30"""
31
32import argparse
33import functools
34import json
35import os
36import re
37import shutil
38import sys
39import tarfile
40import tempfile
41import urllib.request
42
43PKG_VERSION_PATTERN = r"(.*)-([0-9]+\.[0-9]+\.[0-9]+.*)"
44
45PKG_VERSION_MATCHER = re.compile(PKG_VERSION_PATTERN)
46
47VERSION_PATTERN = r"([0-9]+)\.([0-9]+)\.([0-9]+)"
48
49VERSION_MATCHER = re.compile(VERSION_PATTERN)
50
51
52def parse_args():
53  """Parse main arguments."""
54  parser = argparse.ArgumentParser("get_rust_pkg")
55  parser.add_argument(
56      "-v", action="store_true", default=False, help="echo executed commands")
57  parser.add_argument(
58      "-o", metavar="out_dir", default=".", help="output directory")
59  parser.add_argument(
60      "-add3prf", "--add3prf",
61      action="store_true",
62      default=False,
63      help="call add3prf.py to add third party review files")
64  parser.add_argument(
65      "-show", "--show",
66      action="store_true",
67      default=False,
68      help="show all default dependent packages, using crates.io api")
69  parser.add_argument(
70      dest="pkgs",
71      metavar="pkg_name",
72      nargs="+",
73      help="name of Rust package to be fetched from crates.io")
74  return parser.parse_args()
75
76
77def set2list(a_set):
78  return (" " + " ".join(sorted(a_set))) if a_set else ""
79
80
81def echo(args, msg):
82  if args.v:
83    print("INFO: {}".format(msg), flush=True)
84
85
86def echo_all_deps(args, kind, deps):
87  if args.v and deps:
88    print("INFO: now {} in {}:{}".format(len(deps), kind, set2list(deps)))
89
90
91def pkg_base_name(pkg):
92  match = PKG_VERSION_MATCHER.match(pkg)
93  if match is not None:
94    return (match.group(1), match.group(2))
95  else:
96    return (pkg, "")
97
98
99def get_version_numbers(version):
100  match = VERSION_MATCHER.match(version)
101  if match is not None:
102    return tuple(int(match.group(i)) for i in range(1, 4))
103  return (0, 0, 0)
104
105
106def is_newer_version(args, prev_version, prev_id, check_version, check_id):
107  """Return true if check_version+id is newer than prev_version+id."""
108  echo(args, "checking version={}  id={}".format(check_version, check_id))
109  return ((get_version_numbers(check_version), check_id) >
110          (get_version_numbers(prev_version), prev_id))
111
112
113def get_max_version(pkg):
114  """Ask crates.io for a pkg's latest version."""
115  url = "https://crates.io/api/v1/crates/" + pkg
116  with urllib.request.urlopen(url) as request:
117    data = json.loads(request.read().decode())
118    return data["crate"]["max_version"]
119
120
121def find_dl_path(args, name):
122  """Ask crates.io for the latest version download path."""
123  base_name, version = pkg_base_name(name)
124  if not version:
125    version = get_max_version(name)
126  url = "https://crates.io/api/v1/crates/{}/{}".format(base_name, version)
127  echo(args, "try to get dl_path from {}".format(url))
128  with urllib.request.urlopen(url) as request:
129    data = json.loads(request.read().decode())
130    if "version" not in data or "dl_path" not in data["version"]:
131      print("ERROR: cannot find version {} of package {}".format(
132          version, base_name))
133      return None
134    echo(args, "found download path for version {}".format(version))
135    return data["version"]["dl_path"]
136
137
138def fetch_pkg(args, pkg, dl_path):
139  """Fetch package from crates.io and untar it into a subdirectory."""
140  if not dl_path:
141    print("ERROR: cannot find download path for '{}'".format(pkg))
142    return False
143  url = "https://crates.io" + dl_path
144  tmp_dir = tempfile.mkdtemp()
145  echo(args, "fetch tar file from {}".format(url))
146  tar_file, _ = urllib.request.urlretrieve(url)
147  with tarfile.open(tar_file, mode="r") as tfile:
148    echo(args, "extract tar file {} into {}".format(tar_file, tmp_dir))
149    tfile.extractall(tmp_dir)
150    files = os.listdir(tmp_dir)
151    # There should be only one directory in the tar file,
152    # but it might not be (name + "-" + version)
153    pkg_tmp_dir = os.path.join(tmp_dir, files[0])
154    echo(args, "untared package in {}".format(pkg_tmp_dir))
155    dest_dir = os.path.join(args.o, files[0])
156    if os.path.exists(dest_dir):
157      print("ERROR: do not overwrite existing {}".format(dest_dir))
158      return False  # leave tar_file and tmp_dir
159    else:
160      echo(args, "move {} to {}".format(pkg_tmp_dir, dest_dir))
161      shutil.move(pkg_tmp_dir, dest_dir)
162  echo(args, "delete downloaded tar file {}".format(tar_file))
163  os.remove(tar_file)
164  echo(args, "delete temp directory {}".format(tmp_dir))
165  shutil.rmtree(tmp_dir)
166  print("SUCCESS: downloaded package to '{}'".format(dest_dir))
167  if args.add3prf:
168    echo(args, "Calling add3prf.py in {}".format(dest_dir))
169    cwd = os.getcwd()
170    os.chdir(dest_dir)
171    os.system("add3prf.py")
172    os.chdir(cwd)
173  return True
174
175
176def get_crate_dependencies(args, pkg):
177  """Ask crates.io for pkg's dependencies."""
178  echo(args, "Ask crates.io for {} ...".format(pkg))
179  try:
180    url = "https://crates.io/api/v1/crates/{}/{}/dependencies".format(
181        pkg, get_max_version(pkg))
182    with urllib.request.urlopen(url) as request:
183      data = json.loads(request.read().decode())
184  except urllib.error.HTTPError:
185    print("ERROR: failed to find {}".format(pkg))
186    return False, None, None
187  build_deps = set()
188  dev_deps = set()
189  for crate in data["dependencies"]:
190    if not crate["optional"]:  # some package has a lot of optional features
191      # dev_deps is a super set of build_deps
192      dev_deps.add(crate["crate_id"])
193      if crate["kind"] != "dev":
194        build_deps.add(crate["crate_id"])
195  return True, build_deps, dev_deps
196
197
198def compare_pkg_deps(pkg1, pkg2):
199  """Compare dependency order of pkg1 and pkg2."""
200  base1, build_deps1, dev_deps1 = pkg1
201  base2, build_deps2, dev_deps2 = pkg2
202  # Some pkg1 can be build-dependent (non-dev-dependent) on pkg2,
203  # when pkg2 is only test-dependent (dev-dependent) on pkg1.
204  # This is not really a build dependency cycle, because pkg2
205  # can be built before pkg1, but tested after pkg1.
206  # So the dependency order is based on build_deps first, and then dev_deps.
207  if base1 in build_deps2:
208    return -1  # pkg2 needs base1
209  if base2 in build_deps1:
210    return 1  # pkg1 needs base2
211  if base1 in dev_deps2:
212    return -1  # pkg2 needs base1
213  if base2 in dev_deps1:
214    return 1  # pkg1 needs base2
215  # If there is no dependency between pkg1 and pkg2,
216  # order them by the size of build_deps or dev_deps, or the name.
217  count1 = (len(build_deps1), len(dev_deps1), base1)
218  count2 = (len(build_deps2), len(dev_deps2), base2)
219  if count1 != count2:
220    return -1 if count1 < count2 else 1
221  return 0
222
223
224def sort_found_pkgs(tuples):
225  """A topological sort of tuples based on build_deps."""
226  # tuples is a list of (base_name, build_deps, dev_deps)
227
228  # Use build_deps as the dependency relation in a topological sort.
229  # The new_tuples list is used in topological sort. It is the input tuples
230  # prefixed with a changing build_deps during the sorting process.
231  # Collect all package base names.
232  # Dependent packages not found in all_base_names will be treated as
233  # "external" and ignored in topological sort.
234  all_base_names = set(map(lambda t: t[0], tuples))
235  new_tuples = []
236  all_names = set()
237  for (base_name, build_deps, dev_deps) in tuples:
238    new_tuples.append((build_deps, (base_name, build_deps, dev_deps)))
239    all_names = all_names.union(build_deps)
240  external_names = all_names.difference(all_base_names)
241  new_tuples = list(
242      map(lambda t: (t[0].difference(external_names), t[1]), new_tuples))
243
244  sorted_tuples = []
245  # A brute force topological sort;
246  # tuples with empty build_deps are put before the others.
247  while new_tuples:
248    first_group = list(filter(lambda t: not t[0], new_tuples))
249    other_group = list(filter(lambda t: t[0], new_tuples))
250    new_tuples = []
251    if first_group:
252      # Remove the extra build_deps in first_group,
253      # then sort it, and add its tuples to the sorted_tuples list.
254      first_group = list(map(lambda t: t[1], first_group))
255      first_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
256      sorted_tuples.extend(first_group)
257      # Copy other_group to new_tuples but remove names in the first_group.
258      base_names = set(map(lambda t: t[0], first_group))
259      new_tuples = list(
260          map(lambda t: (t[0].difference(base_names), t[1]), other_group))
261    else:
262      # There is a bug, or a cycle in the build_deps.
263      # If we include all optional dependent packages into build_deps,
264      # we will see one cycle: futures-util has an optional dependent
265      # on futures, which has a normal dependent on futures-util.
266      print("ERROR: leftover in topological sort: {}".format(
267          list(map(lambda t: t[1][1], other_group))))
268      # Anyway, sort the other_group to include them into final report.
269      other_group = list(map(lambda t: t[1], other_group))
270      other_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
271      sorted_tuples.extend(other_group)
272  return sorted_tuples
273
274
275def show_all_dependencies(args, found_pkgs):
276  """Topological sort found_pkgs and report number of dependent packages."""
277  found_pkgs = sort_found_pkgs(found_pkgs)
278  max_length = functools.reduce(lambda a, t: max(a, len(t[0])), found_pkgs, 1)
279  name_format = "{:" + str(max_length) + "s}"
280  print("\n##### Summary of all dependent package counts #####")
281  pattern = "{:>9s}_deps[k] = # of {}dependent packages of {}"
282  for (prefix, suffix) in [("    ", "pkg[k]"), ("all_", "pkg[1] to pkg[k]")]:
283    for (name, kind) in [("build", "non-dev-"), ("dev", "all ")]:
284      print(pattern.format(prefix + name, kind, suffix))
285  print(("{:>4s} " + name_format + " {:>10s} {:>10s} {:>14s} {:>14s}").format(
286      "k", "pkg", "build_deps", "dev_deps", "all_build_deps", "all_dev_deps"))
287  all_pkgs = set()
288  all_build_deps = set()
289  all_dev_deps = set()
290  k = 0
291  prev_all_build_deps = set()
292  prev_all_dev_deps = set()
293  for (pkg, build_deps, dev_deps) in found_pkgs:
294    all_pkgs.add(pkg)
295    all_build_deps = all_build_deps.union(build_deps).difference(all_pkgs)
296    all_dev_deps = all_dev_deps.union(dev_deps).difference(all_pkgs)
297    k += 1
298    print(("{:4d} " + name_format + " {:10d} {:10d} {:14d} {:14d}").format(
299        k, pkg, len(build_deps), len(dev_deps), len(all_build_deps),
300        len(all_dev_deps)))
301    if prev_all_build_deps != all_build_deps:
302      echo_all_deps(args, "all_build_deps", all_build_deps)
303      prev_all_build_deps = all_build_deps
304    if prev_all_dev_deps != all_dev_deps:
305      echo_all_deps(args, "all_dev_deps", all_dev_deps)
306      prev_all_dev_deps = all_dev_deps
307  print("\nNOTE: from all {} package(s):{}".format(
308      len(all_pkgs), set2list(all_pkgs)))
309  for (kind, deps) in [("non-dev-", all_build_deps), ("", all_dev_deps)]:
310    if deps:
311      print("NOTE: found {:3d} other {}dependent package(s):{}".format(
312          len(deps), kind, set2list(deps)))
313
314
315def crates_io_find_pkgs(args, pkgs, found_pkgs):
316  """Call crates.io api to find direct dependent packages."""
317  success = True
318  for pkg in sorted(pkgs):
319    ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
320    if not ok:
321      success = False
322    else:
323      found_pkgs.append((pkg, build_deps, dev_deps))
324  return success
325
326
327def add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg):
328  """Add reachable non-dev dependencies to all_deps[pkg]'s dependencies."""
329  if pkg not in all_deps:
330    ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
331    if not ok:
332      return set()
333    all_deps[pkg] = (pkg, build_deps, dev_deps)
334  else:
335    (_, build_deps, dev_deps) = all_deps[pkg]
336  if pkg in visited:
337    return build_deps
338  visited.add(pkg)
339
340  for p in sorted(build_deps):
341    if p not in core_pkgs and pkg in core_pkgs:
342      core_pkgs.add(p)
343    deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
344    build_deps = build_deps.union(deps)
345  if pkg in core_pkgs:
346    for p in sorted(dev_deps):
347      deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
348      dev_deps = dev_deps.union(deps)
349  all_deps[pkg] = (pkg, build_deps, dev_deps)
350  return build_deps
351
352
353def add_indirect_build_deps(args, found_pkgs):
354  """Add all indirect dependencies and return a new found_pkgs."""
355  all_deps = dict(map(lambda t: (t[0], t), found_pkgs))
356  core_pkgs = set(map(lambda t: t[0], found_pkgs))
357  dev_pkgs = set()
358  dump_pkgs(args, "BEFORE", all_deps, core_pkgs, dev_pkgs)
359  visited = set()
360  for pkg in sorted(core_pkgs):
361    add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg)
362  # Revisit core packages to add build_deps into core_pkgs and other deps.
363  check_again = True
364  while check_again:
365    pattern = "Revisiting {} core packages:{}"
366    echo(args, pattern.format(len(core_pkgs), set2list(core_pkgs)))
367    check_again = False
368    visited = set()
369    num_core_pkgs = len(core_pkgs)
370    for pkg in sorted(core_pkgs):
371      (_, build_deps, dev_deps) = all_deps[pkg]
372      (num_build_deps, num_dev_deps) = (len(build_deps), len(dev_deps))
373      add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg)
374      (_, build_deps, dev_deps) = all_deps[pkg]
375      (new_num_build_deps, new_num_dev_deps) = (len(build_deps), len(dev_deps))
376      # If build_deps, dev_deps, or core_pkgs was changed, check again.
377      if (num_build_deps != new_num_build_deps or
378          num_dev_deps != new_num_dev_deps or num_core_pkgs != len(core_pkgs)):
379        check_again = True
380  dev_pkgs = visited.difference(core_pkgs)
381  dump_pkgs(args, "AFTER", all_deps, core_pkgs, dev_pkgs)
382  found_pkgs = list(map(lambda p: all_deps[p], core_pkgs))
383  found_pkgs.sort(key=lambda t: t[0])
384  return found_pkgs
385
386
387def echo_dump_found_pkgs(args, msg, all_deps, pkgs):
388  if not args.v or not pkgs:
389    return
390  echo(args, msg)
391  for pkg in sorted(pkgs):
392    (_, build_deps, dev_deps) = all_deps[pkg]
393    for (name, deps) in [("normal", build_deps), ("dev", dev_deps)]:
394      pattern = "  {} has {} " + name + " deps:{}"
395      echo(args, pattern.format(pkg, len(deps), set2list(deps)))
396
397
398def dump_pkgs(args, msg, all_deps, core_pkgs, dev_pkgs):
399  echo_dump_found_pkgs(args, msg + " core_pkgs:", all_deps, core_pkgs)
400  echo_dump_found_pkgs(args, msg + " other_dev_pkgs:", all_deps, dev_pkgs)
401
402
403def show_dependencies(args):
404  """Calls crates.io api to find dependent packages; returns True on success."""
405  all_pkgs = set(map(lambda p: pkg_base_name(p)[0], args.pkgs))
406  if "" in all_pkgs:
407    # TODO(chh): detect and report ill formed names in args.pkgs
408    print("WARNING: skip some ill formatted pkg arguments.")
409    all_pkgs = all_pkgs.remove("")
410  if not all_pkgs:
411    print("ERROR: no valid pkg names to show dependencies.")
412    return False
413  pkgs = sorted(all_pkgs)
414  found_pkgs = []
415  success = crates_io_find_pkgs(args, pkgs, found_pkgs)
416  if not found_pkgs:
417    return False
418  # All normal (non-dev) dependent packages will be added into found_pkgs.
419  found_pkgs = add_indirect_build_deps(args, found_pkgs)
420  show_all_dependencies(args, found_pkgs)
421  return success
422
423
424def main():
425  args = parse_args()
426  if args.show:
427    # only show dependencies, not to fetch any package
428    if not show_dependencies(args):
429      sys.exit(2)
430    return
431  echo(args, "to fetch packages = {}".format(args.pkgs))
432  success = True
433  for pkg in args.pkgs:
434    echo(args, "trying to fetch package '{}'".format(pkg))
435    try:
436      if not fetch_pkg(args, pkg, find_dl_path(args, pkg)):
437        success = False
438    except urllib.error.HTTPError:
439      print("ERROR: unknown package '{}'".format(pkg))
440      success = False
441  if not success:
442    sys.exit(1)
443
444
445if __name__ == "__main__":
446  main()
447