1#!/usr/bin/env python3 2# 3# Copyright (C) 2020 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""Fetch a Rust package from crates.io. 17 18Usage: get_rust_pkg.py -v syn-1.0.7 19Get the package syn-1.0.7 from crates.io and untar it into ./syn-1.0.7. 20 21Usage: get_rust_pkg.py -v -o tmp syn 22Get the latest version of package syn, say 1.0.17, 23and untar it into tmp/syn-1.0.17. 24 25Usage: get_rust_pkg.py -show bindgen cxx 26Count dependent packages of bindgen and cxx. 27 28When downloading a package, its target directory should not exist, 29or the download will be skipped. 30""" 31 32import argparse 33import functools 34import json 35import os 36import re 37import shutil 38import sys 39import tarfile 40import tempfile 41import urllib.request 42 43PKG_VERSION_PATTERN = r"(.*)-([0-9]+\.[0-9]+\.[0-9]+.*)" 44 45PKG_VERSION_MATCHER = re.compile(PKG_VERSION_PATTERN) 46 47VERSION_PATTERN = r"([0-9]+)\.([0-9]+)\.([0-9]+)" 48 49VERSION_MATCHER = re.compile(VERSION_PATTERN) 50 51 52def parse_args(): 53 """Parse main arguments.""" 54 parser = argparse.ArgumentParser("get_rust_pkg") 55 parser.add_argument( 56 "-v", action="store_true", default=False, help="echo executed commands") 57 parser.add_argument( 58 "-o", metavar="out_dir", default=".", help="output directory") 59 parser.add_argument( 60 "-add3prf", "--add3prf", 61 action="store_true", 62 default=False, 63 help="call add3prf.py to add third party review files") 64 parser.add_argument( 65 "-show", "--show", 66 action="store_true", 67 default=False, 68 help="show all default dependent packages, using crates.io api") 69 parser.add_argument( 70 dest="pkgs", 71 metavar="pkg_name", 72 nargs="+", 73 help="name of Rust package to be fetched from crates.io") 74 return parser.parse_args() 75 76 77def set2list(a_set): 78 return (" " + " ".join(sorted(a_set))) if a_set else "" 79 80 81def echo(args, msg): 82 if args.v: 83 print("INFO: {}".format(msg), flush=True) 84 85 86def echo_all_deps(args, kind, deps): 87 if args.v and deps: 88 print("INFO: now {} in {}:{}".format(len(deps), kind, set2list(deps))) 89 90 91def pkg_base_name(pkg): 92 match = PKG_VERSION_MATCHER.match(pkg) 93 if match is not None: 94 return (match.group(1), match.group(2)) 95 else: 96 return (pkg, "") 97 98 99def get_version_numbers(version): 100 match = VERSION_MATCHER.match(version) 101 if match is not None: 102 return tuple(int(match.group(i)) for i in range(1, 4)) 103 return (0, 0, 0) 104 105 106def is_newer_version(args, prev_version, prev_id, check_version, check_id): 107 """Return true if check_version+id is newer than prev_version+id.""" 108 echo(args, "checking version={} id={}".format(check_version, check_id)) 109 return ((get_version_numbers(check_version), check_id) > 110 (get_version_numbers(prev_version), prev_id)) 111 112 113def get_max_version(pkg): 114 """Ask crates.io for a pkg's latest stable version.""" 115 url = "https://crates.io/api/v1/crates/" + pkg 116 with urllib.request.urlopen(url) as request: 117 data = json.loads(request.read().decode()) 118 return data["crate"]["max_stable_version"] 119 120 121def find_dl_path(args, name): 122 """Ask crates.io for the latest stable version download path.""" 123 base_name, version = pkg_base_name(name) 124 if not version: 125 version = get_max_version(name) 126 url = "https://crates.io/api/v1/crates/{}/{}".format(base_name, version) 127 echo(args, "try to get dl_path from {}".format(url)) 128 with urllib.request.urlopen(url) as request: 129 data = json.loads(request.read().decode()) 130 if "version" not in data or "dl_path" not in data["version"]: 131 print("ERROR: cannot find version {} of package {}".format( 132 version, base_name)) 133 return None 134 echo(args, "found download path for version {}".format(version)) 135 return data["version"]["dl_path"] 136 137 138def fetch_pkg(args, pkg, dl_path): 139 """Fetch package from crates.io and untar it into a subdirectory.""" 140 if not dl_path: 141 print("ERROR: cannot find download path for '{}'".format(pkg)) 142 return False 143 url = "https://crates.io" + dl_path 144 tmp_dir = tempfile.mkdtemp() 145 echo(args, "fetch tar file from {}".format(url)) 146 tar_file, _ = urllib.request.urlretrieve(url) 147 with tarfile.open(tar_file, mode="r") as tfile: 148 echo(args, "extract tar file {} into {}".format(tar_file, tmp_dir)) 149 tfile.extractall(tmp_dir) 150 files = os.listdir(tmp_dir) 151 # There should be only one directory in the tar file, 152 # but it might not be (name + "-" + version) 153 pkg_tmp_dir = os.path.join(tmp_dir, files[0]) 154 echo(args, "untared package in {}".format(pkg_tmp_dir)) 155 dest_dir = os.path.join(args.o, files[0]) 156 if os.path.exists(dest_dir): 157 print("ERROR: do not overwrite existing {}".format(dest_dir)) 158 return False # leave tar_file and tmp_dir 159 else: 160 echo(args, "move {} to {}".format(pkg_tmp_dir, dest_dir)) 161 shutil.move(pkg_tmp_dir, dest_dir) 162 echo(args, "delete downloaded tar file {}".format(tar_file)) 163 os.remove(tar_file) 164 echo(args, "delete temp directory {}".format(tmp_dir)) 165 shutil.rmtree(tmp_dir) 166 print("SUCCESS: downloaded package to '{}'".format(dest_dir)) 167 if args.add3prf: 168 echo(args, "Calling add3prf.py in {}".format(dest_dir)) 169 cwd = os.getcwd() 170 os.chdir(dest_dir) 171 # add3prf.py is in the same directory as this python script 172 os.system(os.path.dirname(__file__) + "/add3prf.py") 173 os.chdir(cwd) 174 return True 175 176def get_crate_dependencies(args, pkg): 177 """Ask crates.io for pkg's dependencies.""" 178 echo(args, "Ask crates.io for {} ...".format(pkg)) 179 try: 180 url = "https://crates.io/api/v1/crates/{}/{}/dependencies".format( 181 pkg, get_max_version(pkg)) 182 with urllib.request.urlopen(url) as request: 183 data = json.loads(request.read().decode()) 184 except urllib.error.HTTPError: 185 print("ERROR: failed to find {}".format(pkg)) 186 return False, None, None 187 build_deps = set() 188 dev_deps = set() 189 for crate in data["dependencies"]: 190 if not crate["optional"]: # some package has a lot of optional features 191 # dev_deps is a super set of build_deps 192 dev_deps.add(crate["crate_id"]) 193 if crate["kind"] != "dev": 194 build_deps.add(crate["crate_id"]) 195 return True, build_deps, dev_deps 196 197 198def compare_pkg_deps(pkg1, pkg2): 199 """Compare dependency order of pkg1 and pkg2.""" 200 base1, build_deps1, dev_deps1 = pkg1 201 base2, build_deps2, dev_deps2 = pkg2 202 # Some pkg1 can be build-dependent (non-dev-dependent) on pkg2, 203 # when pkg2 is only test-dependent (dev-dependent) on pkg1. 204 # This is not really a build dependency cycle, because pkg2 205 # can be built before pkg1, but tested after pkg1. 206 # So the dependency order is based on build_deps first, and then dev_deps. 207 if base1 in build_deps2: 208 return -1 # pkg2 needs base1 209 if base2 in build_deps1: 210 return 1 # pkg1 needs base2 211 if base1 in dev_deps2: 212 return -1 # pkg2 needs base1 213 if base2 in dev_deps1: 214 return 1 # pkg1 needs base2 215 # If there is no dependency between pkg1 and pkg2, 216 # order them by the size of build_deps or dev_deps, or the name. 217 count1 = (len(build_deps1), len(dev_deps1), base1) 218 count2 = (len(build_deps2), len(dev_deps2), base2) 219 if count1 != count2: 220 return -1 if count1 < count2 else 1 221 return 0 222 223 224def sort_found_pkgs(tuples): 225 """A topological sort of tuples based on build_deps.""" 226 # tuples is a list of (base_name, build_deps, dev_deps) 227 228 # Use build_deps as the dependency relation in a topological sort. 229 # The new_tuples list is used in topological sort. It is the input tuples 230 # prefixed with a changing build_deps during the sorting process. 231 # Collect all package base names. 232 # Dependent packages not found in all_base_names will be treated as 233 # "external" and ignored in topological sort. 234 all_base_names = set(map(lambda t: t[0], tuples)) 235 new_tuples = [] 236 all_names = set() 237 for (base_name, build_deps, dev_deps) in tuples: 238 new_tuples.append((build_deps, (base_name, build_deps, dev_deps))) 239 all_names = all_names.union(build_deps) 240 external_names = all_names.difference(all_base_names) 241 new_tuples = list( 242 map(lambda t: (t[0].difference(external_names), t[1]), new_tuples)) 243 244 sorted_tuples = [] 245 # A brute force topological sort; 246 # tuples with empty build_deps are put before the others. 247 while new_tuples: 248 first_group = list(filter(lambda t: not t[0], new_tuples)) 249 other_group = list(filter(lambda t: t[0], new_tuples)) 250 new_tuples = [] 251 if first_group: 252 # Remove the extra build_deps in first_group, 253 # then sort it, and add its tuples to the sorted_tuples list. 254 first_group = list(map(lambda t: t[1], first_group)) 255 first_group.sort(key=functools.cmp_to_key(compare_pkg_deps)) 256 sorted_tuples.extend(first_group) 257 # Copy other_group to new_tuples but remove names in the first_group. 258 base_names = set(map(lambda t: t[0], first_group)) 259 new_tuples = list( 260 map(lambda t: (t[0].difference(base_names), t[1]), other_group)) 261 else: 262 # There is a bug, or a cycle in the build_deps. 263 # If we include all optional dependent packages into build_deps, 264 # we will see one cycle: futures-util has an optional dependent 265 # on futures, which has a normal dependent on futures-util. 266 print("ERROR: leftover in topological sort: {}".format( 267 list(map(lambda t: t[1][1], other_group)))) 268 # Anyway, sort the other_group to include them into final report. 269 other_group = list(map(lambda t: t[1], other_group)) 270 other_group.sort(key=functools.cmp_to_key(compare_pkg_deps)) 271 sorted_tuples.extend(other_group) 272 return sorted_tuples 273 274 275def show_all_dependencies(args, found_pkgs): 276 """Topological sort found_pkgs and report number of dependent packages.""" 277 found_pkgs = sort_found_pkgs(found_pkgs) 278 max_length = functools.reduce(lambda a, t: max(a, len(t[0])), found_pkgs, 1) 279 name_format = "{:" + str(max_length) + "s}" 280 print("\n##### Summary of all dependent package counts #####") 281 pattern = "{:>9s}_deps[k] = # of {}dependent packages of {}" 282 for (prefix, suffix) in [(" ", "pkg[k]"), ("all_", "pkg[1] to pkg[k]")]: 283 for (name, kind) in [("build", "non-dev-"), ("dev", "all ")]: 284 print(pattern.format(prefix + name, kind, suffix)) 285 print(("{:>4s} " + name_format + " {:>10s} {:>10s} {:>14s} {:>14s}").format( 286 "k", "pkg", "build_deps", "dev_deps", "all_build_deps", "all_dev_deps")) 287 all_pkgs = set() 288 all_build_deps = set() 289 all_dev_deps = set() 290 k = 0 291 prev_all_build_deps = set() 292 prev_all_dev_deps = set() 293 for (pkg, build_deps, dev_deps) in found_pkgs: 294 all_pkgs.add(pkg) 295 all_build_deps = all_build_deps.union(build_deps).difference(all_pkgs) 296 all_dev_deps = all_dev_deps.union(dev_deps).difference(all_pkgs) 297 k += 1 298 print(("{:4d} " + name_format + " {:10d} {:10d} {:14d} {:14d}").format( 299 k, pkg, len(build_deps), len(dev_deps), len(all_build_deps), 300 len(all_dev_deps))) 301 if prev_all_build_deps != all_build_deps: 302 echo_all_deps(args, "all_build_deps", all_build_deps) 303 prev_all_build_deps = all_build_deps 304 if prev_all_dev_deps != all_dev_deps: 305 echo_all_deps(args, "all_dev_deps", all_dev_deps) 306 prev_all_dev_deps = all_dev_deps 307 print("\nNOTE: from all {} package(s):{}".format( 308 len(all_pkgs), set2list(all_pkgs))) 309 for (kind, deps) in [("non-dev-", all_build_deps), ("", all_dev_deps)]: 310 if deps: 311 print("NOTE: found {:3d} other {}dependent package(s):{}".format( 312 len(deps), kind, set2list(deps))) 313 314 315def crates_io_find_pkgs(args, pkgs, found_pkgs): 316 """Call crates.io api to find direct dependent packages.""" 317 success = True 318 for pkg in sorted(pkgs): 319 ok, build_deps, dev_deps = get_crate_dependencies(args, pkg) 320 if not ok: 321 success = False 322 else: 323 found_pkgs.append((pkg, build_deps, dev_deps)) 324 return success 325 326 327def add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg): 328 """Add reachable non-dev dependencies to all_deps[pkg]'s dependencies.""" 329 if pkg not in all_deps: 330 ok, build_deps, dev_deps = get_crate_dependencies(args, pkg) 331 if not ok: 332 return set() 333 all_deps[pkg] = (pkg, build_deps, dev_deps) 334 else: 335 (_, build_deps, dev_deps) = all_deps[pkg] 336 if pkg in visited: 337 return build_deps 338 visited.add(pkg) 339 340 for p in sorted(build_deps): 341 if p not in core_pkgs and pkg in core_pkgs: 342 core_pkgs.add(p) 343 deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p) 344 build_deps = build_deps.union(deps) 345 if pkg in core_pkgs: 346 for p in sorted(dev_deps): 347 deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p) 348 dev_deps = dev_deps.union(deps) 349 all_deps[pkg] = (pkg, build_deps, dev_deps) 350 return build_deps 351 352 353def add_indirect_build_deps(args, found_pkgs): 354 """Add all indirect dependencies and return a new found_pkgs.""" 355 all_deps = dict(map(lambda t: (t[0], t), found_pkgs)) 356 core_pkgs = set(map(lambda t: t[0], found_pkgs)) 357 dev_pkgs = set() 358 dump_pkgs(args, "BEFORE", all_deps, core_pkgs, dev_pkgs) 359 visited = set() 360 for pkg in sorted(core_pkgs): 361 add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg) 362 # Revisit core packages to add build_deps into core_pkgs and other deps. 363 check_again = True 364 while check_again: 365 pattern = "Revisiting {} core packages:{}" 366 echo(args, pattern.format(len(core_pkgs), set2list(core_pkgs))) 367 check_again = False 368 visited = set() 369 num_core_pkgs = len(core_pkgs) 370 for pkg in sorted(core_pkgs): 371 (_, build_deps, dev_deps) = all_deps[pkg] 372 (num_build_deps, num_dev_deps) = (len(build_deps), len(dev_deps)) 373 add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg) 374 (_, build_deps, dev_deps) = all_deps[pkg] 375 (new_num_build_deps, new_num_dev_deps) = (len(build_deps), len(dev_deps)) 376 # If build_deps, dev_deps, or core_pkgs was changed, check again. 377 if (num_build_deps != new_num_build_deps or 378 num_dev_deps != new_num_dev_deps or num_core_pkgs != len(core_pkgs)): 379 check_again = True 380 dev_pkgs = visited.difference(core_pkgs) 381 dump_pkgs(args, "AFTER", all_deps, core_pkgs, dev_pkgs) 382 found_pkgs = list(map(lambda p: all_deps[p], core_pkgs)) 383 found_pkgs.sort(key=lambda t: t[0]) 384 return found_pkgs 385 386 387def echo_dump_found_pkgs(args, msg, all_deps, pkgs): 388 if not args.v or not pkgs: 389 return 390 echo(args, msg) 391 for pkg in sorted(pkgs): 392 (_, build_deps, dev_deps) = all_deps[pkg] 393 for (name, deps) in [("normal", build_deps), ("dev", dev_deps)]: 394 pattern = " {} has {} " + name + " deps:{}" 395 echo(args, pattern.format(pkg, len(deps), set2list(deps))) 396 397 398def dump_pkgs(args, msg, all_deps, core_pkgs, dev_pkgs): 399 echo_dump_found_pkgs(args, msg + " core_pkgs:", all_deps, core_pkgs) 400 echo_dump_found_pkgs(args, msg + " other_dev_pkgs:", all_deps, dev_pkgs) 401 402 403def show_dependencies(args): 404 """Calls crates.io api to find dependent packages; returns True on success.""" 405 all_pkgs = set(map(lambda p: pkg_base_name(p)[0], args.pkgs)) 406 if "" in all_pkgs: 407 # TODO(chh): detect and report ill formed names in args.pkgs 408 print("WARNING: skip some ill formatted pkg arguments.") 409 all_pkgs = all_pkgs.remove("") 410 if not all_pkgs: 411 print("ERROR: no valid pkg names to show dependencies.") 412 return False 413 pkgs = sorted(all_pkgs) 414 found_pkgs = [] 415 success = crates_io_find_pkgs(args, pkgs, found_pkgs) 416 if not found_pkgs: 417 return False 418 # All normal (non-dev) dependent packages will be added into found_pkgs. 419 found_pkgs = add_indirect_build_deps(args, found_pkgs) 420 show_all_dependencies(args, found_pkgs) 421 return success 422 423 424def main(): 425 args = parse_args() 426 if args.show: 427 # only show dependencies, not to fetch any package 428 if not show_dependencies(args): 429 sys.exit(2) 430 return 431 echo(args, "to fetch packages = {}".format(args.pkgs)) 432 success = True 433 for pkg in args.pkgs: 434 echo(args, "trying to fetch package '{}'".format(pkg)) 435 try: 436 if not fetch_pkg(args, pkg, find_dl_path(args, pkg)): 437 success = False 438 except urllib.error.HTTPError: 439 print("ERROR: unknown package '{}'".format(pkg)) 440 success = False 441 if not success: 442 sys.exit(1) 443 444 445if __name__ == "__main__": 446 main() 447