1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Add files to a Rust package for third party review."""
17
18import collections
19import datetime
20import enum
21import glob
22import json
23import os
24import pathlib
25import re
26
27# patterns to match keys in Cargo.toml
28NAME_PATTERN = r"^name *= *\"(.+)\""
29NAME_MATCHER = re.compile(NAME_PATTERN)
30VERSION_PATTERN = r"^version *= *\"(.+)\""
31VERSION_MATCHER = re.compile(VERSION_PATTERN)
32DESCRIPTION_PATTERN = r"^description *= *(\".+\")"
33DESCRIPTION_MATCHER = re.compile(DESCRIPTION_PATTERN)
34# NOTE: This description one-liner pattern fails to match
35# multi-line descriptions in some Rust crates, e.g. shlex.
36LICENSE_PATTERN = r"^license *= *\"(.+)\""
37LICENSE_MATCHER = re.compile(LICENSE_PATTERN)
38
39# patterns to match year/month/day in METADATA
40YMD_PATTERN = r"^ +(year|month|day): (.+)$"
41YMD_MATCHER = re.compile(YMD_PATTERN)
42YMD_LINE_PATTERN = r"^.* year: *([^ ]+) +month: *([^ ]+) +day: *([^ ]+).*$"
43YMD_LINE_MATCHER = re.compile(YMD_LINE_PATTERN)
44
45# patterns to match different licence types in LICENSE*
46APACHE_PATTERN = r"^.*Apache License.*$"
47APACHE_MATCHER = re.compile(APACHE_PATTERN)
48BOOST_PATTERN = r"^.Boost Software License.*Version 1.0.*$"
49BOOST_MATCHER = re.compile(BOOST_PATTERN)
50MIT_PATTERN = r"^.*MIT License.*$"
51MIT_MATCHER = re.compile(MIT_PATTERN)
52BSD_PATTERN = r"^.*BSD .*License.*$"
53BSD_MATCHER = re.compile(BSD_PATTERN)
54MPL_PATTERN = r"^.Mozilla Public License.*$"
55MPL_MATCHER = re.compile(MPL_PATTERN)
56UNLICENSE_PATTERN = r"^.*unlicense\.org.*$"
57UNLICENSE_MATCHER = re.compile(UNLICENSE_PATTERN)
58ZERO_BSD_PATTERN = r"^.*Zero-Clause BSD.*$"
59ZERO_BSD_MATCHER = re.compile(ZERO_BSD_PATTERN)
60ZLIB_PATTERN = r"^.*zlib License.$"
61ZLIB_MATCHER = re.compile(ZLIB_PATTERN)
62MULTI_LICENSE_COMMENT = ("# Dual-licensed, using the least restrictive "
63        "per go/thirdpartylicenses#same.\n  ")
64
65# default owners added to OWNERS
66DEFAULT_OWNERS = "include platform/prebuilts/rust:main:/OWNERS\n"
67
68# See b/159487435 Official policy for rust imports METADATA URLs.
69# "license_type: NOTICE" might be optional,
70# but it is already used in most rust crate METADATA.
71# This line format should match the output of external_updater.
72METADATA_CONTENT = """name: "{name}"
73description: {description}
74third_party {{
75  identifier {{
76    type: "crates.io"
77    value: "{name}"
78  }}
79  identifier {{
80    type: "Archive"
81    value: "https://static.crates.io/crates/{name}/{name}-{version}.crate"
82    primary_source: true
83  }}
84  version: "{version}"
85  {license_comment}license_type: {license_type}
86  last_upgrade_date {{
87    year: {year}
88    month: {month}
89    day: {day}
90  }}
91}}
92"""
93
94
95def get_metadata_date():
96  """Return last_upgrade_date in METADATA or today."""
97  # When applied to existing directories to normalize METADATA,
98  # we don't want to change the last_upgrade_date.
99  year, month, day = "", "", ""
100  if os.path.exists("METADATA"):
101    with open("METADATA", "r") as inf:
102      for line in inf:
103        match = YMD_MATCHER.match(line)
104        if match:
105          if match.group(1) == "year":
106            year = match.group(2)
107          elif match.group(1) == "month":
108            month = match.group(2)
109          elif match.group(1) == "day":
110            day = match.group(2)
111        else:
112          match = YMD_LINE_MATCHER.match(line)
113          if match:
114            year, month, day = match.group(1), match.group(2), match.group(3)
115  if year and month and day:
116    print("### Reuse date in METADATA:", year, month, day)
117    return int(year), int(month), int(day)
118  today = datetime.date.today()
119  return today.year, today.month, today.day
120
121
122def add_metadata(name, version, description, license_group, multi_license):
123  """Update or add METADATA file."""
124  if os.path.exists("METADATA"):
125    print("### Updating METADATA")
126  else:
127    print("### Adding METADATA")
128  year, month, day = get_metadata_date()
129  license_comment = ""
130  if multi_license:
131    license_comment = MULTI_LICENSE_COMMENT
132  with open("METADATA", "w") as outf:
133    outf.write(METADATA_CONTENT.format(
134        name=name, description=description, version=version,
135        license_comment=license_comment, license_type=license_group, year=year, month=month, day=day))
136
137
138def grep_license_keyword(license_file):
139  """Find familiar patterns in a file and return the type."""
140  with open(license_file, "r") as input_file:
141    for line in input_file:
142      if APACHE_MATCHER.match(line):
143        return License(LicenseType.APACHE2, LicenseGroup.NOTICE, license_file)
144      if BOOST_MATCHER.match(line):
145        return License(LicenseType.BOOST, LicenseGroup.NOTICE, license_file)
146      if MIT_MATCHER.match(line):
147        return License(LicenseType.MIT, LicenseGroup.NOTICE, license_file)
148      if BSD_MATCHER.match(line):
149        return License(LicenseType.BSD_LIKE, LicenseGroup.NOTICE, license_file)
150      if MPL_MATCHER.match(line):
151        return License(LicenseType.MPL, LicenseGroup.RECIPROCAL, license_file)
152      if UNLICENSE_MATCHER.match(line):
153        return License(LicenseType.UNLICENSE, LicenseGroup.PERMISSIVE, license_file)
154      if ZERO_BSD_MATCHER.match(line):
155        return License(LicenseType.ZERO_BSD, LicenseGroup.PERMISSIVE, license_file)
156      if ZLIB_MATCHER.match(line):
157        return License(LicenseType.ZLIB, LicenseGroup.NOTICE, license_file)
158  print("ERROR: cannot decide license type in", license_file,
159        "assume BSD_LIKE")
160  return License(LicenseType.BSD_LIKE, LicenseGroup.NOTICE, license_file)
161
162
163class LicenseType(enum.IntEnum):
164  """A type of license.
165
166  An IntEnum is used to be able to sort by preference. This is mainly the case
167  for dual-licensed Apache/MIT code, for which we prefer the Apache license.
168  The enum name is used to generate the corresponding MODULE_LICENSE_* file.
169  """
170  APACHE2 = 1
171  MIT = 2
172  BSD_LIKE = 3
173  ISC = 4
174  MPL = 5
175  ZERO_BSD = 6
176  UNLICENSE = 7
177  ZLIB = 8
178  BOOST = 9
179
180class LicenseGroup(enum.Enum):
181  """A group of license as defined by go/thirdpartylicenses#types
182
183  Note, go/thirdpartylicenses#types calls them "types". But LicenseType was
184  already taken so this script calls them groups.
185  """
186  RESTRICTED = 1
187  RESTRICTED_IF_STATICALLY_LINKED = 2
188  RECIPROCAL = 3
189  NOTICE = 4
190  PERMISSIVE = 5
191  BY_EXCEPTION_ONLY = 6
192
193
194License = collections.namedtuple('License', ['type', 'group', 'filename'])
195
196
197def decide_license_type(cargo_license):
198  """Check LICENSE* files to determine the license type.
199
200  Returns: A list of Licenses. The first element is the license we prefer.
201  """
202  # Most crates.io packages have both APACHE and MIT.
203  # Some crate like time-macros-impl uses lower case names like LICENSE-Apache.
204  licenses = []
205  license_file = None
206  for license_file in glob.glob("LICENSE*") + glob.glob("COPYING*") + glob.glob("UNLICENSE*"):
207    lowered_name = os.path.splitext(license_file.lower())[0]
208    if lowered_name == "license-apache":
209      licenses.append(License(LicenseType.APACHE2, LicenseGroup.NOTICE, license_file))
210    elif lowered_name == "license-boost":
211      licenses.append(License(LicenseType.BOOST, LicenseGroup.NOTICE, license_file))
212    elif lowered_name == "license-bsd":
213      licenses.append(License(LicenseType.BSD_LIKE, LicenseGroup.NOTICE, license_file))
214    elif lowered_name == "license-mit":
215      licenses.append(License(LicenseType.MIT, LicenseGroup.NOTICE, license_file))
216    elif lowered_name == "license-0bsd":
217      licenses.append(License(LicenseType.ZERO_BSD, LicenseGroup.PERMISSIVE, license_file))
218    elif lowered_name == "license-zlib":
219      licenses.append(License(LicenseType.ZLIB, LicenseGroup.NOTICE, license_file))
220    elif lowered_name == "unlicense":
221      licenses.append(License(LicenseType.UNLICENSE, LicenseGroup.PERMISSIVE, license_file))
222  if licenses:
223    licenses.sort(key=lambda l: l.type)
224    return licenses
225  if not license_file:
226    raise FileNotFoundError("No license file has been found.")
227  # There is a LICENSE* or COPYING* file, use cargo_license found in
228  # Cargo.toml.
229  if "Apache" in cargo_license:
230    return [License(LicenseType.APACHE2, LicenseGroup.NOTICE, license_file)]
231  if "BSL" in cargo_license:
232    return [License(LicenseType.BOOST, LicenseGroup.NOTICE, license_file)]
233  if "MIT" in cargo_license:
234    return [License(LicenseType.MIT, LicenseGroup.NOTICE, license_file)]
235  if "0BSD" in cargo_license:
236    return [License(LicenseType.ZERO_BSD, LicenseGroup.PERMISSIVE, license_file)]
237  if "BSD" in cargo_license:
238    return [License(LicenseType.BSD_LIKE, LicenseGroup.NOTICE, license_file)]
239  if "ISC" in cargo_license:
240    return [License(LicenseType.ISC, LicenseGroup.NOTICE, license_file)]
241  if "MPL" in cargo_license:
242    return [License(LicenseType.MPL, LicenseGroup.RECIPROCAL, license_file)]
243  if "Unlicense" in cargo_license:
244    return [License(LicenseType.UNLICENSE, LicenseGroup.PERMISSIVE, license_file)]
245  if "Zlib" in cargo_license:
246    return [License(LicenseType.ZLIB, LicenseGroup.NOTICE, license_file)]
247  return [grep_license_keyword(license_file)]
248
249
250def add_notice():
251  if not os.path.exists("NOTICE"):
252    if os.path.exists("LICENSE"):
253      os.symlink("LICENSE", "NOTICE")
254      print("Created link from NOTICE to LICENSE")
255    else:
256      print("ERROR: missing NOTICE and LICENSE")
257
258
259def check_license_link(target):
260  """Check the LICENSE link, must bet the given target."""
261  if not os.path.islink("LICENSE"):
262    print("ERROR: LICENSE file is not a link")
263    return
264  found_target = os.readlink("LICENSE")
265  if target != found_target and found_target != "LICENSE.txt":
266    print("ERROR: found LICENSE link to", found_target,
267          "but expected", target)
268
269
270def add_license(target):
271  """Add LICENSE link to give target."""
272  if os.path.exists("LICENSE"):
273    if os.path.islink("LICENSE"):
274      check_license_link(target)
275    else:
276      print("NOTE: found LICENSE and it is not a link.")
277    return
278  print("### Creating LICENSE link to", target)
279  os.symlink(target, "LICENSE")
280
281
282def add_module_license(license_type):
283  """Touch MODULE_LICENSE_type file."""
284  # Do not change existing MODULE_* files.
285  for suffix in ["MIT", "APACHE", "APACHE2", "BSD_LIKE", "MPL", "0BSD", "UNLICENSE", "ZLIB", "BOOST"]:
286    module_file = "MODULE_LICENSE_" + suffix
287    if os.path.exists(module_file):
288      if license_type.name != suffix:
289        raise Exception("Found unexpected license " + module_file)
290      return
291  module_file = "MODULE_LICENSE_" + license_type.name.upper()
292  pathlib.Path(module_file).touch()
293  print("### Touched", module_file)
294
295
296def found_line(file_name, line):
297  """Returns true if the given line is found in a file."""
298  with open(file_name, "r") as input_file:
299    return line in input_file
300
301
302def add_owners():
303  """Create or append OWNERS with the default owner line."""
304  # Existing OWNERS file might contain more than the default owners.
305  # Only append missing default owners to existing OWNERS.
306  if os.path.isfile("OWNERS"):
307    if found_line("OWNERS", DEFAULT_OWNERS):
308      print("### No change to OWNERS, which has already default owners.")
309      return
310    else:
311      print("### Append default owners to OWNERS")
312      mode = "a"
313  else:
314    print("### Creating OWNERS with default owners")
315    mode = "w"
316  with open("OWNERS", mode) as outf:
317    outf.write(DEFAULT_OWNERS)
318
319
320def toml2json(line):
321  """Convert a quoted toml string to a json quoted string for METADATA."""
322  if line.startswith("\"\"\""):
323    return "\"()\""  # cannot handle broken multi-line description
324  # TOML string escapes: \b \t \n \f \r \" \\ (no unicode escape)
325  line = line[1:-1].replace("\\\\", "\n").replace("\\b", "")
326  line = line.replace("\\t", " ").replace("\\n", " ").replace("\\f", " ")
327  line = line.replace("\\r", "").replace("\\\"", "\"").replace("\n", "\\")
328  # replace a unicode quotation mark, used in the libloading crate
329  line = line.replace("’", "'")
330  # strip and escape single quotes
331  return json.dumps(line.strip()).replace("'", "\\'")
332
333
334def parse_cargo_toml(cargo):
335  """get name, version, description, license string from Cargo.toml."""
336  name = ""
337  version = ""
338  description = ""
339  cargo_license = ""
340  with open(cargo, "r") as toml:
341    for line in toml:
342      if not name and NAME_MATCHER.match(line):
343        name = NAME_MATCHER.match(line).group(1)
344      elif not version and VERSION_MATCHER.match(line):
345        version = VERSION_MATCHER.match(line).group(1)
346      elif not description and DESCRIPTION_MATCHER.match(line):
347        description = toml2json(DESCRIPTION_MATCHER.match(line).group(1))
348      elif not cargo_license and LICENSE_MATCHER.match(line):
349        cargo_license = LICENSE_MATCHER.match(line).group(1)
350      if name and version and description and cargo_license:
351        break
352  return name, version, description, cargo_license
353
354
355def main():
356  """Add 3rd party review files."""
357  cargo = "Cargo.toml"
358  if not os.path.isfile(cargo):
359    print("ERROR: ", cargo, "is not found")
360    return
361  if not os.access(cargo, os.R_OK):
362    print("ERROR: ", cargo, "is not readable")
363    return
364  name, version, description, cargo_license = parse_cargo_toml(cargo)
365  if not name or not version or not description:
366    print("ERROR: Cannot find name, version, or description in", cargo)
367    return
368  print("### Cargo.toml license:", cargo_license)
369  licenses = decide_license_type(cargo_license)
370  preferred_license = licenses[0]
371  add_metadata(name, version, description, preferred_license.group.name, len(licenses) > 1)
372  add_owners()
373  add_license(preferred_license.filename)
374  add_module_license(preferred_license.type)
375  # It is unclear yet if a NOTICE file is required.
376  # add_notice()
377
378
379if __name__ == "__main__":
380  main()
381