1#
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15import argparse
16import collections
17import fnmatch
18import hashlib
19import os
20import pathlib
21import subprocess
22import tempfile
23import zipfile
24def silent_call(cmd):
25  return subprocess.call(
26      cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0
27def sha1sum(f):
28  with open(f, "rb") as fin:
29    return hashlib.sha1(fin.read()).hexdigest()
30def sha1sum_without_signing_key(filepath):
31  apk = zipfile.ZipFile(filepath)
32  l = []
33  for f in sorted(apk.namelist()):
34    if f.startswith("META-INF/"):
35      continue
36    l.append(hashlib.sha1(apk.read(f)).hexdigest())
37    l.append(f)
38  return hashlib.sha1(",".join(l).encode()).hexdigest()
39def strip_and_sha1sum(filepath):
40  """Strip informations of elf file and calculate sha1 hash."""
41  tmp_filepath = filepath + ".tmp.no-build-id"
42  llvm_strip = [
43      "llvm-strip", "--strip-all", "--keep-section=.ARM.attributes",
44      "--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath
45  ]
46  strip_all_and_remove_build_id = lambda: silent_call(llvm_strip)
47  try:
48    if strip_all_and_remove_build_id():
49      return sha1sum(tmp_filepath)
50    else:
51      return sha1sum(filepath)
52  finally:
53    if os.path.exists(tmp_filepath):
54      os.remove(tmp_filepath)
55  return sha1sum(filepath)
56def make_filter_from_allowlists(allowlists, all_targets):
57  """Creates a callable filter from a list of allowlist files.
58  Allowlist can contain pathname patterns or skipped lines. Pathnames are case
59  insensitive.
60  Allowlist can contain single-line comments. Comment lines begin with #
61  For example, this ignores the file "system/build.prop":
62    SYSTEM/build.prop
63  This ignores txt files:
64    *.txt
65  This ignores files in directory "system/dontcare/"
66    SYSTEM/dontcare/*
67  This ignores lines prefixed with pat1 or pat2 in file "system/build.prop":
68    SYSTEM/build.prop=pat1 pat2
69  Args:
70    allowlists: A list of allowlist filenames.
71    all_targets: A list of targets to compare.
72  Returns:
73    A callable object that accepts a file pathname and returns True if the file
74    is skipped by the allowlists and False when it is not.
75  """
76  skipped_patterns = set()
77  skipped_lines = collections.defaultdict(list)
78  for allowlist in allowlists:
79    if not os.path.isfile(allowlist):
80      continue
81    with open(allowlist, "rb") as f:
82      for line in f:
83        pat = line.strip().decode()
84        if pat.startswith("#"):
85          continue
86        if pat and pat[-1] == "\\":
87          pat = pat.rstrip("\\")
88        if "=" in pat:
89          filename, prefixes = pat.split("=", 1)
90          prefixes = prefixes.split()
91          if prefixes:
92            skipped_lines[filename.lower()].extend(prefixes)
93        elif pat:
94          skipped_patterns.add(pat.lower())
95  def diff_with_skipped_lines(filename, prefixes):
96    """Compares sha1 digest of file while ignoring lines.
97    Args:
98      filename: File to compare among each target.
99      prefixes: A list of prefixes. Lines that start with prefix are skipped.
100    Returns:
101      True if file is identical among each target.
102    """
103    file_digest_respect_ignore = []
104    for target in all_targets:
105      pathname = os.path.join(target, filename)
106      if not os.path.isfile(pathname):
107        return False
108      sha1 = hashlib.sha1()
109      with open(pathname, "rb") as f:
110        for line in f:
111          line_text = line.decode()
112          if not any(line_text.startswith(prefix) for prefix in prefixes):
113            sha1.update(line)
114      file_digest_respect_ignore.append(sha1.hexdigest())
115    return (len(file_digest_respect_ignore) == len(all_targets) and
116            len(set(file_digest_respect_ignore)) == 1)
117  def allowlist_filter(filename):
118    norm_filename = filename.lower()
119    for pattern in skipped_patterns:
120      if fnmatch.fnmatch(norm_filename, pattern):
121        return True
122    if norm_filename in skipped_lines:
123      skipped_prefixes = skipped_lines[norm_filename]
124      return diff_with_skipped_lines(filename, skipped_prefixes)
125    return False
126  return allowlist_filter
127def main(all_targets,
128         search_paths,
129         allowlists,
130         ignore_signing_key=False,
131         list_only=False):
132  def run(path):
133    is_executable_component = silent_call(["llvm-objdump", "-a", path])
134    is_apk = path.endswith(".apk")
135    if is_executable_component:
136      return strip_and_sha1sum(path)
137    elif is_apk and ignore_signing_key:
138      return sha1sum_without_signing_key(path)
139    else:
140      return sha1sum(path)
141  # artifact_sha1_target_map[filename][sha1] = list of targets
142  artifact_sha1_target_map = collections.defaultdict(
143      lambda: collections.defaultdict(list))
144  for target in all_targets:
145    paths = []
146    for search_path in search_paths:
147      for path in pathlib.Path(target, search_path).glob("**/*"):
148        if path.exists() and not path.is_dir():
149          paths.append((str(path), str(path.relative_to(target))))
150    target_basename = os.path.basename(os.path.normpath(target))
151    for path, filename in paths:
152      sha1 = 0
153      if not list_only:
154        sha1 = run(path)
155      artifact_sha1_target_map[filename][sha1].append(target_basename)
156  def pretty_print(sha1, filename, targets, exclude_sha1):
157    if exclude_sha1:
158      return "{}, {}\n".format(filename, ";".join(targets))
159    return "{}, {}, {}\n".format(filename, sha1[:10], ";".join(targets))
160  def is_common(sha1_target_map):
161    for _, targets in sha1_target_map.items():
162      return len(sha1_target_map) == 1 and len(targets) == len(all_targets)
163    return False
164  allowlist_filter = make_filter_from_allowlists(allowlists, all_targets)
165  common = []
166  diff = []
167  allowlisted_diff = []
168  for filename, sha1_target_map in artifact_sha1_target_map.items():
169    if is_common(sha1_target_map):
170      for sha1, targets in sha1_target_map.items():
171        common.append(pretty_print(sha1, filename, targets, list_only))
172    else:
173      if allowlist_filter(filename):
174        for sha1, targets in sha1_target_map.items():
175          allowlisted_diff.append(
176              pretty_print(sha1, filename, targets, list_only))
177      else:
178        for sha1, targets in sha1_target_map.items():
179          diff.append(pretty_print(sha1, filename, targets, list_only))
180  common = sorted(common)
181  diff = sorted(diff)
182  allowlisted_diff = sorted(allowlisted_diff)
183  header = "filename, sha1sum, targets\n"
184  if list_only:
185    header = "filename, targets\n"
186  with open("common.csv", "w") as fout:
187    fout.write(header)
188    fout.writelines(common)
189  with open("diff.csv", "w") as fout:
190    fout.write(header)
191    fout.writelines(diff)
192  with open("allowlisted_diff.csv", "w") as fout:
193    fout.write(header)
194    fout.writelines(allowlisted_diff)
195def main_with_zip(extracted_paths, main_args):
196  for origin_path, tmp_path in zip(main_args.target, extracted_paths):
197    unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")]
198    unzip_cmd.extend([os.path.join(s, "*") for s in main_args.search_path])
199    subprocess.call(unzip_cmd)
200  main(
201      extracted_paths,
202      main_args.search_path,
203      main_args.allowlist,
204      main_args.ignore_signing_key,
205      list_only=main_args.list_only)
206if __name__ == "__main__":
207  parser = argparse.ArgumentParser(
208      prog="compare_images",
209      usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w allowlist1] [-w allowlist2]"
210  )
211  parser.add_argument("-t", "--target", nargs="+", required=True)
212  parser.add_argument("-s", "--search_path", nargs="+", required=True)
213  parser.add_argument("-i", "--ignore_signing_key", action="store_true")
214  parser.add_argument(
215      "-l",
216      "--list_only",
217      action="store_true",
218      help="Compare file list only and ignore SHA-1 diff")
219  parser.add_argument("-u", "--unzip", action="store_true")
220  parser.add_argument("-p", "--preserve_extracted_files", action="store_true")
221  parser.add_argument("-w", "--allowlist", action="append", default=[])
222  args = parser.parse_args()
223  if len(args.target) < 2:
224    parser.error("The number of targets has to be at least two.")
225  if args.unzip:
226    if args.preserve_extracted_files:
227      main_with_zip(args.target, args)
228    else:
229      with tempfile.TemporaryDirectory() as tmpdir:
230        target_in_tmp = [os.path.join(tmpdir, t) for t in args.target]
231        for p in target_in_tmp:
232          os.makedirs(p)
233        main_with_zip(target_in_tmp, args)
234  else:
235    main(
236        args.target,
237        args.search_path,
238        args.allowlist,
239        args.ignore_signing_key,
240        list_only=args.list_only)
241