1#
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import os
17import subprocess
18import sys
19from collections import defaultdict
20from pathlib import Path
21import hashlib
22import argparse
23import zipfile
24import fnmatch
25import tempfile
26
27def silent_call(cmd):
28  return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0
29
30def sha1sum(f):
31  with open(f, 'rb') as fin:
32    return hashlib.sha1(fin.read()).hexdigest()
33
34def sha1sum_without_signing_key(filepath):
35  apk = zipfile.ZipFile(filepath)
36  l = []
37  for f in sorted(apk.namelist()):
38    if f.startswith('META-INF/'):
39      continue
40    l.append(hashlib.sha1(apk.read(f)).hexdigest())
41    l.append(f)
42  return hashlib.sha1(",".join(l).encode()).hexdigest()
43
44def strip_and_sha1sum(filepath):
45  # TODO: save striped file in tmp directory to support readonly directory.
46  tmp_filepath = filepath + '.tmp.no-build-id'
47  strip_all_and_remove_build_id = lambda: silent_call(
48      ["llvm-strip", "--strip-all", "--keep-section=.ARM.attributes",
49       "--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath])
50  try:
51    if strip_all_and_remove_build_id():
52      return sha1sum(tmp_filepath)
53    else:
54      return sha1sum(filepath)
55  finally:
56    if os.path.exists(tmp_filepath):
57      os.remove(tmp_filepath)
58
59  return sha1sum(filepath)
60
61
62def make_filter_from_whitelists(whitelists, all_targets):
63  """Creates a callable filter from a list of whitelist files.
64
65  Whitelist can contain pathname patterns or ignored lines. Pathnames are case
66  insensitive.
67
68  For example, this ignores the file "system/build.prop":
69    SYSTEM/build.prop
70
71  This ignores txt files:
72    *.txt
73
74  This ignores files in directory "system/dontcare/"
75    SYSTEM/dontcare/*
76
77  This ignores lines prefixed with pat1 or pat2 in file "system/build.prop":
78    SYSTEM/build.prop=pat1 pat2
79
80  Args:
81    whitelists: A list of whitelist filenames.
82    all_targets: A list of targets to compare.
83
84  Returns:
85    A callable object that accepts a file pathname and returns True if the file
86    is ignored by the whitelists and False when it is not.
87  """
88  ignored_patterns = set()
89  ignored_lines = defaultdict(list)
90  for whitelist in whitelists:
91    if not os.path.isfile(whitelist):
92      continue
93    with open(whitelist, 'rb') as f:
94      for line in f:
95        pat = line.strip().decode()
96        if pat and pat[-1] == '\\':
97          pat = pat.rstrip('\\')
98        if '=' in pat:
99          filename, prefixes = pat.split('=', 1)
100          prefixes = prefixes.split()
101          if prefixes:
102            ignored_lines[filename.lower()].extend(prefixes)
103        elif pat:
104          ignored_patterns.add(pat.lower())
105
106  def diff_with_ignored_lines(filename, prefixes):
107    """Compares sha1 digest of file while ignoring lines.
108
109    Args:
110      filename: File to compare among each target.
111      prefixes: A list of prefixes. Lines that start with prefix are ignored.
112
113    Returns:
114      True if file is identical among each target.
115    """
116    file_digest_respect_ignore = []
117    for target in all_targets:
118      pathname = os.path.join(target, filename)
119      if not os.path.isfile(pathname):
120        return False
121      sha1 = hashlib.sha1()
122      with open(pathname, 'rb') as f:
123        for line in f:
124          line_text = line.decode()
125          if not any(line_text.startswith(prefix) for prefix in prefixes):
126            sha1.update(line)
127      file_digest_respect_ignore.append(sha1.hexdigest())
128    return (len(file_digest_respect_ignore) == len(all_targets) and
129            len(set(file_digest_respect_ignore)) == 1)
130
131  def whitelist_filter(filename):
132    norm_filename = filename.lower()
133    for pattern in ignored_patterns:
134      if fnmatch.fnmatch(norm_filename, pattern):
135        return True
136    if norm_filename in ignored_lines:
137      ignored_prefixes = ignored_lines[norm_filename]
138      return diff_with_ignored_lines(filename, ignored_prefixes)
139    return False
140
141  return whitelist_filter
142
143
144def main(all_targets, search_paths, whitelists, ignore_signing_key=False):
145  def run(path):
146    is_native_component = silent_call(["llvm-objdump", "-a", path])
147    is_apk = path.endswith('.apk')
148    if is_native_component:
149      return strip_and_sha1sum(path)
150    elif is_apk and ignore_signing_key:
151      return sha1sum_without_signing_key(path)
152    else:
153      return sha1sum(path)
154
155  # artifact_sha1_target_map[filename][sha1] = list of targets
156  artifact_sha1_target_map = defaultdict(lambda: defaultdict(list))
157  for target in all_targets:
158    paths = []
159    for search_path in search_paths:
160      for path in Path(target, search_path).glob('**/*'):
161        if path.exists() and not path.is_dir():
162          paths.append((str(path), str(path.relative_to(target))))
163
164    target_basename = os.path.basename(os.path.normpath(target))
165    for path, filename in paths:
166      sha1 = run(path)
167      artifact_sha1_target_map[filename][sha1].append(target_basename)
168
169  def pretty_print(sha1, filename, targets):
170    return '{}, {}, {}\n'.format(filename, sha1[:10], ';'.join(targets))
171
172  def is_common(sha1_target_map):
173    for sha1, targets in sha1_target_map.items():
174      return len(sha1_target_map) == 1 and len(targets) == len(all_targets)
175    return False
176
177  whitelist_filter = make_filter_from_whitelists(whitelists, all_targets)
178
179  common = []
180  diff = []
181  whitelisted_diff = []
182  for filename, sha1_target_map in artifact_sha1_target_map.items():
183    if is_common(sha1_target_map):
184      for sha1, targets in sha1_target_map.items():
185        common.append(pretty_print(sha1, filename, targets))
186    else:
187      if whitelist_filter(filename):
188        for sha1, targets in sha1_target_map.items():
189          whitelisted_diff.append(pretty_print(sha1, filename, targets))
190      else:
191        for sha1, targets in sha1_target_map.items():
192          diff.append(pretty_print(sha1, filename, targets))
193
194  common = sorted(common)
195  diff = sorted(diff)
196  whitelisted_diff = sorted(whitelisted_diff)
197
198  header = "filename, sha1sum, targets\n"
199
200  with open("common.csv", 'w') as fout:
201    fout.write(header)
202    fout.writelines(common)
203  with open("diff.csv", 'w') as fout:
204    fout.write(header)
205    fout.writelines(diff)
206  with open("whitelisted_diff.csv", 'w') as fout:
207    fout.write(header)
208    fout.writelines(whitelisted_diff)
209
210def main_with_zip(extracted_paths, args):
211  for origin_path, tmp_path in zip(args.target, extracted_paths):
212    unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")]
213    unzip_cmd.extend([os.path.join(s, "*") for s in args.search_path])
214    subprocess.call(unzip_cmd)
215  main(extracted_paths, args.search_path, args.whitelist, args.ignore_signing_key)
216
217if __name__ == "__main__":
218  parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w whitelist1] [-w whitelist2]")
219  parser.add_argument("-t", "--target", nargs='+', required=True)
220  parser.add_argument("-s", "--search_path", nargs='+', required=True)
221  parser.add_argument("-i", "--ignore_signing_key", action='store_true')
222  parser.add_argument("-u", "--unzip", action='store_true')
223  parser.add_argument("-p", "--preserve_extracted_files", action='store_true')
224  parser.add_argument("-w", "--whitelist", action="append", default=[])
225  args = parser.parse_args()
226  if len(args.target) < 2:
227    parser.error("The number of targets has to be at least two.")
228  if args.unzip:
229    if args.preserve_extracted_files:
230      main_with_zip(args.target, args)
231    else:
232      with tempfile.TemporaryDirectory() as tmpdir:
233        target_in_tmp = [os.path.join(tmpdir, t) for t in args.target]
234        for p in target_in_tmp:
235          os.makedirs(p)
236        main_with_zip(target_in_tmp, args)
237  else:
238    main(args.target, args.search_path, args.whitelist, args.ignore_signing_key)
239