# # Copyright (C) 2019 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import subprocess import sys from collections import defaultdict from pathlib import Path import hashlib import argparse import zipfile import fnmatch import tempfile def silent_call(cmd): return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0 def sha1sum(f): with open(f, 'rb') as fin: return hashlib.sha1(fin.read()).hexdigest() def sha1sum_without_signing_key(filepath): apk = zipfile.ZipFile(filepath) l = [] for f in sorted(apk.namelist()): if f.startswith('META-INF/'): continue l.append(hashlib.sha1(apk.read(f)).hexdigest()) l.append(f) return hashlib.sha1(",".join(l).encode()).hexdigest() def strip_and_sha1sum(filepath): # TODO: save striped file in tmp directory to support readonly directory. tmp_filepath = filepath + '.tmp.no-build-id' strip_all_and_remove_build_id = lambda: silent_call( ["llvm-strip", "--strip-all", "--keep-section=.ARM.attributes", "--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath]) try: if strip_all_and_remove_build_id(): return sha1sum(tmp_filepath) else: return sha1sum(filepath) finally: if os.path.exists(tmp_filepath): os.remove(tmp_filepath) return sha1sum(filepath) def make_filter_from_whitelists(whitelists, all_targets): """Creates a callable filter from a list of whitelist files. Whitelist can contain pathname patterns or ignored lines. Pathnames are case insensitive. For example, this ignores the file "system/build.prop": SYSTEM/build.prop This ignores txt files: *.txt This ignores files in directory "system/dontcare/" SYSTEM/dontcare/* This ignores lines prefixed with pat1 or pat2 in file "system/build.prop": SYSTEM/build.prop=pat1 pat2 Args: whitelists: A list of whitelist filenames. all_targets: A list of targets to compare. Returns: A callable object that accepts a file pathname and returns True if the file is ignored by the whitelists and False when it is not. """ ignored_patterns = set() ignored_lines = defaultdict(list) for whitelist in whitelists: if not os.path.isfile(whitelist): continue with open(whitelist, 'rb') as f: for line in f: pat = line.strip().decode() if pat and pat[-1] == '\\': pat = pat.rstrip('\\') if '=' in pat: filename, prefixes = pat.split('=', 1) prefixes = prefixes.split() if prefixes: ignored_lines[filename.lower()].extend(prefixes) elif pat: ignored_patterns.add(pat.lower()) def diff_with_ignored_lines(filename, prefixes): """Compares sha1 digest of file while ignoring lines. Args: filename: File to compare among each target. prefixes: A list of prefixes. Lines that start with prefix are ignored. Returns: True if file is identical among each target. """ file_digest_respect_ignore = [] for target in all_targets: pathname = os.path.join(target, filename) if not os.path.isfile(pathname): return False sha1 = hashlib.sha1() with open(pathname, 'rb') as f: for line in f: line_text = line.decode() if not any(line_text.startswith(prefix) for prefix in prefixes): sha1.update(line) file_digest_respect_ignore.append(sha1.hexdigest()) return (len(file_digest_respect_ignore) == len(all_targets) and len(set(file_digest_respect_ignore)) == 1) def whitelist_filter(filename): norm_filename = filename.lower() for pattern in ignored_patterns: if fnmatch.fnmatch(norm_filename, pattern): return True if norm_filename in ignored_lines: ignored_prefixes = ignored_lines[norm_filename] return diff_with_ignored_lines(filename, ignored_prefixes) return False return whitelist_filter def main(all_targets, search_paths, whitelists, ignore_signing_key=False): def run(path): is_native_component = silent_call(["llvm-objdump", "-a", path]) is_apk = path.endswith('.apk') if is_native_component: return strip_and_sha1sum(path) elif is_apk and ignore_signing_key: return sha1sum_without_signing_key(path) else: return sha1sum(path) # artifact_sha1_target_map[filename][sha1] = list of targets artifact_sha1_target_map = defaultdict(lambda: defaultdict(list)) for target in all_targets: paths = [] for search_path in search_paths: for path in Path(target, search_path).glob('**/*'): if path.exists() and not path.is_dir(): paths.append((str(path), str(path.relative_to(target)))) target_basename = os.path.basename(os.path.normpath(target)) for path, filename in paths: sha1 = run(path) artifact_sha1_target_map[filename][sha1].append(target_basename) def pretty_print(sha1, filename, targets): return '{}, {}, {}\n'.format(filename, sha1[:10], ';'.join(targets)) def is_common(sha1_target_map): for sha1, targets in sha1_target_map.items(): return len(sha1_target_map) == 1 and len(targets) == len(all_targets) return False whitelist_filter = make_filter_from_whitelists(whitelists, all_targets) common = [] diff = [] whitelisted_diff = [] for filename, sha1_target_map in artifact_sha1_target_map.items(): if is_common(sha1_target_map): for sha1, targets in sha1_target_map.items(): common.append(pretty_print(sha1, filename, targets)) else: if whitelist_filter(filename): for sha1, targets in sha1_target_map.items(): whitelisted_diff.append(pretty_print(sha1, filename, targets)) else: for sha1, targets in sha1_target_map.items(): diff.append(pretty_print(sha1, filename, targets)) common = sorted(common) diff = sorted(diff) whitelisted_diff = sorted(whitelisted_diff) header = "filename, sha1sum, targets\n" with open("common.csv", 'w') as fout: fout.write(header) fout.writelines(common) with open("diff.csv", 'w') as fout: fout.write(header) fout.writelines(diff) with open("whitelisted_diff.csv", 'w') as fout: fout.write(header) fout.writelines(whitelisted_diff) def main_with_zip(extracted_paths, args): for origin_path, tmp_path in zip(args.target, extracted_paths): unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")] unzip_cmd.extend([os.path.join(s, "*") for s in args.search_path]) subprocess.call(unzip_cmd) main(extracted_paths, args.search_path, args.whitelist, args.ignore_signing_key) if __name__ == "__main__": parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w whitelist1] [-w whitelist2]") parser.add_argument("-t", "--target", nargs='+', required=True) parser.add_argument("-s", "--search_path", nargs='+', required=True) parser.add_argument("-i", "--ignore_signing_key", action='store_true') parser.add_argument("-u", "--unzip", action='store_true') parser.add_argument("-p", "--preserve_extracted_files", action='store_true') parser.add_argument("-w", "--whitelist", action="append", default=[]) args = parser.parse_args() if len(args.target) < 2: parser.error("The number of targets has to be at least two.") if args.unzip: if args.preserve_extracted_files: main_with_zip(args.target, args) else: with tempfile.TemporaryDirectory() as tmpdir: target_in_tmp = [os.path.join(tmpdir, t) for t in args.target] for p in target_in_tmp: os.makedirs(p) main_with_zip(target_in_tmp, args) else: main(args.target, args.search_path, args.whitelist, args.ignore_signing_key)