1# 2# Copyright (C) 2019 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import os 17import subprocess 18import sys 19from collections import defaultdict 20from pathlib import Path 21import hashlib 22import argparse 23import zipfile 24import fnmatch 25import tempfile 26 27def silent_call(cmd): 28 return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0 29 30def sha1sum(f): 31 with open(f, 'rb') as fin: 32 return hashlib.sha1(fin.read()).hexdigest() 33 34def sha1sum_without_signing_key(filepath): 35 apk = zipfile.ZipFile(filepath) 36 l = [] 37 for f in sorted(apk.namelist()): 38 if f.startswith('META-INF/'): 39 continue 40 l.append(hashlib.sha1(apk.read(f)).hexdigest()) 41 l.append(f) 42 return hashlib.sha1(",".join(l).encode()).hexdigest() 43 44def strip_and_sha1sum(filepath): 45 # TODO: save striped file in tmp directory to support readonly directory. 46 tmp_filepath = filepath + '.tmp.no-build-id' 47 strip_all_and_remove_build_id = lambda: silent_call( 48 ["llvm-strip", "--strip-all", "--keep-section=.ARM.attributes", 49 "--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath]) 50 try: 51 if strip_all_and_remove_build_id(): 52 return sha1sum(tmp_filepath) 53 else: 54 return sha1sum(filepath) 55 finally: 56 if os.path.exists(tmp_filepath): 57 os.remove(tmp_filepath) 58 59 return sha1sum(filepath) 60 61 62def make_filter_from_whitelists(whitelists, all_targets): 63 """Creates a callable filter from a list of whitelist files. 64 65 Whitelist can contain pathname patterns or ignored lines. Pathnames are case 66 insensitive. 67 68 For example, this ignores the file "system/build.prop": 69 SYSTEM/build.prop 70 71 This ignores txt files: 72 *.txt 73 74 This ignores files in directory "system/dontcare/" 75 SYSTEM/dontcare/* 76 77 This ignores lines prefixed with pat1 or pat2 in file "system/build.prop": 78 SYSTEM/build.prop=pat1 pat2 79 80 Args: 81 whitelists: A list of whitelist filenames. 82 all_targets: A list of targets to compare. 83 84 Returns: 85 A callable object that accepts a file pathname and returns True if the file 86 is ignored by the whitelists and False when it is not. 87 """ 88 ignored_patterns = set() 89 ignored_lines = defaultdict(list) 90 for whitelist in whitelists: 91 if not os.path.isfile(whitelist): 92 continue 93 with open(whitelist, 'rb') as f: 94 for line in f: 95 pat = line.strip().decode() 96 if pat and pat[-1] == '\\': 97 pat = pat.rstrip('\\') 98 if '=' in pat: 99 filename, prefixes = pat.split('=', 1) 100 prefixes = prefixes.split() 101 if prefixes: 102 ignored_lines[filename.lower()].extend(prefixes) 103 elif pat: 104 ignored_patterns.add(pat.lower()) 105 106 def diff_with_ignored_lines(filename, prefixes): 107 """Compares sha1 digest of file while ignoring lines. 108 109 Args: 110 filename: File to compare among each target. 111 prefixes: A list of prefixes. Lines that start with prefix are ignored. 112 113 Returns: 114 True if file is identical among each target. 115 """ 116 file_digest_respect_ignore = [] 117 for target in all_targets: 118 pathname = os.path.join(target, filename) 119 if not os.path.isfile(pathname): 120 return False 121 sha1 = hashlib.sha1() 122 with open(pathname, 'rb') as f: 123 for line in f: 124 line_text = line.decode() 125 if not any(line_text.startswith(prefix) for prefix in prefixes): 126 sha1.update(line) 127 file_digest_respect_ignore.append(sha1.hexdigest()) 128 return (len(file_digest_respect_ignore) == len(all_targets) and 129 len(set(file_digest_respect_ignore)) == 1) 130 131 def whitelist_filter(filename): 132 norm_filename = filename.lower() 133 for pattern in ignored_patterns: 134 if fnmatch.fnmatch(norm_filename, pattern): 135 return True 136 if norm_filename in ignored_lines: 137 ignored_prefixes = ignored_lines[norm_filename] 138 return diff_with_ignored_lines(filename, ignored_prefixes) 139 return False 140 141 return whitelist_filter 142 143 144def main(all_targets, search_paths, whitelists, ignore_signing_key=False): 145 def run(path): 146 is_native_component = silent_call(["llvm-objdump", "-a", path]) 147 is_apk = path.endswith('.apk') 148 if is_native_component: 149 return strip_and_sha1sum(path) 150 elif is_apk and ignore_signing_key: 151 return sha1sum_without_signing_key(path) 152 else: 153 return sha1sum(path) 154 155 # artifact_sha1_target_map[filename][sha1] = list of targets 156 artifact_sha1_target_map = defaultdict(lambda: defaultdict(list)) 157 for target in all_targets: 158 paths = [] 159 for search_path in search_paths: 160 for path in Path(target, search_path).glob('**/*'): 161 if path.exists() and not path.is_dir(): 162 paths.append((str(path), str(path.relative_to(target)))) 163 164 target_basename = os.path.basename(os.path.normpath(target)) 165 for path, filename in paths: 166 sha1 = run(path) 167 artifact_sha1_target_map[filename][sha1].append(target_basename) 168 169 def pretty_print(sha1, filename, targets): 170 return '{}, {}, {}\n'.format(filename, sha1[:10], ';'.join(targets)) 171 172 def is_common(sha1_target_map): 173 for sha1, targets in sha1_target_map.items(): 174 return len(sha1_target_map) == 1 and len(targets) == len(all_targets) 175 return False 176 177 whitelist_filter = make_filter_from_whitelists(whitelists, all_targets) 178 179 common = [] 180 diff = [] 181 whitelisted_diff = [] 182 for filename, sha1_target_map in artifact_sha1_target_map.items(): 183 if is_common(sha1_target_map): 184 for sha1, targets in sha1_target_map.items(): 185 common.append(pretty_print(sha1, filename, targets)) 186 else: 187 if whitelist_filter(filename): 188 for sha1, targets in sha1_target_map.items(): 189 whitelisted_diff.append(pretty_print(sha1, filename, targets)) 190 else: 191 for sha1, targets in sha1_target_map.items(): 192 diff.append(pretty_print(sha1, filename, targets)) 193 194 common = sorted(common) 195 diff = sorted(diff) 196 whitelisted_diff = sorted(whitelisted_diff) 197 198 header = "filename, sha1sum, targets\n" 199 200 with open("common.csv", 'w') as fout: 201 fout.write(header) 202 fout.writelines(common) 203 with open("diff.csv", 'w') as fout: 204 fout.write(header) 205 fout.writelines(diff) 206 with open("whitelisted_diff.csv", 'w') as fout: 207 fout.write(header) 208 fout.writelines(whitelisted_diff) 209 210def main_with_zip(extracted_paths, args): 211 for origin_path, tmp_path in zip(args.target, extracted_paths): 212 unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")] 213 unzip_cmd.extend([os.path.join(s, "*") for s in args.search_path]) 214 subprocess.call(unzip_cmd) 215 main(extracted_paths, args.search_path, args.whitelist, args.ignore_signing_key) 216 217if __name__ == "__main__": 218 parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w whitelist1] [-w whitelist2]") 219 parser.add_argument("-t", "--target", nargs='+', required=True) 220 parser.add_argument("-s", "--search_path", nargs='+', required=True) 221 parser.add_argument("-i", "--ignore_signing_key", action='store_true') 222 parser.add_argument("-u", "--unzip", action='store_true') 223 parser.add_argument("-p", "--preserve_extracted_files", action='store_true') 224 parser.add_argument("-w", "--whitelist", action="append", default=[]) 225 args = parser.parse_args() 226 if len(args.target) < 2: 227 parser.error("The number of targets has to be at least two.") 228 if args.unzip: 229 if args.preserve_extracted_files: 230 main_with_zip(args.target, args) 231 else: 232 with tempfile.TemporaryDirectory() as tmpdir: 233 target_in_tmp = [os.path.join(tmpdir, t) for t in args.target] 234 for p in target_in_tmp: 235 os.makedirs(p) 236 main_with_zip(target_in_tmp, args) 237 else: 238 main(args.target, args.search_path, args.whitelist, args.ignore_signing_key) 239