1#!/usr/bin/env python 2""" A small program to compute checksums of LLVM checkout. 3""" 4from __future__ import absolute_import 5from __future__ import division 6from __future__ import print_function 7 8import hashlib 9import logging 10import re 11import sys 12from argparse import ArgumentParser 13from project_tree import * 14 15SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") 16 17 18def main(): 19 parser = ArgumentParser() 20 parser.add_argument( 21 "-v", "--verbose", action="store_true", help="enable debug logging") 22 parser.add_argument( 23 "-c", 24 "--check", 25 metavar="reference_file", 26 help="read checksums from reference_file and " + 27 "check they match checksums of llvm_path.") 28 parser.add_argument( 29 "--partial", 30 action="store_true", 31 help="ignore projects from reference_file " + 32 "that are not checked out in llvm_path.") 33 parser.add_argument( 34 "--multi_dir", 35 action="store_true", 36 help="indicates llvm_path contains llvm, checked out " + 37 "into multiple directories, as opposed to a " + 38 "typical single source tree checkout.") 39 parser.add_argument("llvm_path") 40 41 args = parser.parse_args() 42 if args.check is not None: 43 with open(args.check, "r") as f: 44 reference_checksums = ReadLLVMChecksums(f) 45 else: 46 reference_checksums = None 47 48 if args.verbose: 49 logging.basicConfig(level=logging.DEBUG) 50 51 llvm_projects = CreateLLVMProjects(not args.multi_dir) 52 checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) 53 54 if reference_checksums is None: 55 WriteLLVMChecksums(checksums, sys.stdout) 56 sys.exit(0) 57 58 if not ValidateChecksums(reference_checksums, checksums, args.partial): 59 sys.stdout.write("Checksums differ.\nNew checksums:\n") 60 WriteLLVMChecksums(checksums, sys.stdout) 61 sys.stdout.write("Reference checksums:\n") 62 WriteLLVMChecksums(reference_checksums, sys.stdout) 63 sys.exit(1) 64 else: 65 sys.stdout.write("Checksums match.") 66 67 68def ComputeLLVMChecksums(root_path, projects): 69 """Compute checksums for LLVM sources checked out using svn. 70 71 Args: 72 root_path: a directory of llvm checkout. 73 projects: a list of LLVMProject instances, which describe checkout paths, 74 relative to root_path. 75 76 Returns: 77 A dict mapping from project name to project checksum. 78 """ 79 hash_algo = hashlib.sha256 80 81 def collapse_svn_substitutions(contents): 82 # Replace svn substitutions for $Date$ and $LastChangedDate$. 83 # Unfortunately, these are locale-specific. 84 return SVN_DATES_REGEX.sub("$\1$", contents) 85 86 def read_and_collapse_svn_subsitutions(file_path): 87 with open(file_path, "rb") as f: 88 contents = f.read() 89 new_contents = collapse_svn_substitutions(contents) 90 if contents != new_contents: 91 logging.debug("Replaced svn keyword substitutions in %s", file_path) 92 logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) 93 return new_contents 94 95 project_checksums = dict() 96 # Hash each project. 97 for proj in projects: 98 project_root = os.path.join(root_path, proj.relpath) 99 if not os.path.exists(project_root): 100 logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath, 101 proj.name) 102 continue 103 104 files = list() 105 106 def add_file_hash(file_path): 107 if os.path.islink(file_path) and not os.path.exists(file_path): 108 content = os.readlink(file_path) 109 else: 110 content = read_and_collapse_svn_subsitutions(file_path) 111 hasher = hash_algo() 112 hasher.update(content) 113 file_digest = hasher.hexdigest() 114 logging.debug("Checksum %s for file %s", file_digest, file_path) 115 files.append((file_path, file_digest)) 116 117 logging.info("Computing checksum for %s", proj.name) 118 WalkProjectFiles(root_path, projects, proj, add_file_hash) 119 120 # Compute final checksum. 121 files.sort(key=lambda x: x[0]) 122 hasher = hash_algo() 123 for file_path, file_digest in files: 124 file_path = os.path.relpath(file_path, project_root) 125 hasher.update(file_path) 126 hasher.update(file_digest) 127 project_checksums[proj.name] = hasher.hexdigest() 128 return project_checksums 129 130 131def WriteLLVMChecksums(checksums, f): 132 """Writes checksums to a text file. 133 134 Args: 135 checksums: a dict mapping from project name to project checksum (result of 136 ComputeLLVMChecksums). 137 f: a file object to write into. 138 """ 139 140 for proj in sorted(checksums.keys()): 141 f.write("{} {}\n".format(checksums[proj], proj)) 142 143 144def ReadLLVMChecksums(f): 145 """Reads checksums from a text file, produced by WriteLLVMChecksums. 146 147 Returns: 148 A dict, mapping from project name to project checksum. 149 """ 150 checksums = {} 151 while True: 152 line = f.readline() 153 if line == "": 154 break 155 checksum, proj = line.split() 156 checksums[proj] = checksum 157 return checksums 158 159 160def ValidateChecksums(reference_checksums, 161 new_checksums, 162 allow_missing_projects=False): 163 """Validates that reference_checksums and new_checksums match. 164 165 Args: 166 reference_checksums: a dict of reference checksums, mapping from a project 167 name to a project checksum. 168 new_checksums: a dict of checksums to be checked, mapping from a project 169 name to a project checksum. 170 allow_missing_projects: 171 When True, reference_checksums may contain more projects than 172 new_checksums. Projects missing from new_checksums are ignored. 173 When False, new_checksums and reference_checksums must contain checksums 174 for the same set of projects. If there is a project in 175 reference_checksums, missing from new_checksums, ValidateChecksums 176 will return False. 177 178 Returns: 179 True, if checksums match with regards to allow_missing_projects flag value. 180 False, otherwise. 181 """ 182 if not allow_missing_projects: 183 if len(new_checksums) != len(reference_checksums): 184 return False 185 186 for proj, checksum in new_checksums.items(): 187 # We never computed a checksum for this project. 188 if proj not in reference_checksums: 189 return False 190 # Checksum did not match. 191 if reference_checksums[proj] != checksum: 192 return False 193 194 return True 195 196 197if __name__ == "__main__": 198 main() 199