1#!/usr/bin/env python
2""" A small program to compute checksums of LLVM checkout.
3"""
4from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7
8import hashlib
9import logging
10import re
11import sys
12from argparse import ArgumentParser
13from project_tree import *
14
15SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
16
17
18def main():
19  parser = ArgumentParser()
20  parser.add_argument(
21      "-v", "--verbose", action="store_true", help="enable debug logging")
22  parser.add_argument(
23      "-c",
24      "--check",
25      metavar="reference_file",
26      help="read checksums from reference_file and " +
27      "check they match checksums of llvm_path.")
28  parser.add_argument(
29      "--partial",
30      action="store_true",
31      help="ignore projects from reference_file " +
32      "that are not checked out in llvm_path.")
33  parser.add_argument(
34      "--multi_dir",
35      action="store_true",
36      help="indicates llvm_path contains llvm, checked out " +
37      "into multiple directories, as opposed to a " +
38      "typical single source tree checkout.")
39  parser.add_argument("llvm_path")
40
41  args = parser.parse_args()
42  if args.check is not None:
43    with open(args.check, "r") as f:
44      reference_checksums = ReadLLVMChecksums(f)
45  else:
46    reference_checksums = None
47
48  if args.verbose:
49    logging.basicConfig(level=logging.DEBUG)
50
51  llvm_projects = CreateLLVMProjects(not args.multi_dir)
52  checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
53
54  if reference_checksums is None:
55    WriteLLVMChecksums(checksums, sys.stdout)
56    sys.exit(0)
57
58  if not ValidateChecksums(reference_checksums, checksums, args.partial):
59    sys.stdout.write("Checksums differ.\nNew checksums:\n")
60    WriteLLVMChecksums(checksums, sys.stdout)
61    sys.stdout.write("Reference checksums:\n")
62    WriteLLVMChecksums(reference_checksums, sys.stdout)
63    sys.exit(1)
64  else:
65    sys.stdout.write("Checksums match.")
66
67
68def ComputeLLVMChecksums(root_path, projects):
69  """Compute checksums for LLVM sources checked out using svn.
70
71  Args:
72    root_path: a directory of llvm checkout.
73    projects: a list of LLVMProject instances, which describe checkout paths,
74      relative to root_path.
75
76  Returns:
77    A dict mapping from project name to project checksum.
78  """
79  hash_algo = hashlib.sha256
80
81  def collapse_svn_substitutions(contents):
82    # Replace svn substitutions for $Date$ and $LastChangedDate$.
83    # Unfortunately, these are locale-specific.
84    return SVN_DATES_REGEX.sub("$\1$", contents)
85
86  def read_and_collapse_svn_subsitutions(file_path):
87    with open(file_path, "rb") as f:
88      contents = f.read()
89      new_contents = collapse_svn_substitutions(contents)
90      if contents != new_contents:
91        logging.debug("Replaced svn keyword substitutions in %s", file_path)
92        logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
93      return new_contents
94
95  project_checksums = dict()
96  # Hash each project.
97  for proj in projects:
98    project_root = os.path.join(root_path, proj.relpath)
99    if not os.path.exists(project_root):
100      logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
101                   proj.name)
102      continue
103
104    files = list()
105
106    def add_file_hash(file_path):
107      if os.path.islink(file_path) and not os.path.exists(file_path):
108        content = os.readlink(file_path)
109      else:
110        content = read_and_collapse_svn_subsitutions(file_path)
111      hasher = hash_algo()
112      hasher.update(content)
113      file_digest = hasher.hexdigest()
114      logging.debug("Checksum %s for file %s", file_digest, file_path)
115      files.append((file_path, file_digest))
116
117    logging.info("Computing checksum for %s", proj.name)
118    WalkProjectFiles(root_path, projects, proj, add_file_hash)
119
120    # Compute final checksum.
121    files.sort(key=lambda x: x[0])
122    hasher = hash_algo()
123    for file_path, file_digest in files:
124      file_path = os.path.relpath(file_path, project_root)
125      hasher.update(file_path)
126      hasher.update(file_digest)
127    project_checksums[proj.name] = hasher.hexdigest()
128  return project_checksums
129
130
131def WriteLLVMChecksums(checksums, f):
132  """Writes checksums to a text file.
133
134  Args:
135    checksums: a dict mapping from project name to project checksum (result of
136      ComputeLLVMChecksums).
137    f: a file object to write into.
138  """
139
140  for proj in sorted(checksums.keys()):
141    f.write("{} {}\n".format(checksums[proj], proj))
142
143
144def ReadLLVMChecksums(f):
145  """Reads checksums from a text file, produced by WriteLLVMChecksums.
146
147  Returns:
148    A dict, mapping from project name to project checksum.
149  """
150  checksums = {}
151  while True:
152    line = f.readline()
153    if line == "":
154      break
155    checksum, proj = line.split()
156    checksums[proj] = checksum
157  return checksums
158
159
160def ValidateChecksums(reference_checksums,
161                      new_checksums,
162                      allow_missing_projects=False):
163  """Validates that reference_checksums and new_checksums match.
164
165  Args:
166    reference_checksums: a dict of reference checksums, mapping from a project
167      name to a project checksum.
168    new_checksums: a dict of checksums to be checked, mapping from a project
169      name to a project checksum.
170    allow_missing_projects:
171      When True, reference_checksums may contain more projects than
172        new_checksums. Projects missing from new_checksums are ignored.
173      When False, new_checksums and reference_checksums must contain checksums
174        for the same set of projects. If there is a project in
175        reference_checksums, missing from new_checksums, ValidateChecksums
176        will return False.
177
178  Returns:
179    True, if checksums match with regards to allow_missing_projects flag value.
180    False, otherwise.
181  """
182  if not allow_missing_projects:
183    if len(new_checksums) != len(reference_checksums):
184      return False
185
186  for proj, checksum in new_checksums.items():
187    # We never computed a checksum for this project.
188    if proj not in reference_checksums:
189      return False
190    # Checksum did not match.
191    if reference_checksums[proj] != checksum:
192      return False
193
194  return True
195
196
197if __name__ == "__main__":
198  main()
199