1#!/usr/bin/env -S python3 -u
2
3"""
4This script helps find various build behaviors that make builds less hermetic
5and repeatable. Depending on the flags, it runs a sequence of builds and looks
6for files that have changed or have been improperly regenerated, updating
7their timestamps incorrectly. It also looks for changes that the build has
8done to the source tree, and for files whose contents are dependent on the
9location of the out directory.
10
11This utility has two major modes, full and incremental. By default, this tool
12runs in full mode. To run in incremental mode, pass the --incremental flag.
13
14
15FULL MODE
16
17In full mode, this tool helps verify BUILD CORRECTNESS by examining its
18REPEATABILITY. In full mode, this tool runs two complete builds in different
19directories and compares the CONTENTS of the two directories. Lists of any
20files that are added, removed or changed are printed, sorted by the timestamp
21of that file, to aid finding which dependencies trigger the rebuilding of
22other files.
23
24
25INCREMENTAL MODE
26
27In incremental mode, this tool helps verfiy the SPEED of the build. It runs two
28builds and looks at the TIMESTAMPS of the generated files, and reports files
29that were changed by the second build. In theory, an incremental build with no
30source files touched should not have any generated targets changed. As in full
31builds, the file list is returned sorted by timestamp.
32
33
34OTHER CHECKS
35
36In both full and incremental mode, this tool looks at the timestamps of all
37source files in the tree, and reports on files that have been touched. In the
38output, these are labeled with the header "Source files touched after start of
39build."
40
41In addition, by default, this tool sets the OUT_DIR environment variable to
42something other than "out" in order to find build rules that are not respecting
43the OUT_DIR. If you see these, you should fix them, but if your build can not
44complete for some reason because of this, you can pass the --no-check-out-dir
45flag to suppress this check.
46
47
48OTHER FLAGS
49
50In full mode, the --detect-embedded-paths flag does the two builds in different
51directories, to help in finding rules that embed the out directory path into
52the targets.
53
54The --hide-build-output flag hides the output of successful bulds, to make
55script output cleaner. The output of builds that fail is still shown.
56
57The --no-build flag is useful if you have already done a build and would
58just like to re-run the analysis.
59
60The --target flag lets you specify a build target other than the default
61full build (droid). You can pass "nothing" as in the example below, or a
62specific target, to reduce the scope of the checks performed.
63
64The --touch flag lets you specify a list of source files to touch between
65the builds, to examine the consequences of editing a particular file.
66
67
68EXAMPLE COMMANDLINES
69
70Please run build/make/tools/compare_builds.py --help for a full listing
71of the commandline flags. Here are a sampling of useful combinations.
72
73  1. Find files changed during an incremental build that doesn't build
74     any targets.
75
76       build/make/tools/compare_builds.py --incremental --target nothing
77
78     Long incremental build times, or consecutive builds that re-run build actions
79     are usually caused by files being touched as part of loading the makefiles.
80
81     The nothing build (m nothing) loads the make and blueprint files, generates
82     the dependency graph, but then doesn't actually build any targets. Checking
83     against this build is the fastest and easiest way to find files that are
84     modified while makefiles are read, for example with $(shell) invocations.
85
86  2. Find packaging targets that are different, ignoring intermediate files.
87
88       build/make/tools/compare_builds.py --subdirs --detect-embedded-paths
89
90     These flags will compare the final staging directories for partitions,
91     as well as the APKs, apexes, testcases, and the like (the full directory
92     list is in the DEFAULT_DIRS variable below). Since these are the files
93     that are ultimately released, it is more important that these files be
94     replicable, even if the intermediates that went into them are not (for
95     example, when debugging symbols are stripped).
96
97  3. Check that all targets are repeatable.
98
99       build/make/tools/compare_builds.py --detect-embedded-paths
100
101     This check will list all of the differences in built targets that it can
102     find. Be aware that the AOSP tree still has quite a few targets that
103     are flagged by this check, so OEM changes might be lost in that list.
104     That said, each file shown here is a potential blocker for a repeatable
105     build.
106
107  4. See what targets are rebuilt when a file is touched between builds.
108
109       build/make/tools/compare_builds.py --incremental \
110            --touch frameworks/base/core/java/android/app/Activity.java
111
112     This check simulates the common engineer workflow of touching a single
113     file and rebuilding the whole system. To see a restricted view, consider
114     also passing a --target option for a common use case. For example:
115
116       build/make/tools/compare_builds.py --incremental --target framework \
117            --touch frameworks/base/core/java/android/app/Activity.java
118"""
119
120import argparse
121import itertools
122import os
123import shutil
124import stat
125import subprocess
126import sys
127
128
129# Soong
130SOONG_UI = "build/soong/soong_ui.bash"
131
132
133# Which directories to use if no --subdirs is supplied without explicit directories.
134DEFAULT_DIRS = (
135    "apex",
136    "data",
137    "product",
138    "ramdisk",
139    "recovery",
140    "root",
141    "system",
142    "system_ext",
143    "system_other",
144    "testcases",
145    "vendor",
146)
147
148
149# Files to skip for incremental timestamp checking
150BUILD_INTERNALS_PREFIX_SKIP = (
151    "soong/.glob/",
152    ".path/",
153)
154
155
156BUILD_INTERNALS_SUFFIX_SKIP = (
157    "/soong/soong_build_metrics.pb",
158    "/.installable_test_files",
159    "/files.db",
160    "/.blueprint.bootstrap",
161    "/build_number.txt",
162    "/build.ninja",
163    "/.out-dir",
164    "/build_fingerprint.txt",
165    "/build_thumbprint.txt",
166    "/.copied_headers_list",
167    "/.installable_files",
168)
169
170
171class DiffType(object):
172  def __init__(self, code, message):
173    self.code = code
174    self.message = message
175
176DIFF_NONE = DiffType("DIFF_NONE", "Files are the same")
177DIFF_MODE = DiffType("DIFF_MODE", "Stat mode bits differ")
178DIFF_SIZE = DiffType("DIFF_SIZE", "File size differs")
179DIFF_SYMLINK = DiffType("DIFF_SYMLINK", "Symlinks point to different locations")
180DIFF_CONTENTS = DiffType("DIFF_CONTENTS", "File contents differ")
181
182
183def main():
184  argparser = argparse.ArgumentParser(description="Diff build outputs from two builds.",
185                                      epilog="Run this command from the root of the tree."
186                                        + " Before running this command, the build environment"
187                                        + " must be set up, including sourcing build/envsetup.sh"
188                                        + " and running lunch.")
189  argparser.add_argument("--detect-embedded-paths", action="store_true",
190      help="Use unique out dirs to detect paths embedded in binaries.")
191  argparser.add_argument("--incremental", action="store_true",
192      help="Compare which files are touched in two consecutive builds without a clean in between.")
193  argparser.add_argument("--hide-build-output", action="store_true",
194      help="Don't print the build output for successful builds")
195  argparser.add_argument("--no-build", dest="run_build", action="store_false",
196      help="Don't build or clean, but do everything else.")
197  argparser.add_argument("--no-check-out-dir", dest="check_out_dir", action="store_false",
198      help="Don't check for rules not honoring movable out directories.")
199  argparser.add_argument("--subdirs", nargs="*",
200      help="Only scan these subdirs of $PRODUCT_OUT instead of the whole out directory."
201           + " The --subdirs argument with no listed directories will give a default list.")
202  argparser.add_argument("--target", default="droid",
203      help="Make target to run. The default is droid")
204  argparser.add_argument("--touch", nargs="+", default=[],
205      help="Files to touch between builds. Must pair with --incremental.")
206  args = argparser.parse_args(sys.argv[1:])
207
208  if args.detect_embedded_paths and args.incremental:
209    sys.stderr.write("Can't pass --detect-embedded-paths and --incremental together.\n")
210    sys.exit(1)
211  if args.detect_embedded_paths and not args.check_out_dir:
212    sys.stderr.write("Can't pass --detect-embedded-paths and --no-check-out-dir together.\n")
213    sys.exit(1)
214  if args.touch and not args.incremental:
215    sys.stderr.write("The --incremental flag is required if the --touch flag is passed.")
216    sys.exit(1)
217
218  AssertAtTop()
219  RequireEnvVar("TARGET_PRODUCT")
220  RequireEnvVar("TARGET_BUILD_VARIANT")
221
222  # Out dir file names:
223  #   - dir_prefix - The directory we'll put everything in (except for maybe the top level
224  #     out/ dir).
225  #   - *work_dir - The directory that we will build directly into. This is in dir_prefix
226  #     unless --no-check-out-dir is set.
227  #   - *out_dir - After building, if work_dir is different from out_dir, we move the out
228  #     directory to here so we can do the comparisions.
229  #   - timestamp_* - Files we touch so we know the various phases between the builds, so we
230  #     can compare timestamps of files.
231  if args.incremental:
232    dir_prefix = "out_incremental"
233    if args.check_out_dir:
234      first_work_dir = first_out_dir = dir_prefix + "/out"
235      second_work_dir = second_out_dir = dir_prefix + "/out"
236    else:
237      first_work_dir = first_out_dir = "out"
238      second_work_dir = second_out_dir = "out"
239  else:
240    dir_prefix = "out_full"
241    first_out_dir = dir_prefix + "/out_1"
242    second_out_dir = dir_prefix + "/out_2"
243    if not args.check_out_dir:
244      first_work_dir = second_work_dir = "out"
245    elif args.detect_embedded_paths:
246      first_work_dir = first_out_dir
247      second_work_dir = second_out_dir
248    else:
249      first_work_dir = dir_prefix + "/work"
250      second_work_dir = dir_prefix + "/work"
251  timestamp_start = dir_prefix + "/timestamp_start"
252  timestamp_between = dir_prefix + "/timestamp_between"
253  timestamp_end = dir_prefix + "/timestamp_end"
254
255  if args.run_build:
256    # Initial clean, if necessary
257    print("Cleaning " + dir_prefix + "/")
258    Clean(dir_prefix)
259    print("Cleaning out/")
260    Clean("out")
261    CreateEmptyFile(timestamp_start)
262    print("Running the first build in " + first_work_dir)
263    RunBuild(first_work_dir, first_out_dir, args.target, args.hide_build_output)
264    for f in args.touch:
265      print("Touching " + f)
266      TouchFile(f)
267    CreateEmptyFile(timestamp_between)
268    print("Running the second build in " + second_work_dir)
269    RunBuild(second_work_dir, second_out_dir, args.target, args.hide_build_output)
270    CreateEmptyFile(timestamp_end)
271    print("Done building")
272    print()
273
274  # Which out directories to scan
275  if args.subdirs is not None:
276    if args.subdirs:
277      subdirs = args.subdirs
278    else:
279      subdirs = DEFAULT_DIRS
280    first_files = ProductFiles(RequireBuildVar(first_out_dir, "PRODUCT_OUT"), subdirs)
281    second_files = ProductFiles(RequireBuildVar(second_out_dir, "PRODUCT_OUT"), subdirs)
282  else:
283    first_files = OutFiles(first_out_dir)
284    second_files = OutFiles(second_out_dir)
285
286  printer = Printer()
287
288  if args.incremental:
289    # Find files that were rebuilt unnecessarily
290    touched_incrementally = FindOutFilesTouchedAfter(first_files,
291                                                     GetFileTimestamp(timestamp_between))
292    printer.PrintList("Touched in incremental build", touched_incrementally)
293  else:
294    # Compare the two out dirs
295    added, removed, changed = DiffFileList(first_files, second_files)
296    printer.PrintList("Added", added)
297    printer.PrintList("Removed", removed)
298    printer.PrintList("Changed", changed, "%s %s")
299
300  # Find files in the source tree that were touched
301  touched_during = FindSourceFilesTouchedAfter(GetFileTimestamp(timestamp_start))
302  printer.PrintList("Source files touched after start of build", touched_during)
303
304  # Find files and dirs that were output to "out" and didn't respect $OUT_DIR
305  if args.check_out_dir:
306    bad_out_dir_contents = FindFilesAndDirectories("out")
307    printer.PrintList("Files and directories created by rules that didn't respect $OUT_DIR",
308                      bad_out_dir_contents)
309
310  # If we didn't find anything, print success message
311  if not printer.printed_anything:
312    print("No bad behaviors found.")
313
314
315def AssertAtTop():
316  """If the current directory is not the top of an android source tree, print an error
317     message and exit."""
318  if not os.access(SOONG_UI, os.X_OK):
319    sys.stderr.write("FAILED: Please run from the root of the tree.\n")
320    sys.exit(1)
321
322
323def RequireEnvVar(name):
324  """Gets an environment variable. If that fails, then print an error message and exit."""
325  result = os.environ.get(name)
326  if not result:
327    sys.stderr.write("error: Can't determine %s. Please run lunch first.\n" % name)
328    sys.exit(1)
329  return result
330
331
332def RunSoong(out_dir, args, capture_output):
333  env = dict(os.environ)
334  env["OUT_DIR"] = out_dir
335  args = [SOONG_UI,] + args
336  if capture_output:
337    proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
338    combined_output, none = proc.communicate()
339    return proc.returncode, combined_output
340  else:
341    result = subprocess.run(args, env=env)
342    return result.returncode, None
343
344
345def GetBuildVar(out_dir, name):
346  """Gets a variable from the build system."""
347  returncode, output = RunSoong(out_dir, ["--dumpvar-mode", name], True)
348  if returncode != 0:
349    return None
350  else:
351    return output.decode("utf-8").strip()
352
353
354def RequireBuildVar(out_dir, name):
355  """Gets a variable from the builds system. If that fails, then print an error
356     message and exit."""
357  value = GetBuildVar(out_dir, name)
358  if not value:
359    sys.stderr.write("error: Can't determine %s. Please run lunch first.\n" % name)
360    sys.exit(1)
361  return value
362
363
364def Clean(directory):
365  """"Deletes the supplied directory."""
366  try:
367    shutil.rmtree(directory)
368  except FileNotFoundError:
369    pass
370
371
372def RunBuild(work_dir, out_dir, target, hide_build_output):
373  """Runs a build. If the build fails, prints a message and exits."""
374  returncode, output = RunSoong(work_dir,
375                    ["--build-mode", "--all-modules", "--dir=" + os.getcwd(), target],
376                    hide_build_output)
377  if work_dir != out_dir:
378    os.replace(work_dir, out_dir)
379  if returncode != 0:
380    if hide_build_output:
381      # The build output was hidden, so print it now for debugging
382      sys.stderr.buffer.write(output)
383    sys.stderr.write("FAILED: Build failed. Stopping.\n")
384    sys.exit(1)
385
386
387def DiffFileList(first_files, second_files):
388  """Examines the files.
389
390  Returns:
391    Filenames of files in first_filelist but not second_filelist (added files)
392    Filenames of files in second_filelist but not first_filelist (removed files)
393    2-Tuple of filenames for the files that are in both but are different (changed files)
394  """
395  # List of files, relative to their respective PRODUCT_OUT directories
396  first_filelist = sorted([x for x in first_files], key=lambda x: x[1])
397  second_filelist = sorted([x for x in second_files], key=lambda x: x[1])
398
399  added = []
400  removed = []
401  changed = []
402
403  first_index = 0
404  second_index = 0
405
406  while first_index < len(first_filelist) and second_index < len(second_filelist):
407    # Path relative to source root and path relative to PRODUCT_OUT
408    first_full_filename, first_relative_filename = first_filelist[first_index]
409    second_full_filename, second_relative_filename = second_filelist[second_index]
410
411    if first_relative_filename < second_relative_filename:
412      # Removed
413      removed.append(first_full_filename)
414      first_index += 1
415    elif first_relative_filename > second_relative_filename:
416      # Added
417      added.append(second_full_filename)
418      second_index += 1
419    else:
420      # Both present
421      diff_type = DiffFiles(first_full_filename, second_full_filename)
422      if diff_type != DIFF_NONE:
423        changed.append((first_full_filename, second_full_filename))
424      first_index += 1
425      second_index += 1
426
427  while first_index < len(first_filelist):
428    first_full_filename, first_relative_filename = first_filelist[first_index]
429    removed.append(first_full_filename)
430    first_index += 1
431
432  while second_index < len(second_filelist):
433    second_full_filename, second_relative_filename = second_filelist[second_index]
434    added.append(second_full_filename)
435    second_index += 1
436
437  return (SortByTimestamp(added),
438          SortByTimestamp(removed),
439          SortByTimestamp(changed, key=lambda item: item[1]))
440
441
442def FindOutFilesTouchedAfter(files, timestamp):
443  """Find files in the given file iterator that were touched after timestamp."""
444  result = []
445  for full, relative in files:
446    ts = GetFileTimestamp(full)
447    if ts > timestamp:
448      result.append(TouchedFile(full, ts))
449  return [f.filename for f in sorted(result, key=lambda f: f.timestamp)]
450
451
452def GetFileTimestamp(filename):
453  """Get timestamp for a file (just wraps stat)."""
454  st = os.stat(filename, follow_symlinks=False)
455  return st.st_mtime
456
457
458def SortByTimestamp(items, key=lambda item: item):
459  """Sort the list by timestamp of files.
460  Args:
461    items - the list of items to sort
462    key - a function to extract a filename from each element in items
463  """
464  return [x[0] for x in sorted([(item, GetFileTimestamp(key(item))) for item in items],
465                               key=lambda y: y[1])]
466
467
468def FindSourceFilesTouchedAfter(timestamp):
469  """Find files in the source tree that have changed after timestamp. Ignores
470  the out directory."""
471  result = []
472  for root, dirs, files in os.walk(".", followlinks=False):
473    if root == ".":
474      RemoveItemsFromList(dirs, (".repo", "out", "out_full", "out_incremental"))
475    for f in files:
476      full = os.path.sep.join((root, f))[2:]
477      ts = GetFileTimestamp(full)
478      if ts > timestamp:
479        result.append(TouchedFile(full, ts))
480  return [f.filename for f in sorted(result, key=lambda f: f.timestamp)]
481
482
483def FindFilesAndDirectories(directory):
484  """Finds all files and directories inside a directory."""
485  result = []
486  for root, dirs, files in os.walk(directory, followlinks=False):
487    result += [os.path.sep.join((root, x, "")) for x in dirs]
488    result += [os.path.sep.join((root, x)) for x in files]
489  return result
490
491
492def CreateEmptyFile(filename):
493  """Create an empty file with now as the timestamp at filename."""
494  try:
495    os.makedirs(os.path.dirname(filename))
496  except FileExistsError:
497    pass
498  open(filename, "w").close()
499  os.utime(filename)
500
501
502def TouchFile(filename):
503  os.utime(filename)
504
505
506def DiffFiles(first_filename, second_filename):
507  def AreFileContentsSame(remaining, first_filename, second_filename):
508    """Compare the file contents. They must be known to be the same size."""
509    CHUNK_SIZE = 32*1024
510    with open(first_filename, "rb") as first_file:
511      with open(second_filename, "rb") as second_file:
512        while remaining > 0:
513          size = min(CHUNK_SIZE, remaining)
514          if first_file.read(CHUNK_SIZE) != second_file.read(CHUNK_SIZE):
515            return False
516          remaining -= size
517        return True
518
519  first_stat = os.stat(first_filename, follow_symlinks=False)
520  second_stat = os.stat(first_filename, follow_symlinks=False)
521
522  # Mode bits
523  if first_stat.st_mode != second_stat.st_mode:
524    return DIFF_MODE
525
526  # File size
527  if first_stat.st_size != second_stat.st_size:
528    return DIFF_SIZE
529
530  # Contents
531  if stat.S_ISLNK(first_stat.st_mode):
532    if os.readlink(first_filename) != os.readlink(second_filename):
533      return DIFF_SYMLINK
534  elif stat.S_ISREG(first_stat.st_mode):
535    if not AreFileContentsSame(first_stat.st_size, first_filename, second_filename):
536      return DIFF_CONTENTS
537
538  return DIFF_NONE
539
540
541class FileIterator(object):
542  """Object that produces an iterator containing all files in a given directory.
543
544  Each iteration yields a tuple containing:
545
546  [0] (full) Path to file relative to source tree.
547  [1] (relative) Path to the file relative to the base directory given in the
548      constructor.
549  """
550
551  def __init__(self, base_dir):
552    self._base_dir = base_dir
553
554  def __iter__(self):
555    return self._Iterator(self, self._base_dir)
556
557  def ShouldIncludeFile(self, root, path):
558    return False
559
560  class _Iterator(object):
561    def __init__(self, parent, base_dir):
562      self._parent = parent
563      self._base_dir = base_dir
564      self._walker = os.walk(base_dir, followlinks=False)
565      self._current_index = 0
566      self._current_dir = []
567
568    def __iter__(self):
569      return self
570
571    def __next__(self):
572      # os.walk's iterator will eventually terminate by raising StopIteration
573      while True:
574        if self._current_index >= len(self._current_dir):
575          root, dirs, files = self._walker.__next__()
576          full_paths = [os.path.sep.join((root, f)) for f in files]
577          pairs = [(f, f[len(self._base_dir)+1:]) for f in full_paths]
578          self._current_dir = [(full, relative) for full, relative in pairs
579                               if self._parent.ShouldIncludeFile(root, relative)]
580          self._current_index = 0
581          if not self._current_dir:
582            continue
583        index = self._current_index
584        self._current_index += 1
585        return self._current_dir[index]
586
587
588class OutFiles(FileIterator):
589  """Object that produces an iterator containing all files in a given out directory,
590  except for files which are known to be touched as part of build setup.
591  """
592  def __init__(self, out_dir):
593    super().__init__(out_dir)
594    self._out_dir = out_dir
595
596  def ShouldIncludeFile(self, root, relative):
597    # Skip files in root, although note that this could actually skip
598    # files that are sadly generated directly into that directory.
599    if root == self._out_dir:
600      return False
601    # Skiplist
602    for skip in BUILD_INTERNALS_PREFIX_SKIP:
603      if relative.startswith(skip):
604        return False
605    for skip in BUILD_INTERNALS_SUFFIX_SKIP:
606      if relative.endswith(skip):
607        return False
608    return True
609
610
611class ProductFiles(FileIterator):
612  """Object that produces an iterator containing files in listed subdirectories of $PRODUCT_OUT.
613  """
614  def __init__(self, product_out, subdirs):
615    super().__init__(product_out)
616    self._subdirs = subdirs
617
618  def ShouldIncludeFile(self, root, relative):
619    for subdir in self._subdirs:
620      if relative.startswith(subdir):
621        return True
622    return False
623
624
625class TouchedFile(object):
626  """A file in the out directory with a timestamp."""
627  def __init__(self, filename, timestamp):
628    self.filename = filename
629    self.timestamp = timestamp
630
631
632def RemoveItemsFromList(haystack, needles):
633  for needle in needles:
634    try:
635      haystack.remove(needle)
636    except ValueError:
637      pass
638
639
640class Printer(object):
641  def __init__(self):
642    self.printed_anything = False
643
644  def PrintList(self, title, items, fmt="%s"):
645    if items:
646      if self.printed_anything:
647        sys.stdout.write("\n")
648      sys.stdout.write("%s:\n" % title)
649      for item in items:
650        sys.stdout.write("  %s\n" % fmt % item)
651      self.printed_anything = True
652
653
654if __name__ == "__main__":
655  try:
656    main()
657  except KeyboardInterrupt:
658    pass
659
660
661# vim: ts=2 sw=2 sts=2 nocindent
662