1#!/usr/bin/python2
2#
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Block diff utility."""
8
9from __future__ import print_function
10
11import optparse
12import sys
13
14
15class BlockDiffError(Exception):
16  pass
17
18
19def BlockDiff(block_size, file1, file2, name1, name2, max_length=-1):
20  """Performs a binary diff of two files by blocks.
21
22  Args:
23    block_size: the size of a block to diff by
24    file1: first file object
25    file2: second file object
26    name1: name of first file (for error reporting)
27    name2: name of second file (for error reporting)
28    max_length: the maximum length to read/diff in bytes (optional)
29  Returns:
30    A list of (start, length) pairs representing block extents that differ
31    between the two files.
32  Raises:
33    BlockDiffError if there were errors while diffing.
34
35  """
36  if max_length < 0:
37    max_length = sys.maxint
38  diff_list = []
39  num_blocks = extent_start = extent_length = 0
40  while max_length or extent_length:
41    read_length = min(max_length, block_size)
42    data1 = file1.read(read_length)
43    data2 = file2.read(read_length)
44    if len(data1) != len(data2):
45      raise BlockDiffError('read %d bytes from %s but %d bytes from %s' %
46                           (len(data1), name1, len(data2), name2))
47
48    if data1 != data2:
49      # Data is different, mark it down.
50      if extent_length:
51        # Stretch the current diff extent.
52        extent_length += 1
53      else:
54        # Start a new diff extent.
55        extent_start = num_blocks
56        extent_length = 1
57    elif extent_length:
58      # Record the previous extent.
59      diff_list.append((extent_start, extent_length))
60      extent_length = 0
61
62    # Are we done reading?
63    if not data1:
64      break
65
66    max_length -= len(data1)
67    num_blocks += 1
68
69  return diff_list
70
71
72def main(argv):
73  # Parse command-line arguments.
74  parser = optparse.OptionParser(
75      usage='Usage: %prog FILE1 FILE2',
76      description='Compare FILE1 and FILE2 by blocks.')
77
78  parser.add_option('-b', '--block-size', metavar='NUM', type=int, default=4096,
79                    help='the block size to use (default: %default)')
80  parser.add_option('-m', '--max-length', metavar='NUM', type=int, default=-1,
81                    help='maximum number of bytes to compared')
82
83  opts, args = parser.parse_args(argv[1:])
84
85  try:
86    name1, name2 = args
87  except ValueError:
88    parser.error('unexpected number of arguments')
89
90  # Perform the block diff.
91  try:
92    with open(name1) as file1:
93      with open(name2) as file2:
94        diff_list = BlockDiff(opts.block_size, file1, file2, name1, name2,
95                              opts.max_length)
96  except BlockDiffError as e:
97    print('Error: ' % e, file=sys.stderr)
98    return 2
99
100  # Print the diff, if such was found.
101  if diff_list:
102    total_diff_blocks = 0
103    for extent_start, extent_length in diff_list:
104      total_diff_blocks += extent_length
105      print('%d->%d (%d)' %
106            (extent_start, extent_start + extent_length, extent_length))
107
108    print('total diff: %d blocks' % total_diff_blocks)
109    return 1
110
111  return 0
112
113
114if __name__ == '__main__':
115  sys.exit(main(sys.argv))
116