1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3"""
4Module to find differences over time in a filesystem
5
6Basically this takes a snapshot of a directory, then sees what changes
7were made.  The contents of the files are not checked, so you can
8detect that the content was changed, but not what the old version of
9the file was.
10"""
11
12import os
13from fnmatch import fnmatch
14from datetime import datetime
15
16try:
17    # Python 3
18    import collections.UserDict as IterableUserDict
19except ImportError:
20    # Python 2.5-2.7
21    from UserDict import IterableUserDict
22import operator
23import re
24
25__all__ = ['Diff', 'Snapshot', 'File', 'Dir', 'report_expected_diffs',
26           'show_diff']
27
28class Diff(object):
29
30    """
31    Represents the difference between two snapshots
32    """
33
34    def __init__(self, before, after):
35        self.before = before
36        self.after = after
37        self._calculate()
38
39    def _calculate(self):
40        before = self.before.data
41        after = self.after.data
42        self.deleted = {}
43        self.updated = {}
44        self.created = after.copy()
45        for path, f in before.items():
46            if path not in after:
47                self.deleted[path] = f
48                continue
49            del self.created[path]
50            if f.mtime < after[path].mtime:
51                self.updated[path] = after[path]
52
53    def __str__(self):
54        return self.report()
55
56    def report(self, header=True, dates=False):
57        s = []
58        if header:
59            s.append('Difference in %s from %s to %s:' %
60                     (self.before.base_path,
61                      self.before.calculated,
62                      self.after.calculated))
63        for name, files, show_size in [
64            ('created', self.created, True),
65            ('deleted', self.deleted, True),
66            ('updated', self.updated, True)]:
67            if files:
68                s.append('-- %s: -------------------' % name)
69                files = files.items()
70                files.sort()
71                last = ''
72                for path, f in files:
73                    t = '  %s' % _space_prefix(last, path, indent=4,
74                                               include_sep=False)
75                    last = path
76                    if show_size and f.size != 'N/A':
77                        t += '  (%s bytes)' % f.size
78                    if dates:
79                        parts = []
80                        if self.before.get(path):
81                            parts.append(self.before[path].mtime)
82                        if self.after.get(path):
83                            parts.append(self.after[path].mtime)
84                        t += ' (mtime: %s)' % ('->'.join(map(repr, parts)))
85                    s.append(t)
86        if len(s) == 1:
87            s.append('  (no changes)')
88        return '\n'.join(s)
89
90class Snapshot(IterableUserDict):
91
92    """
93    Represents a snapshot of a set of files.  Has a dictionary-like
94    interface, keyed relative to ``base_path``
95    """
96
97    def __init__(self, base_path, files=None, ignore_wildcards=(),
98                 ignore_paths=(), ignore_hidden=True):
99        self.base_path = base_path
100        self.ignore_wildcards = ignore_wildcards
101        self.ignore_hidden = ignore_hidden
102        self.ignore_paths = ignore_paths
103        self.calculated = None
104        self.data = files or {}
105        if files is None:
106            self.find_files()
107
108    ############################################################
109    ## File finding
110    ############################################################
111
112    def find_files(self):
113        """
114        Find all the files under the base path, and put them in
115        ``self.data``
116        """
117        self._find_traverse('', self.data)
118        self.calculated = datetime.now()
119
120    def _ignore_file(self, fn):
121        if fn in self.ignore_paths:
122            return True
123        if self.ignore_hidden and os.path.basename(fn).startswith('.'):
124            return True
125        for pat in self.ignore_wildcards:
126            if fnmatch(fn, pat):
127                return True
128        return False
129
130    def _find_traverse(self, path, result):
131        full = os.path.join(self.base_path, path)
132        if os.path.isdir(full):
133            if path:
134                # Don't actually include the base path
135                result[path] = Dir(self.base_path, path)
136            for fn in os.listdir(full):
137                fn = os.path.join(path, fn)
138                if self._ignore_file(fn):
139                    continue
140                self._find_traverse(fn, result)
141        else:
142            result[path] = File(self.base_path, path)
143
144    def __repr__(self):
145        return '<%s in %r from %r>' % (
146            self.__class__.__name__, self.base_path,
147            self.calculated or '(no calculation done)')
148
149    def compare_expected(self, expected, comparison=operator.eq,
150                         differ=None, not_found=None,
151                         include_success=False):
152        """
153        Compares a dictionary of ``path: content`` to the
154        found files.  Comparison is done by equality, or the
155        ``comparison(actual_content, expected_content)`` function given.
156
157        Returns dictionary of differences, keyed by path.  Each
158        difference is either noted, or the output of
159        ``differ(actual_content, expected_content)`` is given.
160
161        If a file does not exist and ``not_found`` is given, then
162        ``not_found(path)`` is put in.
163        """
164        result = {}
165        for path in expected:
166            orig_path = path
167            path = path.strip('/')
168            if path not in self.data:
169                if not_found:
170                    msg = not_found(path)
171                else:
172                    msg = 'not found'
173                result[path] = msg
174                continue
175            expected_content = expected[orig_path]
176            file = self.data[path]
177            actual_content = file.bytes
178            if not comparison(actual_content, expected_content):
179                if differ:
180                    msg = differ(actual_content, expected_content)
181                else:
182                    if len(actual_content) < len(expected_content):
183                        msg = 'differ (%i bytes smaller)' % (
184                            len(expected_content) - len(actual_content))
185                    elif len(actual_content) > len(expected_content):
186                        msg = 'differ (%i bytes larger)' % (
187                            len(actual_content) - len(expected_content))
188                    else:
189                        msg = 'diff (same size)'
190                result[path] = msg
191            elif include_success:
192                result[path] = 'same!'
193        return result
194
195    def diff_to_now(self):
196        return Diff(self, self.clone())
197
198    def clone(self):
199        return self.__class__(base_path=self.base_path,
200                              ignore_wildcards=self.ignore_wildcards,
201                              ignore_paths=self.ignore_paths,
202                              ignore_hidden=self.ignore_hidden)
203
204class File(object):
205
206    """
207    Represents a single file found as the result of a command.
208
209    Has attributes:
210
211    ``path``:
212        The path of the file, relative to the ``base_path``
213
214    ``full``:
215        The full path
216
217    ``stat``:
218        The results of ``os.stat``.  Also ``mtime`` and ``size``
219        contain the ``.st_mtime`` and ``st_size`` of the stat.
220
221    ``bytes``:
222        The contents of the file.
223
224    You may use the ``in`` operator with these objects (tested against
225    the contents of the file), and the ``.mustcontain()`` method.
226    """
227
228    file = True
229    dir = False
230
231    def __init__(self, base_path, path):
232        self.base_path = base_path
233        self.path = path
234        self.full = os.path.join(base_path, path)
235        self.stat = os.stat(self.full)
236        self.mtime = self.stat.st_mtime
237        self.size = self.stat.st_size
238        self._bytes = None
239
240    def bytes__get(self):
241        if self._bytes is None:
242            f = open(self.full, 'rb')
243            self._bytes = f.read()
244            f.close()
245        return self._bytes
246    bytes = property(bytes__get)
247
248    def __contains__(self, s):
249        return s in self.bytes
250
251    def mustcontain(self, s):
252        __tracebackhide__ = True
253        bytes = self.bytes
254        if s not in bytes:
255            print('Could not find %r in:' % s)
256            print(bytes)
257            assert s in bytes
258
259    def __repr__(self):
260        return '<%s %s:%s>' % (
261            self.__class__.__name__,
262            self.base_path, self.path)
263
264class Dir(File):
265
266    """
267    Represents a directory created by a command.
268    """
269
270    file = False
271    dir = True
272
273    def __init__(self, base_path, path):
274        self.base_path = base_path
275        self.path = path
276        self.full = os.path.join(base_path, path)
277        self.size = 'N/A'
278        self.mtime = 'N/A'
279
280    def __repr__(self):
281        return '<%s %s:%s>' % (
282            self.__class__.__name__,
283            self.base_path, self.path)
284
285    def bytes__get(self):
286        raise NotImplementedError(
287            "Directory %r doesn't have content" % self)
288
289    bytes = property(bytes__get)
290
291
292def _space_prefix(pref, full, sep=None, indent=None, include_sep=True):
293    """
294    Anything shared by pref and full will be replaced with spaces
295    in full, and full returned.
296
297    Example::
298
299        >>> _space_prefix('/foo/bar', '/foo')
300        '    /bar'
301    """
302    if sep is None:
303        sep = os.path.sep
304    pref = pref.split(sep)
305    full = full.split(sep)
306    padding = []
307    while pref and full and pref[0] == full[0]:
308        if indent is None:
309            padding.append(' ' * (len(full[0]) + len(sep)))
310        else:
311            padding.append(' ' * indent)
312        full.pop(0)
313        pref.pop(0)
314    if padding:
315        if include_sep:
316            return ''.join(padding) + sep + sep.join(full)
317        else:
318            return ''.join(padding) + sep.join(full)
319    else:
320        return sep.join(full)
321
322def report_expected_diffs(diffs, colorize=False):
323    """
324    Takes the output of compare_expected, and returns a string
325    description of the differences.
326    """
327    if not diffs:
328        return 'No differences'
329    diffs = diffs.items()
330    diffs.sort()
331    s = []
332    last = ''
333    for path, desc in diffs:
334        t = _space_prefix(last, path, indent=4, include_sep=False)
335        if colorize:
336            t = color_line(t, 11)
337        last = path
338        if len(desc.splitlines()) > 1:
339            cur_indent = len(re.search(r'^[ ]*', t).group(0))
340            desc = indent(cur_indent+2, desc)
341            if colorize:
342                t += '\n'
343                for line in desc.splitlines():
344                    if line.strip().startswith('+'):
345                        line = color_line(line, 10)
346                    elif line.strip().startswith('-'):
347                        line = color_line(line, 9)
348                    else:
349                        line = color_line(line, 14)
350                    t += line+'\n'
351            else:
352                t += '\n' + desc
353        else:
354            t += ' '+desc
355        s.append(t)
356    s.append('Files with differences: %s' % len(diffs))
357    return '\n'.join(s)
358
359def color_code(foreground=None, background=None):
360    """
361    0  black
362    1  red
363    2  green
364    3  yellow
365    4  blue
366    5  magenta (purple)
367    6  cyan
368    7  white (gray)
369
370    Add 8 to get high-intensity
371    """
372    if foreground is None and background is None:
373        # Reset
374        return '\x1b[0m'
375    codes = []
376    if foreground is None:
377        codes.append('[39m')
378    elif foreground > 7:
379        codes.append('[1m')
380        codes.append('[%im' % (22+foreground))
381    else:
382        codes.append('[%im' % (30+foreground))
383    if background is None:
384        codes.append('[49m')
385    else:
386        codes.append('[%im' % (40+background))
387    return '\x1b' + '\x1b'.join(codes)
388
389def color_line(line, foreground=None, background=None):
390    match = re.search(r'^(\s*)', line)
391    return (match.group(1) + color_code(foreground, background)
392            + line[match.end():] + color_code())
393
394def indent(indent, text):
395    return '\n'.join(
396        [' '*indent + l for l in text.splitlines()])
397
398def show_diff(actual_content, expected_content):
399    actual_lines = [l.strip() for l in actual_content.splitlines()
400                    if l.strip()]
401    expected_lines = [l.strip() for l in expected_content.splitlines()
402                      if l.strip()]
403    if len(actual_lines) == len(expected_lines) == 1:
404        return '%r not %r' % (actual_lines[0], expected_lines[0])
405    if not actual_lines:
406        return 'Empty; should have:\n'+expected_content
407    import difflib
408    return '\n'.join(difflib.ndiff(actual_lines, expected_lines))
409