1#!/usr/bin/env python2
2##########################################################################
3#
4# Copyright 2011 Jose Fonseca
5# All Rights Reserved.
6#
7# Permission is hereby granted, free of charge, to any person obtaining a copy
8# of this software and associated documentation files (the "Software"), to deal
9# in the Software without restriction, including without limitation the rights
10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11# copies of the Software, and to permit persons to whom the Software is
12# furnished to do so, subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be included in
15# all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23# THE SOFTWARE.
24#
25##########################################################################/
26
27
28import json
29import optparse
30import re
31import difflib
32import sys
33
34
35def strip_object_hook(obj):
36    if '__class__' in obj:
37        return None
38    for name in obj.keys():
39        if name.startswith('__') and name.endswith('__'):
40            del obj[name]
41    return obj
42
43
44class Visitor:
45
46    def visit(self, node, *args, **kwargs):
47        if isinstance(node, dict):
48            return self.visitObject(node, *args, **kwargs)
49        elif isinstance(node, list):
50            return self.visitArray(node, *args, **kwargs)
51        else:
52            return self.visitValue(node, *args, **kwargs)
53
54    def visitObject(self, node, *args, **kwargs):
55        pass
56
57    def visitArray(self, node, *args, **kwargs):
58        pass
59
60    def visitValue(self, node, *args, **kwargs):
61        pass
62
63
64class Dumper(Visitor):
65
66    def __init__(self, stream = sys.stdout):
67        self.stream = stream
68        self.level = 0
69
70    def _write(self, s):
71        self.stream.write(s)
72
73    def _indent(self):
74        self._write('  '*self.level)
75
76    def _newline(self):
77        self._write('\n')
78
79    def visitObject(self, node):
80        self.enter_object()
81
82        members = node.keys()
83        members.sort()
84        for i in range(len(members)):
85            name = members[i]
86            value = node[name]
87            self.enter_member(name)
88            self.visit(value)
89            self.leave_member(i == len(members) - 1)
90        self.leave_object()
91
92    def enter_object(self):
93        self._write('{')
94        self._newline()
95        self.level += 1
96
97    def enter_member(self, name):
98        self._indent()
99        self._write('%s: ' % name)
100
101    def leave_member(self, last):
102        if not last:
103            self._write(',')
104        self._newline()
105
106    def leave_object(self):
107        self.level -= 1
108        self._indent()
109        self._write('}')
110        if self.level <= 0:
111            self._newline()
112
113    def visitArray(self, node):
114        self.enter_array()
115        for i in range(len(node)):
116            value = node[i]
117            self._indent()
118            self.visit(value)
119            if i != len(node) - 1:
120                self._write(',')
121            self._newline()
122        self.leave_array()
123
124    def enter_array(self):
125        self._write('[')
126        self._newline()
127        self.level += 1
128
129    def leave_array(self):
130        self.level -= 1
131        self._indent()
132        self._write(']')
133
134    def visitValue(self, node):
135        self._write(json.dumps(node, allow_nan=True))
136
137
138
139class Comparer(Visitor):
140
141    def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
142        self.ignore_added = ignore_added
143        self.tolerance = tolerance
144
145    def visitObject(self, a, b):
146        if not isinstance(b, dict):
147            return False
148        if len(a) != len(b) and not self.ignore_added:
149            return False
150        ak = a.keys()
151        bk = b.keys()
152        ak.sort()
153        bk.sort()
154        if ak != bk and not self.ignore_added:
155            return False
156        for k in ak:
157            ae = a[k]
158            try:
159                be = b[k]
160            except KeyError:
161                return False
162            if not self.visit(ae, be):
163                return False
164        return True
165
166    def visitArray(self, a, b):
167        if not isinstance(b, list):
168            return False
169        if len(a) != len(b):
170            return False
171        for ae, be in zip(a, b):
172            if not self.visit(ae, be):
173                return False
174        return True
175
176    def visitValue(self, a, b):
177        if isinstance(a, float) or isinstance(b, float):
178            if a == 0:
179                return abs(b) < self.tolerance
180            else:
181                return abs((b - a)/a) < self.tolerance
182        else:
183            return a == b
184
185
186class Differ(Visitor):
187
188    def __init__(self, stream = sys.stdout, ignore_added = False):
189        self.dumper = Dumper(stream)
190        self.comparer = Comparer(ignore_added = ignore_added)
191
192    def visit(self, a, b):
193        if self.comparer.visit(a, b):
194            return
195        Visitor.visit(self, a, b)
196
197    def visitObject(self, a, b):
198        if not isinstance(b, dict):
199            self.replace(a, b)
200        else:
201            self.dumper.enter_object()
202            names = set(a.keys())
203            if not self.comparer.ignore_added:
204                names.update(b.keys())
205            names = list(names)
206            names.sort()
207
208            for i in range(len(names)):
209                name = names[i]
210                ae = a.get(name, None)
211                be = b.get(name, None)
212                if not self.comparer.visit(ae, be):
213                    self.dumper.enter_member(name)
214                    self.visit(ae, be)
215                    self.dumper.leave_member(i == len(names) - 1)
216
217            self.dumper.leave_object()
218
219    def visitArray(self, a, b):
220        if not isinstance(b, list):
221            self.replace(a, b)
222        else:
223            self.dumper.enter_array()
224            max_len = max(len(a), len(b))
225            for i in range(max_len):
226                try:
227                    ae = a[i]
228                except IndexError:
229                    ae = None
230                try:
231                    be = b[i]
232                except IndexError:
233                    be = None
234                self.dumper._indent()
235                if self.comparer.visit(ae, be):
236                    self.dumper.visit(ae)
237                else:
238                    self.visit(ae, be)
239                if i != max_len - 1:
240                    self.dumper._write(',')
241                self.dumper._newline()
242
243            self.dumper.leave_array()
244
245    def visitValue(self, a, b):
246        if a != b:
247            self.replace(a, b)
248
249    def replace(self, a, b):
250        if isinstance(a, basestring) and isinstance(b, basestring):
251            if '\n' in a or '\n' in b:
252                a = a.splitlines()
253                b = b.splitlines()
254                differ = difflib.Differ()
255                result = differ.compare(a, b)
256                self.dumper.level += 1
257                for entry in result:
258                    self.dumper._newline()
259                    self.dumper._indent()
260                    tag = entry[:2]
261                    text = entry[2:]
262                    if tag == '? ':
263                        tag = '  '
264                        prefix = ' '
265                        text = text.rstrip()
266                        suffix = ''
267                    else:
268                        prefix = '"'
269                        suffix = '\\n"'
270                    line = tag + prefix + text + suffix
271                    self.dumper._write(line)
272                self.dumper.level -= 1
273                return
274        self.dumper.visit(a)
275        self.dumper._write(' -> ')
276        self.dumper.visit(b)
277
278    def isMultilineString(self, value):
279        return isinstance(value, basestring) and '\n' in value
280
281    def replaceMultilineString(self, a, b):
282        self.dumper.visit(a)
283        self.dumper._write(' -> ')
284        self.dumper.visit(b)
285
286
287#
288# Unfortunately JSON standard does not include comments, but this is a quite
289# useful feature to have on regressions tests
290#
291
292_token_res = [
293    r'//[^\r\n]*', # comment
294    r'"[^"\\]*(\\.[^"\\]*)*"', # string
295]
296
297_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
298
299
300def _strip_comment(mo):
301    if mo.group(1):
302        return ''
303    else:
304        return mo.group(0)
305
306
307def _strip_comments(data):
308    '''Strip (non-standard) JSON comments.'''
309    return _tokens_re.sub(_strip_comment, data)
310
311
312assert _strip_comments('''// a comment
313"// a comment in a string
314"''') == '''
315"// a comment in a string
316"'''
317
318
319def load(stream, strip_images = True, strip_comments = True):
320    if strip_images:
321        object_hook = strip_object_hook
322    else:
323        object_hook = None
324    if strip_comments:
325        data = stream.read()
326        data = _strip_comments(data)
327        return json.loads(data, strict=False, object_hook = object_hook)
328    else:
329        return json.load(stream, strict=False, object_hook = object_hook)
330
331
332def main():
333    optparser = optparse.OptionParser(
334        usage="\n\t%prog [options] <ref_json> <src_json>")
335    optparser.add_option(
336        '--keep-images',
337        action="store_false", dest="strip_images", default=True,
338        help="compare images")
339
340    (options, args) = optparser.parse_args(sys.argv[1:])
341
342    if len(args) != 2:
343        optparser.error('incorrect number of arguments')
344
345    a = load(open(sys.argv[1], 'rt'), options.strip_images)
346    b = load(open(sys.argv[2], 'rt'), options.strip_images)
347
348    if False:
349        dumper = Dumper()
350        dumper.visit(a)
351
352    differ = Differ()
353    differ.visit(a, b)
354
355
356if __name__ == '__main__':
357    main()
358