1import difflib
2from test.test_support import run_unittest, findfile
3import unittest
4import doctest
5import sys
6
7
8class TestWithAscii(unittest.TestCase):
9    def test_one_insert(self):
10        sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
11        self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
12        self.assertEqual(list(sm.get_opcodes()),
13            [   ('insert', 0, 0, 0, 1),
14                ('equal', 0, 100, 1, 101)])
15        sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
16        self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
17        self.assertEqual(list(sm.get_opcodes()),
18            [   ('equal', 0, 50, 0, 50),
19                ('insert', 50, 50, 50, 51),
20                ('equal', 50, 100, 51, 101)])
21
22    def test_one_delete(self):
23        sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
24        self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
25        self.assertEqual(list(sm.get_opcodes()),
26            [   ('equal', 0, 40, 0, 40),
27                ('delete', 40, 41, 40, 40),
28                ('equal', 41, 81, 40, 80)])
29
30
31class TestAutojunk(unittest.TestCase):
32    """Tests for the autojunk parameter added in 2.7"""
33    def test_one_insert_homogenous_sequence(self):
34        # By default autojunk=True and the heuristic kicks in for a sequence
35        # of length 200+
36        seq1 = 'b' * 200
37        seq2 = 'a' + 'b' * 200
38
39        sm = difflib.SequenceMatcher(None, seq1, seq2)
40        self.assertAlmostEqual(sm.ratio(), 0, places=3)
41
42        # Now turn the heuristic off
43        sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
44        self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
45
46
47class TestSFbugs(unittest.TestCase):
48    def test_ratio_for_null_seqn(self):
49        # Check clearing of SF bug 763023
50        s = difflib.SequenceMatcher(None, [], [])
51        self.assertEqual(s.ratio(), 1)
52        self.assertEqual(s.quick_ratio(), 1)
53        self.assertEqual(s.real_quick_ratio(), 1)
54
55    def test_comparing_empty_lists(self):
56        # Check fix for bug #979794
57        group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
58        self.assertRaises(StopIteration, group_gen.next)
59        diff_gen = difflib.unified_diff([], [])
60        self.assertRaises(StopIteration, diff_gen.next)
61
62    def test_matching_blocks_cache(self):
63        # Issue #21635
64        s = difflib.SequenceMatcher(None, "abxcd", "abcd")
65        first = s.get_matching_blocks()
66        second = s.get_matching_blocks()
67        self.assertEqual(second[0].size, 2)
68        self.assertEqual(second[1].size, 2)
69        self.assertEqual(second[2].size, 0)
70
71    def test_added_tab_hint(self):
72        # Check fix for bug #1488943
73        diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
74        self.assertEqual("- \tI am a buggy", diff[0])
75        self.assertEqual("?            --\n", diff[1])
76        self.assertEqual("+ \t\tI am a bug", diff[2])
77        self.assertEqual("? +\n", diff[3])
78
79patch914575_from1 = """
80   1. Beautiful is beTTer than ugly.
81   2. Explicit is better than implicit.
82   3. Simple is better than complex.
83   4. Complex is better than complicated.
84"""
85
86patch914575_to1 = """
87   1. Beautiful is better than ugly.
88   3.   Simple is better than complex.
89   4. Complicated is better than complex.
90   5. Flat is better than nested.
91"""
92
93patch914575_from2 = """
94\t\tLine 1: preceeded by from:[tt] to:[ssss]
95  \t\tLine 2: preceeded by from:[sstt] to:[sssst]
96  \t \tLine 3: preceeded by from:[sstst] to:[ssssss]
97Line 4:  \thas from:[sst] to:[sss] after :
98Line 5: has from:[t] to:[ss] at end\t
99"""
100
101patch914575_to2 = """
102    Line 1: preceeded by from:[tt] to:[ssss]
103    \tLine 2: preceeded by from:[sstt] to:[sssst]
104      Line 3: preceeded by from:[sstst] to:[ssssss]
105Line 4:   has from:[sst] to:[sss] after :
106Line 5: has from:[t] to:[ss] at end
107"""
108
109patch914575_from3 = """line 0
1101234567890123456789012345689012345
111line 1
112line 2
113line 3
114line 4   changed
115line 5   changed
116line 6   changed
117line 7
118line 8  subtracted
119line 9
1201234567890123456789012345689012345
121short line
122just fits in!!
123just fits in two lines yup!!
124the end"""
125
126patch914575_to3 = """line 0
1271234567890123456789012345689012345
128line 1
129line 2    added
130line 3
131line 4   chanGEd
132line 5a  chanGed
133line 6a  changEd
134line 7
135line 8
136line 9
1371234567890
138another long line that needs to be wrapped
139just fitS in!!
140just fits in two lineS yup!!
141the end"""
142
143class TestSFpatches(unittest.TestCase):
144
145    def test_html_diff(self):
146        # Check SF patch 914575 for generating HTML differences
147        f1a = ((patch914575_from1 + '123\n'*10)*3)
148        t1a = (patch914575_to1 + '123\n'*10)*3
149        f1b = '456\n'*10 + f1a
150        t1b = '456\n'*10 + t1a
151        f1a = f1a.splitlines()
152        t1a = t1a.splitlines()
153        f1b = f1b.splitlines()
154        t1b = t1b.splitlines()
155        f2 = patch914575_from2.splitlines()
156        t2 = patch914575_to2.splitlines()
157        f3 = patch914575_from3
158        t3 = patch914575_to3
159        i = difflib.HtmlDiff()
160        j = difflib.HtmlDiff(tabsize=2)
161        k = difflib.HtmlDiff(wrapcolumn=14)
162
163        full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
164        tables = '\n'.join(
165            [
166             '<h2>Context (first diff within numlines=5(default))</h2>',
167             i.make_table(f1a,t1a,'from','to',context=True),
168             '<h2>Context (first diff after numlines=5(default))</h2>',
169             i.make_table(f1b,t1b,'from','to',context=True),
170             '<h2>Context (numlines=6)</h2>',
171             i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
172             '<h2>Context (numlines=0)</h2>',
173             i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
174             '<h2>Same Context</h2>',
175             i.make_table(f1a,f1a,'from','to',context=True),
176             '<h2>Same Full</h2>',
177             i.make_table(f1a,f1a,'from','to',context=False),
178             '<h2>Empty Context</h2>',
179             i.make_table([],[],'from','to',context=True),
180             '<h2>Empty Full</h2>',
181             i.make_table([],[],'from','to',context=False),
182             '<h2>tabsize=2</h2>',
183             j.make_table(f2,t2),
184             '<h2>tabsize=default</h2>',
185             i.make_table(f2,t2),
186             '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
187             k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
188             '<h2>wrapcolumn=14,splitlines()</h2>',
189             k.make_table(f3.splitlines(),t3.splitlines()),
190             '<h2>wrapcolumn=14,splitlines(True)</h2>',
191             k.make_table(f3.splitlines(True),t3.splitlines(True)),
192             ])
193        actual = full.replace('</body>','\n%s\n</body>' % tables)
194
195        # temporarily uncomment next two lines to baseline this test
196        #with open('test_difflib_expect.html','w') as fp:
197        #    fp.write(actual)
198
199        with open(findfile('test_difflib_expect.html')) as fp:
200            self.assertEqual(actual, fp.read())
201
202    def test_recursion_limit(self):
203        # Check if the problem described in patch #1413711 exists.
204        limit = sys.getrecursionlimit()
205        old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
206        new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
207        difflib.SequenceMatcher(None, old, new).get_opcodes()
208
209
210class TestOutputFormat(unittest.TestCase):
211    def test_tab_delimiter(self):
212        args = ['one', 'two', 'Original', 'Current',
213            '2005-01-26 23:30:50', '2010-04-02 10:20:52']
214        ud = difflib.unified_diff(*args, lineterm='')
215        self.assertEqual(list(ud)[0:2], [
216                           "--- Original\t2005-01-26 23:30:50",
217                           "+++ Current\t2010-04-02 10:20:52"])
218        cd = difflib.context_diff(*args, lineterm='')
219        self.assertEqual(list(cd)[0:2], [
220                           "*** Original\t2005-01-26 23:30:50",
221                           "--- Current\t2010-04-02 10:20:52"])
222
223    def test_no_trailing_tab_on_empty_filedate(self):
224        args = ['one', 'two', 'Original', 'Current']
225        ud = difflib.unified_diff(*args, lineterm='')
226        self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
227
228        cd = difflib.context_diff(*args, lineterm='')
229        self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
230
231    def test_range_format_unified(self):
232        # Per the diff spec at http://www.unix.org/single_unix_specification/
233        spec = '''\
234           Each <range> field shall be of the form:
235             %1d", <beginning line number>  if the range contains exactly one line,
236           and:
237            "%1d,%1d", <beginning line number>, <number of lines> otherwise.
238           If a range is empty, its beginning line number shall be the number of
239           the line just before the range, or 0 if the empty range starts the file.
240        '''
241        fmt = difflib._format_range_unified
242        self.assertEqual(fmt(3,3), '3,0')
243        self.assertEqual(fmt(3,4), '4')
244        self.assertEqual(fmt(3,5), '4,2')
245        self.assertEqual(fmt(3,6), '4,3')
246        self.assertEqual(fmt(0,0), '0,0')
247
248    def test_range_format_context(self):
249        # Per the diff spec at http://www.unix.org/single_unix_specification/
250        spec = '''\
251           The range of lines in file1 shall be written in the following format
252           if the range contains two or more lines:
253               "*** %d,%d ****\n", <beginning line number>, <ending line number>
254           and the following format otherwise:
255               "*** %d ****\n", <ending line number>
256           The ending line number of an empty range shall be the number of the preceding line,
257           or 0 if the range is at the start of the file.
258
259           Next, the range of lines in file2 shall be written in the following format
260           if the range contains two or more lines:
261               "--- %d,%d ----\n", <beginning line number>, <ending line number>
262           and the following format otherwise:
263               "--- %d ----\n", <ending line number>
264        '''
265        fmt = difflib._format_range_context
266        self.assertEqual(fmt(3,3), '3')
267        self.assertEqual(fmt(3,4), '4')
268        self.assertEqual(fmt(3,5), '4,5')
269        self.assertEqual(fmt(3,6), '4,6')
270        self.assertEqual(fmt(0,0), '0')
271
272class TestJunkAPIs(unittest.TestCase):
273    def test_is_line_junk_true(self):
274        for line in ['#', '  ', ' #', '# ', ' # ', '']:
275            self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line))
276
277    def test_is_line_junk_false(self):
278        for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']:
279            self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line))
280
281    def test_is_line_junk_REDOS(self):
282        evil_input = ('\t' * 1000000) + '##'
283        self.assertFalse(difflib.IS_LINE_JUNK(evil_input))
284
285    def test_is_character_junk_true(self):
286        for char in [' ', '\t']:
287            self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char))
288
289    def test_is_character_junk_false(self):
290        for char in ['a', '#', '\n', '\f', '\r', '\v']:
291            self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
292
293def test_main():
294    difflib.HtmlDiff._default_prefix = 0
295    Doctests = doctest.DocTestSuite(difflib)
296    run_unittest(
297        TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
298        TestOutputFormat, TestJunkAPIs)
299
300if __name__ == '__main__':
301    test_main()
302