1import sys
2from json.tests import PyTest, CTest
3
4
5class TestScanstring(object):
6    def test_scanstring(self):
7        scanstring = self.json.decoder.scanstring
8        if sys.maxunicode == 65535:
9            self.assertEqual(
10                scanstring(u'"z\U0001d120x"', 1, None, True),
11                (u'z\U0001d120x', 6))
12        else:
13            self.assertEqual(
14                scanstring(u'"z\U0001d120x"', 1, None, True),
15                (u'z\U0001d120x', 5))
16
17        self.assertEqual(
18            scanstring('"\\u007b"', 1, None, True),
19            (u'{', 8))
20
21        self.assertEqual(
22            scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
23            (u'A JSON payload should be an object or array, not a string.', 60))
24
25        self.assertEqual(
26            scanstring('["Unclosed array"', 2, None, True),
27            (u'Unclosed array', 17))
28
29        self.assertEqual(
30            scanstring('["extra comma",]', 2, None, True),
31            (u'extra comma', 14))
32
33        self.assertEqual(
34            scanstring('["double extra comma",,]', 2, None, True),
35            (u'double extra comma', 21))
36
37        self.assertEqual(
38            scanstring('["Comma after the close"],', 2, None, True),
39            (u'Comma after the close', 24))
40
41        self.assertEqual(
42            scanstring('["Extra close"]]', 2, None, True),
43            (u'Extra close', 14))
44
45        self.assertEqual(
46            scanstring('{"Extra comma": true,}', 2, None, True),
47            (u'Extra comma', 14))
48
49        self.assertEqual(
50            scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
51            (u'Extra value after close', 26))
52
53        self.assertEqual(
54            scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
55            (u'Illegal expression', 21))
56
57        self.assertEqual(
58            scanstring('{"Illegal invocation": alert()}', 2, None, True),
59            (u'Illegal invocation', 21))
60
61        self.assertEqual(
62            scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
63            (u'Numbers cannot have leading zeroes', 37))
64
65        self.assertEqual(
66            scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
67            (u'Numbers cannot be hex', 24))
68
69        self.assertEqual(
70            scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
71            (u'Too deep', 30))
72
73        self.assertEqual(
74            scanstring('{"Missing colon" null}', 2, None, True),
75            (u'Missing colon', 16))
76
77        self.assertEqual(
78            scanstring('{"Double colon":: null}', 2, None, True),
79            (u'Double colon', 15))
80
81        self.assertEqual(
82            scanstring('{"Comma instead of colon", null}', 2, None, True),
83            (u'Comma instead of colon', 25))
84
85        self.assertEqual(
86            scanstring('["Colon instead of comma": false]', 2, None, True),
87            (u'Colon instead of comma', 25))
88
89        self.assertEqual(
90            scanstring('["Bad value", truth]', 2, None, True),
91            (u'Bad value', 12))
92
93    def test_surrogates(self):
94        scanstring = self.json.decoder.scanstring
95        def assertScan(given, expect):
96            self.assertEqual(scanstring(given, 1, None, True),
97                             (expect, len(given)))
98            if not isinstance(given, unicode):
99                given = unicode(given)
100                self.assertEqual(scanstring(given, 1, None, True),
101                                 (expect, len(given)))
102
103        surrogates = unichr(0xd834) + unichr(0xdd20)
104        assertScan('"z\\ud834\\u0079x"', u'z\ud834yx')
105        assertScan('"z\\ud834\\udd20x"', u'z\U0001d120x')
106        assertScan('"z\\ud834\\ud834\\udd20x"', u'z\ud834\U0001d120x')
107        assertScan('"z\\ud834x"', u'z\ud834x')
108        assertScan(u'"z\\ud834\udd20x12345"', u'z%sx12345' % surrogates)
109        assertScan('"z\\udd20x"', u'z\udd20x')
110        assertScan(u'"z\ud834\udd20x"', u'z\ud834\udd20x')
111        assertScan(u'"z\ud834\\udd20x"', u'z%sx' % surrogates)
112        assertScan(u'"z\ud834x"', u'z\ud834x')
113
114    def test_bad_escapes(self):
115        scanstring = self.json.decoder.scanstring
116        bad_escapes = [
117            '"\\"',
118            '"\\x"',
119            '"\\u"',
120            '"\\u0"',
121            '"\\u01"',
122            '"\\u012"',
123            '"\\uz012"',
124            '"\\u0z12"',
125            '"\\u01z2"',
126            '"\\u012z"',
127            '"\\u0x12"',
128            '"\\u0X12"',
129            '"\\ud834\\"',
130            '"\\ud834\\u"',
131            '"\\ud834\\ud"',
132            '"\\ud834\\udd"',
133            '"\\ud834\\udd2"',
134            '"\\ud834\\uzdd2"',
135            '"\\ud834\\udzd2"',
136            '"\\ud834\\uddz2"',
137            '"\\ud834\\udd2z"',
138            '"\\ud834\\u0x20"',
139            '"\\ud834\\u0X20"',
140        ]
141        for s in bad_escapes:
142            with self.assertRaises(ValueError):
143                scanstring(s, 1, None, True)
144
145    def test_issue3623(self):
146        self.assertRaises(ValueError, self.json.decoder.scanstring, b"xxx", 1,
147                          "xxx")
148        self.assertRaises(UnicodeDecodeError,
149                          self.json.encoder.encode_basestring_ascii, b"xx\xff")
150
151    def test_overflow(self):
152        with self.assertRaises(OverflowError):
153            self.json.decoder.scanstring(b"xxx", sys.maxsize+1)
154
155
156class TestPyScanstring(TestScanstring, PyTest): pass
157class TestCScanstring(TestScanstring, CTest): pass
158