1import sys
2from test.test_json import PyTest, CTest
3
4
5class TestScanstring:
6    def test_scanstring(self):
7        scanstring = self.json.decoder.scanstring
8        self.assertEqual(
9            scanstring('"z\U0001d120x"', 1, True),
10            ('z\U0001d120x', 5))
11
12        self.assertEqual(
13            scanstring('"\\u007b"', 1, True),
14            ('{', 8))
15
16        self.assertEqual(
17            scanstring('"A JSON payload should be an object or array, not a string."', 1, True),
18            ('A JSON payload should be an object or array, not a string.', 60))
19
20        self.assertEqual(
21            scanstring('["Unclosed array"', 2, True),
22            ('Unclosed array', 17))
23
24        self.assertEqual(
25            scanstring('["extra comma",]', 2, True),
26            ('extra comma', 14))
27
28        self.assertEqual(
29            scanstring('["double extra comma",,]', 2, True),
30            ('double extra comma', 21))
31
32        self.assertEqual(
33            scanstring('["Comma after the close"],', 2, True),
34            ('Comma after the close', 24))
35
36        self.assertEqual(
37            scanstring('["Extra close"]]', 2, True),
38            ('Extra close', 14))
39
40        self.assertEqual(
41            scanstring('{"Extra comma": true,}', 2, True),
42            ('Extra comma', 14))
43
44        self.assertEqual(
45            scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, True),
46            ('Extra value after close', 26))
47
48        self.assertEqual(
49            scanstring('{"Illegal expression": 1 + 2}', 2, True),
50            ('Illegal expression', 21))
51
52        self.assertEqual(
53            scanstring('{"Illegal invocation": alert()}', 2, True),
54            ('Illegal invocation', 21))
55
56        self.assertEqual(
57            scanstring('{"Numbers cannot have leading zeroes": 013}', 2, True),
58            ('Numbers cannot have leading zeroes', 37))
59
60        self.assertEqual(
61            scanstring('{"Numbers cannot be hex": 0x14}', 2, True),
62            ('Numbers cannot be hex', 24))
63
64        self.assertEqual(
65            scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, True),
66            ('Too deep', 30))
67
68        self.assertEqual(
69            scanstring('{"Missing colon" null}', 2, True),
70            ('Missing colon', 16))
71
72        self.assertEqual(
73            scanstring('{"Double colon":: null}', 2, True),
74            ('Double colon', 15))
75
76        self.assertEqual(
77            scanstring('{"Comma instead of colon", null}', 2, True),
78            ('Comma instead of colon', 25))
79
80        self.assertEqual(
81            scanstring('["Colon instead of comma": false]', 2, True),
82            ('Colon instead of comma', 25))
83
84        self.assertEqual(
85            scanstring('["Bad value", truth]', 2, True),
86            ('Bad value', 12))
87
88    def test_surrogates(self):
89        scanstring = self.json.decoder.scanstring
90        def assertScan(given, expect):
91            self.assertEqual(scanstring(given, 1, True),
92                             (expect, len(given)))
93
94        assertScan('"z\\ud834\\u0079x"', 'z\ud834yx')
95        assertScan('"z\\ud834\\udd20x"', 'z\U0001d120x')
96        assertScan('"z\\ud834\\ud834\\udd20x"', 'z\ud834\U0001d120x')
97        assertScan('"z\\ud834x"', 'z\ud834x')
98        assertScan('"z\\ud834\udd20x12345"', 'z\ud834\udd20x12345')
99        assertScan('"z\\udd20x"', 'z\udd20x')
100        assertScan('"z\ud834\udd20x"', 'z\ud834\udd20x')
101        assertScan('"z\ud834\\udd20x"', 'z\ud834\udd20x')
102        assertScan('"z\ud834x"', 'z\ud834x')
103
104    def test_bad_escapes(self):
105        scanstring = self.json.decoder.scanstring
106        bad_escapes = [
107            '"\\"',
108            '"\\x"',
109            '"\\u"',
110            '"\\u0"',
111            '"\\u01"',
112            '"\\u012"',
113            '"\\uz012"',
114            '"\\u0z12"',
115            '"\\u01z2"',
116            '"\\u012z"',
117            '"\\u0x12"',
118            '"\\u0X12"',
119            '"\\ud834\\"',
120            '"\\ud834\\u"',
121            '"\\ud834\\ud"',
122            '"\\ud834\\udd"',
123            '"\\ud834\\udd2"',
124            '"\\ud834\\uzdd2"',
125            '"\\ud834\\udzd2"',
126            '"\\ud834\\uddz2"',
127            '"\\ud834\\udd2z"',
128            '"\\ud834\\u0x20"',
129            '"\\ud834\\u0X20"',
130        ]
131        for s in bad_escapes:
132            with self.assertRaises(self.JSONDecodeError, msg=s):
133                scanstring(s, 1, True)
134
135    def test_overflow(self):
136        with self.assertRaises(OverflowError):
137            self.json.decoder.scanstring(b"xxx", sys.maxsize+1)
138
139
140class TestPyScanstring(TestScanstring, PyTest): pass
141class TestCScanstring(TestScanstring, CTest): pass
142