1r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
5    'abc'             -- normal str
6    r'abc'            -- raw str
7    b'xyz'            -- normal bytes
8    br'xyz' | rb'xyz' -- raw bytes
9
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals.  For bytes literals, this is considered illegal.  But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings.  We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
25backslashes.  It doesn't concern itself with issues like single
26vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
35import warnings
36from test.support import use_old_parser
37
38
39TEMPLATE = r"""# coding: %s
40a = 'x'
41assert ord(a) == 120
42b = '\x01'
43assert ord(b) == 1
44c = r'\x01'
45assert list(map(ord, c)) == [92, 120, 48, 49]
46d = '\x81'
47assert ord(d) == 0x81
48e = r'\x81'
49assert list(map(ord, e)) == [92, 120, 56, 49]
50f = '\u1881'
51assert ord(f) == 0x1881
52g = r'\u1881'
53assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
54h = '\U0001d120'
55assert ord(h) == 0x1d120
56i = r'\U0001d120'
57assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
58"""
59
60
61def byte(i):
62    return bytes([i])
63
64
65class TestLiterals(unittest.TestCase):
66
67    def setUp(self):
68        self.save_path = sys.path[:]
69        self.tmpdir = tempfile.mkdtemp()
70        sys.path.insert(0, self.tmpdir)
71
72    def tearDown(self):
73        sys.path[:] = self.save_path
74        shutil.rmtree(self.tmpdir, ignore_errors=True)
75
76    def test_template(self):
77        # Check that the template doesn't contain any non-printables
78        # except for \n.
79        for c in TEMPLATE:
80            assert c == '\n' or ' ' <= c <= '~', repr(c)
81
82    def test_eval_str_normal(self):
83        self.assertEqual(eval(""" 'x' """), 'x')
84        self.assertEqual(eval(r""" '\x01' """), chr(1))
85        self.assertEqual(eval(""" '\x01' """), chr(1))
86        self.assertEqual(eval(r""" '\x81' """), chr(0x81))
87        self.assertEqual(eval(""" '\x81' """), chr(0x81))
88        self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
89        self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
90        self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
91        self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
92
93    def test_eval_str_incomplete(self):
94        self.assertRaises(SyntaxError, eval, r""" '\x' """)
95        self.assertRaises(SyntaxError, eval, r""" '\x0' """)
96        self.assertRaises(SyntaxError, eval, r""" '\u' """)
97        self.assertRaises(SyntaxError, eval, r""" '\u0' """)
98        self.assertRaises(SyntaxError, eval, r""" '\u00' """)
99        self.assertRaises(SyntaxError, eval, r""" '\u000' """)
100        self.assertRaises(SyntaxError, eval, r""" '\U' """)
101        self.assertRaises(SyntaxError, eval, r""" '\U0' """)
102        self.assertRaises(SyntaxError, eval, r""" '\U00' """)
103        self.assertRaises(SyntaxError, eval, r""" '\U000' """)
104        self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
105        self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
106        self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
107        self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
108
109    def test_eval_str_invalid_escape(self):
110        for b in range(1, 128):
111            if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
112                continue
113            with self.assertWarns(DeprecationWarning):
114                self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
115
116        with warnings.catch_warnings(record=True) as w:
117            warnings.simplefilter('always', category=DeprecationWarning)
118            eval("'''\n\\z'''")
119        self.assertEqual(len(w), 1)
120        self.assertEqual(w[0].filename, '<string>')
121        self.assertEqual(w[0].lineno, 1)
122
123        with warnings.catch_warnings(record=True) as w:
124            warnings.simplefilter('error', category=DeprecationWarning)
125            with self.assertRaises(SyntaxError) as cm:
126                eval("'''\n\\z'''")
127            exc = cm.exception
128        self.assertEqual(w, [])
129        self.assertEqual(exc.filename, '<string>')
130        self.assertEqual(exc.lineno, 1)
131        self.assertEqual(exc.offset, 1)
132
133    def test_eval_str_raw(self):
134        self.assertEqual(eval(""" r'x' """), 'x')
135        self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
136        self.assertEqual(eval(""" r'\x01' """), chr(1))
137        self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
138        self.assertEqual(eval(""" r'\x81' """), chr(0x81))
139        self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
140        self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
141        self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
142        self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
143
144    def test_eval_bytes_normal(self):
145        self.assertEqual(eval(""" b'x' """), b'x')
146        self.assertEqual(eval(r""" b'\x01' """), byte(1))
147        self.assertEqual(eval(""" b'\x01' """), byte(1))
148        self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
149        self.assertRaises(SyntaxError, eval, """ b'\x81' """)
150        self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
151        self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
152        self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
153        self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
154
155    def test_eval_bytes_incomplete(self):
156        self.assertRaises(SyntaxError, eval, r""" b'\x' """)
157        self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
158
159    def test_eval_bytes_invalid_escape(self):
160        for b in range(1, 128):
161            if b in b"""\n\r"'01234567\\abfnrtvx""":
162                continue
163            with self.assertWarns(DeprecationWarning):
164                self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
165
166        with warnings.catch_warnings(record=True) as w:
167            warnings.simplefilter('always', category=DeprecationWarning)
168            eval("b'''\n\\z'''")
169        self.assertEqual(len(w), 1)
170        self.assertEqual(w[0].filename, '<string>')
171        if use_old_parser():
172            self.assertEqual(w[0].lineno, 1)
173
174        with warnings.catch_warnings(record=True) as w:
175            warnings.simplefilter('error', category=DeprecationWarning)
176            with self.assertRaises(SyntaxError) as cm:
177                eval("b'''\n\\z'''")
178            exc = cm.exception
179        self.assertEqual(w, [])
180        self.assertEqual(exc.filename, '<string>')
181        if use_old_parser():
182            self.assertEqual(exc.lineno, 1)
183
184    def test_eval_bytes_raw(self):
185        self.assertEqual(eval(""" br'x' """), b'x')
186        self.assertEqual(eval(""" rb'x' """), b'x')
187        self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
188        self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
189        self.assertEqual(eval(""" br'\x01' """), byte(1))
190        self.assertEqual(eval(""" rb'\x01' """), byte(1))
191        self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
192        self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
193        self.assertRaises(SyntaxError, eval, """ br'\x81' """)
194        self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
195        self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
196        self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
197        self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
198        self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
199        self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
200        self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
201        self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
202        self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
203        self.assertRaises(SyntaxError, eval, """ bb'' """)
204        self.assertRaises(SyntaxError, eval, """ rr'' """)
205        self.assertRaises(SyntaxError, eval, """ brr'' """)
206        self.assertRaises(SyntaxError, eval, """ bbr'' """)
207        self.assertRaises(SyntaxError, eval, """ rrb'' """)
208        self.assertRaises(SyntaxError, eval, """ rbb'' """)
209
210    def test_eval_str_u(self):
211        self.assertEqual(eval(""" u'x' """), 'x')
212        self.assertEqual(eval(""" U'\u00e4' """), 'ä')
213        self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
214        self.assertRaises(SyntaxError, eval, """ ur'' """)
215        self.assertRaises(SyntaxError, eval, """ ru'' """)
216        self.assertRaises(SyntaxError, eval, """ bu'' """)
217        self.assertRaises(SyntaxError, eval, """ ub'' """)
218
219    def check_encoding(self, encoding, extra=""):
220        modname = "xx_" + encoding.replace("-", "_")
221        fn = os.path.join(self.tmpdir, modname + ".py")
222        f = open(fn, "w", encoding=encoding)
223        try:
224            f.write(TEMPLATE % encoding)
225            f.write(extra)
226        finally:
227            f.close()
228        __import__(modname)
229        del sys.modules[modname]
230
231    def test_file_utf_8(self):
232        extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
233        self.check_encoding("utf-8", extra)
234
235    def test_file_utf_8_error(self):
236        extra = "b'\x80'\n"
237        self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
238
239    def test_file_utf8(self):
240        self.check_encoding("utf-8")
241
242    def test_file_iso_8859_1(self):
243        self.check_encoding("iso-8859-1")
244
245    def test_file_latin_1(self):
246        self.check_encoding("latin-1")
247
248    def test_file_latin9(self):
249        self.check_encoding("latin9")
250
251
252if __name__ == "__main__":
253    unittest.main()
254