1r"""Test correct treatment of various string literals by the parser. 2 3There are four types of string literals: 4 5 'abc' -- normal str 6 r'abc' -- raw str 7 b'xyz' -- normal bytes 8 br'xyz' | rb'xyz' -- raw bytes 9 10The difference between normal and raw strings is of course that in a 11raw string, \ escapes (while still used to determine the end of the 12literal) are not interpreted, so that r'\x00' contains four 13characters: a backslash, an x, and two zeros; while '\x00' contains a 14single character (code point zero). 15 16The tricky thing is what should happen when non-ASCII bytes are used 17inside literals. For bytes literals, this is considered illegal. But 18for str literals, those bytes are supposed to be decoded using the 19encoding declared for the file (UTF-8 by default). 20 21We have to test this with various file encodings. We also test it with 22exec()/eval(), which uses a different code path. 23 24This file is really about correct treatment of encodings and 25backslashes. It doesn't concern itself with issues like single 26vs. double quotes or singly- vs. triply-quoted strings: that's dealt 27with elsewhere (I assume). 28""" 29 30import os 31import sys 32import shutil 33import tempfile 34import unittest 35import warnings 36from test.support import use_old_parser 37 38 39TEMPLATE = r"""# coding: %s 40a = 'x' 41assert ord(a) == 120 42b = '\x01' 43assert ord(b) == 1 44c = r'\x01' 45assert list(map(ord, c)) == [92, 120, 48, 49] 46d = '\x81' 47assert ord(d) == 0x81 48e = r'\x81' 49assert list(map(ord, e)) == [92, 120, 56, 49] 50f = '\u1881' 51assert ord(f) == 0x1881 52g = r'\u1881' 53assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49] 54h = '\U0001d120' 55assert ord(h) == 0x1d120 56i = r'\U0001d120' 57assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48] 58""" 59 60 61def byte(i): 62 return bytes([i]) 63 64 65class TestLiterals(unittest.TestCase): 66 67 def setUp(self): 68 self.save_path = sys.path[:] 69 self.tmpdir = tempfile.mkdtemp() 70 sys.path.insert(0, self.tmpdir) 71 72 def tearDown(self): 73 sys.path[:] = self.save_path 74 shutil.rmtree(self.tmpdir, ignore_errors=True) 75 76 def test_template(self): 77 # Check that the template doesn't contain any non-printables 78 # except for \n. 79 for c in TEMPLATE: 80 assert c == '\n' or ' ' <= c <= '~', repr(c) 81 82 def test_eval_str_normal(self): 83 self.assertEqual(eval(""" 'x' """), 'x') 84 self.assertEqual(eval(r""" '\x01' """), chr(1)) 85 self.assertEqual(eval(""" '\x01' """), chr(1)) 86 self.assertEqual(eval(r""" '\x81' """), chr(0x81)) 87 self.assertEqual(eval(""" '\x81' """), chr(0x81)) 88 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881)) 89 self.assertEqual(eval(""" '\u1881' """), chr(0x1881)) 90 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120)) 91 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120)) 92 93 def test_eval_str_incomplete(self): 94 self.assertRaises(SyntaxError, eval, r""" '\x' """) 95 self.assertRaises(SyntaxError, eval, r""" '\x0' """) 96 self.assertRaises(SyntaxError, eval, r""" '\u' """) 97 self.assertRaises(SyntaxError, eval, r""" '\u0' """) 98 self.assertRaises(SyntaxError, eval, r""" '\u00' """) 99 self.assertRaises(SyntaxError, eval, r""" '\u000' """) 100 self.assertRaises(SyntaxError, eval, r""" '\U' """) 101 self.assertRaises(SyntaxError, eval, r""" '\U0' """) 102 self.assertRaises(SyntaxError, eval, r""" '\U00' """) 103 self.assertRaises(SyntaxError, eval, r""" '\U000' """) 104 self.assertRaises(SyntaxError, eval, r""" '\U0000' """) 105 self.assertRaises(SyntaxError, eval, r""" '\U00000' """) 106 self.assertRaises(SyntaxError, eval, r""" '\U000000' """) 107 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """) 108 109 def test_eval_str_invalid_escape(self): 110 for b in range(1, 128): 111 if b in b"""\n\r"'01234567NU\\abfnrtuvx""": 112 continue 113 with self.assertWarns(DeprecationWarning): 114 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b)) 115 116 with warnings.catch_warnings(record=True) as w: 117 warnings.simplefilter('always', category=DeprecationWarning) 118 eval("'''\n\\z'''") 119 self.assertEqual(len(w), 1) 120 self.assertEqual(w[0].filename, '<string>') 121 self.assertEqual(w[0].lineno, 1) 122 123 with warnings.catch_warnings(record=True) as w: 124 warnings.simplefilter('error', category=DeprecationWarning) 125 with self.assertRaises(SyntaxError) as cm: 126 eval("'''\n\\z'''") 127 exc = cm.exception 128 self.assertEqual(w, []) 129 self.assertEqual(exc.filename, '<string>') 130 self.assertEqual(exc.lineno, 1) 131 self.assertEqual(exc.offset, 1) 132 133 def test_eval_str_raw(self): 134 self.assertEqual(eval(""" r'x' """), 'x') 135 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') 136 self.assertEqual(eval(""" r'\x01' """), chr(1)) 137 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81') 138 self.assertEqual(eval(""" r'\x81' """), chr(0x81)) 139 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881') 140 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881)) 141 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120') 142 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120)) 143 144 def test_eval_bytes_normal(self): 145 self.assertEqual(eval(""" b'x' """), b'x') 146 self.assertEqual(eval(r""" b'\x01' """), byte(1)) 147 self.assertEqual(eval(""" b'\x01' """), byte(1)) 148 self.assertEqual(eval(r""" b'\x81' """), byte(0x81)) 149 self.assertRaises(SyntaxError, eval, """ b'\x81' """) 150 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881') 151 self.assertRaises(SyntaxError, eval, """ b'\u1881' """) 152 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120') 153 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """) 154 155 def test_eval_bytes_incomplete(self): 156 self.assertRaises(SyntaxError, eval, r""" b'\x' """) 157 self.assertRaises(SyntaxError, eval, r""" b'\x0' """) 158 159 def test_eval_bytes_invalid_escape(self): 160 for b in range(1, 128): 161 if b in b"""\n\r"'01234567\\abfnrtvx""": 162 continue 163 with self.assertWarns(DeprecationWarning): 164 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b])) 165 166 with warnings.catch_warnings(record=True) as w: 167 warnings.simplefilter('always', category=DeprecationWarning) 168 eval("b'''\n\\z'''") 169 self.assertEqual(len(w), 1) 170 self.assertEqual(w[0].filename, '<string>') 171 if use_old_parser(): 172 self.assertEqual(w[0].lineno, 1) 173 174 with warnings.catch_warnings(record=True) as w: 175 warnings.simplefilter('error', category=DeprecationWarning) 176 with self.assertRaises(SyntaxError) as cm: 177 eval("b'''\n\\z'''") 178 exc = cm.exception 179 self.assertEqual(w, []) 180 self.assertEqual(exc.filename, '<string>') 181 if use_old_parser(): 182 self.assertEqual(exc.lineno, 1) 183 184 def test_eval_bytes_raw(self): 185 self.assertEqual(eval(""" br'x' """), b'x') 186 self.assertEqual(eval(""" rb'x' """), b'x') 187 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01') 188 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01') 189 self.assertEqual(eval(""" br'\x01' """), byte(1)) 190 self.assertEqual(eval(""" rb'\x01' """), byte(1)) 191 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81") 192 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81") 193 self.assertRaises(SyntaxError, eval, """ br'\x81' """) 194 self.assertRaises(SyntaxError, eval, """ rb'\x81' """) 195 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881") 196 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881") 197 self.assertRaises(SyntaxError, eval, """ br'\u1881' """) 198 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """) 199 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120") 200 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120") 201 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """) 202 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """) 203 self.assertRaises(SyntaxError, eval, """ bb'' """) 204 self.assertRaises(SyntaxError, eval, """ rr'' """) 205 self.assertRaises(SyntaxError, eval, """ brr'' """) 206 self.assertRaises(SyntaxError, eval, """ bbr'' """) 207 self.assertRaises(SyntaxError, eval, """ rrb'' """) 208 self.assertRaises(SyntaxError, eval, """ rbb'' """) 209 210 def test_eval_str_u(self): 211 self.assertEqual(eval(""" u'x' """), 'x') 212 self.assertEqual(eval(""" U'\u00e4' """), 'ä') 213 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä') 214 self.assertRaises(SyntaxError, eval, """ ur'' """) 215 self.assertRaises(SyntaxError, eval, """ ru'' """) 216 self.assertRaises(SyntaxError, eval, """ bu'' """) 217 self.assertRaises(SyntaxError, eval, """ ub'' """) 218 219 def check_encoding(self, encoding, extra=""): 220 modname = "xx_" + encoding.replace("-", "_") 221 fn = os.path.join(self.tmpdir, modname + ".py") 222 f = open(fn, "w", encoding=encoding) 223 try: 224 f.write(TEMPLATE % encoding) 225 f.write(extra) 226 finally: 227 f.close() 228 __import__(modname) 229 del sys.modules[modname] 230 231 def test_file_utf_8(self): 232 extra = "z = '\u1234'; assert ord(z) == 0x1234\n" 233 self.check_encoding("utf-8", extra) 234 235 def test_file_utf_8_error(self): 236 extra = "b'\x80'\n" 237 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra) 238 239 def test_file_utf8(self): 240 self.check_encoding("utf-8") 241 242 def test_file_iso_8859_1(self): 243 self.check_encoding("iso-8859-1") 244 245 def test_file_latin_1(self): 246 self.check_encoding("latin-1") 247 248 def test_file_latin9(self): 249 self.check_encoding("latin9") 250 251 252if __name__ == "__main__": 253 unittest.main() 254