1# -*- coding: koi8-r -*- 2 3import unittest 4from test.support import TESTFN, unlink, unload, rmtree, script_helper, captured_stdout 5import importlib 6import os 7import sys 8import subprocess 9import tempfile 10 11class MiscSourceEncodingTest(unittest.TestCase): 12 13 def test_pep263(self): 14 self.assertEqual( 15 "�����".encode("utf-8"), 16 b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd' 17 ) 18 self.assertEqual( 19 "\�".encode("utf-8"), 20 b'\\\xd0\x9f' 21 ) 22 23 def test_compilestring(self): 24 # see #1882 25 c = compile(b"\n# coding: utf-8\nu = '\xc3\xb3'\n", "dummy", "exec") 26 d = {} 27 exec(c, d) 28 self.assertEqual(d['u'], '\xf3') 29 30 def test_issue2301(self): 31 try: 32 compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec") 33 except SyntaxError as v: 34 self.assertEqual(v.text.rstrip('\n'), "print '\u5e74'") 35 else: 36 self.fail() 37 38 def test_issue4626(self): 39 c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec") 40 d = {} 41 exec(c, d) 42 self.assertEqual(d['\xc6'], '\xc6') 43 44 def test_issue3297(self): 45 c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec") 46 d = {} 47 exec(c, d) 48 self.assertEqual(d['a'], d['b']) 49 self.assertEqual(len(d['a']), len(d['b'])) 50 self.assertEqual(ascii(d['a']), ascii(d['b'])) 51 52 def test_issue7820(self): 53 # Ensure that check_bom() restores all bytes in the right order if 54 # check_bom() fails in pydebug mode: a buffer starts with the first 55 # byte of a valid BOM, but next bytes are different 56 57 # one byte in common with the UTF-16-LE BOM 58 self.assertRaises(SyntaxError, eval, b'\xff\x20') 59 60 # one byte in common with the UTF-8 BOM 61 self.assertRaises(SyntaxError, eval, b'\xef\x20') 62 63 # two bytes in common with the UTF-8 BOM 64 self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20') 65 66 def test_20731(self): 67 sub = subprocess.Popen([sys.executable, 68 os.path.join(os.path.dirname(__file__), 69 'coding20731.py')], 70 stderr=subprocess.PIPE) 71 err = sub.communicate()[1] 72 self.assertEqual(sub.returncode, 0) 73 self.assertNotIn(b'SyntaxError', err) 74 75 def test_error_message(self): 76 compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec') 77 compile(b'\xef\xbb\xbf\n', 'dummy', 'exec') 78 compile(b'\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec') 79 with self.assertRaisesRegex(SyntaxError, 'fake'): 80 compile(b'# -*- coding: fake -*-\n', 'dummy', 'exec') 81 with self.assertRaisesRegex(SyntaxError, 'iso-8859-15'): 82 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', 83 'dummy', 'exec') 84 with self.assertRaisesRegex(SyntaxError, 'BOM'): 85 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', 86 'dummy', 'exec') 87 with self.assertRaisesRegex(SyntaxError, 'fake'): 88 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') 89 with self.assertRaisesRegex(SyntaxError, 'BOM'): 90 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') 91 92 def test_bad_coding(self): 93 module_name = 'bad_coding' 94 self.verify_bad_module(module_name) 95 96 def test_bad_coding2(self): 97 module_name = 'bad_coding2' 98 self.verify_bad_module(module_name) 99 100 def verify_bad_module(self, module_name): 101 self.assertRaises(SyntaxError, __import__, 'test.' + module_name) 102 103 path = os.path.dirname(__file__) 104 filename = os.path.join(path, module_name + '.py') 105 with open(filename, "rb") as fp: 106 bytes = fp.read() 107 self.assertRaises(SyntaxError, compile, bytes, filename, 'exec') 108 109 def test_exec_valid_coding(self): 110 d = {} 111 exec(b'# coding: cp949\na = "\xaa\xa7"\n', d) 112 self.assertEqual(d['a'], '\u3047') 113 114 def test_file_parse(self): 115 # issue1134: all encodings outside latin-1 and utf-8 fail on 116 # multiline strings and long lines (>512 columns) 117 unload(TESTFN) 118 filename = TESTFN + ".py" 119 f = open(filename, "w", encoding="cp1252") 120 sys.path.insert(0, os.curdir) 121 try: 122 with f: 123 f.write("# -*- coding: cp1252 -*-\n") 124 f.write("'''A short string\n") 125 f.write("'''\n") 126 f.write("'A very long string %s'\n" % ("X" * 1000)) 127 128 importlib.invalidate_caches() 129 __import__(TESTFN) 130 finally: 131 del sys.path[0] 132 unlink(filename) 133 unlink(filename + "c") 134 unlink(filename + "o") 135 unload(TESTFN) 136 rmtree('__pycache__') 137 138 def test_error_from_string(self): 139 # See http://bugs.python.org/issue6289 140 input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8') 141 with self.assertRaises(SyntaxError) as c: 142 compile(input, "<string>", "exec") 143 expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \ 144 "ordinal not in range(128)" 145 self.assertTrue(c.exception.args[0].startswith(expected), 146 msg=c.exception.args[0]) 147 148 149class AbstractSourceEncodingTest: 150 151 def test_default_coding(self): 152 src = (b'print(ascii("\xc3\xa4"))\n') 153 self.check_script_output(src, br"'\xe4'") 154 155 def test_first_coding_line(self): 156 src = (b'#coding:iso8859-15\n' 157 b'print(ascii("\xc3\xa4"))\n') 158 self.check_script_output(src, br"'\xc3\u20ac'") 159 160 def test_second_coding_line(self): 161 src = (b'#\n' 162 b'#coding:iso8859-15\n' 163 b'print(ascii("\xc3\xa4"))\n') 164 self.check_script_output(src, br"'\xc3\u20ac'") 165 166 def test_third_coding_line(self): 167 # Only first two lines are tested for a magic comment. 168 src = (b'#\n' 169 b'#\n' 170 b'#coding:iso8859-15\n' 171 b'print(ascii("\xc3\xa4"))\n') 172 self.check_script_output(src, br"'\xe4'") 173 174 def test_double_coding_line(self): 175 # If the first line matches the second line is ignored. 176 src = (b'#coding:iso8859-15\n' 177 b'#coding:latin1\n' 178 b'print(ascii("\xc3\xa4"))\n') 179 self.check_script_output(src, br"'\xc3\u20ac'") 180 181 def test_double_coding_same_line(self): 182 src = (b'#coding:iso8859-15 coding:latin1\n' 183 b'print(ascii("\xc3\xa4"))\n') 184 self.check_script_output(src, br"'\xc3\u20ac'") 185 186 def test_first_non_utf8_coding_line(self): 187 src = (b'#coding:iso-8859-15 \xa4\n' 188 b'print(ascii("\xc3\xa4"))\n') 189 self.check_script_output(src, br"'\xc3\u20ac'") 190 191 def test_second_non_utf8_coding_line(self): 192 src = (b'\n' 193 b'#coding:iso-8859-15 \xa4\n' 194 b'print(ascii("\xc3\xa4"))\n') 195 self.check_script_output(src, br"'\xc3\u20ac'") 196 197 def test_utf8_bom(self): 198 src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n') 199 self.check_script_output(src, br"'\xe4'") 200 201 def test_utf8_bom_and_utf8_coding_line(self): 202 src = (b'\xef\xbb\xbf#coding:utf-8\n' 203 b'print(ascii("\xc3\xa4"))\n') 204 self.check_script_output(src, br"'\xe4'") 205 206 207class BytesSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): 208 209 def check_script_output(self, src, expected): 210 with captured_stdout() as stdout: 211 exec(src) 212 out = stdout.getvalue().encode('latin1') 213 self.assertEqual(out.rstrip(), expected) 214 215 216class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): 217 218 def check_script_output(self, src, expected): 219 with tempfile.TemporaryDirectory() as tmpd: 220 fn = os.path.join(tmpd, 'test.py') 221 with open(fn, 'wb') as fp: 222 fp.write(src) 223 res = script_helper.assert_python_ok(fn) 224 self.assertEqual(res.out.rstrip(), expected) 225 226 227if __name__ == "__main__": 228 unittest.main() 229