1import test.test_support, unittest 2import sys, codecs, htmlentitydefs, unicodedata 3 4class PosReturn: 5 # this can be used for configurable callbacks 6 7 def __init__(self): 8 self.pos = 0 9 10 def handle(self, exc): 11 oldpos = self.pos 12 realpos = oldpos 13 if realpos<0: 14 realpos = len(exc.object) + realpos 15 # if we don't advance this time, terminate on the next call 16 # otherwise we'd get an endless loop 17 if realpos <= exc.start: 18 self.pos = len(exc.object) 19 return (u"<?>", oldpos) 20 21# A UnicodeEncodeError object with a bad start attribute 22class BadStartUnicodeEncodeError(UnicodeEncodeError): 23 def __init__(self): 24 UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") 25 self.start = [] 26 27# A UnicodeEncodeError object with a bad object attribute 28class BadObjectUnicodeEncodeError(UnicodeEncodeError): 29 def __init__(self): 30 UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") 31 self.object = [] 32 33# A UnicodeDecodeError object without an end attribute 34class NoEndUnicodeDecodeError(UnicodeDecodeError): 35 def __init__(self): 36 UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") 37 del self.end 38 39# A UnicodeDecodeError object with a bad object attribute 40class BadObjectUnicodeDecodeError(UnicodeDecodeError): 41 def __init__(self): 42 UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") 43 self.object = [] 44 45# A UnicodeTranslateError object without a start attribute 46class NoStartUnicodeTranslateError(UnicodeTranslateError): 47 def __init__(self): 48 UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 49 del self.start 50 51# A UnicodeTranslateError object without an end attribute 52class NoEndUnicodeTranslateError(UnicodeTranslateError): 53 def __init__(self): 54 UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 55 del self.end 56 57# A UnicodeTranslateError object without an object attribute 58class NoObjectUnicodeTranslateError(UnicodeTranslateError): 59 def __init__(self): 60 UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 61 del self.object 62 63class CodecCallbackTest(unittest.TestCase): 64 65 def test_xmlcharrefreplace(self): 66 # replace unencodable characters which numeric character entities. 67 # For ascii, latin-1 and charmaps this is completely implemented 68 # in C and should be reasonably fast. 69 s = u"\u30b9\u30d1\u30e2 \xe4nd eggs" 70 self.assertEqual( 71 s.encode("ascii", "xmlcharrefreplace"), 72 "スパモ änd eggs" 73 ) 74 self.assertEqual( 75 s.encode("latin-1", "xmlcharrefreplace"), 76 "スパモ \xe4nd eggs" 77 ) 78 79 def test_xmlcharnamereplace(self): 80 # This time use a named character entity for unencodable 81 # characters, if one is available. 82 83 def xmlcharnamereplace(exc): 84 if not isinstance(exc, UnicodeEncodeError): 85 raise TypeError("don't know how to handle %r" % exc) 86 l = [] 87 for c in exc.object[exc.start:exc.end]: 88 try: 89 l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)]) 90 except KeyError: 91 l.append(u"&#%d;" % ord(c)) 92 return (u"".join(l), exc.end) 93 94 codecs.register_error( 95 "test.xmlcharnamereplace", xmlcharnamereplace) 96 97 sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" 98 sout = "«ℜ» = ⟨ሴ€⟩" 99 self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) 100 sout = "\xabℜ\xbb = ⟨ሴ€⟩" 101 self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) 102 sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩" 103 self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) 104 105 def test_uninamereplace(self): 106 # We're using the names from the unicode database this time, 107 # and we're doing "syntax highlighting" here, i.e. we include 108 # the replaced text in ANSI escape sequences. For this it is 109 # useful that the error handler is not called for every single 110 # unencodable character, but for a complete sequence of 111 # unencodable characters, otherwise we would output many 112 # unnecessary escape sequences. 113 114 def uninamereplace(exc): 115 if not isinstance(exc, UnicodeEncodeError): 116 raise TypeError("don't know how to handle %r" % exc) 117 l = [] 118 for c in exc.object[exc.start:exc.end]: 119 l.append(unicodedata.name(c, u"0x%x" % ord(c))) 120 return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end) 121 122 codecs.register_error( 123 "test.uninamereplace", uninamereplace) 124 125 sin = u"\xac\u1234\u20ac\u8000" 126 sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 127 self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) 128 129 sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 130 self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) 131 132 sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" 133 self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) 134 135 def test_backslashescape(self): 136 # Does the same as the "unicode-escape" encoding, but with different 137 # base encodings. 138 sin = u"a\xac\u1234\u20ac\u8000" 139 if sys.maxunicode > 0xffff: 140 sin += unichr(sys.maxunicode) 141 sout = "a\\xac\\u1234\\u20ac\\u8000" 142 if sys.maxunicode > 0xffff: 143 sout += "\\U%08x" % sys.maxunicode 144 self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) 145 146 sout = "a\xac\\u1234\\u20ac\\u8000" 147 if sys.maxunicode > 0xffff: 148 sout += "\\U%08x" % sys.maxunicode 149 self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) 150 151 sout = "a\xac\\u1234\xa4\\u8000" 152 if sys.maxunicode > 0xffff: 153 sout += "\\U%08x" % sys.maxunicode 154 self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) 155 156 def test_decoding_callbacks(self): 157 # This is a test for a decoding callback handler 158 # that allows the decoding of the invalid sequence 159 # "\xc0\x80" and returns "\x00" instead of raising an error. 160 # All other illegal sequences will be handled strictly. 161 def relaxedutf8(exc): 162 if not isinstance(exc, UnicodeDecodeError): 163 raise TypeError("don't know how to handle %r" % exc) 164 if exc.object[exc.start:exc.start+2] == "\xc0\x80": 165 return (u"\x00", exc.start+2) # retry after two bytes 166 else: 167 raise exc 168 169 codecs.register_error("test.relaxedutf8", relaxedutf8) 170 171 # all the "\xc0\x80" will be decoded to "\x00" 172 sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" 173 sout = u"a\x00b\x00c\xfc\x00\x00" 174 self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) 175 176 # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised 177 sin = "\xc0\x80\xc0\x81" 178 self.assertRaises(UnicodeDecodeError, sin.decode, 179 "utf-8", "test.relaxedutf8") 180 181 def test_charmapencode(self): 182 # For charmap encodings the replacement string will be 183 # mapped through the encoding again. This means, that 184 # to be able to use e.g. the "replace" handler, the 185 # charmap has to have a mapping for "?". 186 charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"]) 187 sin = u"abc" 188 sout = "AABBCC" 189 self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) 190 191 sin = u"abcA" 192 self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) 193 194 charmap[ord("?")] = "XYZ" 195 sin = u"abcDEF" 196 sout = "AABBCCXYZXYZXYZ" 197 self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) 198 199 charmap[ord("?")] = u"XYZ" 200 self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 201 202 charmap[ord("?")] = u"XYZ" 203 self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 204 205 def test_decodeunicodeinternal(self): 206 self.assertRaises( 207 UnicodeDecodeError, 208 "\x00\x00\x00\x00\x00".decode, 209 "unicode-internal", 210 ) 211 if sys.maxunicode > 0xffff: 212 def handler_unicodeinternal(exc): 213 if not isinstance(exc, UnicodeDecodeError): 214 raise TypeError("don't know how to handle %r" % exc) 215 return (u"\x01", 1) 216 217 self.assertEqual( 218 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), 219 u"\u0000" 220 ) 221 222 self.assertEqual( 223 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), 224 u"\u0000\ufffd" 225 ) 226 227 codecs.register_error("test.hui", handler_unicodeinternal) 228 229 self.assertEqual( 230 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), 231 u"\u0000\u0001\u0000" 232 ) 233 234 def test_callbacks(self): 235 def handler1(exc): 236 if not isinstance(exc, UnicodeEncodeError) \ 237 and not isinstance(exc, UnicodeDecodeError): 238 raise TypeError("don't know how to handle %r" % exc) 239 l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 240 return (u"[%s]" % u"".join(l), exc.end) 241 242 codecs.register_error("test.handler1", handler1) 243 244 def handler2(exc): 245 if not isinstance(exc, UnicodeDecodeError): 246 raise TypeError("don't know how to handle %r" % exc) 247 l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 248 return (u"[%s]" % u"".join(l), exc.end+1) # skip one character 249 250 codecs.register_error("test.handler2", handler2) 251 252 s = "\x00\x81\x7f\x80\xff" 253 254 self.assertEqual( 255 s.decode("ascii", "test.handler1"), 256 u"\x00[<129>]\x7f[<128>][<255>]" 257 ) 258 self.assertEqual( 259 s.decode("ascii", "test.handler2"), 260 u"\x00[<129>][<128>]" 261 ) 262 263 self.assertEqual( 264 "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), 265 u"\u3042[<92><117><51><120>]xx" 266 ) 267 268 self.assertEqual( 269 "\\u3042\u3xx".decode("unicode-escape", "test.handler1"), 270 u"\u3042[<92><117><51><120><120>]" 271 ) 272 273 self.assertEqual( 274 codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0], 275 u"z[<98>][<99>]" 276 ) 277 278 self.assertEqual( 279 u"g\xfc\xdfrk".encode("ascii", "test.handler1"), 280 u"g[<252><223>]rk" 281 ) 282 283 self.assertEqual( 284 u"g\xfc\xdf".encode("ascii", "test.handler1"), 285 u"g[<252><223>]" 286 ) 287 288 def test_longstrings(self): 289 # test long strings to check for memory overflow problems 290 errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", 291 "backslashreplace"] 292 # register the handlers under different names, 293 # to prevent the codec from recognizing the name 294 for err in errors: 295 codecs.register_error("test." + err, codecs.lookup_error(err)) 296 l = 1000 297 errors += [ "test." + err for err in errors ] 298 for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: 299 for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", 300 "utf-8", "utf-7", "utf-16", "utf-32"): 301 for err in errors: 302 try: 303 uni.encode(enc, err) 304 except UnicodeError: 305 pass 306 307 def check_exceptionobjectargs(self, exctype, args, msg): 308 # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion 309 # check with one missing argument 310 self.assertRaises(TypeError, exctype, *args[:-1]) 311 # check with one argument too much 312 self.assertRaises(TypeError, exctype, *(args + ["too much"])) 313 # check with one argument of the wrong type 314 wrongargs = [ "spam", u"eggs", 42, 1.0, None ] 315 for i in xrange(len(args)): 316 for wrongarg in wrongargs: 317 if type(wrongarg) is type(args[i]): 318 continue 319 # build argument array 320 callargs = [] 321 for j in xrange(len(args)): 322 if i==j: 323 callargs.append(wrongarg) 324 else: 325 callargs.append(args[i]) 326 self.assertRaises(TypeError, exctype, *callargs) 327 328 # check with the correct number and type of arguments 329 exc = exctype(*args) 330 self.assertEqual(str(exc), msg) 331 332 def test_unicodeencodeerror(self): 333 self.check_exceptionobjectargs( 334 UnicodeEncodeError, 335 ["ascii", u"g\xfcrk", 1, 2, "ouch"], 336 "'ascii' codec can't encode character u'\\xfc' in position 1: ouch" 337 ) 338 self.check_exceptionobjectargs( 339 UnicodeEncodeError, 340 ["ascii", u"g\xfcrk", 1, 4, "ouch"], 341 "'ascii' codec can't encode characters in position 1-3: ouch" 342 ) 343 self.check_exceptionobjectargs( 344 UnicodeEncodeError, 345 ["ascii", u"\xfcx", 0, 1, "ouch"], 346 "'ascii' codec can't encode character u'\\xfc' in position 0: ouch" 347 ) 348 self.check_exceptionobjectargs( 349 UnicodeEncodeError, 350 ["ascii", u"\u0100x", 0, 1, "ouch"], 351 "'ascii' codec can't encode character u'\\u0100' in position 0: ouch" 352 ) 353 self.check_exceptionobjectargs( 354 UnicodeEncodeError, 355 ["ascii", u"\uffffx", 0, 1, "ouch"], 356 "'ascii' codec can't encode character u'\\uffff' in position 0: ouch" 357 ) 358 if sys.maxunicode > 0xffff: 359 self.check_exceptionobjectargs( 360 UnicodeEncodeError, 361 ["ascii", u"\U00010000x", 0, 1, "ouch"], 362 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch" 363 ) 364 365 def test_unicodedecodeerror(self): 366 self.check_exceptionobjectargs( 367 UnicodeDecodeError, 368 ["ascii", "g\xfcrk", 1, 2, "ouch"], 369 "'ascii' codec can't decode byte 0xfc in position 1: ouch" 370 ) 371 self.check_exceptionobjectargs( 372 UnicodeDecodeError, 373 ["ascii", "g\xfcrk", 1, 3, "ouch"], 374 "'ascii' codec can't decode bytes in position 1-2: ouch" 375 ) 376 377 def test_unicodetranslateerror(self): 378 self.check_exceptionobjectargs( 379 UnicodeTranslateError, 380 [u"g\xfcrk", 1, 2, "ouch"], 381 "can't translate character u'\\xfc' in position 1: ouch" 382 ) 383 self.check_exceptionobjectargs( 384 UnicodeTranslateError, 385 [u"g\u0100rk", 1, 2, "ouch"], 386 "can't translate character u'\\u0100' in position 1: ouch" 387 ) 388 self.check_exceptionobjectargs( 389 UnicodeTranslateError, 390 [u"g\uffffrk", 1, 2, "ouch"], 391 "can't translate character u'\\uffff' in position 1: ouch" 392 ) 393 if sys.maxunicode > 0xffff: 394 self.check_exceptionobjectargs( 395 UnicodeTranslateError, 396 [u"g\U00010000rk", 1, 2, "ouch"], 397 "can't translate character u'\\U00010000' in position 1: ouch" 398 ) 399 self.check_exceptionobjectargs( 400 UnicodeTranslateError, 401 [u"g\xfcrk", 1, 3, "ouch"], 402 "can't translate characters in position 1-2: ouch" 403 ) 404 405 def test_badandgoodstrictexceptions(self): 406 # "strict" complains about a non-exception passed in 407 self.assertRaises( 408 TypeError, 409 codecs.strict_errors, 410 42 411 ) 412 # "strict" complains about the wrong exception type 413 self.assertRaises( 414 Exception, 415 codecs.strict_errors, 416 Exception("ouch") 417 ) 418 419 # If the correct exception is passed in, "strict" raises it 420 self.assertRaises( 421 UnicodeEncodeError, 422 codecs.strict_errors, 423 UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch") 424 ) 425 426 def test_badandgoodignoreexceptions(self): 427 # "ignore" complains about a non-exception passed in 428 self.assertRaises( 429 TypeError, 430 codecs.ignore_errors, 431 42 432 ) 433 # "ignore" complains about the wrong exception type 434 self.assertRaises( 435 TypeError, 436 codecs.ignore_errors, 437 UnicodeError("ouch") 438 ) 439 # If the correct exception is passed in, "ignore" returns an empty replacement 440 self.assertEqual( 441 codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 442 (u"", 1) 443 ) 444 self.assertEqual( 445 codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 446 (u"", 1) 447 ) 448 self.assertEqual( 449 codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 450 (u"", 1) 451 ) 452 453 def test_badandgoodreplaceexceptions(self): 454 # "replace" complains about a non-exception passed in 455 self.assertRaises( 456 TypeError, 457 codecs.replace_errors, 458 42 459 ) 460 # "replace" complains about the wrong exception type 461 self.assertRaises( 462 TypeError, 463 codecs.replace_errors, 464 UnicodeError("ouch") 465 ) 466 self.assertRaises( 467 TypeError, 468 codecs.replace_errors, 469 BadObjectUnicodeEncodeError() 470 ) 471 self.assertRaises( 472 TypeError, 473 codecs.replace_errors, 474 BadObjectUnicodeDecodeError() 475 ) 476 # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement 477 self.assertEqual( 478 codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 479 (u"?", 1) 480 ) 481 self.assertEqual( 482 codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 483 (u"\ufffd", 1) 484 ) 485 self.assertEqual( 486 codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 487 (u"\ufffd", 1) 488 ) 489 490 def test_badandgoodxmlcharrefreplaceexceptions(self): 491 # "xmlcharrefreplace" complains about a non-exception passed in 492 self.assertRaises( 493 TypeError, 494 codecs.xmlcharrefreplace_errors, 495 42 496 ) 497 # "xmlcharrefreplace" complains about the wrong exception types 498 self.assertRaises( 499 TypeError, 500 codecs.xmlcharrefreplace_errors, 501 UnicodeError("ouch") 502 ) 503 # "xmlcharrefreplace" can only be used for encoding 504 self.assertRaises( 505 TypeError, 506 codecs.xmlcharrefreplace_errors, 507 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 508 ) 509 self.assertRaises( 510 TypeError, 511 codecs.xmlcharrefreplace_errors, 512 UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 513 ) 514 # Use the correct exception 515 cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) 516 s = "".join(unichr(c) for c in cs) 517 self.assertEqual( 518 codecs.xmlcharrefreplace_errors( 519 UnicodeEncodeError("ascii", s, 0, len(s), "ouch") 520 ), 521 (u"".join(u"&#%d;" % ord(c) for c in s), len(s)) 522 ) 523 524 def test_badandgoodbackslashreplaceexceptions(self): 525 # "backslashreplace" complains about a non-exception passed in 526 self.assertRaises( 527 TypeError, 528 codecs.backslashreplace_errors, 529 42 530 ) 531 # "backslashreplace" complains about the wrong exception types 532 self.assertRaises( 533 TypeError, 534 codecs.backslashreplace_errors, 535 UnicodeError("ouch") 536 ) 537 # "backslashreplace" can only be used for encoding 538 self.assertRaises( 539 TypeError, 540 codecs.backslashreplace_errors, 541 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 542 ) 543 self.assertRaises( 544 TypeError, 545 codecs.backslashreplace_errors, 546 UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 547 ) 548 # Use the correct exception 549 self.assertEqual( 550 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 551 (u"\\u3042", 1) 552 ) 553 self.assertEqual( 554 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), 555 (u"\\x00", 1) 556 ) 557 self.assertEqual( 558 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), 559 (u"\\xff", 1) 560 ) 561 self.assertEqual( 562 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), 563 (u"\\u0100", 1) 564 ) 565 self.assertEqual( 566 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), 567 (u"\\uffff", 1) 568 ) 569 if sys.maxunicode>0xffff: 570 self.assertEqual( 571 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), 572 (u"\\U00010000", 1) 573 ) 574 self.assertEqual( 575 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), 576 (u"\\U0010ffff", 1) 577 ) 578 579 def test_badhandlerresults(self): 580 results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) 581 encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") 582 583 for res in results: 584 codecs.register_error("test.badhandler", lambda x: res) 585 for enc in encs: 586 self.assertRaises( 587 TypeError, 588 u"\u3042".encode, 589 enc, 590 "test.badhandler" 591 ) 592 for (enc, bytes) in ( 593 ("ascii", "\xff"), 594 ("utf-8", "\xff"), 595 ("utf-7", "+x-"), 596 ("unicode-internal", "\x00"), 597 ): 598 self.assertRaises( 599 TypeError, 600 bytes.decode, 601 enc, 602 "test.badhandler" 603 ) 604 605 def test_lookup(self): 606 self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 607 self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore")) 608 self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 609 self.assertEqual( 610 codecs.xmlcharrefreplace_errors, 611 codecs.lookup_error("xmlcharrefreplace") 612 ) 613 self.assertEqual( 614 codecs.backslashreplace_errors, 615 codecs.lookup_error("backslashreplace") 616 ) 617 618 def test_unencodablereplacement(self): 619 def unencrepl(exc): 620 if isinstance(exc, UnicodeEncodeError): 621 return (u"\u4242", exc.end) 622 else: 623 raise TypeError("don't know how to handle %r" % exc) 624 codecs.register_error("test.unencreplhandler", unencrepl) 625 for enc in ("ascii", "iso-8859-1", "iso-8859-15"): 626 self.assertRaises( 627 UnicodeEncodeError, 628 u"\u4242".encode, 629 enc, 630 "test.unencreplhandler" 631 ) 632 633 def test_badregistercall(self): 634 # enhance coverage of: 635 # Modules/_codecsmodule.c::register_error() 636 # Python/codecs.c::PyCodec_RegisterError() 637 self.assertRaises(TypeError, codecs.register_error, 42) 638 self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) 639 640 def test_badlookupcall(self): 641 # enhance coverage of: 642 # Modules/_codecsmodule.c::lookup_error() 643 self.assertRaises(TypeError, codecs.lookup_error) 644 645 def test_unknownhandler(self): 646 # enhance coverage of: 647 # Modules/_codecsmodule.c::lookup_error() 648 self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") 649 650 def test_xmlcharrefvalues(self): 651 # enhance coverage of: 652 # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() 653 # and inline implementations 654 v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) 655 if sys.maxunicode>=100000: 656 v += (100000, 500000, 1000000) 657 s = u"".join([unichr(x) for x in v]) 658 codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) 659 for enc in ("ascii", "iso-8859-15"): 660 for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): 661 s.encode(enc, err) 662 663 def test_decodehelper(self): 664 # enhance coverage of: 665 # Objects/unicodeobject.c::unicode_decode_call_errorhandler() 666 # and callers 667 self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown") 668 669 def baddecodereturn1(exc): 670 return 42 671 codecs.register_error("test.baddecodereturn1", baddecodereturn1) 672 self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1") 673 self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1") 674 self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1") 675 self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1") 676 self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") 677 self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") 678 679 def baddecodereturn2(exc): 680 return (u"?", None) 681 codecs.register_error("test.baddecodereturn2", baddecodereturn2) 682 self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2") 683 684 handler = PosReturn() 685 codecs.register_error("test.posreturn", handler.handle) 686 687 # Valid negative position 688 handler.pos = -1 689 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 690 691 # Valid negative position 692 handler.pos = -2 693 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?><?>") 694 695 # Negative position out of bounds 696 handler.pos = -3 697 self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 698 699 # Valid positive position 700 handler.pos = 1 701 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 702 703 # Largest valid positive position (one beyond end of input) 704 handler.pos = 2 705 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>") 706 707 # Invalid positive position 708 handler.pos = 3 709 self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 710 711 # Restart at the "0" 712 handler.pos = 6 713 self.assertEqual("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0") 714 715 class D(dict): 716 def __getitem__(self, key): 717 raise ValueError 718 self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None}) 719 self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D()) 720 self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1}) 721 722 def test_encodehelper(self): 723 # enhance coverage of: 724 # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 725 # and callers 726 self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown") 727 728 def badencodereturn1(exc): 729 return 42 730 codecs.register_error("test.badencodereturn1", badencodereturn1) 731 self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1") 732 733 def badencodereturn2(exc): 734 return (u"?", None) 735 codecs.register_error("test.badencodereturn2", badencodereturn2) 736 self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2") 737 738 handler = PosReturn() 739 codecs.register_error("test.posreturn", handler.handle) 740 741 # Valid negative position 742 handler.pos = -1 743 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 744 745 # Valid negative position 746 handler.pos = -2 747 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>") 748 749 # Negative position out of bounds 750 handler.pos = -3 751 self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 752 753 # Valid positive position 754 handler.pos = 1 755 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 756 757 # Largest valid positive position (one beyond end of input 758 handler.pos = 2 759 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>") 760 761 # Invalid positive position 762 handler.pos = 3 763 self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 764 765 handler.pos = 0 766 767 class D(dict): 768 def __getitem__(self, key): 769 raise ValueError 770 for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): 771 self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None}) 772 self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D()) 773 self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300}) 774 775 def test_translatehelper(self): 776 # enhance coverage of: 777 # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 778 # and callers 779 # (Unfortunately the errors argument is not directly accessible 780 # from Python, so we can't test that much) 781 class D(dict): 782 def __getitem__(self, key): 783 raise ValueError 784 self.assertRaises(ValueError, u"\xff".translate, D()) 785 self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1}) 786 self.assertRaises(TypeError, u"\xff".translate, {0xff: ()}) 787 788 def test_bug828737(self): 789 charmap = { 790 ord("&"): u"&", 791 ord("<"): u"<", 792 ord(">"): u">", 793 ord('"'): u""", 794 } 795 796 for n in (1, 10, 100, 1000): 797 text = u'abc<def>ghi'*n 798 text.translate(charmap) 799 800def test_main(): 801 test.test_support.run_unittest(CodecCallbackTest) 802 803if __name__ == "__main__": 804 test_main() 805