1#
2# test_codecencodings_jp.py
3#   Codec encoding tests for Japanese encodings.
4#
5
6from test import test_support
7from test import test_multibytecodec_support
8import unittest
9
10class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
11    encoding = 'cp932'
12    tstring = test_multibytecodec_support.load_teststring('shift_jis')
13    codectests = (
14        # invalid bytes
15        ("abc\x81\x00\x81\x00\x82\x84", "strict",  None),
16        ("abc\xf8", "strict",  None),
17        ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
18        ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
19        ("abc\x81\x00\x82\x84", "ignore",  u"abc\uff44"),
20        # sjis vs cp932
21        ("\\\x7e", "replace", u"\\\x7e"),
22        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
23    )
24
25class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
26                        unittest.TestCase):
27    encoding = 'euc_jisx0213'
28    tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
29    codectests = (
30        # invalid bytes
31        ("abc\x80\x80\xc1\xc4", "strict",  None),
32        ("abc\xc8", "strict",  None),
33        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
34        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
35        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u7956"),
36        ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
37        ("\xc1\x64", "strict", None),
38        ("\xa1\xc0", "strict", u"\uff3c"),
39    )
40    xmlcharnametest = (
41        u"\xab\u211c\xbb = \u2329\u1234\u232a",
42        "\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
43    )
44
45eucjp_commontests = (
46    ("abc\x80\x80\xc1\xc4", "strict",  None),
47    ("abc\xc8", "strict",  None),
48    ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
49    ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
50    ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u7956"),
51    ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
52    ("\xc1\x64", "strict", None),
53)
54
55class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
56                         unittest.TestCase):
57    encoding = 'euc_jp'
58    tstring = test_multibytecodec_support.load_teststring('euc_jp')
59    codectests = eucjp_commontests + (
60        ("\xa1\xc0\\", "strict", u"\uff3c\\"),
61        (u"\xa5", "strict", "\x5c"),
62        (u"\u203e", "strict", "\x7e"),
63    )
64
65shiftjis_commonenctests = (
66    ("abc\x80\x80\x82\x84", "strict",  None),
67    ("abc\xf8", "strict",  None),
68    ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
69    ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
70    ("abc\x80\x80\x82\x84def", "ignore",  u"abc\uff44def"),
71)
72
73class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
74    encoding = 'shift_jis'
75    tstring = test_multibytecodec_support.load_teststring('shift_jis')
76    codectests = shiftjis_commonenctests + (
77        ("\\\x7e", "strict", u"\\\x7e"),
78        ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
79    )
80
81class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
82    encoding = 'shift_jisx0213'
83    tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
84    codectests = (
85        # invalid bytes
86        ("abc\x80\x80\x82\x84", "strict",  None),
87        ("abc\xf8", "strict",  None),
88        ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
89        ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
90        ("abc\x80\x80\x82\x84def", "ignore",  u"abc\uff44def"),
91        # sjis vs cp932
92        ("\\\x7e", "replace", u"\xa5\u203e"),
93        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
94    )
95    xmlcharnametest = (
96        u"\xab\u211c\xbb = \u2329\u1234\u232a",
97        "\x85Gℜ\x85Q = ⟨ሴ⟩"
98    )
99
100def test_main():
101    test_support.run_unittest(__name__)
102
103if __name__ == "__main__":
104    test_main()
105