1from test.support import open_urlresource
2import unittest
3
4from http.client import HTTPException
5import sys
6from unicodedata import normalize, unidata_version
7
8TESTDATAFILE = "NormalizationTest.txt"
9TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
10
11def check_version(testfile):
12    hdr = testfile.readline()
13    return unidata_version in hdr
14
15class RangeError(Exception):
16    pass
17
18def NFC(str):
19    return normalize("NFC", str)
20
21def NFKC(str):
22    return normalize("NFKC", str)
23
24def NFD(str):
25    return normalize("NFD", str)
26
27def NFKD(str):
28    return normalize("NFKD", str)
29
30def unistr(data):
31    data = [int(x, 16) for x in data.split(" ")]
32    for x in data:
33        if x > sys.maxunicode:
34            raise RangeError
35    return "".join([chr(x) for x in data])
36
37class NormalizationTest(unittest.TestCase):
38    def test_main(self):
39        # Hit the exception early
40        try:
41            testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
42                                        check=check_version)
43        except PermissionError:
44            self.skipTest(f"Permission error when downloading {TESTDATAURL} "
45                          f"into the test data directory")
46        except (OSError, HTTPException):
47            self.fail(f"Could not retrieve {TESTDATAURL}")
48
49        with testdata:
50            self.run_normalization_tests(testdata)
51
52    def run_normalization_tests(self, testdata):
53        part = None
54        part1_data = {}
55
56        for line in testdata:
57            if '#' in line:
58                line = line.split('#')[0]
59            line = line.strip()
60            if not line:
61                continue
62            if line.startswith("@Part"):
63                part = line.split()[0]
64                continue
65            try:
66                c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
67            except RangeError:
68                # Skip unsupported characters;
69                # try at least adding c1 if we are in part1
70                if part == "@Part1":
71                    try:
72                        c1 = unistr(line.split(';')[0])
73                    except RangeError:
74                        pass
75                    else:
76                        part1_data[c1] = 1
77                continue
78
79            # Perform tests
80            self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
81            self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
82            self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
83            self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
84            self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
85                            NFKC(c3) == NFKC(c4) == NFKC(c5),
86                            line)
87            self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
88                            NFKD(c3) == NFKD(c4) == NFKD(c5),
89                            line)
90
91            # Record part 1 data
92            if part == "@Part1":
93                part1_data[c1] = 1
94
95        # Perform tests for all other data
96        for c in range(sys.maxunicode+1):
97            X = chr(c)
98            if X in part1_data:
99                continue
100            self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
101
102    def test_bug_834676(self):
103        # Check for bug 834676
104        normalize('NFC', '\ud55c\uae00')
105
106
107if __name__ == "__main__":
108    unittest.main()
109