1#
2# genmap_support.py: Multibyte Codec Map Generator
3#
4# Original Author:  Hye-Shik Chang <perky@FreeBSD.org>
5# Modified Author:  Dong-hee Na <donghee.na92@gmail.com>
6#
7
8
9class BufferedFiller:
10    def __init__(self, column=78):
11        self.column = column
12        self.buffered = []
13        self.cline = []
14        self.clen = 0
15        self.count = 0
16
17    def write(self, *data):
18        for s in data:
19            if len(s) > self.column:
20                raise ValueError("token is too long")
21            if len(s) + self.clen > self.column:
22                self.flush()
23            self.clen += len(s)
24            self.cline.append(s)
25            self.count += 1
26
27    def flush(self):
28        if not self.cline:
29            return
30        self.buffered.append(''.join(self.cline))
31        self.clen = 0
32        del self.cline[:]
33
34    def printout(self, fp):
35        self.flush()
36        for l in self.buffered:
37            fp.write(f'{l}\n')
38        del self.buffered[:]
39
40    def __len__(self):
41        return self.count
42
43
44class DecodeMapWriter:
45    filler_class = BufferedFiller
46
47    def __init__(self, fp, prefix, decode_map):
48        self.fp = fp
49        self.prefix = prefix
50        self.decode_map = decode_map
51        self.filler = self.filler_class()
52
53    def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
54        c2values = range(c2range[0], c2range[1] + 1)
55
56        for c1 in range(c1range[0], c1range[1] + 1):
57            if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
58                continue
59            c2map = self.decode_map[c1]
60            rc2values = [n for n in c2values if n in c2map]
61            if not rc2values:
62                continue
63
64            c2map[self.prefix] = True
65            c2map['min'] = rc2values[0]
66            c2map['max'] = rc2values[-1]
67            c2map['midx'] = len(self.filler)
68
69            for v in range(rc2values[0], rc2values[-1] + 1):
70                if v in c2map:
71                    self.filler.write('%d,' % c2map[v])
72                else:
73                    self.filler.write('U,')
74
75    def generate(self, wide=False):
76        if not wide:
77            self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
78        else:
79            self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
80
81        self.filler.printout(self.fp)
82        self.fp.write("};\n\n")
83
84        if not wide:
85            self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
86        else:
87            self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
88
89        for i in range(256):
90            if i in self.decode_map and self.prefix in self.decode_map[i]:
91                m = self.decode_map
92                prefix = self.prefix
93            else:
94                self.filler.write("{", "0,", "0,", "0", "},")
95                continue
96
97            self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
98                              ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
99        self.filler.printout(self.fp)
100        self.fp.write("};\n\n")
101
102
103class EncodeMapWriter:
104    filler_class = BufferedFiller
105    elemtype = 'DBCHAR'
106    indextype = 'struct unim_index'
107
108    def __init__(self, fp, prefix, encode_map):
109        self.fp = fp
110        self.prefix = prefix
111        self.encode_map = encode_map
112        self.filler = self.filler_class()
113
114    def generate(self):
115        self.buildmap()
116        self.printmap()
117
118    def buildmap(self):
119        for c1 in range(0, 256):
120            if c1 not in self.encode_map:
121                continue
122            c2map = self.encode_map[c1]
123            rc2values = [k for k in c2map.keys()]
124            rc2values.sort()
125            if not rc2values:
126                continue
127
128            c2map[self.prefix] = True
129            c2map['min'] = rc2values[0]
130            c2map['max'] = rc2values[-1]
131            c2map['midx'] = len(self.filler)
132
133            for v in range(rc2values[0], rc2values[-1] + 1):
134                if v not in c2map:
135                    self.write_nochar()
136                elif isinstance(c2map[v], int):
137                    self.write_char(c2map[v])
138                elif isinstance(c2map[v], tuple):
139                    self.write_multic(c2map[v])
140                else:
141                    raise ValueError
142
143    def write_nochar(self):
144        self.filler.write('N,')
145
146    def write_multic(self, point):
147        self.filler.write('M,')
148
149    def write_char(self, point):
150        self.filler.write(str(point) + ',')
151
152    def printmap(self):
153        self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
154        self.filler.printout(self.fp)
155        self.fp.write("};\n\n")
156        self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
157
158        for i in range(256):
159            if i in self.encode_map and self.prefix in self.encode_map[i]:
160                self.filler.write("{", "__%s_encmap" % self.prefix, "+",
161                                  "%d" % self.encode_map[i]['midx'], ",",
162                                  "%d," % self.encode_map[i]['min'],
163                                  "%d" % self.encode_map[i]['max'], "},")
164            else:
165                self.filler.write("{", "0,", "0,", "0", "},")
166                continue
167        self.filler.printout(self.fp)
168        self.fp.write("};\n\n")
169
170
171def open_mapping_file(path, source):
172    try:
173        f = open(path)
174    except IOError:
175        raise SystemExit(f'{source} is needed')
176    return f
177
178
179def print_autogen(fo, source):
180    fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
181
182
183def loadmap(fo, natcol=0, unicol=1, sbcs=0):
184    print("Loading from", fo)
185    fo.seek(0, 0)
186    decmap = {}
187    for line in fo:
188        line = line.split('#', 1)[0].strip()
189        if not line or len(line.split()) < 2:
190            continue
191
192        row = [eval(e) for e in line.split()]
193        loc, uni = row[natcol], row[unicol]
194        if loc >= 0x100 or sbcs:
195            decmap.setdefault((loc >> 8), {})
196            decmap[(loc >> 8)][(loc & 0xff)] = uni
197
198    return decmap
199