1#!/usr/bin/python 2 3import sys 4import os.path 5 6if len (sys.argv) != 4: 7 print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt" 8 sys.exit (1) 9 10files = [file (x) for x in sys.argv[1:]] 11 12headers = [[files[0].readline (), files[0].readline ()], [files[2].readline (), files[2].readline ()]] 13headers.append (["UnicodeData.txt does not have a header."]) 14while files[0].readline ().find ('##################') < 0: 15 pass 16 17blocks = {} 18def read_blocks(f): 19 global blocks 20 for line in f: 21 22 j = line.find ('#') 23 if j >= 0: 24 line = line[:j] 25 26 fields = [x.strip () for x in line.split (';')] 27 if len (fields) == 1: 28 continue 29 30 uu = fields[0].split ('..') 31 start = int (uu[0], 16) 32 if len (uu) == 1: 33 end = start 34 else: 35 end = int (uu[1], 16) 36 37 t = fields[1] 38 39 for u in range (start, end + 1): 40 blocks[u] = t 41 42def print_joining_table(f): 43 44 values = {} 45 for line in f: 46 47 if line[0] == '#': 48 continue 49 50 fields = [x.strip () for x in line.split (';')] 51 if len (fields) == 1: 52 continue 53 54 u = int (fields[0], 16) 55 56 if fields[3] in ["ALAPH", "DALATH RISH"]: 57 value = "JOINING_GROUP_" + fields[3].replace(' ', '_') 58 else: 59 value = "JOINING_TYPE_" + fields[2] 60 values[u] = value 61 62 short_value = {} 63 for value in set([v for v in values.values()] + ['JOINING_TYPE_X']): 64 short = ''.join(x[0] for x in value.split('_')[2:]) 65 assert short not in short_value.values() 66 short_value[value] = short 67 68 print 69 for value,short in short_value.items(): 70 print "#define %s %s" % (short, value) 71 72 uu = sorted(values.keys()) 73 num = len(values) 74 all_blocks = set([blocks[u] for u in uu]) 75 76 last = -100000 77 ranges = [] 78 for u in uu: 79 if u - last <= 1+16*5: 80 ranges[-1][-1] = u 81 else: 82 ranges.append([u,u]) 83 last = u 84 85 print 86 print "static const uint8_t joining_table[] =" 87 print "{" 88 last_block = None 89 offset = 0 90 for start,end in ranges: 91 92 print 93 print "#define joining_offset_0x%04xu %d" % (start, offset) 94 95 for u in range(start, end+1): 96 97 block = blocks.get(u, last_block) 98 value = values.get(u, "JOINING_TYPE_X") 99 100 if block != last_block or u == start: 101 if u != start: 102 print 103 if block in all_blocks: 104 print "\n /* %s */" % block 105 else: 106 print "\n /* FILLER */" 107 last_block = block 108 if u % 32 != 0: 109 print 110 print " /* %04X */" % (u//32*32), " " * (u % 32), 111 112 if u % 32 == 0: 113 print 114 print " /* %04X */ " % u, 115 sys.stdout.write("%s," % short_value[value]) 116 print 117 118 offset += end - start + 1 119 print 120 occupancy = num * 100. / offset 121 print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy) 122 print 123 124 page_bits = 12; 125 print 126 print "static unsigned int" 127 print "joining_type (hb_codepoint_t u)" 128 print "{" 129 print " switch (u >> %d)" % page_bits 130 print " {" 131 pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]]) 132 for p in sorted(pages): 133 print " case 0x%0Xu:" % p 134 for (start,end) in ranges: 135 if p not in [start>>page_bits, end>>page_bits]: continue 136 offset = "joining_offset_0x%04xu" % start 137 print " if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return joining_table[u - 0x%04Xu + %s];" % (start, end, start, offset) 138 print " break;" 139 print "" 140 print " default:" 141 print " break;" 142 print " }" 143 print " return X;" 144 print "}" 145 print 146 for value,short in short_value.items(): 147 print "#undef %s" % (short) 148 print 149 150def print_shaping_table(f): 151 152 shapes = {} 153 ligatures = {} 154 names = {} 155 for line in f: 156 157 fields = [x.strip () for x in line.split (';')] 158 if fields[5][0:1] != '<': 159 continue 160 161 items = fields[5].split (' ') 162 shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:]) 163 164 if not shape in ['initial', 'medial', 'isolated', 'final']: 165 continue 166 167 c = int (fields[0], 16) 168 if len (items) != 1: 169 # We only care about lam-alef ligatures 170 if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]: 171 continue 172 173 # Save ligature 174 names[c] = fields[1] 175 if items not in ligatures: 176 ligatures[items] = {} 177 ligatures[items][shape] = c 178 pass 179 else: 180 # Save shape 181 if items[0] not in names: 182 names[items[0]] = fields[1] 183 else: 184 names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip () 185 if items[0] not in shapes: 186 shapes[items[0]] = {} 187 shapes[items[0]][shape] = c 188 189 print 190 print "static const uint16_t shaping_table[][4] =" 191 print "{" 192 193 keys = shapes.keys () 194 min_u, max_u = min (keys), max (keys) 195 for u in range (min_u, max_u + 1): 196 s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0 197 for shape in ['initial', 'medial', 'final', 'isolated']] 198 value = ', '.join ("0x%04Xu" % c for c in s) 199 print " {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "") 200 201 print "};" 202 print 203 print "#define SHAPING_TABLE_FIRST 0x%04Xu" % min_u 204 print "#define SHAPING_TABLE_LAST 0x%04Xu" % max_u 205 print 206 207 ligas = {} 208 for pair in ligatures.keys (): 209 for shape in ligatures[pair]: 210 c = ligatures[pair][shape] 211 if shape == 'isolated': 212 liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final']) 213 elif shape == 'final': 214 liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final']) 215 else: 216 raise Exception ("Unexpected shape", shape) 217 if liga[0] not in ligas: 218 ligas[liga[0]] = [] 219 ligas[liga[0]].append ((liga[1], c)) 220 max_i = max (len (ligas[l]) for l in ligas) 221 print 222 print "static const struct ligature_set_t {" 223 print " uint16_t first;" 224 print " struct ligature_pairs_t {" 225 print " uint16_t second;" 226 print " uint16_t ligature;" 227 print " } ligatures[%d];" % max_i 228 print "} ligature_table[] =" 229 print "{" 230 keys = ligas.keys () 231 keys.sort () 232 for first in keys: 233 234 print " { 0x%04Xu, {" % (first) 235 for liga in ligas[first]: 236 print " { 0x%04Xu, 0x%04Xu }, /* %s */" % (liga[0], liga[1], names[liga[1]]) 237 print " }}," 238 239 print "};" 240 print 241 242 243 244print "/* == Start of generated table == */" 245print "/*" 246print " * The following table is generated by running:" 247print " *" 248print " * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt" 249print " *" 250print " * on files with these headers:" 251print " *" 252for h in headers: 253 for l in h: 254 print " * %s" % (l.strip()) 255print " */" 256print 257print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" 258print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" 259print 260 261read_blocks (files[2]) 262print_joining_table (files[0]) 263print_shaping_table (files[1]) 264 265print 266print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */" 267print 268print "/* == End of generated table == */" 269 270