1from __future__ import ( 2 print_function, division, absolute_import, unicode_literals) 3from collections import OrderedDict 4import fontTools.voltLib.ast as ast 5from fontTools.voltLib.lexer import Lexer 6from fontTools.voltLib.error import VoltLibError 7from io import open 8 9PARSE_FUNCS = { 10 "DEF_GLYPH": "parse_def_glyph_", 11 "DEF_GROUP": "parse_def_group_", 12 "DEF_SCRIPT": "parse_def_script_", 13 "DEF_LOOKUP": "parse_def_lookup_", 14 "DEF_ANCHOR": "parse_def_anchor_", 15 "GRID_PPEM": "parse_ppem_", 16 "PRESENTATION_PPEM": "parse_ppem_", 17 "PPOSITIONING_PPEM": "parse_ppem_", 18 "COMPILER_USEEXTENSIONLOOKUPS": "parse_compiler_flag_", 19 "COMPILER_USEPAIRPOSFORMAT2": "parse_compiler_flag_", 20 "CMAP_FORMAT": "parse_cmap_format", 21} 22 23 24class Parser(object): 25 def __init__(self, path): 26 self.doc_ = ast.VoltFile() 27 self.glyphs_ = OrderedSymbolTable() 28 self.groups_ = SymbolTable() 29 self.anchors_ = {} # dictionary of SymbolTable() keyed by glyph 30 self.scripts_ = SymbolTable() 31 self.langs_ = SymbolTable() 32 self.lookups_ = SymbolTable() 33 self.next_token_type_, self.next_token_ = (None, None) 34 self.next_token_location_ = None 35 with open(path, "r") as f: 36 self.lexer_ = Lexer(f.read(), path) 37 self.advance_lexer_() 38 39 def parse(self): 40 statements = self.doc_.statements 41 while self.next_token_type_ is not None: 42 self.advance_lexer_() 43 if self.cur_token_ in PARSE_FUNCS.keys(): 44 func = getattr(self, PARSE_FUNCS[self.cur_token_]) 45 statements.append(func()) 46 elif self.is_cur_keyword_("END"): 47 break 48 else: 49 raise VoltLibError( 50 "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())), 51 self.cur_token_location_) 52 return self.doc_ 53 54 def parse_def_glyph_(self): 55 assert self.is_cur_keyword_("DEF_GLYPH") 56 location = self.cur_token_location_ 57 name = self.expect_string_() 58 self.expect_keyword_("ID") 59 gid = self.expect_number_() 60 if gid < 0: 61 raise VoltLibError("Invalid glyph ID", self.cur_token_location_) 62 gunicode = None 63 if self.next_token_ == "UNICODE": 64 self.expect_keyword_("UNICODE") 65 gunicode = [self.expect_number_()] 66 if gunicode[0] < 0: 67 raise VoltLibError("Invalid glyph UNICODE", 68 self.cur_token_location_) 69 elif self.next_token_ == "UNICODEVALUES": 70 self.expect_keyword_("UNICODEVALUES") 71 gunicode = self.parse_unicode_values_() 72 gtype = None 73 if self.next_token_ == "TYPE": 74 self.expect_keyword_("TYPE") 75 gtype = self.expect_name_() 76 assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT") 77 components = None 78 if self.next_token_ == "COMPONENTS": 79 self.expect_keyword_("COMPONENTS") 80 components = self.expect_number_() 81 self.expect_keyword_("END_GLYPH") 82 if self.glyphs_.resolve(name) is not None: 83 raise VoltLibError( 84 'Glyph "%s" (gid %i) already defined' % (name, gid), 85 location 86 ) 87 def_glyph = ast.GlyphDefinition(name, gid, 88 gunicode, gtype, components, 89 location=location) 90 self.glyphs_.define(name, def_glyph) 91 return def_glyph 92 93 def parse_def_group_(self): 94 assert self.is_cur_keyword_("DEF_GROUP") 95 location = self.cur_token_location_ 96 name = self.expect_string_() 97 enum = None 98 if self.next_token_ == "ENUM": 99 enum = self.parse_enum_() 100 self.expect_keyword_("END_GROUP") 101 if self.groups_.resolve(name) is not None: 102 raise VoltLibError( 103 'Glyph group "%s" already defined, ' 104 'group names are case insensitive' % name, 105 location 106 ) 107 def_group = ast.GroupDefinition(name, enum, 108 location=location) 109 self.groups_.define(name, def_group) 110 return def_group 111 112 def parse_def_script_(self): 113 assert self.is_cur_keyword_("DEF_SCRIPT") 114 location = self.cur_token_location_ 115 name = None 116 if self.next_token_ == "NAME": 117 self.expect_keyword_("NAME") 118 name = self.expect_string_() 119 self.expect_keyword_("TAG") 120 tag = self.expect_string_() 121 if self.scripts_.resolve(tag) is not None: 122 raise VoltLibError( 123 'Script "%s" already defined, ' 124 'script tags are case insensitive' % tag, 125 location 126 ) 127 self.langs_.enter_scope() 128 langs = [] 129 while self.next_token_ != "END_SCRIPT": 130 self.advance_lexer_() 131 lang = self.parse_langsys_() 132 self.expect_keyword_("END_LANGSYS") 133 if self.langs_.resolve(lang.tag) is not None: 134 raise VoltLibError( 135 'Language "%s" already defined in script "%s", ' 136 'language tags are case insensitive' % (lang.tag, tag), 137 location 138 ) 139 self.langs_.define(lang.tag, lang) 140 langs.append(lang) 141 self.expect_keyword_("END_SCRIPT") 142 self.langs_.exit_scope() 143 def_script = ast.ScriptDefinition(name, tag, langs, location=location) 144 self.scripts_.define(tag, def_script) 145 return def_script 146 147 def parse_langsys_(self): 148 assert self.is_cur_keyword_("DEF_LANGSYS") 149 location = self.cur_token_location_ 150 name = None 151 if self.next_token_ == "NAME": 152 self.expect_keyword_("NAME") 153 name = self.expect_string_() 154 self.expect_keyword_("TAG") 155 tag = self.expect_string_() 156 features = [] 157 while self.next_token_ != "END_LANGSYS": 158 self.advance_lexer_() 159 feature = self.parse_feature_() 160 self.expect_keyword_("END_FEATURE") 161 features.append(feature) 162 def_langsys = ast.LangSysDefinition(name, tag, features, 163 location=location) 164 return def_langsys 165 166 def parse_feature_(self): 167 assert self.is_cur_keyword_("DEF_FEATURE") 168 location = self.cur_token_location_ 169 self.expect_keyword_("NAME") 170 name = self.expect_string_() 171 self.expect_keyword_("TAG") 172 tag = self.expect_string_() 173 lookups = [] 174 while self.next_token_ != "END_FEATURE": 175 # self.advance_lexer_() 176 self.expect_keyword_("LOOKUP") 177 lookup = self.expect_string_() 178 lookups.append(lookup) 179 feature = ast.FeatureDefinition(name, tag, lookups, 180 location=location) 181 return feature 182 183 def parse_def_lookup_(self): 184 assert self.is_cur_keyword_("DEF_LOOKUP") 185 location = self.cur_token_location_ 186 name = self.expect_string_() 187 if not name[0].isalpha(): 188 raise VoltLibError( 189 'Lookup name "%s" must start with a letter' % name, 190 location 191 ) 192 if self.lookups_.resolve(name) is not None: 193 raise VoltLibError( 194 'Lookup "%s" already defined, ' 195 'lookup names are case insensitive' % name, 196 location 197 ) 198 process_base = True 199 if self.next_token_ == "PROCESS_BASE": 200 self.advance_lexer_() 201 elif self.next_token_ == "SKIP_BASE": 202 self.advance_lexer_() 203 process_base = False 204 process_marks = True 205 mark_glyph_set = None 206 if self.next_token_ == "PROCESS_MARKS": 207 self.advance_lexer_() 208 if self.next_token_ == "MARK_GLYPH_SET": 209 self.advance_lexer_() 210 mark_glyph_set = self.expect_string_() 211 elif self.next_token_type_ == Lexer.STRING: 212 process_marks = self.expect_string_() 213 elif self.next_token_ == "ALL": 214 self.advance_lexer_() 215 else: 216 raise VoltLibError( 217 "Expected ALL, MARK_GLYPH_SET or an ID. " 218 "Got %s" % (self.next_token_type_), 219 location) 220 elif self.next_token_ == "SKIP_MARKS": 221 self.advance_lexer_() 222 process_marks = False 223 direction = None 224 if self.next_token_ == "DIRECTION": 225 self.expect_keyword_("DIRECTION") 226 direction = self.expect_name_() 227 assert direction in ("LTR", "RTL") 228 reversal = None 229 if self.next_token_ == "REVERSAL": 230 self.expect_keyword_("REVERSAL") 231 reversal = True 232 comments = None 233 if self.next_token_ == "COMMENTS": 234 self.expect_keyword_("COMMENTS") 235 comments = self.expect_string_() 236 context = [] 237 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 238 context = self.parse_context_() 239 as_pos_or_sub = self.expect_name_() 240 sub = None 241 pos = None 242 if as_pos_or_sub == "AS_SUBSTITUTION": 243 sub = self.parse_substitution_(reversal) 244 elif as_pos_or_sub == "AS_POSITION": 245 pos = self.parse_position_() 246 else: 247 raise VoltLibError( 248 "Expected AS_SUBSTITUTION or AS_POSITION. " 249 "Got %s" % (as_pos_or_sub), 250 location) 251 def_lookup = ast.LookupDefinition( 252 name, process_base, process_marks, mark_glyph_set, direction, 253 reversal, comments, context, sub, pos, location=location) 254 self.lookups_.define(name, def_lookup) 255 return def_lookup 256 257 def parse_context_(self): 258 location = self.cur_token_location_ 259 contexts = [] 260 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 261 side = None 262 coverage = None 263 ex_or_in = self.expect_name_() 264 # side_contexts = [] # XXX 265 if self.next_token_ != "END_CONTEXT": 266 left = [] 267 right = [] 268 while self.next_token_ in ("LEFT", "RIGHT"): 269 side = self.expect_name_() 270 coverage = self.parse_coverage_() 271 if side == "LEFT": 272 left.append(coverage) 273 else: 274 right.append(coverage) 275 self.expect_keyword_("END_CONTEXT") 276 context = ast.ContextDefinition(ex_or_in, left, 277 right, location=location) 278 contexts.append(context) 279 else: 280 self.expect_keyword_("END_CONTEXT") 281 return contexts 282 283 def parse_substitution_(self, reversal): 284 assert self.is_cur_keyword_("AS_SUBSTITUTION") 285 location = self.cur_token_location_ 286 src = [] 287 dest = [] 288 if self.next_token_ != "SUB": 289 raise VoltLibError("Expected SUB", location) 290 while self.next_token_ == "SUB": 291 self.expect_keyword_("SUB") 292 src.append(self.parse_coverage_()) 293 self.expect_keyword_("WITH") 294 dest.append(self.parse_coverage_()) 295 self.expect_keyword_("END_SUB") 296 self.expect_keyword_("END_SUBSTITUTION") 297 max_src = max([len(cov) for cov in src]) 298 max_dest = max([len(cov) for cov in dest]) 299 # many to many or mixed is invalid 300 if ((max_src > 1 and max_dest > 1) or 301 (reversal and (max_src > 1 or max_dest > 1))): 302 raise VoltLibError( 303 "Invalid substitution type", 304 location) 305 mapping = OrderedDict(zip(tuple(src), tuple(dest))) 306 if max_src == 1 and max_dest == 1: 307 if reversal: 308 sub = ast.SubstitutionReverseChainingSingleDefinition( 309 mapping, location=location) 310 else: 311 sub = ast.SubstitutionSingleDefinition(mapping, 312 location=location) 313 elif max_src == 1 and max_dest > 1: 314 sub = ast.SubstitutionMultipleDefinition(mapping, 315 location=location) 316 elif max_src > 1 and max_dest == 1: 317 sub = ast.SubstitutionLigatureDefinition(mapping, 318 location=location) 319 return sub 320 321 def parse_position_(self): 322 assert self.is_cur_keyword_("AS_POSITION") 323 location = self.cur_token_location_ 324 pos_type = self.expect_name_() 325 if pos_type not in ( 326 "ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"): 327 raise VoltLibError( 328 "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE", 329 location) 330 if pos_type == "ATTACH": 331 position = self.parse_attach_() 332 elif pos_type == "ATTACH_CURSIVE": 333 position = self.parse_attach_cursive_() 334 elif pos_type == "ADJUST_PAIR": 335 position = self.parse_adjust_pair_() 336 elif pos_type == "ADJUST_SINGLE": 337 position = self.parse_adjust_single_() 338 self.expect_keyword_("END_POSITION") 339 return position 340 341 def parse_attach_(self): 342 assert self.is_cur_keyword_("ATTACH") 343 location = self.cur_token_location_ 344 coverage = self.parse_coverage_() 345 coverage_to = [] 346 self.expect_keyword_("TO") 347 while self.next_token_ != "END_ATTACH": 348 cov = self.parse_coverage_() 349 self.expect_keyword_("AT") 350 self.expect_keyword_("ANCHOR") 351 anchor_name = self.expect_string_() 352 coverage_to.append((cov, anchor_name)) 353 self.expect_keyword_("END_ATTACH") 354 position = ast.PositionAttachDefinition( 355 coverage, coverage_to, location=location) 356 return position 357 358 def parse_attach_cursive_(self): 359 assert self.is_cur_keyword_("ATTACH_CURSIVE") 360 location = self.cur_token_location_ 361 coverages_exit = [] 362 coverages_enter = [] 363 while self.next_token_ != "ENTER": 364 self.expect_keyword_("EXIT") 365 coverages_exit.append(self.parse_coverage_()) 366 while self.next_token_ != "END_ATTACH": 367 self.expect_keyword_("ENTER") 368 coverages_enter.append(self.parse_coverage_()) 369 self.expect_keyword_("END_ATTACH") 370 position = ast.PositionAttachCursiveDefinition( 371 coverages_exit, coverages_enter, location=location) 372 return position 373 374 def parse_adjust_pair_(self): 375 assert self.is_cur_keyword_("ADJUST_PAIR") 376 location = self.cur_token_location_ 377 coverages_1 = [] 378 coverages_2 = [] 379 adjust_pair = {} 380 while self.next_token_ == "FIRST": 381 self.advance_lexer_() 382 coverage_1 = self.parse_coverage_() 383 coverages_1.append(coverage_1) 384 while self.next_token_ == "SECOND": 385 self.advance_lexer_() 386 coverage_2 = self.parse_coverage_() 387 coverages_2.append(coverage_2) 388 while self.next_token_ != "END_ADJUST": 389 id_1 = self.expect_number_() 390 id_2 = self.expect_number_() 391 self.expect_keyword_("BY") 392 pos_1 = self.parse_pos_() 393 pos_2 = self.parse_pos_() 394 adjust_pair[(id_1, id_2)] = (pos_1, pos_2) 395 self.expect_keyword_("END_ADJUST") 396 position = ast.PositionAdjustPairDefinition( 397 coverages_1, coverages_2, adjust_pair, location=location) 398 return position 399 400 def parse_adjust_single_(self): 401 assert self.is_cur_keyword_("ADJUST_SINGLE") 402 location = self.cur_token_location_ 403 adjust_single = [] 404 while self.next_token_ != "END_ADJUST": 405 coverages = self.parse_coverage_() 406 self.expect_keyword_("BY") 407 pos = self.parse_pos_() 408 adjust_single.append((coverages, pos)) 409 self.expect_keyword_("END_ADJUST") 410 position = ast.PositionAdjustSingleDefinition( 411 adjust_single, location=location) 412 return position 413 414 def parse_def_anchor_(self): 415 assert self.is_cur_keyword_("DEF_ANCHOR") 416 location = self.cur_token_location_ 417 name = self.expect_string_() 418 self.expect_keyword_("ON") 419 gid = self.expect_number_() 420 self.expect_keyword_("GLYPH") 421 glyph_name = self.expect_name_() 422 self.expect_keyword_("COMPONENT") 423 component = self.expect_number_() 424 # check for duplicate anchor names on this glyph 425 if glyph_name in self.anchors_: 426 anchor = self.anchors_[glyph_name].resolve(name) 427 if anchor is not None and anchor.component == component: 428 raise VoltLibError( 429 'Anchor "%s" already defined, ' 430 'anchor names are case insensitive' % name, 431 location 432 ) 433 if self.next_token_ == "LOCKED": 434 locked = True 435 self.advance_lexer_() 436 else: 437 locked = False 438 self.expect_keyword_("AT") 439 pos = self.parse_pos_() 440 self.expect_keyword_("END_ANCHOR") 441 anchor = ast.AnchorDefinition(name, gid, glyph_name, 442 component, locked, pos, 443 location=location) 444 if glyph_name not in self.anchors_: 445 self.anchors_[glyph_name] = SymbolTable() 446 self.anchors_[glyph_name].define(name, anchor) 447 return anchor 448 449 def parse_adjust_by_(self): 450 self.advance_lexer_() 451 assert self.is_cur_keyword_("ADJUST_BY") 452 adjustment = self.expect_number_() 453 self.expect_keyword_("AT") 454 size = self.expect_number_() 455 return adjustment, size 456 457 def parse_pos_(self): 458 # VOLT syntax doesn't seem to take device Y advance 459 self.advance_lexer_() 460 location = self.cur_token_location_ 461 assert self.is_cur_keyword_("POS"), location 462 adv = None 463 dx = None 464 dy = None 465 adv_adjust_by = {} 466 dx_adjust_by = {} 467 dy_adjust_by = {} 468 if self.next_token_ == "ADV": 469 self.advance_lexer_() 470 adv = self.expect_number_() 471 while self.next_token_ == "ADJUST_BY": 472 adjustment, size = self.parse_adjust_by_() 473 adv_adjust_by[size] = adjustment 474 if self.next_token_ == "DX": 475 self.advance_lexer_() 476 dx = self.expect_number_() 477 while self.next_token_ == "ADJUST_BY": 478 adjustment, size = self.parse_adjust_by_() 479 dx_adjust_by[size] = adjustment 480 if self.next_token_ == "DY": 481 self.advance_lexer_() 482 dy = self.expect_number_() 483 while self.next_token_ == "ADJUST_BY": 484 adjustment, size = self.parse_adjust_by_() 485 dy_adjust_by[size] = adjustment 486 self.expect_keyword_("END_POS") 487 return (adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by) 488 489 def parse_unicode_values_(self): 490 location = self.cur_token_location_ 491 try: 492 unicode_values = self.expect_string_().split(",") 493 unicode_values = [ 494 int(uni[2:], 16) 495 for uni in unicode_values if uni != ""] 496 except ValueError as err: 497 raise VoltLibError(str(err), location) 498 return unicode_values if unicode_values != [] else None 499 500 def parse_enum_(self): 501 self.expect_keyword_("ENUM") 502 location = self.cur_token_location_ 503 enum = ast.Enum(self.parse_coverage_(), location=location) 504 self.expect_keyword_("END_ENUM") 505 return enum 506 507 def parse_coverage_(self): 508 coverage = [] 509 location = self.cur_token_location_ 510 while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"): 511 if self.next_token_ == "ENUM": 512 enum = self.parse_enum_() 513 coverage.append(enum) 514 elif self.next_token_ == "GLYPH": 515 self.expect_keyword_("GLYPH") 516 name = self.expect_string_() 517 coverage.append(ast.GlyphName(name, location=location)) 518 elif self.next_token_ == "GROUP": 519 self.expect_keyword_("GROUP") 520 name = self.expect_string_() 521 coverage.append(ast.GroupName(name, self, location=location)) 522 elif self.next_token_ == "RANGE": 523 self.expect_keyword_("RANGE") 524 start = self.expect_string_() 525 self.expect_keyword_("TO") 526 end = self.expect_string_() 527 coverage.append(ast.Range(start, end, self, location=location)) 528 return tuple(coverage) 529 530 def resolve_group(self, group_name): 531 return self.groups_.resolve(group_name) 532 533 def glyph_range(self, start, end): 534 return self.glyphs_.range(start, end) 535 536 def parse_ppem_(self): 537 location = self.cur_token_location_ 538 ppem_name = self.cur_token_ 539 value = self.expect_number_() 540 setting = ast.SettingDefinition(ppem_name, value, location=location) 541 return setting 542 543 def parse_compiler_flag_(self): 544 location = self.cur_token_location_ 545 flag_name = self.cur_token_ 546 value = True 547 setting = ast.SettingDefinition(flag_name, value, location=location) 548 return setting 549 550 def parse_cmap_format(self): 551 location = self.cur_token_location_ 552 name = self.cur_token_ 553 value = (self.expect_number_(), self.expect_number_(), 554 self.expect_number_()) 555 setting = ast.SettingDefinition(name, value, location=location) 556 return setting 557 558 def is_cur_keyword_(self, k): 559 return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k) 560 561 def expect_string_(self): 562 self.advance_lexer_() 563 if self.cur_token_type_ is not Lexer.STRING: 564 raise VoltLibError("Expected a string", self.cur_token_location_) 565 return self.cur_token_ 566 567 def expect_keyword_(self, keyword): 568 self.advance_lexer_() 569 if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword: 570 return self.cur_token_ 571 raise VoltLibError("Expected \"%s\"" % keyword, 572 self.cur_token_location_) 573 574 def expect_name_(self): 575 self.advance_lexer_() 576 if self.cur_token_type_ is Lexer.NAME: 577 return self.cur_token_ 578 raise VoltLibError("Expected a name", self.cur_token_location_) 579 580 def expect_number_(self): 581 self.advance_lexer_() 582 if self.cur_token_type_ is not Lexer.NUMBER: 583 raise VoltLibError("Expected a number", self.cur_token_location_) 584 return self.cur_token_ 585 586 def advance_lexer_(self): 587 self.cur_token_type_, self.cur_token_, self.cur_token_location_ = ( 588 self.next_token_type_, self.next_token_, self.next_token_location_) 589 try: 590 if self.is_cur_keyword_("END"): 591 raise StopIteration 592 (self.next_token_type_, self.next_token_, 593 self.next_token_location_) = self.lexer_.next() 594 except StopIteration: 595 self.next_token_type_, self.next_token_ = (None, None) 596 597 598class SymbolTable(object): 599 def __init__(self): 600 self.scopes_ = [{}] 601 602 def enter_scope(self): 603 self.scopes_.append({}) 604 605 def exit_scope(self): 606 self.scopes_.pop() 607 608 def define(self, name, item): 609 self.scopes_[-1][name] = item 610 611 def resolve(self, name, case_insensitive=True): 612 for scope in reversed(self.scopes_): 613 item = scope.get(name) 614 if item: 615 return item 616 if case_insensitive: 617 for key in scope: 618 if key.lower() == name.lower(): 619 return scope[key] 620 return None 621 622 623class OrderedSymbolTable(SymbolTable): 624 def __init__(self): 625 self.scopes_ = [OrderedDict()] 626 627 def enter_scope(self): 628 self.scopes_.append(OrderedDict()) 629 630 def resolve(self, name, case_insensitive=False): 631 SymbolTable.resolve(self, name, case_insensitive=case_insensitive) 632 633 def range(self, start, end): 634 for scope in reversed(self.scopes_): 635 if start in scope and end in scope: 636 start_idx = list(scope.keys()).index(start) 637 end_idx = list(scope.keys()).index(end) 638 return list(scope.keys())[start_idx:end_idx + 1] 639 return None 640