1from __future__ import (
2    print_function, division, absolute_import, unicode_literals)
3from collections import OrderedDict
4import fontTools.voltLib.ast as ast
5from fontTools.voltLib.lexer import Lexer
6from fontTools.voltLib.error import VoltLibError
7from io import open
8
9PARSE_FUNCS = {
10    "DEF_GLYPH": "parse_def_glyph_",
11    "DEF_GROUP": "parse_def_group_",
12    "DEF_SCRIPT": "parse_def_script_",
13    "DEF_LOOKUP": "parse_def_lookup_",
14    "DEF_ANCHOR": "parse_def_anchor_",
15    "GRID_PPEM": "parse_ppem_",
16    "PRESENTATION_PPEM": "parse_ppem_",
17    "PPOSITIONING_PPEM": "parse_ppem_",
18    "COMPILER_USEEXTENSIONLOOKUPS": "parse_compiler_flag_",
19    "COMPILER_USEPAIRPOSFORMAT2": "parse_compiler_flag_",
20    "CMAP_FORMAT": "parse_cmap_format",
21}
22
23
24class Parser(object):
25    def __init__(self, path):
26        self.doc_ = ast.VoltFile()
27        self.glyphs_ = OrderedSymbolTable()
28        self.groups_ = SymbolTable()
29        self.anchors_ = {}  # dictionary of SymbolTable() keyed by glyph
30        self.scripts_ = SymbolTable()
31        self.langs_ = SymbolTable()
32        self.lookups_ = SymbolTable()
33        self.next_token_type_, self.next_token_ = (None, None)
34        self.next_token_location_ = None
35        with open(path, "r") as f:
36            self.lexer_ = Lexer(f.read(), path)
37        self.advance_lexer_()
38
39    def parse(self):
40        statements = self.doc_.statements
41        while self.next_token_type_ is not None:
42            self.advance_lexer_()
43            if self.cur_token_ in PARSE_FUNCS.keys():
44                func = getattr(self, PARSE_FUNCS[self.cur_token_])
45                statements.append(func())
46            elif self.is_cur_keyword_("END"):
47                break
48            else:
49                raise VoltLibError(
50                    "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())),
51                    self.cur_token_location_)
52        return self.doc_
53
54    def parse_def_glyph_(self):
55        assert self.is_cur_keyword_("DEF_GLYPH")
56        location = self.cur_token_location_
57        name = self.expect_string_()
58        self.expect_keyword_("ID")
59        gid = self.expect_number_()
60        if gid < 0:
61            raise VoltLibError("Invalid glyph ID", self.cur_token_location_)
62        gunicode = None
63        if self.next_token_ == "UNICODE":
64            self.expect_keyword_("UNICODE")
65            gunicode = [self.expect_number_()]
66            if gunicode[0] < 0:
67                raise VoltLibError("Invalid glyph UNICODE",
68                                   self.cur_token_location_)
69        elif self.next_token_ == "UNICODEVALUES":
70            self.expect_keyword_("UNICODEVALUES")
71            gunicode = self.parse_unicode_values_()
72        gtype = None
73        if self.next_token_ == "TYPE":
74            self.expect_keyword_("TYPE")
75            gtype = self.expect_name_()
76            assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT")
77        components = None
78        if self.next_token_ == "COMPONENTS":
79            self.expect_keyword_("COMPONENTS")
80            components = self.expect_number_()
81        self.expect_keyword_("END_GLYPH")
82        if self.glyphs_.resolve(name) is not None:
83            raise VoltLibError(
84                'Glyph "%s" (gid %i) already defined' % (name, gid),
85                location
86            )
87        def_glyph = ast.GlyphDefinition(name, gid,
88                                        gunicode, gtype, components,
89                                        location=location)
90        self.glyphs_.define(name, def_glyph)
91        return def_glyph
92
93    def parse_def_group_(self):
94        assert self.is_cur_keyword_("DEF_GROUP")
95        location = self.cur_token_location_
96        name = self.expect_string_()
97        enum = None
98        if self.next_token_ == "ENUM":
99            enum = self.parse_enum_()
100        self.expect_keyword_("END_GROUP")
101        if self.groups_.resolve(name) is not None:
102            raise VoltLibError(
103                'Glyph group "%s" already defined, '
104                'group names are case insensitive' % name,
105                location
106            )
107        def_group = ast.GroupDefinition(name, enum,
108                                        location=location)
109        self.groups_.define(name, def_group)
110        return def_group
111
112    def parse_def_script_(self):
113        assert self.is_cur_keyword_("DEF_SCRIPT")
114        location = self.cur_token_location_
115        name = None
116        if self.next_token_ == "NAME":
117            self.expect_keyword_("NAME")
118            name = self.expect_string_()
119        self.expect_keyword_("TAG")
120        tag = self.expect_string_()
121        if self.scripts_.resolve(tag) is not None:
122            raise VoltLibError(
123                'Script "%s" already defined, '
124                'script tags are case insensitive' % tag,
125                location
126            )
127        self.langs_.enter_scope()
128        langs = []
129        while self.next_token_ != "END_SCRIPT":
130            self.advance_lexer_()
131            lang = self.parse_langsys_()
132            self.expect_keyword_("END_LANGSYS")
133            if self.langs_.resolve(lang.tag) is not None:
134                raise VoltLibError(
135                    'Language "%s" already defined in script "%s", '
136                    'language tags are case insensitive' % (lang.tag, tag),
137                    location
138                )
139            self.langs_.define(lang.tag, lang)
140            langs.append(lang)
141        self.expect_keyword_("END_SCRIPT")
142        self.langs_.exit_scope()
143        def_script = ast.ScriptDefinition(name, tag, langs, location=location)
144        self.scripts_.define(tag, def_script)
145        return def_script
146
147    def parse_langsys_(self):
148        assert self.is_cur_keyword_("DEF_LANGSYS")
149        location = self.cur_token_location_
150        name = None
151        if self.next_token_ == "NAME":
152            self.expect_keyword_("NAME")
153            name = self.expect_string_()
154        self.expect_keyword_("TAG")
155        tag = self.expect_string_()
156        features = []
157        while self.next_token_ != "END_LANGSYS":
158            self.advance_lexer_()
159            feature = self.parse_feature_()
160            self.expect_keyword_("END_FEATURE")
161            features.append(feature)
162        def_langsys = ast.LangSysDefinition(name, tag, features,
163                                            location=location)
164        return def_langsys
165
166    def parse_feature_(self):
167        assert self.is_cur_keyword_("DEF_FEATURE")
168        location = self.cur_token_location_
169        self.expect_keyword_("NAME")
170        name = self.expect_string_()
171        self.expect_keyword_("TAG")
172        tag = self.expect_string_()
173        lookups = []
174        while self.next_token_ != "END_FEATURE":
175            # self.advance_lexer_()
176            self.expect_keyword_("LOOKUP")
177            lookup = self.expect_string_()
178            lookups.append(lookup)
179        feature = ast.FeatureDefinition(name, tag, lookups,
180                                        location=location)
181        return feature
182
183    def parse_def_lookup_(self):
184        assert self.is_cur_keyword_("DEF_LOOKUP")
185        location = self.cur_token_location_
186        name = self.expect_string_()
187        if not name[0].isalpha():
188            raise VoltLibError(
189                'Lookup name "%s" must start with a letter' % name,
190                location
191            )
192        if self.lookups_.resolve(name) is not None:
193            raise VoltLibError(
194                'Lookup "%s" already defined, '
195                'lookup names are case insensitive' % name,
196                location
197            )
198        process_base = True
199        if self.next_token_ == "PROCESS_BASE":
200            self.advance_lexer_()
201        elif self.next_token_ == "SKIP_BASE":
202            self.advance_lexer_()
203            process_base = False
204        process_marks = True
205        mark_glyph_set = None
206        if self.next_token_ == "PROCESS_MARKS":
207            self.advance_lexer_()
208            if self.next_token_ == "MARK_GLYPH_SET":
209                self.advance_lexer_()
210                mark_glyph_set = self.expect_string_()
211            elif self.next_token_type_ == Lexer.STRING:
212                process_marks = self.expect_string_()
213            elif self.next_token_ == "ALL":
214                self.advance_lexer_()
215            else:
216                raise VoltLibError(
217                    "Expected ALL, MARK_GLYPH_SET or an ID. "
218                    "Got %s" % (self.next_token_type_),
219                    location)
220        elif self.next_token_ == "SKIP_MARKS":
221            self.advance_lexer_()
222            process_marks = False
223        direction = None
224        if self.next_token_ == "DIRECTION":
225            self.expect_keyword_("DIRECTION")
226            direction = self.expect_name_()
227            assert direction in ("LTR", "RTL")
228        reversal = None
229        if self.next_token_ == "REVERSAL":
230            self.expect_keyword_("REVERSAL")
231            reversal = True
232        comments = None
233        if self.next_token_ == "COMMENTS":
234            self.expect_keyword_("COMMENTS")
235            comments = self.expect_string_()
236        context = []
237        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
238            context = self.parse_context_()
239        as_pos_or_sub = self.expect_name_()
240        sub = None
241        pos = None
242        if as_pos_or_sub == "AS_SUBSTITUTION":
243            sub = self.parse_substitution_(reversal)
244        elif as_pos_or_sub == "AS_POSITION":
245            pos = self.parse_position_()
246        else:
247            raise VoltLibError(
248                "Expected AS_SUBSTITUTION or AS_POSITION. "
249                "Got %s" % (as_pos_or_sub),
250                location)
251        def_lookup = ast.LookupDefinition(
252            name, process_base, process_marks, mark_glyph_set, direction,
253            reversal, comments, context, sub, pos, location=location)
254        self.lookups_.define(name, def_lookup)
255        return def_lookup
256
257    def parse_context_(self):
258        location = self.cur_token_location_
259        contexts = []
260        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
261            side = None
262            coverage = None
263            ex_or_in = self.expect_name_()
264            # side_contexts = [] # XXX
265            if self.next_token_ != "END_CONTEXT":
266                left = []
267                right = []
268                while self.next_token_ in ("LEFT", "RIGHT"):
269                    side = self.expect_name_()
270                    coverage = self.parse_coverage_()
271                    if side == "LEFT":
272                        left.append(coverage)
273                    else:
274                        right.append(coverage)
275                self.expect_keyword_("END_CONTEXT")
276                context = ast.ContextDefinition(ex_or_in, left,
277                                                right, location=location)
278                contexts.append(context)
279            else:
280                self.expect_keyword_("END_CONTEXT")
281        return contexts
282
283    def parse_substitution_(self, reversal):
284        assert self.is_cur_keyword_("AS_SUBSTITUTION")
285        location = self.cur_token_location_
286        src = []
287        dest = []
288        if self.next_token_ != "SUB":
289            raise VoltLibError("Expected SUB", location)
290        while self.next_token_ == "SUB":
291            self.expect_keyword_("SUB")
292            src.append(self.parse_coverage_())
293            self.expect_keyword_("WITH")
294            dest.append(self.parse_coverage_())
295            self.expect_keyword_("END_SUB")
296        self.expect_keyword_("END_SUBSTITUTION")
297        max_src = max([len(cov) for cov in src])
298        max_dest = max([len(cov) for cov in dest])
299        # many to many or mixed is invalid
300        if ((max_src > 1 and max_dest > 1) or
301                (reversal and (max_src > 1 or max_dest > 1))):
302            raise VoltLibError(
303                "Invalid substitution type",
304                location)
305        mapping = OrderedDict(zip(tuple(src), tuple(dest)))
306        if max_src == 1 and max_dest == 1:
307            if reversal:
308                sub = ast.SubstitutionReverseChainingSingleDefinition(
309                    mapping, location=location)
310            else:
311                sub = ast.SubstitutionSingleDefinition(mapping,
312                                                       location=location)
313        elif max_src == 1 and max_dest > 1:
314            sub = ast.SubstitutionMultipleDefinition(mapping,
315                                                     location=location)
316        elif max_src > 1 and max_dest == 1:
317            sub = ast.SubstitutionLigatureDefinition(mapping,
318                                                     location=location)
319        return sub
320
321    def parse_position_(self):
322        assert self.is_cur_keyword_("AS_POSITION")
323        location = self.cur_token_location_
324        pos_type = self.expect_name_()
325        if pos_type not in (
326                "ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"):
327            raise VoltLibError(
328                "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE",
329                location)
330        if pos_type == "ATTACH":
331            position = self.parse_attach_()
332        elif pos_type == "ATTACH_CURSIVE":
333            position = self.parse_attach_cursive_()
334        elif pos_type == "ADJUST_PAIR":
335            position = self.parse_adjust_pair_()
336        elif pos_type == "ADJUST_SINGLE":
337            position = self.parse_adjust_single_()
338        self.expect_keyword_("END_POSITION")
339        return position
340
341    def parse_attach_(self):
342        assert self.is_cur_keyword_("ATTACH")
343        location = self.cur_token_location_
344        coverage = self.parse_coverage_()
345        coverage_to = []
346        self.expect_keyword_("TO")
347        while self.next_token_ != "END_ATTACH":
348            cov = self.parse_coverage_()
349            self.expect_keyword_("AT")
350            self.expect_keyword_("ANCHOR")
351            anchor_name = self.expect_string_()
352            coverage_to.append((cov, anchor_name))
353        self.expect_keyword_("END_ATTACH")
354        position = ast.PositionAttachDefinition(
355            coverage, coverage_to, location=location)
356        return position
357
358    def parse_attach_cursive_(self):
359        assert self.is_cur_keyword_("ATTACH_CURSIVE")
360        location = self.cur_token_location_
361        coverages_exit = []
362        coverages_enter = []
363        while self.next_token_ != "ENTER":
364            self.expect_keyword_("EXIT")
365            coverages_exit.append(self.parse_coverage_())
366        while self.next_token_ != "END_ATTACH":
367            self.expect_keyword_("ENTER")
368            coverages_enter.append(self.parse_coverage_())
369        self.expect_keyword_("END_ATTACH")
370        position = ast.PositionAttachCursiveDefinition(
371            coverages_exit, coverages_enter, location=location)
372        return position
373
374    def parse_adjust_pair_(self):
375        assert self.is_cur_keyword_("ADJUST_PAIR")
376        location = self.cur_token_location_
377        coverages_1 = []
378        coverages_2 = []
379        adjust_pair = {}
380        while self.next_token_ == "FIRST":
381            self.advance_lexer_()
382            coverage_1 = self.parse_coverage_()
383            coverages_1.append(coverage_1)
384        while self.next_token_ == "SECOND":
385            self.advance_lexer_()
386            coverage_2 = self.parse_coverage_()
387            coverages_2.append(coverage_2)
388        while self.next_token_ != "END_ADJUST":
389            id_1 = self.expect_number_()
390            id_2 = self.expect_number_()
391            self.expect_keyword_("BY")
392            pos_1 = self.parse_pos_()
393            pos_2 = self.parse_pos_()
394            adjust_pair[(id_1, id_2)] = (pos_1, pos_2)
395        self.expect_keyword_("END_ADJUST")
396        position = ast.PositionAdjustPairDefinition(
397            coverages_1, coverages_2, adjust_pair, location=location)
398        return position
399
400    def parse_adjust_single_(self):
401        assert self.is_cur_keyword_("ADJUST_SINGLE")
402        location = self.cur_token_location_
403        adjust_single = []
404        while self.next_token_ != "END_ADJUST":
405            coverages = self.parse_coverage_()
406            self.expect_keyword_("BY")
407            pos = self.parse_pos_()
408            adjust_single.append((coverages, pos))
409        self.expect_keyword_("END_ADJUST")
410        position = ast.PositionAdjustSingleDefinition(
411            adjust_single, location=location)
412        return position
413
414    def parse_def_anchor_(self):
415        assert self.is_cur_keyword_("DEF_ANCHOR")
416        location = self.cur_token_location_
417        name = self.expect_string_()
418        self.expect_keyword_("ON")
419        gid = self.expect_number_()
420        self.expect_keyword_("GLYPH")
421        glyph_name = self.expect_name_()
422        self.expect_keyword_("COMPONENT")
423        component = self.expect_number_()
424        # check for duplicate anchor names on this glyph
425        if glyph_name in self.anchors_:
426            anchor = self.anchors_[glyph_name].resolve(name)
427            if anchor is not None and anchor.component == component:
428                raise VoltLibError(
429                    'Anchor "%s" already defined, '
430                    'anchor names are case insensitive' % name,
431                    location
432                )
433        if self.next_token_ == "LOCKED":
434            locked = True
435            self.advance_lexer_()
436        else:
437            locked = False
438        self.expect_keyword_("AT")
439        pos = self.parse_pos_()
440        self.expect_keyword_("END_ANCHOR")
441        anchor = ast.AnchorDefinition(name, gid, glyph_name,
442                                      component, locked, pos,
443                                      location=location)
444        if glyph_name not in self.anchors_:
445            self.anchors_[glyph_name] = SymbolTable()
446        self.anchors_[glyph_name].define(name, anchor)
447        return anchor
448
449    def parse_adjust_by_(self):
450        self.advance_lexer_()
451        assert self.is_cur_keyword_("ADJUST_BY")
452        adjustment = self.expect_number_()
453        self.expect_keyword_("AT")
454        size = self.expect_number_()
455        return adjustment, size
456
457    def parse_pos_(self):
458        # VOLT syntax doesn't seem to take device Y advance
459        self.advance_lexer_()
460        location = self.cur_token_location_
461        assert self.is_cur_keyword_("POS"), location
462        adv = None
463        dx = None
464        dy = None
465        adv_adjust_by = {}
466        dx_adjust_by = {}
467        dy_adjust_by = {}
468        if self.next_token_ == "ADV":
469            self.advance_lexer_()
470            adv = self.expect_number_()
471            while self.next_token_ == "ADJUST_BY":
472                adjustment, size = self.parse_adjust_by_()
473                adv_adjust_by[size] = adjustment
474        if self.next_token_ == "DX":
475            self.advance_lexer_()
476            dx = self.expect_number_()
477            while self.next_token_ == "ADJUST_BY":
478                adjustment, size = self.parse_adjust_by_()
479                dx_adjust_by[size] = adjustment
480        if self.next_token_ == "DY":
481            self.advance_lexer_()
482            dy = self.expect_number_()
483            while self.next_token_ == "ADJUST_BY":
484                adjustment, size = self.parse_adjust_by_()
485                dy_adjust_by[size] = adjustment
486        self.expect_keyword_("END_POS")
487        return (adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by)
488
489    def parse_unicode_values_(self):
490        location = self.cur_token_location_
491        try:
492            unicode_values = self.expect_string_().split(",")
493            unicode_values = [
494                int(uni[2:], 16)
495                for uni in unicode_values if uni != ""]
496        except ValueError as err:
497            raise VoltLibError(str(err), location)
498        return unicode_values if unicode_values != [] else None
499
500    def parse_enum_(self):
501        self.expect_keyword_("ENUM")
502        location = self.cur_token_location_
503        enum = ast.Enum(self.parse_coverage_(), location=location)
504        self.expect_keyword_("END_ENUM")
505        return enum
506
507    def parse_coverage_(self):
508        coverage = []
509        location = self.cur_token_location_
510        while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"):
511            if self.next_token_ == "ENUM":
512                enum = self.parse_enum_()
513                coverage.append(enum)
514            elif self.next_token_ == "GLYPH":
515                self.expect_keyword_("GLYPH")
516                name = self.expect_string_()
517                coverage.append(ast.GlyphName(name, location=location))
518            elif self.next_token_ == "GROUP":
519                self.expect_keyword_("GROUP")
520                name = self.expect_string_()
521                coverage.append(ast.GroupName(name, self, location=location))
522            elif self.next_token_ == "RANGE":
523                self.expect_keyword_("RANGE")
524                start = self.expect_string_()
525                self.expect_keyword_("TO")
526                end = self.expect_string_()
527                coverage.append(ast.Range(start, end, self, location=location))
528        return tuple(coverage)
529
530    def resolve_group(self, group_name):
531        return self.groups_.resolve(group_name)
532
533    def glyph_range(self, start, end):
534        return self.glyphs_.range(start, end)
535
536    def parse_ppem_(self):
537        location = self.cur_token_location_
538        ppem_name = self.cur_token_
539        value = self.expect_number_()
540        setting = ast.SettingDefinition(ppem_name, value, location=location)
541        return setting
542
543    def parse_compiler_flag_(self):
544        location = self.cur_token_location_
545        flag_name = self.cur_token_
546        value = True
547        setting = ast.SettingDefinition(flag_name, value, location=location)
548        return setting
549
550    def parse_cmap_format(self):
551        location = self.cur_token_location_
552        name = self.cur_token_
553        value = (self.expect_number_(), self.expect_number_(),
554                 self.expect_number_())
555        setting = ast.SettingDefinition(name, value, location=location)
556        return setting
557
558    def is_cur_keyword_(self, k):
559        return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
560
561    def expect_string_(self):
562        self.advance_lexer_()
563        if self.cur_token_type_ is not Lexer.STRING:
564            raise VoltLibError("Expected a string", self.cur_token_location_)
565        return self.cur_token_
566
567    def expect_keyword_(self, keyword):
568        self.advance_lexer_()
569        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
570            return self.cur_token_
571        raise VoltLibError("Expected \"%s\"" % keyword,
572                           self.cur_token_location_)
573
574    def expect_name_(self):
575        self.advance_lexer_()
576        if self.cur_token_type_ is Lexer.NAME:
577            return self.cur_token_
578        raise VoltLibError("Expected a name", self.cur_token_location_)
579
580    def expect_number_(self):
581        self.advance_lexer_()
582        if self.cur_token_type_ is not Lexer.NUMBER:
583            raise VoltLibError("Expected a number", self.cur_token_location_)
584        return self.cur_token_
585
586    def advance_lexer_(self):
587        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
588            self.next_token_type_, self.next_token_, self.next_token_location_)
589        try:
590            if self.is_cur_keyword_("END"):
591                raise StopIteration
592            (self.next_token_type_, self.next_token_,
593             self.next_token_location_) = self.lexer_.next()
594        except StopIteration:
595            self.next_token_type_, self.next_token_ = (None, None)
596
597
598class SymbolTable(object):
599    def __init__(self):
600        self.scopes_ = [{}]
601
602    def enter_scope(self):
603        self.scopes_.append({})
604
605    def exit_scope(self):
606        self.scopes_.pop()
607
608    def define(self, name, item):
609        self.scopes_[-1][name] = item
610
611    def resolve(self, name, case_insensitive=True):
612        for scope in reversed(self.scopes_):
613            item = scope.get(name)
614            if item:
615                return item
616        if case_insensitive:
617            for key in scope:
618                if key.lower() == name.lower():
619                    return scope[key]
620        return None
621
622
623class OrderedSymbolTable(SymbolTable):
624    def __init__(self):
625        self.scopes_ = [OrderedDict()]
626
627    def enter_scope(self):
628        self.scopes_.append(OrderedDict())
629
630    def resolve(self, name, case_insensitive=False):
631        SymbolTable.resolve(self, name, case_insensitive=case_insensitive)
632
633    def range(self, start, end):
634        for scope in reversed(self.scopes_):
635            if start in scope and end in scope:
636                start_idx = list(scope.keys()).index(start)
637                end_idx = list(scope.keys()).index(end)
638                return list(scope.keys())[start_idx:end_idx + 1]
639        return None
640