1from __future__ import print_function, division, absolute_import
2from __future__ import unicode_literals
3from fontTools.feaLib.error import FeatureLibError
4from fontTools.feaLib.lexer import Lexer, IncludingLexer, NonIncludingLexer
5from fontTools.misc.encodingTools import getEncoding
6from fontTools.misc.py23 import *
7import fontTools.feaLib.ast as ast
8import logging
9import os
10import re
11
12
13log = logging.getLogger(__name__)
14
15
16class Parser(object):
17    extensions = {}
18    ast = ast
19    SS_FEATURE_TAGS = {"ss%02d" % i for i in range(1, 20+1)}
20    CV_FEATURE_TAGS = {"cv%02d" % i for i in range(1, 99+1)}
21
22    def __init__(self, featurefile, glyphNames=(), followIncludes=True,
23                 **kwargs):
24        if "glyphMap" in kwargs:
25            from fontTools.misc.loggingTools import deprecateArgument
26            deprecateArgument("glyphMap", "use 'glyphNames' (iterable) instead")
27            if glyphNames:
28                raise TypeError("'glyphNames' and (deprecated) 'glyphMap' are "
29                                "mutually exclusive")
30            glyphNames = kwargs.pop("glyphMap")
31        if kwargs:
32            raise TypeError("unsupported keyword argument%s: %s"
33                            % ("" if len(kwargs) == 1 else "s",
34                               ", ".join(repr(k) for k in kwargs)))
35
36        self.glyphNames_ = set(glyphNames)
37        self.doc_ = self.ast.FeatureFile()
38        self.anchors_ = SymbolTable()
39        self.glyphclasses_ = SymbolTable()
40        self.lookups_ = SymbolTable()
41        self.valuerecords_ = SymbolTable()
42        self.symbol_tables_ = {
43            self.anchors_, self.valuerecords_
44        }
45        self.next_token_type_, self.next_token_ = (None, None)
46        self.cur_comments_ = []
47        self.next_token_location_ = None
48        lexerClass = IncludingLexer if followIncludes else NonIncludingLexer
49        self.lexer_ = lexerClass(featurefile)
50        self.advance_lexer_(comments=True)
51
52    def parse(self):
53        statements = self.doc_.statements
54        while self.next_token_type_ is not None or self.cur_comments_:
55            self.advance_lexer_(comments=True)
56            if self.cur_token_type_ is Lexer.COMMENT:
57                statements.append(
58                    self.ast.Comment(self.cur_token_,
59                                     location=self.cur_token_location_))
60            elif self.is_cur_keyword_("include"):
61                statements.append(self.parse_include_())
62            elif self.cur_token_type_ is Lexer.GLYPHCLASS:
63                statements.append(self.parse_glyphclass_definition_())
64            elif self.is_cur_keyword_(("anon", "anonymous")):
65                statements.append(self.parse_anonymous_())
66            elif self.is_cur_keyword_("anchorDef"):
67                statements.append(self.parse_anchordef_())
68            elif self.is_cur_keyword_("languagesystem"):
69                statements.append(self.parse_languagesystem_())
70            elif self.is_cur_keyword_("lookup"):
71                statements.append(self.parse_lookup_(vertical=False))
72            elif self.is_cur_keyword_("markClass"):
73                statements.append(self.parse_markClass_())
74            elif self.is_cur_keyword_("feature"):
75                statements.append(self.parse_feature_block_())
76            elif self.is_cur_keyword_("table"):
77                statements.append(self.parse_table_())
78            elif self.is_cur_keyword_("valueRecordDef"):
79                statements.append(
80                    self.parse_valuerecord_definition_(vertical=False))
81            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in self.extensions:
82                statements.append(self.extensions[self.cur_token_](self))
83            elif self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == ";":
84                continue
85            else:
86                raise FeatureLibError(
87                    "Expected feature, languagesystem, lookup, markClass, "
88                    "table, or glyph class definition, got {} \"{}\"".format(self.cur_token_type_, self.cur_token_),
89                    self.cur_token_location_)
90        return self.doc_
91
92    def parse_anchor_(self):
93        self.expect_symbol_("<")
94        self.expect_keyword_("anchor")
95        location = self.cur_token_location_
96
97        if self.next_token_ == "NULL":
98            self.expect_keyword_("NULL")
99            self.expect_symbol_(">")
100            return None
101
102        if self.next_token_type_ == Lexer.NAME:
103            name = self.expect_name_()
104            anchordef = self.anchors_.resolve(name)
105            if anchordef is None:
106                raise FeatureLibError(
107                    'Unknown anchor "%s"' % name,
108                    self.cur_token_location_)
109            self.expect_symbol_(">")
110            return self.ast.Anchor(anchordef.x, anchordef.y,
111                                   name=name,
112                                   contourpoint=anchordef.contourpoint,
113                                   xDeviceTable=None, yDeviceTable=None,
114                                   location=location)
115
116        x, y = self.expect_number_(), self.expect_number_()
117
118        contourpoint = None
119        if self.next_token_ == "contourpoint":
120            self.expect_keyword_("contourpoint")
121            contourpoint = self.expect_number_()
122
123        if self.next_token_ == "<":
124            xDeviceTable = self.parse_device_()
125            yDeviceTable = self.parse_device_()
126        else:
127            xDeviceTable, yDeviceTable = None, None
128
129        self.expect_symbol_(">")
130        return self.ast.Anchor(x, y, name=None,
131                               contourpoint=contourpoint,
132                               xDeviceTable=xDeviceTable,
133                               yDeviceTable=yDeviceTable,
134                               location=location)
135
136    def parse_anchor_marks_(self):
137        """Parses a sequence of [<anchor> mark @MARKCLASS]*."""
138        anchorMarks = []  # [(self.ast.Anchor, markClassName)*]
139        while self.next_token_ == "<":
140            anchor = self.parse_anchor_()
141            if anchor is None and self.next_token_ != "mark":
142                continue  # <anchor NULL> without mark, eg. in GPOS type 5
143            self.expect_keyword_("mark")
144            markClass = self.expect_markClass_reference_()
145            anchorMarks.append((anchor, markClass))
146        return anchorMarks
147
148    def parse_anchordef_(self):
149        assert self.is_cur_keyword_("anchorDef")
150        location = self.cur_token_location_
151        x, y = self.expect_number_(), self.expect_number_()
152        contourpoint = None
153        if self.next_token_ == "contourpoint":
154            self.expect_keyword_("contourpoint")
155            contourpoint = self.expect_number_()
156        name = self.expect_name_()
157        self.expect_symbol_(";")
158        anchordef = self.ast.AnchorDefinition(name, x, y,
159                                              contourpoint=contourpoint,
160                                              location=location)
161        self.anchors_.define(name, anchordef)
162        return anchordef
163
164    def parse_anonymous_(self):
165        assert self.is_cur_keyword_(("anon", "anonymous"))
166        tag = self.expect_tag_()
167        _, content, location = self.lexer_.scan_anonymous_block(tag)
168        self.advance_lexer_()
169        self.expect_symbol_('}')
170        end_tag = self.expect_tag_()
171        assert tag == end_tag, "bad splitting in Lexer.scan_anonymous_block()"
172        self.expect_symbol_(';')
173        return self.ast.AnonymousBlock(tag, content, location=location)
174
175    def parse_attach_(self):
176        assert self.is_cur_keyword_("Attach")
177        location = self.cur_token_location_
178        glyphs = self.parse_glyphclass_(accept_glyphname=True)
179        contourPoints = {self.expect_number_()}
180        while self.next_token_ != ";":
181            contourPoints.add(self.expect_number_())
182        self.expect_symbol_(";")
183        return self.ast.AttachStatement(glyphs, contourPoints,
184                                        location=location)
185
186    def parse_enumerate_(self, vertical):
187        assert self.cur_token_ in {"enumerate", "enum"}
188        self.advance_lexer_()
189        return self.parse_position_(enumerated=True, vertical=vertical)
190
191    def parse_GlyphClassDef_(self):
192        """Parses 'GlyphClassDef @BASE, @LIGATURES, @MARKS, @COMPONENTS;'"""
193        assert self.is_cur_keyword_("GlyphClassDef")
194        location = self.cur_token_location_
195        if self.next_token_ != ",":
196            baseGlyphs = self.parse_glyphclass_(accept_glyphname=False)
197        else:
198            baseGlyphs = None
199        self.expect_symbol_(",")
200        if self.next_token_ != ",":
201            ligatureGlyphs = self.parse_glyphclass_(accept_glyphname=False)
202        else:
203            ligatureGlyphs = None
204        self.expect_symbol_(",")
205        if self.next_token_ != ",":
206            markGlyphs = self.parse_glyphclass_(accept_glyphname=False)
207        else:
208            markGlyphs = None
209        self.expect_symbol_(",")
210        if self.next_token_ != ";":
211            componentGlyphs = self.parse_glyphclass_(accept_glyphname=False)
212        else:
213            componentGlyphs = None
214        self.expect_symbol_(";")
215        return self.ast.GlyphClassDefStatement(baseGlyphs, markGlyphs,
216                                               ligatureGlyphs, componentGlyphs,
217                                               location=location)
218
219    def parse_glyphclass_definition_(self):
220        """Parses glyph class definitions such as '@UPPERCASE = [A-Z];'"""
221        location, name = self.cur_token_location_, self.cur_token_
222        self.expect_symbol_("=")
223        glyphs = self.parse_glyphclass_(accept_glyphname=False)
224        self.expect_symbol_(";")
225        glyphclass = self.ast.GlyphClassDefinition(name, glyphs,
226                                                   location=location)
227        self.glyphclasses_.define(name, glyphclass)
228        return glyphclass
229
230    def split_glyph_range_(self, name, location):
231        # Since v1.20, the OpenType Feature File specification allows
232        # for dashes in glyph names. A sequence like "a-b-c-d" could
233        # therefore mean a single glyph whose name happens to be
234        # "a-b-c-d", or it could mean a range from glyph "a" to glyph
235        # "b-c-d", or a range from glyph "a-b" to glyph "c-d", or a
236        # range from glyph "a-b-c" to glyph "d".Technically, this
237        # example could be resolved because the (pretty complex)
238        # definition of glyph ranges renders most of these splits
239        # invalid. But the specification does not say that a compiler
240        # should try to apply such fancy heuristics. To encourage
241        # unambiguous feature files, we therefore try all possible
242        # splits and reject the feature file if there are multiple
243        # splits possible. It is intentional that we don't just emit a
244        # warning; warnings tend to get ignored. To fix the problem,
245        # font designers can trivially add spaces around the intended
246        # split point, and we emit a compiler error that suggests
247        # how exactly the source should be rewritten to make things
248        # unambiguous.
249        parts = name.split("-")
250        solutions = []
251        for i in range(len(parts)):
252            start, limit = "-".join(parts[0:i]), "-".join(parts[i:])
253            if start in self.glyphNames_ and limit in self.glyphNames_:
254                solutions.append((start, limit))
255        if len(solutions) == 1:
256            start, limit = solutions[0]
257            return start, limit
258        elif len(solutions) == 0:
259            raise FeatureLibError(
260                "\"%s\" is not a glyph in the font, and it can not be split "
261                "into a range of known glyphs" % name, location)
262        else:
263            ranges = " or ".join(["\"%s - %s\"" % (s, l) for s, l in solutions])
264            raise FeatureLibError(
265                "Ambiguous glyph range \"%s\"; "
266                "please use %s to clarify what you mean" % (name, ranges),
267                location)
268
269    def parse_glyphclass_(self, accept_glyphname):
270        if (accept_glyphname and
271                self.next_token_type_ in (Lexer.NAME, Lexer.CID)):
272            glyph = self.expect_glyph_()
273            return self.ast.GlyphName(glyph, location=self.cur_token_location_)
274        if self.next_token_type_ is Lexer.GLYPHCLASS:
275            self.advance_lexer_()
276            gc = self.glyphclasses_.resolve(self.cur_token_)
277            if gc is None:
278                raise FeatureLibError(
279                    "Unknown glyph class @%s" % self.cur_token_,
280                    self.cur_token_location_)
281            if isinstance(gc, self.ast.MarkClass):
282                return self.ast.MarkClassName(
283                    gc, location=self.cur_token_location_)
284            else:
285                return self.ast.GlyphClassName(
286                    gc, location=self.cur_token_location_)
287
288        self.expect_symbol_("[")
289        location = self.cur_token_location_
290        glyphs = self.ast.GlyphClass(location=location)
291        while self.next_token_ != "]":
292            if self.next_token_type_ is Lexer.NAME:
293                glyph = self.expect_glyph_()
294                location = self.cur_token_location_
295                if '-' in glyph and glyph not in self.glyphNames_:
296                    start, limit = self.split_glyph_range_(glyph, location)
297                    glyphs.add_range(
298                        start, limit,
299                        self.make_glyph_range_(location, start, limit))
300                elif self.next_token_ == "-":
301                    start = glyph
302                    self.expect_symbol_("-")
303                    limit = self.expect_glyph_()
304                    glyphs.add_range(
305                        start, limit,
306                        self.make_glyph_range_(location, start, limit))
307                else:
308                    glyphs.append(glyph)
309            elif self.next_token_type_ is Lexer.CID:
310                glyph = self.expect_glyph_()
311                if self.next_token_ == "-":
312                    range_location = self.cur_token_location_
313                    range_start = self.cur_token_
314                    self.expect_symbol_("-")
315                    range_end = self.expect_cid_()
316                    glyphs.add_cid_range(range_start, range_end,
317                                         self.make_cid_range_(range_location,
318                                                              range_start, range_end))
319                else:
320                    glyphs.append("cid%05d" % self.cur_token_)
321            elif self.next_token_type_ is Lexer.GLYPHCLASS:
322                self.advance_lexer_()
323                gc = self.glyphclasses_.resolve(self.cur_token_)
324                if gc is None:
325                    raise FeatureLibError(
326                        "Unknown glyph class @%s" % self.cur_token_,
327                        self.cur_token_location_)
328                if isinstance(gc, self.ast.MarkClass):
329                    gc = self.ast.MarkClassName(
330                        gc, location=self.cur_token_location_)
331                else:
332                    gc = self.ast.GlyphClassName(
333                        gc, location=self.cur_token_location_)
334                glyphs.add_class(gc)
335            else:
336                raise FeatureLibError(
337                    "Expected glyph name, glyph range, "
338                    "or glyph class reference",
339                    self.next_token_location_)
340        self.expect_symbol_("]")
341        return glyphs
342
343    def parse_class_name_(self):
344        name = self.expect_class_name_()
345        gc = self.glyphclasses_.resolve(name)
346        if gc is None:
347            raise FeatureLibError(
348                "Unknown glyph class @%s" % name,
349                self.cur_token_location_)
350        if isinstance(gc, self.ast.MarkClass):
351            return self.ast.MarkClassName(
352                gc, location=self.cur_token_location_)
353        else:
354            return self.ast.GlyphClassName(
355                gc, location=self.cur_token_location_)
356
357    def parse_glyph_pattern_(self, vertical):
358        prefix, glyphs, lookups, values, suffix = ([], [], [], [], [])
359        hasMarks = False
360        while self.next_token_ not in {"by", "from", ";", ","}:
361            gc = self.parse_glyphclass_(accept_glyphname=True)
362            marked = False
363            if self.next_token_ == "'":
364                self.expect_symbol_("'")
365                hasMarks = marked = True
366            if marked:
367                if suffix:
368                    # makeotf also reports this as an error, while FontForge
369                    # silently inserts ' in all the intervening glyphs.
370                    # https://github.com/fonttools/fonttools/pull/1096
371                    raise FeatureLibError(
372                        "Unsupported contextual target sequence: at most "
373                        "one run of marked (') glyph/class names allowed",
374                        self.cur_token_location_)
375                glyphs.append(gc)
376            elif glyphs:
377                suffix.append(gc)
378            else:
379                prefix.append(gc)
380
381            if self.is_next_value_():
382                values.append(self.parse_valuerecord_(vertical))
383            else:
384                values.append(None)
385
386            lookup = None
387            if self.next_token_ == "lookup":
388                self.expect_keyword_("lookup")
389                if not marked:
390                    raise FeatureLibError(
391                        "Lookups can only follow marked glyphs",
392                        self.cur_token_location_)
393                lookup_name = self.expect_name_()
394                lookup = self.lookups_.resolve(lookup_name)
395                if lookup is None:
396                    raise FeatureLibError(
397                        'Unknown lookup "%s"' % lookup_name,
398                        self.cur_token_location_)
399            if marked:
400                lookups.append(lookup)
401
402        if not glyphs and not suffix:  # eg., "sub f f i by"
403            assert lookups == []
404            return ([], prefix, [None] * len(prefix), values, [], hasMarks)
405        else:
406            assert not any(values[:len(prefix)]), values
407            format1 = values[len(prefix):][:len(glyphs)]
408            format2 = values[(len(prefix) + len(glyphs)):][:len(suffix)]
409            values = format2 if format2 and isinstance(format2[0], self.ast.ValueRecord) else format1
410            return (prefix, glyphs, lookups, values, suffix, hasMarks)
411
412    def parse_chain_context_(self):
413        location = self.cur_token_location_
414        prefix, glyphs, lookups, values, suffix, hasMarks = \
415            self.parse_glyph_pattern_(vertical=False)
416        chainContext = [(prefix, glyphs, suffix)]
417        hasLookups = any(lookups)
418        while self.next_token_ == ",":
419            self.expect_symbol_(",")
420            prefix, glyphs, lookups, values, suffix, hasMarks = \
421                self.parse_glyph_pattern_(vertical=False)
422            chainContext.append((prefix, glyphs, suffix))
423            hasLookups = hasLookups or any(lookups)
424        self.expect_symbol_(";")
425        return chainContext, hasLookups
426
427    def parse_ignore_(self):
428        assert self.is_cur_keyword_("ignore")
429        location = self.cur_token_location_
430        self.advance_lexer_()
431        if self.cur_token_ in ["substitute", "sub"]:
432            chainContext, hasLookups = self.parse_chain_context_()
433            if hasLookups:
434                raise FeatureLibError(
435                    "No lookups can be specified for \"ignore sub\"",
436                    location)
437            return self.ast.IgnoreSubstStatement(chainContext,
438                                                 location=location)
439        if self.cur_token_ in ["position", "pos"]:
440            chainContext, hasLookups = self.parse_chain_context_()
441            if hasLookups:
442                raise FeatureLibError(
443                    "No lookups can be specified for \"ignore pos\"",
444                    location)
445            return self.ast.IgnorePosStatement(chainContext,
446                                               location=location)
447        raise FeatureLibError(
448            "Expected \"substitute\" or \"position\"",
449            self.cur_token_location_)
450
451    def parse_include_(self):
452        assert self.cur_token_ == "include"
453        location = self.cur_token_location_
454        filename = self.expect_filename_()
455        # self.expect_symbol_(";")
456        return ast.IncludeStatement(filename, location=location)
457
458    def parse_language_(self):
459        assert self.is_cur_keyword_("language")
460        location = self.cur_token_location_
461        language = self.expect_language_tag_()
462        include_default, required = (True, False)
463        if self.next_token_ in {"exclude_dflt", "include_dflt"}:
464            include_default = (self.expect_name_() == "include_dflt")
465        if self.next_token_ == "required":
466            self.expect_keyword_("required")
467            required = True
468        self.expect_symbol_(";")
469        return self.ast.LanguageStatement(language,
470                                          include_default, required,
471                                          location=location)
472
473    def parse_ligatureCaretByIndex_(self):
474        assert self.is_cur_keyword_("LigatureCaretByIndex")
475        location = self.cur_token_location_
476        glyphs = self.parse_glyphclass_(accept_glyphname=True)
477        carets = [self.expect_number_()]
478        while self.next_token_ != ";":
479            carets.append(self.expect_number_())
480        self.expect_symbol_(";")
481        return self.ast.LigatureCaretByIndexStatement(glyphs, carets,
482                                                      location=location)
483
484    def parse_ligatureCaretByPos_(self):
485        assert self.is_cur_keyword_("LigatureCaretByPos")
486        location = self.cur_token_location_
487        glyphs = self.parse_glyphclass_(accept_glyphname=True)
488        carets = [self.expect_number_()]
489        while self.next_token_ != ";":
490            carets.append(self.expect_number_())
491        self.expect_symbol_(";")
492        return self.ast.LigatureCaretByPosStatement(glyphs, carets,
493                                                    location=location)
494
495    def parse_lookup_(self, vertical):
496        assert self.is_cur_keyword_("lookup")
497        location, name = self.cur_token_location_, self.expect_name_()
498
499        if self.next_token_ == ";":
500            lookup = self.lookups_.resolve(name)
501            if lookup is None:
502                raise FeatureLibError("Unknown lookup \"%s\"" % name,
503                                      self.cur_token_location_)
504            self.expect_symbol_(";")
505            return self.ast.LookupReferenceStatement(lookup,
506                                                     location=location)
507
508        use_extension = False
509        if self.next_token_ == "useExtension":
510            self.expect_keyword_("useExtension")
511            use_extension = True
512
513        block = self.ast.LookupBlock(name, use_extension, location=location)
514        self.parse_block_(block, vertical)
515        self.lookups_.define(name, block)
516        return block
517
518    def parse_lookupflag_(self):
519        assert self.is_cur_keyword_("lookupflag")
520        location = self.cur_token_location_
521
522        # format B: "lookupflag 6;"
523        if self.next_token_type_ == Lexer.NUMBER:
524            value = self.expect_number_()
525            self.expect_symbol_(";")
526            return self.ast.LookupFlagStatement(value, location=location)
527
528        # format A: "lookupflag RightToLeft MarkAttachmentType @M;"
529        value_seen = False
530        value, markAttachment, markFilteringSet = 0, None, None
531        flags = {
532            "RightToLeft": 1, "IgnoreBaseGlyphs": 2,
533            "IgnoreLigatures": 4, "IgnoreMarks": 8
534        }
535        seen = set()
536        while self.next_token_ != ";":
537            if self.next_token_ in seen:
538                raise FeatureLibError(
539                    "%s can be specified only once" % self.next_token_,
540                    self.next_token_location_)
541            seen.add(self.next_token_)
542            if self.next_token_ == "MarkAttachmentType":
543                self.expect_keyword_("MarkAttachmentType")
544                markAttachment = self.parse_class_name_()
545            elif self.next_token_ == "UseMarkFilteringSet":
546                self.expect_keyword_("UseMarkFilteringSet")
547                markFilteringSet = self.parse_class_name_()
548            elif self.next_token_ in flags:
549                value_seen = True
550                value = value | flags[self.expect_name_()]
551            else:
552                raise FeatureLibError(
553                    '"%s" is not a recognized lookupflag' % self.next_token_,
554                    self.next_token_location_)
555        self.expect_symbol_(";")
556
557        if not any([value_seen, markAttachment, markFilteringSet]):
558            raise FeatureLibError(
559                'lookupflag must have a value', self.next_token_location_)
560
561        return self.ast.LookupFlagStatement(value,
562                                            markAttachment=markAttachment,
563                                            markFilteringSet=markFilteringSet,
564                                            location=location)
565
566    def parse_markClass_(self):
567        assert self.is_cur_keyword_("markClass")
568        location = self.cur_token_location_
569        glyphs = self.parse_glyphclass_(accept_glyphname=True)
570        anchor = self.parse_anchor_()
571        name = self.expect_class_name_()
572        self.expect_symbol_(";")
573        markClass = self.doc_.markClasses.get(name)
574        if markClass is None:
575            markClass = self.ast.MarkClass(name)
576            self.doc_.markClasses[name] = markClass
577            self.glyphclasses_.define(name, markClass)
578        mcdef = self.ast.MarkClassDefinition(markClass, anchor, glyphs,
579                                             location=location)
580        markClass.addDefinition(mcdef)
581        return mcdef
582
583    def parse_position_(self, enumerated, vertical):
584        assert self.cur_token_ in {"position", "pos"}
585        if self.next_token_ == "cursive":  # GPOS type 3
586            return self.parse_position_cursive_(enumerated, vertical)
587        elif self.next_token_ == "base":   # GPOS type 4
588            return self.parse_position_base_(enumerated, vertical)
589        elif self.next_token_ == "ligature":   # GPOS type 5
590            return self.parse_position_ligature_(enumerated, vertical)
591        elif self.next_token_ == "mark":   # GPOS type 6
592            return self.parse_position_mark_(enumerated, vertical)
593
594        location = self.cur_token_location_
595        prefix, glyphs, lookups, values, suffix, hasMarks = \
596            self.parse_glyph_pattern_(vertical)
597        self.expect_symbol_(";")
598
599        if any(lookups):
600            # GPOS type 8: Chaining contextual positioning; explicit lookups
601            if any(values):
602                raise FeatureLibError(
603                    "If \"lookup\" is present, no values must be specified",
604                    location)
605            return self.ast.ChainContextPosStatement(
606                prefix, glyphs, suffix, lookups, location=location)
607
608        # Pair positioning, format A: "pos V 10 A -10;"
609        # Pair positioning, format B: "pos V A -20;"
610        if not prefix and not suffix and len(glyphs) == 2 and not hasMarks:
611            if values[0] is None:  # Format B: "pos V A -20;"
612                values.reverse()
613            return self.ast.PairPosStatement(
614                glyphs[0], values[0], glyphs[1], values[1],
615                enumerated=enumerated,
616                location=location)
617
618        if enumerated:
619            raise FeatureLibError(
620                '"enumerate" is only allowed with pair positionings', location)
621        return self.ast.SinglePosStatement(list(zip(glyphs, values)),
622                                           prefix, suffix, forceChain=hasMarks,
623                                           location=location)
624
625    def parse_position_cursive_(self, enumerated, vertical):
626        location = self.cur_token_location_
627        self.expect_keyword_("cursive")
628        if enumerated:
629            raise FeatureLibError(
630                '"enumerate" is not allowed with '
631                'cursive attachment positioning',
632                location)
633        glyphclass = self.parse_glyphclass_(accept_glyphname=True)
634        entryAnchor = self.parse_anchor_()
635        exitAnchor = self.parse_anchor_()
636        self.expect_symbol_(";")
637        return self.ast.CursivePosStatement(
638            glyphclass, entryAnchor, exitAnchor, location=location)
639
640    def parse_position_base_(self, enumerated, vertical):
641        location = self.cur_token_location_
642        self.expect_keyword_("base")
643        if enumerated:
644            raise FeatureLibError(
645                '"enumerate" is not allowed with '
646                'mark-to-base attachment positioning',
647                location)
648        base = self.parse_glyphclass_(accept_glyphname=True)
649        marks = self.parse_anchor_marks_()
650        self.expect_symbol_(";")
651        return self.ast.MarkBasePosStatement(base, marks, location=location)
652
653    def parse_position_ligature_(self, enumerated, vertical):
654        location = self.cur_token_location_
655        self.expect_keyword_("ligature")
656        if enumerated:
657            raise FeatureLibError(
658                '"enumerate" is not allowed with '
659                'mark-to-ligature attachment positioning',
660                location)
661        ligatures = self.parse_glyphclass_(accept_glyphname=True)
662        marks = [self.parse_anchor_marks_()]
663        while self.next_token_ == "ligComponent":
664            self.expect_keyword_("ligComponent")
665            marks.append(self.parse_anchor_marks_())
666        self.expect_symbol_(";")
667        return self.ast.MarkLigPosStatement(ligatures, marks, location=location)
668
669    def parse_position_mark_(self, enumerated, vertical):
670        location = self.cur_token_location_
671        self.expect_keyword_("mark")
672        if enumerated:
673            raise FeatureLibError(
674                '"enumerate" is not allowed with '
675                'mark-to-mark attachment positioning',
676                location)
677        baseMarks = self.parse_glyphclass_(accept_glyphname=True)
678        marks = self.parse_anchor_marks_()
679        self.expect_symbol_(";")
680        return self.ast.MarkMarkPosStatement(baseMarks, marks,
681                                             location=location)
682
683    def parse_script_(self):
684        assert self.is_cur_keyword_("script")
685        location, script = self.cur_token_location_, self.expect_script_tag_()
686        self.expect_symbol_(";")
687        return self.ast.ScriptStatement(script, location=location)
688
689    def parse_substitute_(self):
690        assert self.cur_token_ in {"substitute", "sub", "reversesub", "rsub"}
691        location = self.cur_token_location_
692        reverse = self.cur_token_ in {"reversesub", "rsub"}
693        old_prefix, old, lookups, values, old_suffix, hasMarks = \
694            self.parse_glyph_pattern_(vertical=False)
695        if any(values):
696            raise FeatureLibError(
697                "Substitution statements cannot contain values", location)
698        new = []
699        if self.next_token_ == "by":
700            keyword = self.expect_keyword_("by")
701            while self.next_token_ != ";":
702                gc = self.parse_glyphclass_(accept_glyphname=True)
703                new.append(gc)
704        elif self.next_token_ == "from":
705            keyword = self.expect_keyword_("from")
706            new = [self.parse_glyphclass_(accept_glyphname=False)]
707        else:
708            keyword = None
709        self.expect_symbol_(";")
710        if len(new) is 0 and not any(lookups):
711            raise FeatureLibError(
712                'Expected "by", "from" or explicit lookup references',
713                self.cur_token_location_)
714
715        # GSUB lookup type 3: Alternate substitution.
716        # Format: "substitute a from [a.1 a.2 a.3];"
717        if keyword == "from":
718            if reverse:
719                raise FeatureLibError(
720                    'Reverse chaining substitutions do not support "from"',
721                    location)
722            if len(old) != 1 or len(old[0].glyphSet()) != 1:
723                raise FeatureLibError(
724                    'Expected a single glyph before "from"',
725                    location)
726            if len(new) != 1:
727                raise FeatureLibError(
728                    'Expected a single glyphclass after "from"',
729                    location)
730            return self.ast.AlternateSubstStatement(
731                old_prefix, old[0], old_suffix, new[0], location=location)
732
733        num_lookups = len([l for l in lookups if l is not None])
734
735        # GSUB lookup type 1: Single substitution.
736        # Format A: "substitute a by a.sc;"
737        # Format B: "substitute [one.fitted one.oldstyle] by one;"
738        # Format C: "substitute [a-d] by [A.sc-D.sc];"
739        if (not reverse and len(old) == 1 and len(new) == 1 and
740                num_lookups == 0):
741            glyphs = list(old[0].glyphSet())
742            replacements = list(new[0].glyphSet())
743            if len(replacements) == 1:
744                replacements = replacements * len(glyphs)
745            if len(glyphs) != len(replacements):
746                raise FeatureLibError(
747                    'Expected a glyph class with %d elements after "by", '
748                    'but found a glyph class with %d elements' %
749                    (len(glyphs), len(replacements)), location)
750            return self.ast.SingleSubstStatement(
751                old, new,
752                old_prefix, old_suffix,
753                forceChain=hasMarks,
754                location=location
755            )
756
757        # GSUB lookup type 2: Multiple substitution.
758        # Format: "substitute f_f_i by f f i;"
759        if (not reverse and
760                len(old) == 1 and len(old[0].glyphSet()) == 1 and
761                len(new) > 1 and max([len(n.glyphSet()) for n in new]) == 1 and
762                num_lookups == 0):
763            return self.ast.MultipleSubstStatement(
764                old_prefix, tuple(old[0].glyphSet())[0], old_suffix,
765                tuple([list(n.glyphSet())[0] for n in new]),
766                forceChain=hasMarks, location=location)
767
768        # GSUB lookup type 4: Ligature substitution.
769        # Format: "substitute f f i by f_f_i;"
770        if (not reverse and
771                len(old) > 1 and len(new) == 1 and
772                len(new[0].glyphSet()) == 1 and
773                num_lookups == 0):
774            return self.ast.LigatureSubstStatement(
775                old_prefix, old, old_suffix,
776                list(new[0].glyphSet())[0], forceChain=hasMarks,
777                location=location)
778
779        # GSUB lookup type 8: Reverse chaining substitution.
780        if reverse:
781            if len(old) != 1:
782                raise FeatureLibError(
783                    "In reverse chaining single substitutions, "
784                    "only a single glyph or glyph class can be replaced",
785                    location)
786            if len(new) != 1:
787                raise FeatureLibError(
788                    'In reverse chaining single substitutions, '
789                    'the replacement (after "by") must be a single glyph '
790                    'or glyph class', location)
791            if num_lookups != 0:
792                raise FeatureLibError(
793                    "Reverse chaining substitutions cannot call named lookups",
794                    location)
795            glyphs = sorted(list(old[0].glyphSet()))
796            replacements = sorted(list(new[0].glyphSet()))
797            if len(replacements) == 1:
798                replacements = replacements * len(glyphs)
799            if len(glyphs) != len(replacements):
800                raise FeatureLibError(
801                    'Expected a glyph class with %d elements after "by", '
802                    'but found a glyph class with %d elements' %
803                    (len(glyphs), len(replacements)), location)
804            return self.ast.ReverseChainSingleSubstStatement(
805                old_prefix, old_suffix, old, new, location=location)
806
807        if len(old) > 1 and len(new) > 1:
808            raise FeatureLibError(
809                'Direct substitution of multiple glyphs by multiple glyphs '
810                'is not supported',
811                location)
812
813        # GSUB lookup type 6: Chaining contextual substitution.
814        assert len(new) == 0, new
815        rule = self.ast.ChainContextSubstStatement(
816            old_prefix, old, old_suffix, lookups, location=location)
817        return rule
818
819    def parse_subtable_(self):
820        assert self.is_cur_keyword_("subtable")
821        location = self.cur_token_location_
822        self.expect_symbol_(";")
823        return self.ast.SubtableStatement(location=location)
824
825    def parse_size_parameters_(self):
826        assert self.is_cur_keyword_("parameters")
827        location = self.cur_token_location_
828        DesignSize = self.expect_decipoint_()
829        SubfamilyID = self.expect_number_()
830        RangeStart = 0
831        RangeEnd = 0
832        if self.next_token_type_ in (Lexer.NUMBER, Lexer.FLOAT) or \
833                SubfamilyID != 0:
834            RangeStart = self.expect_decipoint_()
835            RangeEnd = self.expect_decipoint_()
836
837        self.expect_symbol_(";")
838        return self.ast.SizeParameters(DesignSize, SubfamilyID,
839                                       RangeStart, RangeEnd,
840                                       location=location)
841
842    def parse_size_menuname_(self):
843        assert self.is_cur_keyword_("sizemenuname")
844        location = self.cur_token_location_
845        platformID, platEncID, langID, string = self.parse_name_()
846        return self.ast.FeatureNameStatement("size", platformID,
847                                             platEncID, langID, string,
848                                             location=location)
849
850    def parse_table_(self):
851        assert self.is_cur_keyword_("table")
852        location, name = self.cur_token_location_, self.expect_tag_()
853        table = self.ast.TableBlock(name, location=location)
854        self.expect_symbol_("{")
855        handler = {
856            "GDEF": self.parse_table_GDEF_,
857            "head": self.parse_table_head_,
858            "hhea": self.parse_table_hhea_,
859            "vhea": self.parse_table_vhea_,
860            "name": self.parse_table_name_,
861            "BASE": self.parse_table_BASE_,
862            "OS/2": self.parse_table_OS_2_,
863        }.get(name)
864        if handler:
865            handler(table)
866        else:
867            raise FeatureLibError('"table %s" is not supported' % name.strip(),
868                                  location)
869        self.expect_symbol_("}")
870        end_tag = self.expect_tag_()
871        if end_tag != name:
872            raise FeatureLibError('Expected "%s"' % name.strip(),
873                                  self.cur_token_location_)
874        self.expect_symbol_(";")
875        return table
876
877    def parse_table_GDEF_(self, table):
878        statements = table.statements
879        while self.next_token_ != "}" or self.cur_comments_:
880            self.advance_lexer_(comments=True)
881            if self.cur_token_type_ is Lexer.COMMENT:
882                statements.append(self.ast.Comment(
883                    self.cur_token_, location=self.cur_token_location_))
884            elif self.is_cur_keyword_("Attach"):
885                statements.append(self.parse_attach_())
886            elif self.is_cur_keyword_("GlyphClassDef"):
887                statements.append(self.parse_GlyphClassDef_())
888            elif self.is_cur_keyword_("LigatureCaretByIndex"):
889                statements.append(self.parse_ligatureCaretByIndex_())
890            elif self.is_cur_keyword_("LigatureCaretByPos"):
891                statements.append(self.parse_ligatureCaretByPos_())
892            elif self.cur_token_ == ";":
893                continue
894            else:
895                raise FeatureLibError(
896                    "Expected Attach, LigatureCaretByIndex, "
897                    "or LigatureCaretByPos",
898                    self.cur_token_location_)
899
900    def parse_table_head_(self, table):
901        statements = table.statements
902        while self.next_token_ != "}" or self.cur_comments_:
903            self.advance_lexer_(comments=True)
904            if self.cur_token_type_ is Lexer.COMMENT:
905                statements.append(self.ast.Comment(
906                    self.cur_token_, location=self.cur_token_location_))
907            elif self.is_cur_keyword_("FontRevision"):
908                statements.append(self.parse_FontRevision_())
909            elif self.cur_token_ == ";":
910                continue
911            else:
912                raise FeatureLibError("Expected FontRevision",
913                                      self.cur_token_location_)
914
915    def parse_table_hhea_(self, table):
916        statements = table.statements
917        fields = ("CaretOffset", "Ascender", "Descender", "LineGap")
918        while self.next_token_ != "}" or self.cur_comments_:
919            self.advance_lexer_(comments=True)
920            if self.cur_token_type_ is Lexer.COMMENT:
921                statements.append(self.ast.Comment(
922                    self.cur_token_, location=self.cur_token_location_))
923            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
924                key = self.cur_token_.lower()
925                value = self.expect_number_()
926                statements.append(
927                    self.ast.HheaField(key, value,
928                                       location=self.cur_token_location_))
929                if self.next_token_ != ";":
930                    raise FeatureLibError("Incomplete statement", self.next_token_location_)
931            elif self.cur_token_ == ";":
932                continue
933            else:
934                raise FeatureLibError("Expected CaretOffset, Ascender, "
935                                      "Descender or LineGap",
936                                      self.cur_token_location_)
937
938    def parse_table_vhea_(self, table):
939        statements = table.statements
940        fields = ("VertTypoAscender", "VertTypoDescender", "VertTypoLineGap")
941        while self.next_token_ != "}" or self.cur_comments_:
942            self.advance_lexer_(comments=True)
943            if self.cur_token_type_ is Lexer.COMMENT:
944                statements.append(self.ast.Comment(
945                    self.cur_token_, location=self.cur_token_location_))
946            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
947                key = self.cur_token_.lower()
948                value = self.expect_number_()
949                statements.append(
950                    self.ast.VheaField(key, value,
951                                       location=self.cur_token_location_))
952                if self.next_token_ != ";":
953                    raise FeatureLibError("Incomplete statement", self.next_token_location_)
954            elif self.cur_token_ == ";":
955                continue
956            else:
957                raise FeatureLibError("Expected VertTypoAscender, "
958                                      "VertTypoDescender or VertTypoLineGap",
959                                      self.cur_token_location_)
960
961    def parse_table_name_(self, table):
962        statements = table.statements
963        while self.next_token_ != "}" or self.cur_comments_:
964            self.advance_lexer_(comments=True)
965            if self.cur_token_type_ is Lexer.COMMENT:
966                statements.append(self.ast.Comment(
967                    self.cur_token_, location=self.cur_token_location_))
968            elif self.is_cur_keyword_("nameid"):
969                statement = self.parse_nameid_()
970                if statement:
971                    statements.append(statement)
972            elif self.cur_token_ == ";":
973                continue
974            else:
975                raise FeatureLibError("Expected nameid",
976                                      self.cur_token_location_)
977
978    def parse_name_(self):
979        platEncID = None
980        langID = None
981        if self.next_token_type_ == Lexer.NUMBER:
982            platformID = self.expect_number_()
983            location = self.cur_token_location_
984            if platformID not in (1, 3):
985                raise FeatureLibError("Expected platform id 1 or 3", location)
986            if self.next_token_type_ == Lexer.NUMBER:
987                platEncID = self.expect_number_()
988                langID = self.expect_number_()
989        else:
990            platformID = 3
991            location = self.cur_token_location_
992
993        if platformID == 1:                # Macintosh
994            platEncID = platEncID or 0     # Roman
995            langID = langID or 0           # English
996        else:                              # 3, Windows
997            platEncID = platEncID or 1     # Unicode
998            langID = langID or 0x0409      # English
999
1000        string = self.expect_string_()
1001        self.expect_symbol_(";")
1002
1003        encoding = getEncoding(platformID, platEncID, langID)
1004        if encoding is None:
1005            raise FeatureLibError("Unsupported encoding", location)
1006        unescaped = self.unescape_string_(string, encoding)
1007        return platformID, platEncID, langID, unescaped
1008
1009    def parse_nameid_(self):
1010        assert self.cur_token_ == "nameid", self.cur_token_
1011        location, nameID = self.cur_token_location_, self.expect_number_()
1012        if nameID > 32767:
1013            raise FeatureLibError("Name id value cannot be greater than 32767",
1014                                  self.cur_token_location_)
1015        if 1 <= nameID <= 6:
1016            log.warning("Name id %d cannot be set from the feature file. "
1017                        "Ignoring record" % nameID)
1018            self.parse_name_()  # skip to the next record
1019            return None
1020
1021        platformID, platEncID, langID, string = self.parse_name_()
1022        return self.ast.NameRecord(nameID, platformID, platEncID,
1023                                   langID, string, location=location)
1024
1025    def unescape_string_(self, string, encoding):
1026        if encoding == "utf_16_be":
1027            s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string)
1028        else:
1029            unescape = lambda m: self.unescape_byte_(m, encoding)
1030            s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string)
1031        # We now have a Unicode string, but it might contain surrogate pairs.
1032        # We convert surrogates to actual Unicode by round-tripping through
1033        # Python's UTF-16 codec in a special mode.
1034        utf16 = tobytes(s, "utf_16_be", "surrogatepass")
1035        return tounicode(utf16, "utf_16_be")
1036
1037    @staticmethod
1038    def unescape_unichr_(match):
1039        n = match.group(0)[1:]
1040        return unichr(int(n, 16))
1041
1042    @staticmethod
1043    def unescape_byte_(match, encoding):
1044        n = match.group(0)[1:]
1045        return bytechr(int(n, 16)).decode(encoding)
1046
1047    def parse_table_BASE_(self, table):
1048        statements = table.statements
1049        while self.next_token_ != "}" or self.cur_comments_:
1050            self.advance_lexer_(comments=True)
1051            if self.cur_token_type_ is Lexer.COMMENT:
1052                statements.append(self.ast.Comment(
1053                    self.cur_token_, location=self.cur_token_location_))
1054            elif self.is_cur_keyword_("HorizAxis.BaseTagList"):
1055                horiz_bases = self.parse_base_tag_list_()
1056            elif self.is_cur_keyword_("HorizAxis.BaseScriptList"):
1057                horiz_scripts = self.parse_base_script_list_(len(horiz_bases))
1058                statements.append(
1059                        self.ast.BaseAxis(horiz_bases,
1060                                          horiz_scripts, False,
1061                                          location=self.cur_token_location_))
1062            elif self.is_cur_keyword_("VertAxis.BaseTagList"):
1063                vert_bases = self.parse_base_tag_list_()
1064            elif self.is_cur_keyword_("VertAxis.BaseScriptList"):
1065                vert_scripts = self.parse_base_script_list_(len(vert_bases))
1066                statements.append(
1067                        self.ast.BaseAxis(vert_bases,
1068                                          vert_scripts, True,
1069                                          location=self.cur_token_location_))
1070            elif self.cur_token_ == ";":
1071                continue
1072
1073    def parse_table_OS_2_(self, table):
1074        statements = table.statements
1075        numbers = ("FSType", "TypoAscender", "TypoDescender", "TypoLineGap",
1076                   "winAscent", "winDescent", "XHeight", "CapHeight",
1077                   "WeightClass", "WidthClass", "LowerOpSize", "UpperOpSize")
1078        ranges = ("UnicodeRange", "CodePageRange")
1079        while self.next_token_ != "}" or self.cur_comments_:
1080            self.advance_lexer_(comments=True)
1081            if self.cur_token_type_ is Lexer.COMMENT:
1082                statements.append(self.ast.Comment(
1083                    self.cur_token_, location=self.cur_token_location_))
1084            elif self.cur_token_type_ is Lexer.NAME:
1085                key = self.cur_token_.lower()
1086                value = None
1087                if self.cur_token_ in numbers:
1088                    value = self.expect_number_()
1089                elif self.is_cur_keyword_("Panose"):
1090                    value = []
1091                    for i in range(10):
1092                        value.append(self.expect_number_())
1093                elif self.cur_token_ in ranges:
1094                    value = []
1095                    while self.next_token_ != ";":
1096                         value.append(self.expect_number_())
1097                elif self.is_cur_keyword_("Vendor"):
1098                    value = self.expect_string_()
1099                statements.append(
1100                    self.ast.OS2Field(key, value,
1101                                      location=self.cur_token_location_))
1102            elif self.cur_token_ == ";":
1103                continue
1104
1105    def parse_base_tag_list_(self):
1106        assert self.cur_token_ in ("HorizAxis.BaseTagList",
1107                                   "VertAxis.BaseTagList"), self.cur_token_
1108        bases = []
1109        while self.next_token_ != ";":
1110            bases.append(self.expect_script_tag_())
1111        self.expect_symbol_(";")
1112        return bases
1113
1114    def parse_base_script_list_(self, count):
1115        assert self.cur_token_ in ("HorizAxis.BaseScriptList",
1116                                   "VertAxis.BaseScriptList"), self.cur_token_
1117        scripts = [(self.parse_base_script_record_(count))]
1118        while self.next_token_ == ",":
1119            self.expect_symbol_(",")
1120            scripts.append(self.parse_base_script_record_(count))
1121        self.expect_symbol_(";")
1122        return scripts
1123
1124    def parse_base_script_record_(self, count):
1125        script_tag = self.expect_script_tag_()
1126        base_tag = self.expect_script_tag_()
1127        coords = [self.expect_number_() for i in range(count)]
1128        return script_tag, base_tag, coords
1129
1130    def parse_device_(self):
1131        result = None
1132        self.expect_symbol_("<")
1133        self.expect_keyword_("device")
1134        if self.next_token_ == "NULL":
1135            self.expect_keyword_("NULL")
1136        else:
1137            result = [(self.expect_number_(), self.expect_number_())]
1138            while self.next_token_ == ",":
1139                self.expect_symbol_(",")
1140                result.append((self.expect_number_(), self.expect_number_()))
1141            result = tuple(result)  # make it hashable
1142        self.expect_symbol_(">")
1143        return result
1144
1145    def is_next_value_(self):
1146        return self.next_token_type_ is Lexer.NUMBER or self.next_token_ == "<"
1147
1148    def parse_valuerecord_(self, vertical):
1149        if self.next_token_type_ is Lexer.NUMBER:
1150            number, location = self.expect_number_(), self.cur_token_location_
1151            if vertical:
1152                val = self.ast.ValueRecord(yAdvance=number,
1153                                           vertical=vertical,
1154                                           location=location)
1155            else:
1156                val = self.ast.ValueRecord(xAdvance=number,
1157                                           vertical=vertical,
1158                                           location=location)
1159            return val
1160        self.expect_symbol_("<")
1161        location = self.cur_token_location_
1162        if self.next_token_type_ is Lexer.NAME:
1163            name = self.expect_name_()
1164            if name == "NULL":
1165                self.expect_symbol_(">")
1166                return self.ast.ValueRecord()
1167            vrd = self.valuerecords_.resolve(name)
1168            if vrd is None:
1169                raise FeatureLibError("Unknown valueRecordDef \"%s\"" % name,
1170                                      self.cur_token_location_)
1171            value = vrd.value
1172            xPlacement, yPlacement = (value.xPlacement, value.yPlacement)
1173            xAdvance, yAdvance = (value.xAdvance, value.yAdvance)
1174        else:
1175            xPlacement, yPlacement, xAdvance, yAdvance = (
1176                self.expect_number_(), self.expect_number_(),
1177                self.expect_number_(), self.expect_number_())
1178
1179        if self.next_token_ == "<":
1180            xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = (
1181                self.parse_device_(), self.parse_device_(),
1182                self.parse_device_(), self.parse_device_())
1183            allDeltas = sorted([
1184                delta
1185                for size, delta
1186                in (xPlaDevice if xPlaDevice else ()) +
1187                (yPlaDevice if yPlaDevice else ()) +
1188                (xAdvDevice if xAdvDevice else ()) +
1189                (yAdvDevice if yAdvDevice else ())])
1190            if allDeltas[0] < -128 or allDeltas[-1] > 127:
1191                raise FeatureLibError(
1192                    "Device value out of valid range (-128..127)",
1193                    self.cur_token_location_)
1194        else:
1195            xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = (
1196                None, None, None, None)
1197
1198        self.expect_symbol_(">")
1199        return self.ast.ValueRecord(
1200            xPlacement, yPlacement, xAdvance, yAdvance,
1201            xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice,
1202            vertical=vertical, location=location)
1203
1204    def parse_valuerecord_definition_(self, vertical):
1205        assert self.is_cur_keyword_("valueRecordDef")
1206        location = self.cur_token_location_
1207        value = self.parse_valuerecord_(vertical)
1208        name = self.expect_name_()
1209        self.expect_symbol_(";")
1210        vrd = self.ast.ValueRecordDefinition(name, value, location=location)
1211        self.valuerecords_.define(name, vrd)
1212        return vrd
1213
1214    def parse_languagesystem_(self):
1215        assert self.cur_token_ == "languagesystem"
1216        location = self.cur_token_location_
1217        script = self.expect_script_tag_()
1218        language = self.expect_language_tag_()
1219        self.expect_symbol_(";")
1220        return self.ast.LanguageSystemStatement(script, language,
1221                                                location=location)
1222
1223    def parse_feature_block_(self):
1224        assert self.cur_token_ == "feature"
1225        location = self.cur_token_location_
1226        tag = self.expect_tag_()
1227        vertical = (tag in {"vkrn", "vpal", "vhal", "valt"})
1228
1229        stylisticset = None
1230        cv_feature = None
1231        size_feature = False
1232        if tag in self.SS_FEATURE_TAGS:
1233            stylisticset = tag
1234        elif tag in self.CV_FEATURE_TAGS:
1235            cv_feature = tag
1236        elif tag == "size":
1237            size_feature = True
1238
1239        use_extension = False
1240        if self.next_token_ == "useExtension":
1241            self.expect_keyword_("useExtension")
1242            use_extension = True
1243
1244        block = self.ast.FeatureBlock(tag, use_extension=use_extension,
1245                                      location=location)
1246        self.parse_block_(block, vertical, stylisticset, size_feature,
1247                          cv_feature)
1248        return block
1249
1250    def parse_feature_reference_(self):
1251        assert self.cur_token_ == "feature", self.cur_token_
1252        location = self.cur_token_location_
1253        featureName = self.expect_tag_()
1254        self.expect_symbol_(";")
1255        return self.ast.FeatureReferenceStatement(featureName,
1256                                                  location=location)
1257
1258    def parse_featureNames_(self, tag):
1259        assert self.cur_token_ == "featureNames", self.cur_token_
1260        block = self.ast.NestedBlock(tag, self.cur_token_,
1261                                     location=self.cur_token_location_)
1262        self.expect_symbol_("{")
1263        for symtab in self.symbol_tables_:
1264            symtab.enter_scope()
1265        while self.next_token_ != "}" or self.cur_comments_:
1266            self.advance_lexer_(comments=True)
1267            if self.cur_token_type_ is Lexer.COMMENT:
1268                block.statements.append(self.ast.Comment(
1269                    self.cur_token_, location=self.cur_token_location_))
1270            elif self.is_cur_keyword_("name"):
1271                location = self.cur_token_location_
1272                platformID, platEncID, langID, string = self.parse_name_()
1273                block.statements.append(
1274                    self.ast.FeatureNameStatement(tag, platformID,
1275                                                  platEncID, langID, string,
1276                                                  location=location))
1277            elif self.cur_token_ == ";":
1278                continue
1279            else:
1280                raise FeatureLibError('Expected "name"',
1281                                      self.cur_token_location_)
1282        self.expect_symbol_("}")
1283        for symtab in self.symbol_tables_:
1284            symtab.exit_scope()
1285        self.expect_symbol_(";")
1286        return block
1287
1288    def parse_cvParameters_(self, tag):
1289        assert self.cur_token_ == "cvParameters", self.cur_token_
1290        block = self.ast.NestedBlock(tag, self.cur_token_,
1291                                     location=self.cur_token_location_)
1292        self.expect_symbol_("{")
1293        for symtab in self.symbol_tables_:
1294            symtab.enter_scope()
1295
1296        statements = block.statements
1297        while self.next_token_ != "}" or self.cur_comments_:
1298            self.advance_lexer_(comments=True)
1299            if self.cur_token_type_ is Lexer.COMMENT:
1300                statements.append(self.ast.Comment(
1301                    self.cur_token_, location=self.cur_token_location_))
1302            elif self.is_cur_keyword_({"FeatUILabelNameID",
1303                                       "FeatUITooltipTextNameID",
1304                                       "SampleTextNameID",
1305                                       "ParamUILabelNameID"}):
1306                statements.append(self.parse_cvNameIDs_(tag, self.cur_token_))
1307            elif self.is_cur_keyword_("Character"):
1308                statements.append(self.parse_cvCharacter_(tag))
1309            elif self.cur_token_ == ";":
1310                continue
1311            else:
1312                raise FeatureLibError(
1313                    "Expected statement: got {} {}".format(
1314                        self.cur_token_type_, self.cur_token_),
1315                    self.cur_token_location_)
1316
1317        self.expect_symbol_("}")
1318        for symtab in self.symbol_tables_:
1319            symtab.exit_scope()
1320        self.expect_symbol_(";")
1321        return block
1322
1323    def parse_cvNameIDs_(self, tag, block_name):
1324        assert self.cur_token_ == block_name, self.cur_token_
1325        block = self.ast.NestedBlock(tag, block_name,
1326                                     location=self.cur_token_location_)
1327        self.expect_symbol_("{")
1328        for symtab in self.symbol_tables_:
1329            symtab.enter_scope()
1330        while self.next_token_ != "}" or self.cur_comments_:
1331            self.advance_lexer_(comments=True)
1332            if self.cur_token_type_ is Lexer.COMMENT:
1333                block.statements.append(self.ast.Comment(
1334                    self.cur_token_, location=self.cur_token_location_))
1335            elif self.is_cur_keyword_("name"):
1336                location = self.cur_token_location_
1337                platformID, platEncID, langID, string = self.parse_name_()
1338                block.statements.append(
1339                    self.ast.CVParametersNameStatement(
1340                        tag, platformID, platEncID, langID, string,
1341                        block_name, location=location))
1342            elif self.cur_token_ == ";":
1343                continue
1344            else:
1345                raise FeatureLibError('Expected "name"',
1346                                      self.cur_token_location_)
1347        self.expect_symbol_("}")
1348        for symtab in self.symbol_tables_:
1349            symtab.exit_scope()
1350        self.expect_symbol_(";")
1351        return block
1352
1353    def parse_cvCharacter_(self, tag):
1354        assert self.cur_token_ == "Character", self.cur_token_
1355        location, character = self.cur_token_location_, self.expect_decimal_or_hexadecimal_()
1356        self.expect_symbol_(";")
1357        if not (0xFFFFFF >= character >= 0):
1358            raise FeatureLibError("Character value must be between "
1359                                  "{:#x} and {:#x}".format(0, 0xFFFFFF),
1360                                  location)
1361        return self.ast.CharacterStatement(character, tag, location=location)
1362
1363    def parse_FontRevision_(self):
1364        assert self.cur_token_ == "FontRevision", self.cur_token_
1365        location, version = self.cur_token_location_, self.expect_float_()
1366        self.expect_symbol_(";")
1367        if version <= 0:
1368            raise FeatureLibError("Font revision numbers must be positive",
1369                                  location)
1370        return self.ast.FontRevisionStatement(version, location=location)
1371
1372    def parse_block_(self, block, vertical, stylisticset=None,
1373                     size_feature=False, cv_feature=None):
1374        self.expect_symbol_("{")
1375        for symtab in self.symbol_tables_:
1376            symtab.enter_scope()
1377
1378        statements = block.statements
1379        while self.next_token_ != "}" or self.cur_comments_:
1380            self.advance_lexer_(comments=True)
1381            if self.cur_token_type_ is Lexer.COMMENT:
1382                statements.append(self.ast.Comment(
1383                    self.cur_token_, location=self.cur_token_location_))
1384            elif self.cur_token_type_ is Lexer.GLYPHCLASS:
1385                statements.append(self.parse_glyphclass_definition_())
1386            elif self.is_cur_keyword_("anchorDef"):
1387                statements.append(self.parse_anchordef_())
1388            elif self.is_cur_keyword_({"enum", "enumerate"}):
1389                statements.append(self.parse_enumerate_(vertical=vertical))
1390            elif self.is_cur_keyword_("feature"):
1391                statements.append(self.parse_feature_reference_())
1392            elif self.is_cur_keyword_("ignore"):
1393                statements.append(self.parse_ignore_())
1394            elif self.is_cur_keyword_("language"):
1395                statements.append(self.parse_language_())
1396            elif self.is_cur_keyword_("lookup"):
1397                statements.append(self.parse_lookup_(vertical))
1398            elif self.is_cur_keyword_("lookupflag"):
1399                statements.append(self.parse_lookupflag_())
1400            elif self.is_cur_keyword_("markClass"):
1401                statements.append(self.parse_markClass_())
1402            elif self.is_cur_keyword_({"pos", "position"}):
1403                statements.append(
1404                    self.parse_position_(enumerated=False, vertical=vertical))
1405            elif self.is_cur_keyword_("script"):
1406                statements.append(self.parse_script_())
1407            elif (self.is_cur_keyword_({"sub", "substitute",
1408                                        "rsub", "reversesub"})):
1409                statements.append(self.parse_substitute_())
1410            elif self.is_cur_keyword_("subtable"):
1411                statements.append(self.parse_subtable_())
1412            elif self.is_cur_keyword_("valueRecordDef"):
1413                statements.append(self.parse_valuerecord_definition_(vertical))
1414            elif stylisticset and self.is_cur_keyword_("featureNames"):
1415                statements.append(self.parse_featureNames_(stylisticset))
1416            elif cv_feature and self.is_cur_keyword_("cvParameters"):
1417                statements.append(self.parse_cvParameters_(cv_feature))
1418            elif size_feature and self.is_cur_keyword_("parameters"):
1419                statements.append(self.parse_size_parameters_())
1420            elif size_feature and self.is_cur_keyword_("sizemenuname"):
1421                statements.append(self.parse_size_menuname_())
1422            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in self.extensions:
1423                statements.append(self.extensions[self.cur_token_](self))
1424            elif self.cur_token_ == ";":
1425                continue
1426            else:
1427                raise FeatureLibError(
1428                    "Expected glyph class definition or statement: got {} {}".format(self.cur_token_type_, self.cur_token_),
1429                    self.cur_token_location_)
1430
1431        self.expect_symbol_("}")
1432        for symtab in self.symbol_tables_:
1433            symtab.exit_scope()
1434
1435        name = self.expect_name_()
1436        if name != block.name.strip():
1437            raise FeatureLibError("Expected \"%s\"" % block.name.strip(),
1438                                  self.cur_token_location_)
1439        self.expect_symbol_(";")
1440
1441        # A multiple substitution may have a single destination, in which case
1442        # it will look just like a single substitution. So if there are both
1443        # multiple and single substitutions, upgrade all the single ones to
1444        # multiple substitutions.
1445
1446        # Check if we have a mix of non-contextual singles and multiples.
1447        has_single = False
1448        has_multiple = False
1449        for s in statements:
1450            if isinstance(s, self.ast.SingleSubstStatement):
1451                has_single = not any([s.prefix, s.suffix, s.forceChain])
1452            elif isinstance(s, self.ast.MultipleSubstStatement):
1453                has_multiple = not any([s.prefix, s.suffix, s.forceChain])
1454
1455        # Upgrade all single substitutions to multiple substitutions.
1456        if has_single and has_multiple:
1457            for i, s in enumerate(statements):
1458                if isinstance(s, self.ast.SingleSubstStatement):
1459                    statements[i] = self.ast.MultipleSubstStatement(
1460                        s.prefix, s.glyphs[0].glyphSet()[0], s.suffix,
1461                        [r.glyphSet()[0] for r in s.replacements],
1462                        s.forceChain, location=s.location)
1463
1464    def is_cur_keyword_(self, k):
1465        if self.cur_token_type_ is Lexer.NAME:
1466            if isinstance(k, type("")):  # basestring is gone in Python3
1467                return self.cur_token_ == k
1468            else:
1469                return self.cur_token_ in k
1470        return False
1471
1472    def expect_class_name_(self):
1473        self.advance_lexer_()
1474        if self.cur_token_type_ is not Lexer.GLYPHCLASS:
1475            raise FeatureLibError("Expected @NAME", self.cur_token_location_)
1476        return self.cur_token_
1477
1478    def expect_cid_(self):
1479        self.advance_lexer_()
1480        if self.cur_token_type_ is Lexer.CID:
1481            return self.cur_token_
1482        raise FeatureLibError("Expected a CID", self.cur_token_location_)
1483
1484    def expect_filename_(self):
1485        self.advance_lexer_()
1486        if self.cur_token_type_ is not Lexer.FILENAME:
1487            raise FeatureLibError("Expected file name",
1488                                  self.cur_token_location_)
1489        return self.cur_token_
1490
1491    def expect_glyph_(self):
1492        self.advance_lexer_()
1493        if self.cur_token_type_ is Lexer.NAME:
1494            self.cur_token_ = self.cur_token_.lstrip("\\")
1495            if len(self.cur_token_) > 63:
1496                raise FeatureLibError(
1497                    "Glyph names must not be longer than 63 characters",
1498                    self.cur_token_location_)
1499            return self.cur_token_
1500        elif self.cur_token_type_ is Lexer.CID:
1501            return "cid%05d" % self.cur_token_
1502        raise FeatureLibError("Expected a glyph name or CID",
1503                              self.cur_token_location_)
1504
1505    def expect_markClass_reference_(self):
1506        name = self.expect_class_name_()
1507        mc = self.glyphclasses_.resolve(name)
1508        if mc is None:
1509            raise FeatureLibError("Unknown markClass @%s" % name,
1510                                  self.cur_token_location_)
1511        if not isinstance(mc, self.ast.MarkClass):
1512            raise FeatureLibError("@%s is not a markClass" % name,
1513                                  self.cur_token_location_)
1514        return mc
1515
1516    def expect_tag_(self):
1517        self.advance_lexer_()
1518        if self.cur_token_type_ is not Lexer.NAME:
1519            raise FeatureLibError("Expected a tag", self.cur_token_location_)
1520        if len(self.cur_token_) > 4:
1521            raise FeatureLibError("Tags can not be longer than 4 characters",
1522                                  self.cur_token_location_)
1523        return (self.cur_token_ + "    ")[:4]
1524
1525    def expect_script_tag_(self):
1526        tag = self.expect_tag_()
1527        if tag == "dflt":
1528            raise FeatureLibError(
1529                '"dflt" is not a valid script tag; use "DFLT" instead',
1530                self.cur_token_location_)
1531        return tag
1532
1533    def expect_language_tag_(self):
1534        tag = self.expect_tag_()
1535        if tag == "DFLT":
1536            raise FeatureLibError(
1537                '"DFLT" is not a valid language tag; use "dflt" instead',
1538                self.cur_token_location_)
1539        return tag
1540
1541    def expect_symbol_(self, symbol):
1542        self.advance_lexer_()
1543        if self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == symbol:
1544            return symbol
1545        raise FeatureLibError("Expected '%s'" % symbol,
1546                              self.cur_token_location_)
1547
1548    def expect_keyword_(self, keyword):
1549        self.advance_lexer_()
1550        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
1551            return self.cur_token_
1552        raise FeatureLibError("Expected \"%s\"" % keyword,
1553                              self.cur_token_location_)
1554
1555    def expect_name_(self):
1556        self.advance_lexer_()
1557        if self.cur_token_type_ is Lexer.NAME:
1558            return self.cur_token_
1559        raise FeatureLibError("Expected a name", self.cur_token_location_)
1560
1561    # TODO: Don't allow this method to accept hexadecimal values
1562    def expect_number_(self):
1563        self.advance_lexer_()
1564        if self.cur_token_type_ is Lexer.NUMBER:
1565            return self.cur_token_
1566        raise FeatureLibError("Expected a number", self.cur_token_location_)
1567
1568    def expect_float_(self):
1569        self.advance_lexer_()
1570        if self.cur_token_type_ is Lexer.FLOAT:
1571            return self.cur_token_
1572        raise FeatureLibError("Expected a floating-point number",
1573                              self.cur_token_location_)
1574
1575    # TODO: Don't allow this method to accept hexadecimal values
1576    def expect_decipoint_(self):
1577        if self.next_token_type_ == Lexer.FLOAT:
1578            return self.expect_float_()
1579        elif self.next_token_type_ is Lexer.NUMBER:
1580            return self.expect_number_() / 10
1581        else:
1582            raise FeatureLibError("Expected an integer or floating-point number",
1583                                  self.cur_token_location_)
1584
1585    def expect_decimal_or_hexadecimal_(self):
1586        # the lexer returns the same token type 'NUMBER' for either decimal or
1587        # hexadecimal integers, and casts them both to a `int` type, so it's
1588        # impossible to distinguish the two here. This method is implemented
1589        # the same as `expect_number_`, only it gives a more informative
1590        # error message
1591        self.advance_lexer_()
1592        if self.cur_token_type_ is Lexer.NUMBER:
1593            return self.cur_token_
1594        raise FeatureLibError("Expected a decimal or hexadecimal number",
1595                              self.cur_token_location_)
1596
1597    def expect_string_(self):
1598        self.advance_lexer_()
1599        if self.cur_token_type_ is Lexer.STRING:
1600            return self.cur_token_
1601        raise FeatureLibError("Expected a string", self.cur_token_location_)
1602
1603    def advance_lexer_(self, comments=False):
1604        if comments and self.cur_comments_:
1605            self.cur_token_type_ = Lexer.COMMENT
1606            self.cur_token_, self.cur_token_location_ = self.cur_comments_.pop(0)
1607            return
1608        else:
1609            self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
1610                self.next_token_type_, self.next_token_, self.next_token_location_)
1611        while True:
1612            try:
1613                (self.next_token_type_, self.next_token_,
1614                 self.next_token_location_) = next(self.lexer_)
1615            except StopIteration:
1616                self.next_token_type_, self.next_token_ = (None, None)
1617            if self.next_token_type_ != Lexer.COMMENT:
1618                break
1619            self.cur_comments_.append((self.next_token_, self.next_token_location_))
1620
1621    @staticmethod
1622    def reverse_string_(s):
1623        """'abc' --> 'cba'"""
1624        return ''.join(reversed(list(s)))
1625
1626    def make_cid_range_(self, location, start, limit):
1627        """(location, 999, 1001) --> ["cid00999", "cid01000", "cid01001"]"""
1628        result = list()
1629        if start > limit:
1630            raise FeatureLibError(
1631                "Bad range: start should be less than limit", location)
1632        for cid in range(start, limit + 1):
1633            result.append("cid%05d" % cid)
1634        return result
1635
1636    def make_glyph_range_(self, location, start, limit):
1637        """(location, "a.sc", "d.sc") --> ["a.sc", "b.sc", "c.sc", "d.sc"]"""
1638        result = list()
1639        if len(start) != len(limit):
1640            raise FeatureLibError(
1641                "Bad range: \"%s\" and \"%s\" should have the same length" %
1642                (start, limit), location)
1643
1644        rev = self.reverse_string_
1645        prefix = os.path.commonprefix([start, limit])
1646        suffix = rev(os.path.commonprefix([rev(start), rev(limit)]))
1647        if len(suffix) > 0:
1648            start_range = start[len(prefix):-len(suffix)]
1649            limit_range = limit[len(prefix):-len(suffix)]
1650        else:
1651            start_range = start[len(prefix):]
1652            limit_range = limit[len(prefix):]
1653
1654        if start_range >= limit_range:
1655            raise FeatureLibError(
1656                "Start of range must be smaller than its end",
1657                location)
1658
1659        uppercase = re.compile(r'^[A-Z]$')
1660        if uppercase.match(start_range) and uppercase.match(limit_range):
1661            for c in range(ord(start_range), ord(limit_range) + 1):
1662                result.append("%s%c%s" % (prefix, c, suffix))
1663            return result
1664
1665        lowercase = re.compile(r'^[a-z]$')
1666        if lowercase.match(start_range) and lowercase.match(limit_range):
1667            for c in range(ord(start_range), ord(limit_range) + 1):
1668                result.append("%s%c%s" % (prefix, c, suffix))
1669            return result
1670
1671        digits = re.compile(r'^[0-9]{1,3}$')
1672        if digits.match(start_range) and digits.match(limit_range):
1673            for i in range(int(start_range, 10), int(limit_range, 10) + 1):
1674                number = ("000" + str(i))[-len(start_range):]
1675                result.append("%s%s%s" % (prefix, number, suffix))
1676            return result
1677
1678        raise FeatureLibError("Bad range: \"%s-%s\"" % (start, limit),
1679                              location)
1680
1681
1682class SymbolTable(object):
1683    def __init__(self):
1684        self.scopes_ = [{}]
1685
1686    def enter_scope(self):
1687        self.scopes_.append({})
1688
1689    def exit_scope(self):
1690        self.scopes_.pop()
1691
1692    def define(self, name, item):
1693        self.scopes_[-1][name] = item
1694
1695    def resolve(self, name):
1696        for scope in reversed(self.scopes_):
1697            item = scope.get(name)
1698            if item:
1699                return item
1700        return None
1701