1from __future__ import print_function, division, absolute_import
2from __future__ import unicode_literals
3from fontTools.misc.py23 import *
4from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
5import re
6import os
7
8
9class Lexer(object):
10    NUMBER = "NUMBER"
11    FLOAT = "FLOAT"
12    STRING = "STRING"
13    NAME = "NAME"
14    FILENAME = "FILENAME"
15    GLYPHCLASS = "GLYPHCLASS"
16    CID = "CID"
17    SYMBOL = "SYMBOL"
18    COMMENT = "COMMENT"
19    NEWLINE = "NEWLINE"
20    ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"
21
22    CHAR_WHITESPACE_ = " \t"
23    CHAR_NEWLINE_ = "\r\n"
24    CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
25    CHAR_DIGIT_ = "0123456789"
26    CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
27    CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
28    CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
29    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
30
31    RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")
32
33    MODE_NORMAL_ = "NORMAL"
34    MODE_FILENAME_ = "FILENAME"
35
36    def __init__(self, text, filename):
37        self.filename_ = filename
38        self.line_ = 1
39        self.pos_ = 0
40        self.line_start_ = 0
41        self.text_ = text
42        self.text_length_ = len(text)
43        self.mode_ = Lexer.MODE_NORMAL_
44
45    def __iter__(self):
46        return self
47
48    def next(self):  # Python 2
49        return self.__next__()
50
51    def __next__(self):  # Python 3
52        while True:
53            token_type, token, location = self.next_()
54            if token_type != Lexer.NEWLINE:
55                return (token_type, token, location)
56
57    def location_(self):
58        column = self.pos_ - self.line_start_ + 1
59        return (self.filename_ or "<features>", self.line_, column)
60
61    def next_(self):
62        self.scan_over_(Lexer.CHAR_WHITESPACE_)
63        location = self.location_()
64        start = self.pos_
65        text = self.text_
66        limit = len(text)
67        if start >= limit:
68            raise StopIteration()
69        cur_char = text[start]
70        next_char = text[start + 1] if start + 1 < limit else None
71
72        if cur_char == "\n":
73            self.pos_ += 1
74            self.line_ += 1
75            self.line_start_ = self.pos_
76            return (Lexer.NEWLINE, None, location)
77        if cur_char == "\r":
78            self.pos_ += (2 if next_char == "\n" else 1)
79            self.line_ += 1
80            self.line_start_ = self.pos_
81            return (Lexer.NEWLINE, None, location)
82        if cur_char == "#":
83            self.scan_until_(Lexer.CHAR_NEWLINE_)
84            return (Lexer.COMMENT, text[start:self.pos_], location)
85
86        if self.mode_ is Lexer.MODE_FILENAME_:
87            if cur_char != "(":
88                raise FeatureLibError("Expected '(' before file name",
89                                      location)
90            self.scan_until_(")")
91            cur_char = text[self.pos_] if self.pos_ < limit else None
92            if cur_char != ")":
93                raise FeatureLibError("Expected ')' after file name",
94                                      location)
95            self.pos_ += 1
96            self.mode_ = Lexer.MODE_NORMAL_
97            return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location)
98
99        if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
100            self.pos_ += 1
101            self.scan_over_(Lexer.CHAR_DIGIT_)
102            return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
103        if cur_char == "@":
104            self.pos_ += 1
105            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
106            glyphclass = text[start + 1:self.pos_]
107            if len(glyphclass) < 1:
108                raise FeatureLibError("Expected glyph class name", location)
109            if len(glyphclass) > 63:
110                raise FeatureLibError(
111                    "Glyph class names must not be longer than 63 characters",
112                    location)
113            if not Lexer.RE_GLYPHCLASS.match(glyphclass):
114                raise FeatureLibError(
115                    "Glyph class names must consist of letters, digits, "
116                    "underscore, period or hyphen", location)
117            return (Lexer.GLYPHCLASS, glyphclass, location)
118        if cur_char in Lexer.CHAR_NAME_START_:
119            self.pos_ += 1
120            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
121            token = text[start:self.pos_]
122            if token == "include":
123                self.mode_ = Lexer.MODE_FILENAME_
124            return (Lexer.NAME, token, location)
125        if cur_char == "0" and next_char in "xX":
126            self.pos_ += 2
127            self.scan_over_(Lexer.CHAR_HEXDIGIT_)
128            return (Lexer.NUMBER, int(text[start:self.pos_], 16), location)
129        if cur_char in Lexer.CHAR_DIGIT_:
130            self.scan_over_(Lexer.CHAR_DIGIT_)
131            if self.pos_ >= limit or text[self.pos_] != ".":
132                return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
133            self.scan_over_(".")
134            self.scan_over_(Lexer.CHAR_DIGIT_)
135            return (Lexer.FLOAT, float(text[start:self.pos_]), location)
136        if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
137            self.pos_ += 1
138            self.scan_over_(Lexer.CHAR_DIGIT_)
139            if self.pos_ >= limit or text[self.pos_] != ".":
140                return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
141            self.scan_over_(".")
142            self.scan_over_(Lexer.CHAR_DIGIT_)
143            return (Lexer.FLOAT, float(text[start:self.pos_]), location)
144        if cur_char in Lexer.CHAR_SYMBOL_:
145            self.pos_ += 1
146            return (Lexer.SYMBOL, cur_char, location)
147        if cur_char == '"':
148            self.pos_ += 1
149            self.scan_until_('"')
150            if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
151                self.pos_ += 1
152                # strip newlines embedded within a string
153                string = re.sub("[\r\n]", "", text[start + 1:self.pos_ - 1])
154                return (Lexer.STRING, string, location)
155            else:
156                raise FeatureLibError("Expected '\"' to terminate string",
157                                      location)
158        raise FeatureLibError("Unexpected character: %r" % cur_char,
159                              location)
160
161    def scan_over_(self, valid):
162        p = self.pos_
163        while p < self.text_length_ and self.text_[p] in valid:
164            p += 1
165        self.pos_ = p
166
167    def scan_until_(self, stop_at):
168        p = self.pos_
169        while p < self.text_length_ and self.text_[p] not in stop_at:
170            p += 1
171        self.pos_ = p
172
173    def scan_anonymous_block(self, tag):
174        location = self.location_()
175        tag = tag.strip()
176        self.scan_until_(Lexer.CHAR_NEWLINE_)
177        self.scan_over_(Lexer.CHAR_NEWLINE_)
178        regexp = r'}\s*' + tag + r'\s*;'
179        split = re.split(regexp, self.text_[self.pos_:], maxsplit=1)
180        if len(split) != 2:
181            raise FeatureLibError(
182                "Expected '} %s;' to terminate anonymous block" % tag,
183                location)
184        self.pos_ += len(split[0])
185        return (Lexer.ANONYMOUS_BLOCK, split[0], location)
186
187
188class IncludingLexer(object):
189    def __init__(self, featurefile):
190        self.lexers_ = [self.make_lexer_(featurefile)]
191        self.featurefilepath = self.lexers_[0].filename_
192
193    def __iter__(self):
194        return self
195
196    def next(self):  # Python 2
197        return self.__next__()
198
199    def __next__(self):  # Python 3
200        while self.lexers_:
201            lexer = self.lexers_[-1]
202            try:
203                token_type, token, location = next(lexer)
204            except StopIteration:
205                self.lexers_.pop()
206                continue
207            if token_type is Lexer.NAME and token == "include":
208                fname_type, fname_token, fname_location = lexer.next()
209                if fname_type is not Lexer.FILENAME:
210                    raise FeatureLibError("Expected file name", fname_location)
211                #semi_type, semi_token, semi_location = lexer.next()
212                #if semi_type is not Lexer.SYMBOL or semi_token != ";":
213                #    raise FeatureLibError("Expected ';'", semi_location)
214                if os.path.isabs(fname_token):
215                    path = fname_token
216                else:
217                    if self.featurefilepath is not None:
218                        curpath = os.path.dirname(self.featurefilepath)
219                    else:
220                        # if the IncludingLexer was initialized from an in-memory
221                        # file-like stream, it doesn't have a 'name' pointing to
222                        # its filesystem path, therefore we fall back to using the
223                        # current working directory to resolve relative includes
224                        curpath = os.getcwd()
225                    path = os.path.join(curpath, fname_token)
226                if len(self.lexers_) >= 5:
227                    raise FeatureLibError("Too many recursive includes",
228                                          fname_location)
229                try:
230                    self.lexers_.append(self.make_lexer_(path))
231                except IOError as err:
232                    # FileNotFoundError does not exist on Python < 3.3
233                    import errno
234                    if err.errno == errno.ENOENT:
235                        raise IncludedFeaNotFound(fname_token, fname_location)
236                    raise  # pragma: no cover
237            else:
238                return (token_type, token, location)
239        raise StopIteration()
240
241    @staticmethod
242    def make_lexer_(file_or_path):
243        if hasattr(file_or_path, "read"):
244            fileobj, closing = file_or_path, False
245        else:
246            filename, closing = file_or_path, True
247            fileobj = open(filename, "r", encoding="utf-8")
248        data = fileobj.read()
249        filename = getattr(fileobj, "name", None)
250        if closing:
251            fileobj.close()
252        return Lexer(data, filename)
253
254    def scan_anonymous_block(self, tag):
255        return self.lexers_[-1].scan_anonymous_block(tag)
256
257
258class NonIncludingLexer(IncludingLexer):
259    """Lexer that does not follow `include` statements, emits them as-is."""
260    def __next__(self):  # Python 3
261        return next(self.lexers_[0])
262