1from __future__ import print_function, division, absolute_import
2from __future__ import unicode_literals
3from fontTools.voltLib.error import VoltLibError
4
5class Lexer(object):
6    NUMBER = "NUMBER"
7    STRING = "STRING"
8    NAME = "NAME"
9    NEWLINE = "NEWLINE"
10
11    CHAR_WHITESPACE_ = " \t"
12    CHAR_NEWLINE_ = "\r\n"
13    CHAR_DIGIT_ = "0123456789"
14    CHAR_UC_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
15    CHAR_LC_LETTER_ = "abcdefghijklmnopqrstuvwxyz"
16    CHAR_UNDERSCORE_ = "_"
17    CHAR_PERIOD_ = "."
18    CHAR_NAME_START_ = CHAR_UC_LETTER_ + CHAR_LC_LETTER_ + CHAR_PERIOD_ + \
19        CHAR_UNDERSCORE_
20    CHAR_NAME_CONTINUATION_ = CHAR_NAME_START_ + CHAR_DIGIT_
21
22    def __init__(self, text, filename):
23        self.filename_ = filename
24        self.line_ = 1
25        self.pos_ = 0
26        self.line_start_ = 0
27        self.text_ = text
28        self.text_length_ = len(text)
29
30    def __iter__(self):
31        return self
32
33    def next(self):  # Python 2
34        return self.__next__()
35
36    def __next__(self):  # Python 3
37        while True:
38            token_type, token, location = self.next_()
39            if token_type not in {Lexer.NEWLINE}:
40                return (token_type, token, location)
41
42    def next_(self):
43        self.scan_over_(Lexer.CHAR_WHITESPACE_)
44        column = self.pos_ - self.line_start_ + 1
45        location = (self.filename_, self.line_, column)
46        start = self.pos_
47        text = self.text_
48        limit = len(text)
49        if start >= limit:
50            raise StopIteration()
51        cur_char = text[start]
52        next_char = text[start + 1] if start + 1 < limit else None
53
54        if cur_char == "\n":
55            self.pos_ += 1
56            self.line_ += 1
57            self.line_start_ = self.pos_
58            return (Lexer.NEWLINE, None, location)
59        if cur_char == "\r":
60            self.pos_ += (2 if next_char == "\n" else 1)
61            self.line_ += 1
62            self.line_start_ = self.pos_
63            return (Lexer.NEWLINE, None, location)
64        if cur_char == '"':
65            self.pos_ += 1
66            self.scan_until_('"\r\n')
67            if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
68                self.pos_ += 1
69                return (Lexer.STRING, text[start + 1:self.pos_ - 1], location)
70            else:
71                raise VoltLibError("Expected '\"' to terminate string",
72                                   location)
73        if cur_char in Lexer.CHAR_NAME_START_:
74            self.pos_ += 1
75            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
76            token = text[start:self.pos_]
77            return (Lexer.NAME, token, location)
78        if cur_char in Lexer.CHAR_DIGIT_:
79            self.scan_over_(Lexer.CHAR_DIGIT_)
80            return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
81        if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
82            self.pos_ += 1
83            self.scan_over_(Lexer.CHAR_DIGIT_)
84            return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
85        raise VoltLibError("Unexpected character: '%s'" % cur_char,
86                           location)
87
88    def scan_over_(self, valid):
89        p = self.pos_
90        while p < self.text_length_ and self.text_[p] in valid:
91            p += 1
92        self.pos_ = p
93
94    def scan_until_(self, stop_at):
95        p = self.pos_
96        while p < self.text_length_ and self.text_[p] not in stop_at:
97            p += 1
98        self.pos_ = p
99