1from __future__ import print_function, division, absolute_import 2from __future__ import unicode_literals 3from fontTools.misc.py23 import * 4from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound 5import re 6import os 7 8 9class Lexer(object): 10 NUMBER = "NUMBER" 11 FLOAT = "FLOAT" 12 STRING = "STRING" 13 NAME = "NAME" 14 FILENAME = "FILENAME" 15 GLYPHCLASS = "GLYPHCLASS" 16 CID = "CID" 17 SYMBOL = "SYMBOL" 18 COMMENT = "COMMENT" 19 NEWLINE = "NEWLINE" 20 ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK" 21 22 CHAR_WHITESPACE_ = " \t" 23 CHAR_NEWLINE_ = "\r\n" 24 CHAR_SYMBOL_ = ",;:-+'{}[]<>()=" 25 CHAR_DIGIT_ = "0123456789" 26 CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef" 27 CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 28 CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\" 29 CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-" 30 31 RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$") 32 33 MODE_NORMAL_ = "NORMAL" 34 MODE_FILENAME_ = "FILENAME" 35 36 def __init__(self, text, filename): 37 self.filename_ = filename 38 self.line_ = 1 39 self.pos_ = 0 40 self.line_start_ = 0 41 self.text_ = text 42 self.text_length_ = len(text) 43 self.mode_ = Lexer.MODE_NORMAL_ 44 45 def __iter__(self): 46 return self 47 48 def next(self): # Python 2 49 return self.__next__() 50 51 def __next__(self): # Python 3 52 while True: 53 token_type, token, location = self.next_() 54 if token_type != Lexer.NEWLINE: 55 return (token_type, token, location) 56 57 def location_(self): 58 column = self.pos_ - self.line_start_ + 1 59 return (self.filename_ or "<features>", self.line_, column) 60 61 def next_(self): 62 self.scan_over_(Lexer.CHAR_WHITESPACE_) 63 location = self.location_() 64 start = self.pos_ 65 text = self.text_ 66 limit = len(text) 67 if start >= limit: 68 raise StopIteration() 69 cur_char = text[start] 70 next_char = text[start + 1] if start + 1 < limit else None 71 72 if cur_char == "\n": 73 self.pos_ += 1 74 self.line_ += 1 75 self.line_start_ = self.pos_ 76 return (Lexer.NEWLINE, None, location) 77 if cur_char == "\r": 78 self.pos_ += (2 if next_char == "\n" else 1) 79 self.line_ += 1 80 self.line_start_ = self.pos_ 81 return (Lexer.NEWLINE, None, location) 82 if cur_char == "#": 83 self.scan_until_(Lexer.CHAR_NEWLINE_) 84 return (Lexer.COMMENT, text[start:self.pos_], location) 85 86 if self.mode_ is Lexer.MODE_FILENAME_: 87 if cur_char != "(": 88 raise FeatureLibError("Expected '(' before file name", 89 location) 90 self.scan_until_(")") 91 cur_char = text[self.pos_] if self.pos_ < limit else None 92 if cur_char != ")": 93 raise FeatureLibError("Expected ')' after file name", 94 location) 95 self.pos_ += 1 96 self.mode_ = Lexer.MODE_NORMAL_ 97 return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location) 98 99 if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_: 100 self.pos_ += 1 101 self.scan_over_(Lexer.CHAR_DIGIT_) 102 return (Lexer.CID, int(text[start + 1:self.pos_], 10), location) 103 if cur_char == "@": 104 self.pos_ += 1 105 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 106 glyphclass = text[start + 1:self.pos_] 107 if len(glyphclass) < 1: 108 raise FeatureLibError("Expected glyph class name", location) 109 if len(glyphclass) > 63: 110 raise FeatureLibError( 111 "Glyph class names must not be longer than 63 characters", 112 location) 113 if not Lexer.RE_GLYPHCLASS.match(glyphclass): 114 raise FeatureLibError( 115 "Glyph class names must consist of letters, digits, " 116 "underscore, period or hyphen", location) 117 return (Lexer.GLYPHCLASS, glyphclass, location) 118 if cur_char in Lexer.CHAR_NAME_START_: 119 self.pos_ += 1 120 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 121 token = text[start:self.pos_] 122 if token == "include": 123 self.mode_ = Lexer.MODE_FILENAME_ 124 return (Lexer.NAME, token, location) 125 if cur_char == "0" and next_char in "xX": 126 self.pos_ += 2 127 self.scan_over_(Lexer.CHAR_HEXDIGIT_) 128 return (Lexer.NUMBER, int(text[start:self.pos_], 16), location) 129 if cur_char in Lexer.CHAR_DIGIT_: 130 self.scan_over_(Lexer.CHAR_DIGIT_) 131 if self.pos_ >= limit or text[self.pos_] != ".": 132 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) 133 self.scan_over_(".") 134 self.scan_over_(Lexer.CHAR_DIGIT_) 135 return (Lexer.FLOAT, float(text[start:self.pos_]), location) 136 if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_: 137 self.pos_ += 1 138 self.scan_over_(Lexer.CHAR_DIGIT_) 139 if self.pos_ >= limit or text[self.pos_] != ".": 140 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) 141 self.scan_over_(".") 142 self.scan_over_(Lexer.CHAR_DIGIT_) 143 return (Lexer.FLOAT, float(text[start:self.pos_]), location) 144 if cur_char in Lexer.CHAR_SYMBOL_: 145 self.pos_ += 1 146 return (Lexer.SYMBOL, cur_char, location) 147 if cur_char == '"': 148 self.pos_ += 1 149 self.scan_until_('"') 150 if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"': 151 self.pos_ += 1 152 # strip newlines embedded within a string 153 string = re.sub("[\r\n]", "", text[start + 1:self.pos_ - 1]) 154 return (Lexer.STRING, string, location) 155 else: 156 raise FeatureLibError("Expected '\"' to terminate string", 157 location) 158 raise FeatureLibError("Unexpected character: %r" % cur_char, 159 location) 160 161 def scan_over_(self, valid): 162 p = self.pos_ 163 while p < self.text_length_ and self.text_[p] in valid: 164 p += 1 165 self.pos_ = p 166 167 def scan_until_(self, stop_at): 168 p = self.pos_ 169 while p < self.text_length_ and self.text_[p] not in stop_at: 170 p += 1 171 self.pos_ = p 172 173 def scan_anonymous_block(self, tag): 174 location = self.location_() 175 tag = tag.strip() 176 self.scan_until_(Lexer.CHAR_NEWLINE_) 177 self.scan_over_(Lexer.CHAR_NEWLINE_) 178 regexp = r'}\s*' + tag + r'\s*;' 179 split = re.split(regexp, self.text_[self.pos_:], maxsplit=1) 180 if len(split) != 2: 181 raise FeatureLibError( 182 "Expected '} %s;' to terminate anonymous block" % tag, 183 location) 184 self.pos_ += len(split[0]) 185 return (Lexer.ANONYMOUS_BLOCK, split[0], location) 186 187 188class IncludingLexer(object): 189 def __init__(self, featurefile): 190 self.lexers_ = [self.make_lexer_(featurefile)] 191 self.featurefilepath = self.lexers_[0].filename_ 192 193 def __iter__(self): 194 return self 195 196 def next(self): # Python 2 197 return self.__next__() 198 199 def __next__(self): # Python 3 200 while self.lexers_: 201 lexer = self.lexers_[-1] 202 try: 203 token_type, token, location = next(lexer) 204 except StopIteration: 205 self.lexers_.pop() 206 continue 207 if token_type is Lexer.NAME and token == "include": 208 fname_type, fname_token, fname_location = lexer.next() 209 if fname_type is not Lexer.FILENAME: 210 raise FeatureLibError("Expected file name", fname_location) 211 #semi_type, semi_token, semi_location = lexer.next() 212 #if semi_type is not Lexer.SYMBOL or semi_token != ";": 213 # raise FeatureLibError("Expected ';'", semi_location) 214 if os.path.isabs(fname_token): 215 path = fname_token 216 else: 217 if self.featurefilepath is not None: 218 curpath = os.path.dirname(self.featurefilepath) 219 else: 220 # if the IncludingLexer was initialized from an in-memory 221 # file-like stream, it doesn't have a 'name' pointing to 222 # its filesystem path, therefore we fall back to using the 223 # current working directory to resolve relative includes 224 curpath = os.getcwd() 225 path = os.path.join(curpath, fname_token) 226 if len(self.lexers_) >= 5: 227 raise FeatureLibError("Too many recursive includes", 228 fname_location) 229 try: 230 self.lexers_.append(self.make_lexer_(path)) 231 except IOError as err: 232 # FileNotFoundError does not exist on Python < 3.3 233 import errno 234 if err.errno == errno.ENOENT: 235 raise IncludedFeaNotFound(fname_token, fname_location) 236 raise # pragma: no cover 237 else: 238 return (token_type, token, location) 239 raise StopIteration() 240 241 @staticmethod 242 def make_lexer_(file_or_path): 243 if hasattr(file_or_path, "read"): 244 fileobj, closing = file_or_path, False 245 else: 246 filename, closing = file_or_path, True 247 fileobj = open(filename, "r", encoding="utf-8") 248 data = fileobj.read() 249 filename = getattr(fileobj, "name", None) 250 if closing: 251 fileobj.close() 252 return Lexer(data, filename) 253 254 def scan_anonymous_block(self, tag): 255 return self.lexers_[-1].scan_anonymous_block(tag) 256 257 258class NonIncludingLexer(IncludingLexer): 259 """Lexer that does not follow `include` statements, emits them as-is.""" 260 def __next__(self): # Python 3 261 return next(self.lexers_[0]) 262