1from fontTools.misc.py23 import bytechr, byteord, bytesjoin, tobytes, tostr 2from fontTools.misc import eexec 3from .psOperators import ( 4 PSOperators, 5 ps_StandardEncoding, 6 ps_array, 7 ps_boolean, 8 ps_dict, 9 ps_integer, 10 ps_literal, 11 ps_mark, 12 ps_name, 13 ps_operator, 14 ps_procedure, 15 ps_procmark, 16 ps_real, 17 ps_string, 18) 19import re 20from collections.abc import Callable 21from string import whitespace 22import logging 23 24 25log = logging.getLogger(__name__) 26 27ps_special = b'()<>[]{}%' # / is one too, but we take care of that one differently 28 29skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"])) 30endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"]) 31endofthingRE = re.compile(endofthingPat) 32commentRE = re.compile(b"%[^\n\r]*") 33 34# XXX This not entirely correct as it doesn't allow *nested* embedded parens: 35stringPat = br""" 36 \( 37 ( 38 ( 39 [^()]* \ [()] 40 ) 41 | 42 ( 43 [^()]* \( [^()]* \) 44 ) 45 )* 46 [^()]* 47 \) 48""" 49stringPat = b"".join(stringPat.split()) 50stringRE = re.compile(stringPat) 51 52hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"])) 53 54class PSTokenError(Exception): pass 55class PSError(Exception): pass 56 57 58class PSTokenizer(object): 59 60 def __init__(self, buf=b'', encoding="ascii"): 61 # Force self.buf to be a byte string 62 buf = tobytes(buf) 63 self.buf = buf 64 self.len = len(buf) 65 self.pos = 0 66 self.closed = False 67 self.encoding = encoding 68 69 def read(self, n=-1): 70 """Read at most 'n' bytes from the buffer, or less if the read 71 hits EOF before obtaining 'n' bytes. 72 If 'n' is negative or omitted, read all data until EOF is reached. 73 """ 74 if self.closed: 75 raise ValueError("I/O operation on closed file") 76 if n is None or n < 0: 77 newpos = self.len 78 else: 79 newpos = min(self.pos+n, self.len) 80 r = self.buf[self.pos:newpos] 81 self.pos = newpos 82 return r 83 84 def close(self): 85 if not self.closed: 86 self.closed = True 87 del self.buf, self.pos 88 89 def getnexttoken(self, 90 # localize some stuff, for performance 91 len=len, 92 ps_special=ps_special, 93 stringmatch=stringRE.match, 94 hexstringmatch=hexstringRE.match, 95 commentmatch=commentRE.match, 96 endmatch=endofthingRE.match): 97 98 self.skipwhite() 99 if self.pos >= self.len: 100 return None, None 101 pos = self.pos 102 buf = self.buf 103 char = bytechr(byteord(buf[pos])) 104 if char in ps_special: 105 if char in b'{}[]': 106 tokentype = 'do_special' 107 token = char 108 elif char == b'%': 109 tokentype = 'do_comment' 110 _, nextpos = commentmatch(buf, pos).span() 111 token = buf[pos:nextpos] 112 elif char == b'(': 113 tokentype = 'do_string' 114 m = stringmatch(buf, pos) 115 if m is None: 116 raise PSTokenError('bad string at character %d' % pos) 117 _, nextpos = m.span() 118 token = buf[pos:nextpos] 119 elif char == b'<': 120 tokentype = 'do_hexstring' 121 m = hexstringmatch(buf, pos) 122 if m is None: 123 raise PSTokenError('bad hexstring at character %d' % pos) 124 _, nextpos = m.span() 125 token = buf[pos:nextpos] 126 else: 127 raise PSTokenError('bad token at character %d' % pos) 128 else: 129 if char == b'/': 130 tokentype = 'do_literal' 131 m = endmatch(buf, pos+1) 132 else: 133 tokentype = '' 134 m = endmatch(buf, pos) 135 if m is None: 136 raise PSTokenError('bad token at character %d' % pos) 137 _, nextpos = m.span() 138 token = buf[pos:nextpos] 139 self.pos = pos + len(token) 140 token = tostr(token, encoding=self.encoding) 141 return tokentype, token 142 143 def skipwhite(self, whitematch=skipwhiteRE.match): 144 _, nextpos = whitematch(self.buf, self.pos).span() 145 self.pos = nextpos 146 147 def starteexec(self): 148 self.pos = self.pos + 1 149 self.dirtybuf = self.buf[self.pos:] 150 self.buf, R = eexec.decrypt(self.dirtybuf, 55665) 151 self.len = len(self.buf) 152 self.pos = 4 153 154 def stopeexec(self): 155 if not hasattr(self, 'dirtybuf'): 156 return 157 self.buf = self.dirtybuf 158 del self.dirtybuf 159 160 161class PSInterpreter(PSOperators): 162 163 def __init__(self, encoding="ascii"): 164 systemdict = {} 165 userdict = {} 166 self.encoding = encoding 167 self.dictstack = [systemdict, userdict] 168 self.stack = [] 169 self.proclevel = 0 170 self.procmark = ps_procmark() 171 self.fillsystemdict() 172 173 def fillsystemdict(self): 174 systemdict = self.dictstack[0] 175 systemdict['['] = systemdict['mark'] = self.mark = ps_mark() 176 systemdict[']'] = ps_operator(']', self.do_makearray) 177 systemdict['true'] = ps_boolean(1) 178 systemdict['false'] = ps_boolean(0) 179 systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding) 180 systemdict['FontDirectory'] = ps_dict({}) 181 self.suckoperators(systemdict, self.__class__) 182 183 def suckoperators(self, systemdict, klass): 184 for name in dir(klass): 185 attr = getattr(self, name) 186 if isinstance(attr, Callable) and name[:3] == 'ps_': 187 name = name[3:] 188 systemdict[name] = ps_operator(name, attr) 189 for baseclass in klass.__bases__: 190 self.suckoperators(systemdict, baseclass) 191 192 def interpret(self, data, getattr=getattr): 193 tokenizer = self.tokenizer = PSTokenizer(data, self.encoding) 194 getnexttoken = tokenizer.getnexttoken 195 do_token = self.do_token 196 handle_object = self.handle_object 197 try: 198 while 1: 199 tokentype, token = getnexttoken() 200 if not token: 201 break 202 if tokentype: 203 handler = getattr(self, tokentype) 204 object = handler(token) 205 else: 206 object = do_token(token) 207 if object is not None: 208 handle_object(object) 209 tokenizer.close() 210 self.tokenizer = None 211 except: 212 if self.tokenizer is not None: 213 log.debug( 214 'ps error:\n' 215 '- - - - - - -\n' 216 '%s\n' 217 '>>>\n' 218 '%s\n' 219 '- - - - - - -', 220 self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos], 221 self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50]) 222 raise 223 224 def handle_object(self, object): 225 if not (self.proclevel or object.literal or object.type == 'proceduretype'): 226 if object.type != 'operatortype': 227 object = self.resolve_name(object.value) 228 if object.literal: 229 self.push(object) 230 else: 231 if object.type == 'proceduretype': 232 self.call_procedure(object) 233 else: 234 object.function() 235 else: 236 self.push(object) 237 238 def call_procedure(self, proc): 239 handle_object = self.handle_object 240 for item in proc.value: 241 handle_object(item) 242 243 def resolve_name(self, name): 244 dictstack = self.dictstack 245 for i in range(len(dictstack)-1, -1, -1): 246 if name in dictstack[i]: 247 return dictstack[i][name] 248 raise PSError('name error: ' + str(name)) 249 250 def do_token(self, token, 251 int=int, 252 float=float, 253 ps_name=ps_name, 254 ps_integer=ps_integer, 255 ps_real=ps_real): 256 try: 257 num = int(token) 258 except (ValueError, OverflowError): 259 try: 260 num = float(token) 261 except (ValueError, OverflowError): 262 if '#' in token: 263 hashpos = token.find('#') 264 try: 265 base = int(token[:hashpos]) 266 num = int(token[hashpos+1:], base) 267 except (ValueError, OverflowError): 268 return ps_name(token) 269 else: 270 return ps_integer(num) 271 else: 272 return ps_name(token) 273 else: 274 return ps_real(num) 275 else: 276 return ps_integer(num) 277 278 def do_comment(self, token): 279 pass 280 281 def do_literal(self, token): 282 return ps_literal(token[1:]) 283 284 def do_string(self, token): 285 return ps_string(token[1:-1]) 286 287 def do_hexstring(self, token): 288 hexStr = "".join(token[1:-1].split()) 289 if len(hexStr) % 2: 290 hexStr = hexStr + '0' 291 cleanstr = [] 292 for i in range(0, len(hexStr), 2): 293 cleanstr.append(chr(int(hexStr[i:i+2], 16))) 294 cleanstr = "".join(cleanstr) 295 return ps_string(cleanstr) 296 297 def do_special(self, token): 298 if token == '{': 299 self.proclevel = self.proclevel + 1 300 return self.procmark 301 elif token == '}': 302 proc = [] 303 while 1: 304 topobject = self.pop() 305 if topobject == self.procmark: 306 break 307 proc.append(topobject) 308 self.proclevel = self.proclevel - 1 309 proc.reverse() 310 return ps_procedure(proc) 311 elif token == '[': 312 return self.mark 313 elif token == ']': 314 return ps_name(']') 315 else: 316 raise PSTokenError('huh?') 317 318 def push(self, object): 319 self.stack.append(object) 320 321 def pop(self, *types): 322 stack = self.stack 323 if not stack: 324 raise PSError('stack underflow') 325 object = stack[-1] 326 if types: 327 if object.type not in types: 328 raise PSError('typecheck, expected %s, found %s' % (repr(types), object.type)) 329 del stack[-1] 330 return object 331 332 def do_makearray(self): 333 array = [] 334 while 1: 335 topobject = self.pop() 336 if topobject == self.mark: 337 break 338 array.append(topobject) 339 array.reverse() 340 self.push(ps_array(array)) 341 342 def close(self): 343 """Remove circular references.""" 344 del self.stack 345 del self.dictstack 346 347 348def unpack_item(item): 349 tp = type(item.value) 350 if tp == dict: 351 newitem = {} 352 for key, value in item.value.items(): 353 newitem[key] = unpack_item(value) 354 elif tp == list: 355 newitem = [None] * len(item.value) 356 for i in range(len(item.value)): 357 newitem[i] = unpack_item(item.value[i]) 358 if item.type == 'proceduretype': 359 newitem = tuple(newitem) 360 else: 361 newitem = item.value 362 return newitem 363 364def suckfont(data, encoding="ascii"): 365 m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data) 366 if m: 367 fontName = m.group(1) 368 else: 369 fontName = None 370 interpreter = PSInterpreter(encoding=encoding) 371 interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop") 372 interpreter.interpret(data) 373 fontdir = interpreter.dictstack[0]['FontDirectory'].value 374 if fontName in fontdir: 375 rawfont = fontdir[fontName] 376 else: 377 # fall back, in case fontName wasn't found 378 fontNames = list(fontdir.keys()) 379 if len(fontNames) > 1: 380 fontNames.remove("Helvetica") 381 fontNames.sort() 382 rawfont = fontdir[fontNames[0]] 383 interpreter.close() 384 return unpack_item(rawfont) 385