1from fontTools.misc.py23 import bytechr, byteord, bytesjoin, tobytes, tostr
2from fontTools.misc import eexec
3from .psOperators import (
4	PSOperators,
5	ps_StandardEncoding,
6	ps_array,
7	ps_boolean,
8	ps_dict,
9	ps_integer,
10	ps_literal,
11	ps_mark,
12	ps_name,
13	ps_operator,
14	ps_procedure,
15	ps_procmark,
16	ps_real,
17	ps_string,
18)
19import re
20from collections.abc import Callable
21from string import whitespace
22import logging
23
24
25log = logging.getLogger(__name__)
26
27ps_special = b'()<>[]{}%'	# / is one too, but we take care of that one differently
28
29skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"]))
30endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"])
31endofthingRE = re.compile(endofthingPat)
32commentRE = re.compile(b"%[^\n\r]*")
33
34# XXX This not entirely correct as it doesn't allow *nested* embedded parens:
35stringPat = br"""
36	\(
37		(
38			(
39				[^()]*   \   [()]
40			)
41			|
42			(
43				[^()]*  \(   [^()]*  \)
44			)
45		)*
46		[^()]*
47	\)
48"""
49stringPat = b"".join(stringPat.split())
50stringRE = re.compile(stringPat)
51
52hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"]))
53
54class PSTokenError(Exception): pass
55class PSError(Exception): pass
56
57
58class PSTokenizer(object):
59
60	def __init__(self, buf=b'', encoding="ascii"):
61		# Force self.buf to be a byte string
62		buf = tobytes(buf)
63		self.buf = buf
64		self.len = len(buf)
65		self.pos = 0
66		self.closed = False
67		self.encoding = encoding
68
69	def read(self, n=-1):
70		"""Read at most 'n' bytes from the buffer, or less if the read
71		hits EOF before obtaining 'n' bytes.
72		If 'n' is negative or omitted, read all data until EOF is reached.
73		"""
74		if self.closed:
75			raise ValueError("I/O operation on closed file")
76		if n is None or n < 0:
77			newpos = self.len
78		else:
79			newpos = min(self.pos+n, self.len)
80		r = self.buf[self.pos:newpos]
81		self.pos = newpos
82		return r
83
84	def close(self):
85		if not self.closed:
86			self.closed = True
87			del self.buf, self.pos
88
89	def getnexttoken(self,
90			# localize some stuff, for performance
91			len=len,
92			ps_special=ps_special,
93			stringmatch=stringRE.match,
94			hexstringmatch=hexstringRE.match,
95			commentmatch=commentRE.match,
96			endmatch=endofthingRE.match):
97
98		self.skipwhite()
99		if self.pos >= self.len:
100			return None, None
101		pos = self.pos
102		buf = self.buf
103		char = bytechr(byteord(buf[pos]))
104		if char in ps_special:
105			if char in b'{}[]':
106				tokentype = 'do_special'
107				token = char
108			elif char == b'%':
109				tokentype = 'do_comment'
110				_, nextpos = commentmatch(buf, pos).span()
111				token = buf[pos:nextpos]
112			elif char == b'(':
113				tokentype = 'do_string'
114				m = stringmatch(buf, pos)
115				if m is None:
116					raise PSTokenError('bad string at character %d' % pos)
117				_, nextpos = m.span()
118				token = buf[pos:nextpos]
119			elif char == b'<':
120				tokentype = 'do_hexstring'
121				m = hexstringmatch(buf, pos)
122				if m is None:
123					raise PSTokenError('bad hexstring at character %d' % pos)
124				_, nextpos = m.span()
125				token = buf[pos:nextpos]
126			else:
127				raise PSTokenError('bad token at character %d' % pos)
128		else:
129			if char == b'/':
130				tokentype = 'do_literal'
131				m = endmatch(buf, pos+1)
132			else:
133				tokentype = ''
134				m = endmatch(buf, pos)
135			if m is None:
136				raise PSTokenError('bad token at character %d' % pos)
137			_, nextpos = m.span()
138			token = buf[pos:nextpos]
139		self.pos = pos + len(token)
140		token = tostr(token, encoding=self.encoding)
141		return tokentype, token
142
143	def skipwhite(self, whitematch=skipwhiteRE.match):
144		_, nextpos = whitematch(self.buf, self.pos).span()
145		self.pos = nextpos
146
147	def starteexec(self):
148		self.pos = self.pos + 1
149		self.dirtybuf = self.buf[self.pos:]
150		self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
151		self.len = len(self.buf)
152		self.pos = 4
153
154	def stopeexec(self):
155		if not hasattr(self, 'dirtybuf'):
156			return
157		self.buf = self.dirtybuf
158		del self.dirtybuf
159
160
161class PSInterpreter(PSOperators):
162
163	def __init__(self, encoding="ascii"):
164		systemdict = {}
165		userdict = {}
166		self.encoding = encoding
167		self.dictstack = [systemdict, userdict]
168		self.stack = []
169		self.proclevel = 0
170		self.procmark = ps_procmark()
171		self.fillsystemdict()
172
173	def fillsystemdict(self):
174		systemdict = self.dictstack[0]
175		systemdict['['] = systemdict['mark'] = self.mark = ps_mark()
176		systemdict[']'] = ps_operator(']', self.do_makearray)
177		systemdict['true'] = ps_boolean(1)
178		systemdict['false'] = ps_boolean(0)
179		systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding)
180		systemdict['FontDirectory'] = ps_dict({})
181		self.suckoperators(systemdict, self.__class__)
182
183	def suckoperators(self, systemdict, klass):
184		for name in dir(klass):
185			attr = getattr(self, name)
186			if isinstance(attr, Callable) and name[:3] == 'ps_':
187				name = name[3:]
188				systemdict[name] = ps_operator(name, attr)
189		for baseclass in klass.__bases__:
190			self.suckoperators(systemdict, baseclass)
191
192	def interpret(self, data, getattr=getattr):
193		tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
194		getnexttoken = tokenizer.getnexttoken
195		do_token = self.do_token
196		handle_object = self.handle_object
197		try:
198			while 1:
199				tokentype, token = getnexttoken()
200				if not token:
201					break
202				if tokentype:
203					handler = getattr(self, tokentype)
204					object = handler(token)
205				else:
206					object = do_token(token)
207				if object is not None:
208					handle_object(object)
209			tokenizer.close()
210			self.tokenizer = None
211		except:
212			if self.tokenizer is not None:
213				log.debug(
214					'ps error:\n'
215					'- - - - - - -\n'
216					'%s\n'
217					'>>>\n'
218					'%s\n'
219					'- - - - - - -',
220					self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos],
221					self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50])
222			raise
223
224	def handle_object(self, object):
225		if not (self.proclevel or object.literal or object.type == 'proceduretype'):
226			if object.type != 'operatortype':
227				object = self.resolve_name(object.value)
228			if object.literal:
229				self.push(object)
230			else:
231				if object.type == 'proceduretype':
232					self.call_procedure(object)
233				else:
234					object.function()
235		else:
236			self.push(object)
237
238	def call_procedure(self, proc):
239		handle_object = self.handle_object
240		for item in proc.value:
241			handle_object(item)
242
243	def resolve_name(self, name):
244		dictstack = self.dictstack
245		for i in range(len(dictstack)-1, -1, -1):
246			if name in dictstack[i]:
247				return dictstack[i][name]
248		raise PSError('name error: ' + str(name))
249
250	def do_token(self, token,
251				int=int,
252				float=float,
253				ps_name=ps_name,
254				ps_integer=ps_integer,
255				ps_real=ps_real):
256		try:
257			num = int(token)
258		except (ValueError, OverflowError):
259			try:
260				num = float(token)
261			except (ValueError, OverflowError):
262				if '#' in token:
263					hashpos = token.find('#')
264					try:
265						base = int(token[:hashpos])
266						num = int(token[hashpos+1:], base)
267					except (ValueError, OverflowError):
268						return ps_name(token)
269					else:
270						return ps_integer(num)
271				else:
272					return ps_name(token)
273			else:
274				return ps_real(num)
275		else:
276			return ps_integer(num)
277
278	def do_comment(self, token):
279		pass
280
281	def do_literal(self, token):
282		return ps_literal(token[1:])
283
284	def do_string(self, token):
285		return ps_string(token[1:-1])
286
287	def do_hexstring(self, token):
288		hexStr = "".join(token[1:-1].split())
289		if len(hexStr) % 2:
290			hexStr = hexStr + '0'
291		cleanstr = []
292		for i in range(0, len(hexStr), 2):
293			cleanstr.append(chr(int(hexStr[i:i+2], 16)))
294		cleanstr = "".join(cleanstr)
295		return ps_string(cleanstr)
296
297	def do_special(self, token):
298		if token == '{':
299			self.proclevel = self.proclevel + 1
300			return self.procmark
301		elif token == '}':
302			proc = []
303			while 1:
304				topobject = self.pop()
305				if topobject == self.procmark:
306					break
307				proc.append(topobject)
308			self.proclevel = self.proclevel - 1
309			proc.reverse()
310			return ps_procedure(proc)
311		elif token == '[':
312			return self.mark
313		elif token == ']':
314			return ps_name(']')
315		else:
316			raise PSTokenError('huh?')
317
318	def push(self, object):
319		self.stack.append(object)
320
321	def pop(self, *types):
322		stack = self.stack
323		if not stack:
324			raise PSError('stack underflow')
325		object = stack[-1]
326		if types:
327			if object.type not in types:
328				raise PSError('typecheck, expected %s, found %s' % (repr(types), object.type))
329		del stack[-1]
330		return object
331
332	def do_makearray(self):
333		array = []
334		while 1:
335			topobject = self.pop()
336			if topobject == self.mark:
337				break
338			array.append(topobject)
339		array.reverse()
340		self.push(ps_array(array))
341
342	def close(self):
343		"""Remove circular references."""
344		del self.stack
345		del self.dictstack
346
347
348def unpack_item(item):
349	tp = type(item.value)
350	if tp == dict:
351		newitem = {}
352		for key, value in item.value.items():
353			newitem[key] = unpack_item(value)
354	elif tp == list:
355		newitem = [None] * len(item.value)
356		for i in range(len(item.value)):
357			newitem[i] = unpack_item(item.value[i])
358		if item.type == 'proceduretype':
359			newitem = tuple(newitem)
360	else:
361		newitem = item.value
362	return newitem
363
364def suckfont(data, encoding="ascii"):
365	m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data)
366	if m:
367		fontName = m.group(1)
368	else:
369		fontName = None
370	interpreter = PSInterpreter(encoding=encoding)
371	interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop")
372	interpreter.interpret(data)
373	fontdir = interpreter.dictstack[0]['FontDirectory'].value
374	if fontName in fontdir:
375		rawfont = fontdir[fontName]
376	else:
377		# fall back, in case fontName wasn't found
378		fontNames = list(fontdir.keys())
379		if len(fontNames) > 1:
380			fontNames.remove("Helvetica")
381		fontNames.sort()
382		rawfont = fontdir[fontNames[0]]
383	interpreter.close()
384	return unpack_item(rawfont)
385