1# -*- coding: utf-8 -*-
2
3"""T2CharString operator specializer and generalizer."""
4
5from __future__ import print_function, division, absolute_import
6from fontTools.misc.py23 import *
7
8
9def stringToProgram(string):
10	if isinstance(string, basestring):
11		string = string.split()
12	program = []
13	for token in string:
14		try:
15			token = int(token)
16		except ValueError:
17			try:
18				token = float(token)
19			except ValueError:
20				pass
21		program.append(token)
22	return program
23
24
25def programToString(program):
26	return ' '.join(str(x) for x in program)
27
28
29def programToCommands(program):
30	"""Takes a T2CharString program list and returns list of commands.
31	Each command is a two-tuple of commandname,arg-list.  The commandname might
32	be empty string if no commandname shall be emitted (used for glyph width,
33	hintmask/cntrmask argument, as well as stray arguments at the end of the
34	program (¯\_(ツ)_/¯)."""
35
36	width = None
37	commands = []
38	stack = []
39	it = iter(program)
40	for token in it:
41		if not isinstance(token, basestring):
42			stack.append(token)
43			continue
44
45		if width is None and token in {'hstem', 'hstemhm', 'vstem', 'vstemhm',
46					       'cntrmask', 'hintmask',
47					       'hmoveto', 'vmoveto', 'rmoveto',
48					       'endchar'}:
49			parity = token in {'hmoveto', 'vmoveto'}
50			if stack and (len(stack) % 2) ^ parity:
51				width = stack.pop(0)
52				commands.append(('', [width]))
53
54		if token in {'hintmask', 'cntrmask'}:
55			if stack:
56				commands.append(('', stack))
57			commands.append((token, []))
58			commands.append(('', [next(it)]))
59		else:
60			commands.append((token,stack))
61		stack = []
62	if stack:
63		commands.append(('', stack))
64	return commands
65
66
67def commandsToProgram(commands):
68	"""Takes a commands list as returned by programToCommands() and converts
69	it back to a T2CharString program list."""
70	program = []
71	for op,args in commands:
72		program.extend(args)
73		if op:
74			program.append(op)
75	return program
76
77
78def _everyN(el, n):
79	"""Group the list el into groups of size n"""
80	if len(el) % n != 0: raise ValueError(el)
81	for i in range(0, len(el), n):
82		yield el[i:i+n]
83
84
85class _GeneralizerDecombinerCommandsMap(object):
86
87	@staticmethod
88	def rmoveto(args):
89		if len(args) != 2: raise ValueError(args)
90		yield ('rmoveto', args)
91	@staticmethod
92	def hmoveto(args):
93		if len(args) != 1: raise ValueError(args)
94		yield ('rmoveto', [args[0], 0])
95	@staticmethod
96	def vmoveto(args):
97		if len(args) != 1: raise ValueError(args)
98		yield ('rmoveto', [0, args[0]])
99
100	@staticmethod
101	def rlineto(args):
102		if not args: raise ValueError(args)
103		for args in _everyN(args, 2):
104			yield ('rlineto', args)
105	@staticmethod
106	def hlineto(args):
107		if not args: raise ValueError(args)
108		it = iter(args)
109		try:
110			while True:
111				yield ('rlineto', [next(it), 0])
112				yield ('rlineto', [0, next(it)])
113		except StopIteration:
114			pass
115	@staticmethod
116	def vlineto(args):
117		if not args: raise ValueError(args)
118		it = iter(args)
119		try:
120			while True:
121				yield ('rlineto', [0, next(it)])
122				yield ('rlineto', [next(it), 0])
123		except StopIteration:
124			pass
125	@staticmethod
126	def rrcurveto(args):
127		if not args: raise ValueError(args)
128		for args in _everyN(args, 6):
129			yield ('rrcurveto', args)
130	@staticmethod
131	def hhcurveto(args):
132		if len(args) < 4 or len(args) % 4 > 1: raise ValueError(args)
133		if len(args) % 2 == 1:
134			yield ('rrcurveto', [args[1], args[0], args[2], args[3], args[4], 0])
135			args = args[5:]
136		for args in _everyN(args, 4):
137			yield ('rrcurveto', [args[0], 0, args[1], args[2], args[3], 0])
138	@staticmethod
139	def vvcurveto(args):
140		if len(args) < 4 or len(args) % 4 > 1: raise ValueError(args)
141		if len(args) % 2 == 1:
142			yield ('rrcurveto', [args[0], args[1], args[2], args[3], 0, args[4]])
143			args = args[5:]
144		for args in _everyN(args, 4):
145			yield ('rrcurveto', [0, args[0], args[1], args[2], 0, args[3]])
146	@staticmethod
147	def hvcurveto(args):
148		if len(args) < 4 or len(args) % 8 not in {0,1,4,5}: raise ValueError(args)
149		last_args = None
150		if len(args) % 2 == 1:
151			lastStraight = len(args) % 8 == 5
152			args, last_args = args[:-5], args[-5:]
153		it = _everyN(args, 4)
154		try:
155			while True:
156				args = next(it)
157				yield ('rrcurveto', [args[0], 0, args[1], args[2], 0, args[3]])
158				args = next(it)
159				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], 0])
160		except StopIteration:
161			pass
162		if last_args:
163			args = last_args
164			if lastStraight:
165				yield ('rrcurveto', [args[0], 0, args[1], args[2], args[4], args[3]])
166			else:
167				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], args[4]])
168	@staticmethod
169	def vhcurveto(args):
170		if len(args) < 4 or len(args) % 8 not in {0,1,4,5}: raise ValueError(args)
171		last_args = None
172		if len(args) % 2 == 1:
173			lastStraight = len(args) % 8 == 5
174			args, last_args = args[:-5], args[-5:]
175		it = _everyN(args, 4)
176		try:
177			while True:
178				args = next(it)
179				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], 0])
180				args = next(it)
181				yield ('rrcurveto', [args[0], 0, args[1], args[2], 0, args[3]])
182		except StopIteration:
183			pass
184		if last_args:
185			args = last_args
186			if lastStraight:
187				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], args[4]])
188			else:
189				yield ('rrcurveto', [args[0], 0, args[1], args[2], args[4], args[3]])
190
191	@staticmethod
192	def rcurveline(args):
193		if len(args) < 8 or len(args) % 6 != 2: raise ValueError(args)
194		args, last_args = args[:-2], args[-2:]
195		for args in _everyN(args, 6):
196			yield ('rrcurveto', args)
197		yield ('rlineto', last_args)
198	@staticmethod
199	def rlinecurve(args):
200		if len(args) < 8 or len(args) % 2 != 0: raise ValueError(args)
201		args, last_args = args[:-6], args[-6:]
202		for args in _everyN(args, 2):
203			yield ('rlineto', args)
204		yield ('rrcurveto', last_args)
205
206
207def generalizeCommands(commands, ignoreErrors=False):
208	result = []
209	mapping = _GeneralizerDecombinerCommandsMap
210	for op,args in commands:
211		func = getattr(mapping, op, None)
212		if not func:
213			result.append((op,args))
214			continue
215		try:
216			for command in func(args):
217				result.append(command)
218		except ValueError:
219			if ignoreErrors:
220				# Store op as data, such that consumers of commands do not have to
221				# deal with incorrect number of arguments.
222				result.append(('', args))
223				result.append(('', [op]))
224			else:
225				raise
226	return result
227
228def generalizeProgram(program, **kwargs):
229	return commandsToProgram(generalizeCommands(programToCommands(program), **kwargs))
230
231
232def _categorizeVector(v):
233	"""
234	Takes X,Y vector v and returns one of r, h, v, or 0 depending on which
235	of X and/or Y are zero, plus tuple of nonzero ones.  If both are zero,
236	it returns a single zero still.
237
238	>>> _categorizeVector((0,0))
239	('0', (0,))
240	>>> _categorizeVector((1,0))
241	('h', (1,))
242	>>> _categorizeVector((0,2))
243	('v', (2,))
244	>>> _categorizeVector((1,2))
245	('r', (1, 2))
246	"""
247	if not v[0]:
248		if not v[1]:
249			return '0', v[:1]
250		else:
251			return 'v', v[1:]
252	else:
253		if not v[1]:
254			return 'h', v[:1]
255		else:
256			return 'r', v
257
258def _mergeCategories(a, b):
259	if a == '0': return b
260	if b == '0': return a
261	if a == b: return a
262	return None
263
264def _negateCategory(a):
265	if a == 'h': return 'v'
266	if a == 'v': return 'h'
267	assert a in '0r'
268	return a
269
270def specializeCommands(commands,
271		       ignoreErrors=False,
272		       generalizeFirst=True,
273		       preserveTopology=False,
274		       maxstack=48):
275
276	# We perform several rounds of optimizations.  They are carefully ordered and are:
277	#
278	# 0. Generalize commands.
279	#    This ensures that they are in our expected simple form, with each line/curve only
280	#    having arguments for one segment, and using the generic form (rlineto/rrcurveto).
281	#    If caller is sure the input is in this form, they can turn off generalization to
282	#    save time.
283	#
284	# 1. Combine successive rmoveto operations.
285	#
286	# 2. Specialize rmoveto/rlineto/rrcurveto operators into horizontal/vertical variants.
287	#    We specialize into some, made-up, variants as well, which simplifies following
288	#    passes.
289	#
290	# 3. Merge or delete redundant operations, to the extent requested.
291	#    OpenType spec declares point numbers in CFF undefined.  As such, we happily
292	#    change topology.  If client relies on point numbers (in GPOS anchors, or for
293	#    hinting purposes(what?)) they can turn this off.
294	#
295	# 4. Peephole optimization to revert back some of the h/v variants back into their
296	#    original "relative" operator (rline/rrcurveto) if that saves a byte.
297	#
298	# 5. Combine adjacent operators when possible, minding not to go over max stack size.
299	#
300	# 6. Resolve any remaining made-up operators into real operators.
301	#
302	# I have convinced myself that this produces optimal bytecode (except for, possibly
303	# one byte each time maxstack size prohibits combining.)  YMMV, but you'd be wrong. :-)
304	# A dynamic-programming approach can do the same but would be significantly slower.
305
306
307	# 0. Generalize commands.
308	if generalizeFirst:
309		commands = generalizeCommands(commands, ignoreErrors=ignoreErrors)
310	else:
311		commands = list(commands) # Make copy since we modify in-place later.
312
313	# 1. Combine successive rmoveto operations.
314	for i in range(len(commands)-1, 0, -1):
315		if 'rmoveto' == commands[i][0] == commands[i-1][0]:
316			v1, v2 = commands[i-1][1], commands[i][1]
317			commands[i-1] = ('rmoveto', [v1[0]+v2[0], v1[1]+v2[1]])
318			del commands[i]
319
320	# 2. Specialize rmoveto/rlineto/rrcurveto operators into horizontal/vertical variants.
321	#
322	# We, in fact, specialize into more, made-up, variants that special-case when both
323	# X and Y components are zero.  This simplifies the following optimization passes.
324	# This case is rare, but OCD does not let me skip it.
325	#
326	# After this round, we will have four variants that use the following mnemonics:
327	#
328	#  - 'r' for relative,   ie. non-zero X and non-zero Y,
329	#  - 'h' for horizontal, ie. zero X and non-zero Y,
330	#  - 'v' for vertical,   ie. non-zero X and zero Y,
331	#  - '0' for zeros,      ie. zero X and zero Y.
332	#
333	# The '0' pseudo-operators are not part of the spec, but help simplify the following
334	# optimization rounds.  We resolve them at the end.  So, after this, we will have four
335	# moveto and four lineto variants:
336	#
337	#  - 0moveto, 0lineto
338	#  - hmoveto, hlineto
339	#  - vmoveto, vlineto
340	#  - rmoveto, rlineto
341	#
342	# and sixteen curveto variants.  For example, a '0hcurveto' operator means a curve
343	# dx0,dy0,dx1,dy1,dx2,dy2,dx3,dy3 where dx0, dx1, and dy3 are zero but not dx3.
344	# An 'rvcurveto' means dx3 is zero but not dx0,dy0,dy3.
345	#
346	# There are nine different variants of curves without the '0'.  Those nine map exactly
347	# to the existing curve variants in the spec: rrcurveto, and the four variants hhcurveto,
348	# vvcurveto, hvcurveto, and vhcurveto each cover two cases, one with an odd number of
349	# arguments and one without.  Eg. an hhcurveto with an extra argument (odd number of
350	# arguments) is in fact an rhcurveto.  The operators in the spec are designed such that
351	# all four of rhcurveto, rvcurveto, hrcurveto, and vrcurveto are encodable for one curve.
352	#
353	# Of the curve types with '0', the 00curveto is equivalent to a lineto variant.  The rest
354	# of the curve types with a 0 need to be encoded as a h or v variant.  Ie. a '0' can be
355	# thought of a "don't care" and can be used as either an 'h' or a 'v'.  As such, we always
356	# encode a number 0 as argument when we use a '0' variant.  Later on, we can just substitute
357	# the '0' with either 'h' or 'v' and it works.
358	#
359	# When we get to curve splines however, things become more complicated...  XXX finish this.
360	# There's one more complexity with splines.  If one side of the spline is not horizontal or
361	# vertical (or zero), ie. if it's 'r', then it limits which spline types we can encode.
362	# Only hhcurveto and vvcurveto operators can encode a spline starting with 'r', and
363	# only hvcurveto and vhcurveto operators can encode a spline ending with 'r'.
364	# This limits our merge opportunities later.
365	#
366	for i in range(len(commands)):
367		op,args = commands[i]
368
369		if op in {'rmoveto', 'rlineto'}:
370			c, args = _categorizeVector(args)
371			commands[i] = c+op[1:], args
372			continue
373
374		if op == 'rrcurveto':
375			c1, args1 = _categorizeVector(args[:2])
376			c2, args2 = _categorizeVector(args[-2:])
377			commands[i] = c1+c2+'curveto', args1+args[2:4]+args2
378			continue
379
380	# 3. Merge or delete redundant operations, to the extent requested.
381	#
382	# TODO
383	# A 0moveto that comes before all other path operations can be removed.
384	# though I find conflicting evidence for this.
385	#
386	# TODO
387	# "If hstem and vstem hints are both declared at the beginning of a
388	# CharString, and this sequence is followed directly by the hintmask or
389	# cntrmask operators, then the vstem hint operator (or, if applicable,
390	# the vstemhm operator) need not be included."
391	#
392	# "The sequence and form of a CFF2 CharString program may be represented as:
393	# {hs* vs* cm* hm* mt subpath}? {mt subpath}*"
394	#
395	# https://www.microsoft.com/typography/otspec/cff2charstr.htm#section3.1
396	#
397	# For Type2 CharStrings the sequence is:
398	# w? {hs* vs* cm* hm* mt subpath}? {mt subpath}* endchar"
399
400
401	# Some other redundancies change topology (point numbers).
402	if not preserveTopology:
403		for i in range(len(commands)-1, -1, -1):
404			op, args = commands[i]
405
406			# A 00curveto is demoted to a (specialized) lineto.
407			if op == '00curveto':
408				assert len(args) == 4
409				c, args = _categorizeVector(args[1:3])
410				op = c+'lineto'
411				commands[i] = op, args
412				# and then...
413
414			# A 0lineto can be deleted.
415			if op == '0lineto':
416				del commands[i]
417				continue
418
419			# Merge adjacent hlineto's and vlineto's.
420			if (i and op in {'hlineto', 'vlineto'} and
421					(op == commands[i-1][0]) and
422					(not isinstance(args[0], list))):
423				_, other_args = commands[i-1]
424				assert len(args) == 1 and len(other_args) == 1
425				commands[i-1] = (op, [other_args[0]+args[0]])
426				del commands[i]
427				continue
428
429	# 4. Peephole optimization to revert back some of the h/v variants back into their
430	#    original "relative" operator (rline/rrcurveto) if that saves a byte.
431	for i in range(1, len(commands)-1):
432		op,args = commands[i]
433		prv,nxt = commands[i-1][0], commands[i+1][0]
434
435		if op in {'0lineto', 'hlineto', 'vlineto'} and prv == nxt == 'rlineto':
436			assert len(args) == 1
437			args = [0, args[0]] if op[0] == 'v' else [args[0], 0]
438			commands[i] = ('rlineto', args)
439			continue
440
441		if op[2:] == 'curveto' and len(args) == 5 and prv == nxt == 'rrcurveto':
442			assert (op[0] == 'r') ^ (op[1] == 'r')
443			if op[0] == 'v':
444				pos = 0
445			elif op[0] != 'r':
446				pos = 1
447			elif op[1] == 'v':
448				pos = 4
449			else:
450				pos = 5
451			# Insert, while maintaining the type of args (can be tuple or list).
452			args = args[:pos] + type(args)((0,)) + args[pos:]
453			commands[i] = ('rrcurveto', args)
454			continue
455
456	# 5. Combine adjacent operators when possible, minding not to go over max stack size.
457	for i in range(len(commands)-1, 0, -1):
458		op1,args1 = commands[i-1]
459		op2,args2 = commands[i]
460		new_op = None
461
462		# Merge logic...
463		if {op1, op2} <= {'rlineto', 'rrcurveto'}:
464			if op1 == op2:
465				new_op = op1
466			else:
467				if op2 == 'rrcurveto' and len(args2) == 6:
468					new_op = 'rlinecurve'
469				elif len(args2) == 2:
470					new_op = 'rcurveline'
471
472		elif (op1, op2) in {('rlineto', 'rlinecurve'), ('rrcurveto', 'rcurveline')}:
473			new_op = op2
474
475		elif {op1, op2} == {'vlineto', 'hlineto'}:
476			new_op = op1
477
478		elif 'curveto' == op1[2:] == op2[2:]:
479			d0, d1 = op1[:2]
480			d2, d3 = op2[:2]
481
482			if d1 == 'r' or d2 == 'r' or d0 == d3 == 'r':
483				continue
484
485			d = _mergeCategories(d1, d2)
486			if d is None: continue
487			if d0 == 'r':
488				d = _mergeCategories(d, d3)
489				if d is None: continue
490				new_op = 'r'+d+'curveto'
491			elif d3 == 'r':
492				d0 = _mergeCategories(d0, _negateCategory(d))
493				if d0 is None: continue
494				new_op = d0+'r'+'curveto'
495			else:
496				d0 = _mergeCategories(d0, d3)
497				if d0 is None: continue
498				new_op = d0+d+'curveto'
499
500		# Make sure the stack depth does not exceed (maxstack - 1), so
501		# that subroutinizer can insert subroutine calls at any point.
502		if new_op and len(args1) + len(args2) < maxstack:
503			commands[i-1] = (new_op, args1+args2)
504			del commands[i]
505
506	# 6. Resolve any remaining made-up operators into real operators.
507	for i in range(len(commands)):
508		op,args = commands[i]
509
510		if op in {'0moveto', '0lineto'}:
511			commands[i] = 'h'+op[1:], args
512			continue
513
514		if op[2:] == 'curveto' and op[:2] not in {'rr', 'hh', 'vv', 'vh', 'hv'}:
515			op0, op1 = op[:2]
516			if (op0 == 'r') ^ (op1 == 'r'):
517				assert len(args) % 2 == 1
518			if op0 == '0': op0 = 'h'
519			if op1 == '0': op1 = 'h'
520			if op0 == 'r': op0 = op1
521			if op1 == 'r': op1 = _negateCategory(op0)
522			assert {op0,op1} <= {'h','v'}, (op0, op1)
523
524			if len(args) % 2:
525				if op0 != op1: # vhcurveto / hvcurveto
526					if (op0 == 'h') ^ (len(args) % 8 == 1):
527						# Swap last two args order
528						args = args[:-2]+args[-1:]+args[-2:-1]
529				else: # hhcurveto / vvcurveto
530					if op0 == 'h': # hhcurveto
531						# Swap first two args order
532						args = args[1:2]+args[:1]+args[2:]
533
534			commands[i] = op0+op1+'curveto', args
535			continue
536
537	return commands
538
539def specializeProgram(program, **kwargs):
540	return commandsToProgram(specializeCommands(programToCommands(program), **kwargs))
541
542
543if __name__ == '__main__':
544	import sys
545	if len(sys.argv) == 1:
546		import doctest
547		sys.exit(doctest.testmod().failed)
548	program = stringToProgram(sys.argv[1:])
549	print("Program:"); print(programToString(program))
550	commands = programToCommands(program)
551	print("Commands:"); print(commands)
552	program2 = commandsToProgram(commands)
553	print("Program from commands:"); print(programToString(program2))
554	assert program == program2
555	print("Generalized program:"); print(programToString(generalizeProgram(program)))
556	print("Specialized program:"); print(programToString(specializeProgram(program)))
557
558