1""" fontTools.misc.classifyTools.py -- tools for classifying things.
2"""
3
4from __future__ import print_function, absolute_import
5from fontTools.misc.py23 import *
6
7class Classifier(object):
8
9	"""
10	Main Classifier object, used to classify things into similar sets.
11	"""
12
13	def __init__(self, sort=True):
14
15		self._things = set() # set of all things known so far
16		self._sets = [] # list of class sets produced so far
17		self._mapping = {} # map from things to their class set
18		self._dirty = False
19		self._sort = sort
20
21	def add(self, set_of_things):
22		"""
23		Add a set to the classifier.  Any iterable is accepted.
24		"""
25		if not set_of_things:
26			return
27
28		self._dirty = True
29
30		things, sets, mapping = self._things, self._sets, self._mapping
31
32		s = set(set_of_things)
33		intersection = s.intersection(things) # existing things
34		s.difference_update(intersection) # new things
35		difference = s
36		del s
37
38		# Add new class for new things
39		if difference:
40			things.update(difference)
41			sets.append(difference)
42			for thing in difference:
43				mapping[thing] = difference
44		del difference
45
46		while intersection:
47			# Take one item and process the old class it belongs to
48			old_class = mapping[next(iter(intersection))]
49			old_class_intersection = old_class.intersection(intersection)
50
51			# Update old class to remove items from new set
52			old_class.difference_update(old_class_intersection)
53
54			# Remove processed items from todo list
55			intersection.difference_update(old_class_intersection)
56
57			# Add new class for the intersection with old class
58			sets.append(old_class_intersection)
59			for thing in old_class_intersection:
60				mapping[thing] = old_class_intersection
61			del old_class_intersection
62
63	def update(self, list_of_sets):
64		"""
65		Add a a list of sets to the classifier.  Any iterable of iterables is accepted.
66		"""
67		for s in list_of_sets:
68			self.add(s)
69
70	def _process(self):
71		if not self._dirty:
72			return
73
74		# Do any deferred processing
75		sets = self._sets
76		self._sets = [s for s in sets if s]
77
78		if self._sort:
79			self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s)))
80
81		self._dirty = False
82
83	# Output methods
84
85	def getThings(self):
86		"""Returns the set of all things known so far.
87
88		The return value belongs to the Classifier object and should NOT
89		be modified while the classifier is still in use.
90		"""
91		self._process()
92		return self._things
93
94	def getMapping(self):
95		"""Returns the mapping from things to their class set.
96
97		The return value belongs to the Classifier object and should NOT
98		be modified while the classifier is still in use.
99		"""
100		self._process()
101		return self._mapping
102
103	def getClasses(self):
104		"""Returns the list of class sets.
105
106		The return value belongs to the Classifier object and should NOT
107		be modified while the classifier is still in use.
108		"""
109		self._process()
110		return self._sets
111
112
113def classify(list_of_sets, sort=True):
114	"""
115	Takes a iterable of iterables (list of sets from here on; but any
116	iterable works.), and returns the smallest list of sets such that
117	each set, is either a subset, or is disjoint from, each of the input
118	sets.
119
120	In other words, this function classifies all the things present in
121	any of the input sets, into similar classes, based on which sets
122	things are a member of.
123
124	If sort=True, return class sets are sorted by decreasing size and
125	their natural sort order within each class size.  Otherwise, class
126	sets are returned in the order that they were identified, which is
127	generally not significant.
128
129	>>> classify([]) == ([], {})
130	True
131	>>> classify([[]]) == ([], {})
132	True
133	>>> classify([[], []]) == ([], {})
134	True
135	>>> classify([[1]]) == ([{1}], {1: {1}})
136	True
137	>>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}})
138	True
139	>>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
140	True
141	>>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
142	True
143	>>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}})
144	True
145	>>> classify([[1,2],[2,4,5]]) == (
146	...     [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
147	True
148	>>> classify([[1,2],[2,4,5]], sort=False) == (
149	...     [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
150	True
151	>>> classify([[1,2,9],[2,4,5]], sort=False) == (
152	...     [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5},
153	...     9: {1, 9}})
154	True
155	>>> classify([[1,2,9,15],[2,4,5]], sort=False) == (
156	...     [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5},
157	...     5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}})
158	True
159	>>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False)
160	>>> set([frozenset(c) for c in classes]) == set(
161	...     [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})])
162	True
163	>>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}}
164	True
165	"""
166	classifier = Classifier(sort=sort)
167	classifier.update(list_of_sets)
168	return classifier.getClasses(), classifier.getMapping()
169
170
171if __name__ == "__main__":
172	import sys, doctest
173	sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed)
174