1""" fontTools.misc.classifyTools.py -- tools for classifying things. 2""" 3 4from __future__ import print_function, absolute_import 5from fontTools.misc.py23 import * 6 7class Classifier(object): 8 9 """ 10 Main Classifier object, used to classify things into similar sets. 11 """ 12 13 def __init__(self, sort=True): 14 15 self._things = set() # set of all things known so far 16 self._sets = [] # list of class sets produced so far 17 self._mapping = {} # map from things to their class set 18 self._dirty = False 19 self._sort = sort 20 21 def add(self, set_of_things): 22 """ 23 Add a set to the classifier. Any iterable is accepted. 24 """ 25 if not set_of_things: 26 return 27 28 self._dirty = True 29 30 things, sets, mapping = self._things, self._sets, self._mapping 31 32 s = set(set_of_things) 33 intersection = s.intersection(things) # existing things 34 s.difference_update(intersection) # new things 35 difference = s 36 del s 37 38 # Add new class for new things 39 if difference: 40 things.update(difference) 41 sets.append(difference) 42 for thing in difference: 43 mapping[thing] = difference 44 del difference 45 46 while intersection: 47 # Take one item and process the old class it belongs to 48 old_class = mapping[next(iter(intersection))] 49 old_class_intersection = old_class.intersection(intersection) 50 51 # Update old class to remove items from new set 52 old_class.difference_update(old_class_intersection) 53 54 # Remove processed items from todo list 55 intersection.difference_update(old_class_intersection) 56 57 # Add new class for the intersection with old class 58 sets.append(old_class_intersection) 59 for thing in old_class_intersection: 60 mapping[thing] = old_class_intersection 61 del old_class_intersection 62 63 def update(self, list_of_sets): 64 """ 65 Add a a list of sets to the classifier. Any iterable of iterables is accepted. 66 """ 67 for s in list_of_sets: 68 self.add(s) 69 70 def _process(self): 71 if not self._dirty: 72 return 73 74 # Do any deferred processing 75 sets = self._sets 76 self._sets = [s for s in sets if s] 77 78 if self._sort: 79 self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s))) 80 81 self._dirty = False 82 83 # Output methods 84 85 def getThings(self): 86 """Returns the set of all things known so far. 87 88 The return value belongs to the Classifier object and should NOT 89 be modified while the classifier is still in use. 90 """ 91 self._process() 92 return self._things 93 94 def getMapping(self): 95 """Returns the mapping from things to their class set. 96 97 The return value belongs to the Classifier object and should NOT 98 be modified while the classifier is still in use. 99 """ 100 self._process() 101 return self._mapping 102 103 def getClasses(self): 104 """Returns the list of class sets. 105 106 The return value belongs to the Classifier object and should NOT 107 be modified while the classifier is still in use. 108 """ 109 self._process() 110 return self._sets 111 112 113def classify(list_of_sets, sort=True): 114 """ 115 Takes a iterable of iterables (list of sets from here on; but any 116 iterable works.), and returns the smallest list of sets such that 117 each set, is either a subset, or is disjoint from, each of the input 118 sets. 119 120 In other words, this function classifies all the things present in 121 any of the input sets, into similar classes, based on which sets 122 things are a member of. 123 124 If sort=True, return class sets are sorted by decreasing size and 125 their natural sort order within each class size. Otherwise, class 126 sets are returned in the order that they were identified, which is 127 generally not significant. 128 129 >>> classify([]) == ([], {}) 130 True 131 >>> classify([[]]) == ([], {}) 132 True 133 >>> classify([[], []]) == ([], {}) 134 True 135 >>> classify([[1]]) == ([{1}], {1: {1}}) 136 True 137 >>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}}) 138 True 139 >>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) 140 True 141 >>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) 142 True 143 >>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}}) 144 True 145 >>> classify([[1,2],[2,4,5]]) == ( 146 ... [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) 147 True 148 >>> classify([[1,2],[2,4,5]], sort=False) == ( 149 ... [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) 150 True 151 >>> classify([[1,2,9],[2,4,5]], sort=False) == ( 152 ... [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5}, 153 ... 9: {1, 9}}) 154 True 155 >>> classify([[1,2,9,15],[2,4,5]], sort=False) == ( 156 ... [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5}, 157 ... 5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}}) 158 True 159 >>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False) 160 >>> set([frozenset(c) for c in classes]) == set( 161 ... [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})]) 162 True 163 >>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}} 164 True 165 """ 166 classifier = Classifier(sort=sort) 167 classifier.update(list_of_sets) 168 return classifier.getClasses(), classifier.getMapping() 169 170 171if __name__ == "__main__": 172 import sys, doctest 173 sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed) 174