1import re,string
2
3
4class reason_counter:
5    def __init__(self, wording):
6        self.wording = wording
7        self.num = 1
8
9    def update(self, new_wording):
10        self.num += 1
11        self.wording = new_wording
12
13    def html(self):
14        if self.num == 1:
15            return self.wording
16        else:
17            return "%s (%d+)" % (self.wording, self.num)
18
19
20def numbers_are_irrelevant(txt):
21    ## ? when do we replace numbers with NN ?
22    ## By default is always, but
23    ## if/when some categories of reasons choose to keep their numbers,
24    ## then the function shall return False for such categories
25    return True
26
27
28def aggregate_reason_fields(reasons_list):
29    # each reason in the list may be a combination
30    # of | - separated reasons.
31    # expand into list
32    reasons_txt = '|'.join(reasons_list)
33    reasons = reasons_txt.split('|')
34    reason_htable = {}
35    for reason in reasons:
36        reason_reduced = reason.strip()
37        ## reduce whitespaces
38        reason_reduced = re.sub(r"\s+"," ", reason_reduced)
39
40        if reason_reduced == '':
41            continue # ignore empty reasons
42
43        if numbers_are_irrelevant(reason_reduced):
44            # reduce numbers included into reason descriptor
45            # by replacing them with generic NN
46            reason_reduced = re.sub(r"\d+","NN", reason_reduced)
47
48        if not reason_reduced in reason_htable:
49            reason_htable[reason_reduced] = reason_counter(reason)
50        else:
51            ## reason_counter keeps original ( non reduced )
52            ## reason if it occured once
53            ## if reason occured more then once, reason_counter
54            ## will keep it in reduced/generalized form
55            reason_htable[reason_reduced].update(reason_reduced)
56
57    generic_reasons = reason_htable.keys()
58    generic_reasons.sort(key = (lambda k: reason_htable[k].num),
59                         reverse = True)
60    return map(lambda generic_reason: reason_htable[generic_reason].html(),
61                            generic_reasons)
62