1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3 4""" 5Creates a human-readable identifier, using numbers and digits, 6avoiding ambiguous numbers and letters. hash_identifier can be used 7to create compact representations that are unique for a certain string 8(or concatenation of strings) 9""" 10 11try: 12 from hashlib import md5 13except ImportError: 14 from md5 import md5 15 16import six 17 18good_characters = "23456789abcdefghjkmnpqrtuvwxyz" 19 20base = len(good_characters) 21 22def make_identifier(number): 23 """ 24 Encodes a number as an identifier. 25 """ 26 if not isinstance(number, six.integer_types): 27 raise ValueError( 28 "You can only make identifiers out of integers (not %r)" 29 % number) 30 if number < 0: 31 raise ValueError( 32 "You cannot make identifiers out of negative numbers: %r" 33 % number) 34 result = [] 35 while number: 36 next = number % base 37 result.append(good_characters[next]) 38 # Note, this depends on integer rounding of results: 39 number = number // base 40 return ''.join(result) 41 42def hash_identifier(s, length, pad=True, hasher=md5, prefix='', 43 group=None, upper=False): 44 """ 45 Hashes the string (with the given hashing module), then turns that 46 hash into an identifier of the given length (using modulo to 47 reduce the length of the identifier). If ``pad`` is False, then 48 the minimum-length identifier will be used; otherwise the 49 identifier will be padded with 0's as necessary. 50 51 ``prefix`` will be added last, and does not count towards the 52 target length. ``group`` will group the characters with ``-`` in 53 the given lengths, and also does not count towards the target 54 length. E.g., ``group=4`` will cause a identifier like 55 ``a5f3-hgk3-asdf``. Grouping occurs before the prefix. 56 """ 57 if not callable(hasher): 58 # Accept sha/md5 modules as well as callables 59 hasher = hasher.new 60 if length > 26 and hasher is md5: 61 raise ValueError( 62 "md5 cannot create hashes longer than 26 characters in " 63 "length (you gave %s)" % length) 64 if isinstance(s, six.text_type): 65 s = s.encode('utf-8') 66 elif not isinstance(s, six.binary_type): 67 s = str(s) 68 if six.PY3: 69 s = s.encode('utf-8') 70 h = hasher(s) 71 bin_hash = h.digest() 72 modulo = base ** length 73 number = 0 74 for c in list(bin_hash): 75 number = (number * 256 + six.byte2int([c])) % modulo 76 ident = make_identifier(number) 77 if pad: 78 ident = good_characters[0]*(length-len(ident)) + ident 79 if group: 80 parts = [] 81 while ident: 82 parts.insert(0, ident[-group:]) 83 ident = ident[:-group] 84 ident = '-'.join(parts) 85 if upper: 86 ident = ident.upper() 87 return prefix + ident 88 89# doctest tests: 90__test__ = { 91 'make_identifier': """ 92 >>> make_identifier(0) 93 '' 94 >>> make_identifier(1000) 95 'c53' 96 >>> make_identifier(-100) 97 Traceback (most recent call last): 98 ... 99 ValueError: You cannot make identifiers out of negative numbers: -100 100 >>> make_identifier('test') 101 Traceback (most recent call last): 102 ... 103 ValueError: You can only make identifiers out of integers (not 'test') 104 >>> make_identifier(1000000000000) 105 'c53x9rqh3' 106 """, 107 'hash_identifier': """ 108 >>> hash_identifier(0, 5) 109 'cy2dr' 110 >>> hash_identifier(0, 10) 111 'cy2dr6rg46' 112 >>> hash_identifier('this is a test of a long string', 5) 113 'awatu' 114 >>> hash_identifier(0, 26) 115 'cy2dr6rg46cx8t4w2f3nfexzk4' 116 >>> hash_identifier(0, 30) 117 Traceback (most recent call last): 118 ... 119 ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30) 120 >>> hash_identifier(0, 10, group=4) 121 'cy-2dr6-rg46' 122 >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-') 123 'M-CY-2DR6-RG46' 124 """} 125 126if __name__ == '__main__': 127 import doctest 128 doctest.testmod() 129 130