1r"""UUID objects (universally unique identifiers) according to RFC 4122.
2
3This module provides immutable UUID objects (class UUID) and the functions
4uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
5UUIDs as specified in RFC 4122.
6
7If all you want is a unique ID, you should probably call uuid1() or uuid4().
8Note that uuid1() may compromise privacy since it creates a UUID containing
9the computer's network address.  uuid4() creates a random UUID.
10
11Typical usage:
12
13    >>> import uuid
14
15    # make a UUID based on the host ID and current time
16    >>> uuid.uuid1()
17    UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')
18
19    # make a UUID using an MD5 hash of a namespace UUID and a name
20    >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
21    UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')
22
23    # make a random UUID
24    >>> uuid.uuid4()
25    UUID('16fd2706-8baf-433b-82eb-8c7fada847da')
26
27    # make a UUID using a SHA-1 hash of a namespace UUID and a name
28    >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
29    UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')
30
31    # make a UUID from a string of hex digits (braces and hyphens ignored)
32    >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')
33
34    # convert a UUID to a string of hex digits in standard form
35    >>> str(x)
36    '00010203-0405-0607-0809-0a0b0c0d0e0f'
37
38    # get the raw 16 bytes of the UUID
39    >>> x.bytes
40    '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
41
42    # make a UUID from a 16-byte string
43    >>> uuid.UUID(bytes=x.bytes)
44    UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
45"""
46
47import os
48
49__author__ = 'Ka-Ping Yee <ping@zesty.ca>'
50
51RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
52    'reserved for NCS compatibility', 'specified in RFC 4122',
53    'reserved for Microsoft compatibility', 'reserved for future definition']
54
55class UUID(object):
56    """Instances of the UUID class represent UUIDs as specified in RFC 4122.
57    UUID objects are immutable, hashable, and usable as dictionary keys.
58    Converting a UUID to a string with str() yields something in the form
59    '12345678-1234-1234-1234-123456789abc'.  The UUID constructor accepts
60    five possible forms: a similar string of hexadecimal digits, or a tuple
61    of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
62    48-bit values respectively) as an argument named 'fields', or a string
63    of 16 bytes (with all the integer fields in big-endian order) as an
64    argument named 'bytes', or a string of 16 bytes (with the first three
65    fields in little-endian order) as an argument named 'bytes_le', or a
66    single 128-bit integer as an argument named 'int'.
67
68    UUIDs have these read-only attributes:
69
70        bytes       the UUID as a 16-byte string (containing the six
71                    integer fields in big-endian byte order)
72
73        bytes_le    the UUID as a 16-byte string (with time_low, time_mid,
74                    and time_hi_version in little-endian byte order)
75
76        fields      a tuple of the six integer fields of the UUID,
77                    which are also available as six individual attributes
78                    and two derived attributes:
79
80            time_low                the first 32 bits of the UUID
81            time_mid                the next 16 bits of the UUID
82            time_hi_version         the next 16 bits of the UUID
83            clock_seq_hi_variant    the next 8 bits of the UUID
84            clock_seq_low           the next 8 bits of the UUID
85            node                    the last 48 bits of the UUID
86
87            time                    the 60-bit timestamp
88            clock_seq               the 14-bit sequence number
89
90        hex         the UUID as a 32-character hexadecimal string
91
92        int         the UUID as a 128-bit integer
93
94        urn         the UUID as a URN as specified in RFC 4122
95
96        variant     the UUID variant (one of the constants RESERVED_NCS,
97                    RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)
98
99        version     the UUID version number (1 through 5, meaningful only
100                    when the variant is RFC_4122)
101    """
102
103    def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
104                       int=None, version=None):
105        r"""Create a UUID from either a string of 32 hexadecimal digits,
106        a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
107        in little-endian order as the 'bytes_le' argument, a tuple of six
108        integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
109        8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
110        the 'fields' argument, or a single 128-bit integer as the 'int'
111        argument.  When a string of hex digits is given, curly braces,
112        hyphens, and a URN prefix are all optional.  For example, these
113        expressions all yield the same UUID:
114
115        UUID('{12345678-1234-5678-1234-567812345678}')
116        UUID('12345678123456781234567812345678')
117        UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
118        UUID(bytes='\x12\x34\x56\x78'*4)
119        UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
120                      '\x12\x34\x56\x78\x12\x34\x56\x78')
121        UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
122        UUID(int=0x12345678123456781234567812345678)
123
124        Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
125        be given.  The 'version' argument is optional; if given, the resulting
126        UUID will have its variant and version set according to RFC 4122,
127        overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
128        """
129
130        if [hex, bytes, bytes_le, fields, int].count(None) != 4:
131            raise TypeError('need one of hex, bytes, bytes_le, fields, or int')
132        if hex is not None:
133            hex = hex.replace('urn:', '').replace('uuid:', '')
134            hex = hex.strip('{}').replace('-', '')
135            if len(hex) != 32:
136                raise ValueError('badly formed hexadecimal UUID string')
137            int = long(hex, 16)
138        if bytes_le is not None:
139            if len(bytes_le) != 16:
140                raise ValueError('bytes_le is not a 16-char string')
141            bytes = (bytes_le[3] + bytes_le[2] + bytes_le[1] + bytes_le[0] +
142                     bytes_le[5] + bytes_le[4] + bytes_le[7] + bytes_le[6] +
143                     bytes_le[8:])
144        if bytes is not None:
145            if len(bytes) != 16:
146                raise ValueError('bytes is not a 16-char string')
147            int = long(('%02x'*16) % tuple(map(ord, bytes)), 16)
148        if fields is not None:
149            if len(fields) != 6:
150                raise ValueError('fields is not a 6-tuple')
151            (time_low, time_mid, time_hi_version,
152             clock_seq_hi_variant, clock_seq_low, node) = fields
153            if not 0 <= time_low < 1<<32L:
154                raise ValueError('field 1 out of range (need a 32-bit value)')
155            if not 0 <= time_mid < 1<<16L:
156                raise ValueError('field 2 out of range (need a 16-bit value)')
157            if not 0 <= time_hi_version < 1<<16L:
158                raise ValueError('field 3 out of range (need a 16-bit value)')
159            if not 0 <= clock_seq_hi_variant < 1<<8L:
160                raise ValueError('field 4 out of range (need an 8-bit value)')
161            if not 0 <= clock_seq_low < 1<<8L:
162                raise ValueError('field 5 out of range (need an 8-bit value)')
163            if not 0 <= node < 1<<48L:
164                raise ValueError('field 6 out of range (need a 48-bit value)')
165            clock_seq = (clock_seq_hi_variant << 8L) | clock_seq_low
166            int = ((time_low << 96L) | (time_mid << 80L) |
167                   (time_hi_version << 64L) | (clock_seq << 48L) | node)
168        if int is not None:
169            if not 0 <= int < 1<<128L:
170                raise ValueError('int is out of range (need a 128-bit value)')
171        if version is not None:
172            if not 1 <= version <= 5:
173                raise ValueError('illegal version number')
174            # Set the variant to RFC 4122.
175            int &= ~(0xc000 << 48L)
176            int |= 0x8000 << 48L
177            # Set the version number.
178            int &= ~(0xf000 << 64L)
179            int |= version << 76L
180        self.__dict__['int'] = int
181
182    def __cmp__(self, other):
183        if isinstance(other, UUID):
184            return cmp(self.int, other.int)
185        return NotImplemented
186
187    def __hash__(self):
188        return hash(self.int)
189
190    def __int__(self):
191        return self.int
192
193    def __repr__(self):
194        return 'UUID(%r)' % str(self)
195
196    def __setattr__(self, name, value):
197        raise TypeError('UUID objects are immutable')
198
199    def __str__(self):
200        hex = '%032x' % self.int
201        return '%s-%s-%s-%s-%s' % (
202            hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])
203
204    def get_bytes(self):
205        bytes = ''
206        for shift in range(0, 128, 8):
207            bytes = chr((self.int >> shift) & 0xff) + bytes
208        return bytes
209
210    bytes = property(get_bytes)
211
212    def get_bytes_le(self):
213        bytes = self.bytes
214        return (bytes[3] + bytes[2] + bytes[1] + bytes[0] +
215                bytes[5] + bytes[4] + bytes[7] + bytes[6] + bytes[8:])
216
217    bytes_le = property(get_bytes_le)
218
219    def get_fields(self):
220        return (self.time_low, self.time_mid, self.time_hi_version,
221                self.clock_seq_hi_variant, self.clock_seq_low, self.node)
222
223    fields = property(get_fields)
224
225    def get_time_low(self):
226        return self.int >> 96L
227
228    time_low = property(get_time_low)
229
230    def get_time_mid(self):
231        return (self.int >> 80L) & 0xffff
232
233    time_mid = property(get_time_mid)
234
235    def get_time_hi_version(self):
236        return (self.int >> 64L) & 0xffff
237
238    time_hi_version = property(get_time_hi_version)
239
240    def get_clock_seq_hi_variant(self):
241        return (self.int >> 56L) & 0xff
242
243    clock_seq_hi_variant = property(get_clock_seq_hi_variant)
244
245    def get_clock_seq_low(self):
246        return (self.int >> 48L) & 0xff
247
248    clock_seq_low = property(get_clock_seq_low)
249
250    def get_time(self):
251        return (((self.time_hi_version & 0x0fffL) << 48L) |
252                (self.time_mid << 32L) | self.time_low)
253
254    time = property(get_time)
255
256    def get_clock_seq(self):
257        return (((self.clock_seq_hi_variant & 0x3fL) << 8L) |
258                self.clock_seq_low)
259
260    clock_seq = property(get_clock_seq)
261
262    def get_node(self):
263        return self.int & 0xffffffffffff
264
265    node = property(get_node)
266
267    def get_hex(self):
268        return '%032x' % self.int
269
270    hex = property(get_hex)
271
272    def get_urn(self):
273        return 'urn:uuid:' + str(self)
274
275    urn = property(get_urn)
276
277    def get_variant(self):
278        if not self.int & (0x8000 << 48L):
279            return RESERVED_NCS
280        elif not self.int & (0x4000 << 48L):
281            return RFC_4122
282        elif not self.int & (0x2000 << 48L):
283            return RESERVED_MICROSOFT
284        else:
285            return RESERVED_FUTURE
286
287    variant = property(get_variant)
288
289    def get_version(self):
290        # The version bits are only meaningful for RFC 4122 UUIDs.
291        if self.variant == RFC_4122:
292            return int((self.int >> 76L) & 0xf)
293
294    version = property(get_version)
295
296def _popen(command, args):
297    import os
298    path = os.environ.get("PATH", os.defpath).split(os.pathsep)
299    path.extend(('/sbin', '/usr/sbin'))
300    for dir in path:
301        executable = os.path.join(dir, command)
302        if (os.path.exists(executable) and
303            os.access(executable, os.F_OK | os.X_OK) and
304            not os.path.isdir(executable)):
305            break
306    else:
307        return None
308    # LC_ALL to ensure English output, 2>/dev/null to prevent output on
309    # stderr (Note: we don't have an example where the words we search for
310    # are actually localized, but in theory some system could do so.)
311    cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args)
312    return os.popen(cmd)
313
314def _find_mac(command, args, hw_identifiers, get_index):
315    try:
316        pipe = _popen(command, args)
317        if not pipe:
318            return
319        with pipe:
320            for line in pipe:
321                words = line.lower().rstrip().split()
322                for i in range(len(words)):
323                    if words[i] in hw_identifiers:
324                        try:
325                            word = words[get_index(i)]
326                            mac = int(word.replace(':', ''), 16)
327                            if mac:
328                                return mac
329                        except (ValueError, IndexError):
330                            # Virtual interfaces, such as those provided by
331                            # VPNs, do not have a colon-delimited MAC address
332                            # as expected, but a 16-byte HWAddr separated by
333                            # dashes. These should be ignored in favor of a
334                            # real MAC address
335                            pass
336    except IOError:
337        pass
338
339def _ifconfig_getnode():
340    """Get the hardware address on Unix by running ifconfig."""
341    # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
342    keywords = ('hwaddr', 'ether', 'address:', 'lladdr')
343    for args in ('', '-a', '-av'):
344        mac = _find_mac('ifconfig', args, keywords, lambda i: i+1)
345        if mac:
346            return mac
347
348def _arp_getnode():
349    """Get the hardware address on Unix by running arp."""
350    import os, socket
351    try:
352        ip_addr = socket.gethostbyname(socket.gethostname())
353    except EnvironmentError:
354        return None
355
356    # Try getting the MAC addr from arp based on our IP address (Solaris).
357    mac = _find_mac('arp', '-an', [ip_addr], lambda i: -1)
358    if mac:
359        return mac
360
361    # This works on OpenBSD
362    mac = _find_mac('arp', '-an', [ip_addr], lambda i: i+1)
363    if mac:
364        return mac
365
366    # This works on Linux, FreeBSD and NetBSD
367    mac = _find_mac('arp', '-an', ['(%s)' % ip_addr],
368                    lambda i: i+2)
369    if mac:
370        return mac
371
372def _lanscan_getnode():
373    """Get the hardware address on Unix by running lanscan."""
374    # This might work on HP-UX.
375    return _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0)
376
377def _netstat_getnode():
378    """Get the hardware address on Unix by running netstat."""
379    # This might work on AIX, Tru64 UNIX and presumably on IRIX.
380    try:
381        pipe = _popen('netstat', '-ia')
382        if not pipe:
383            return
384        with pipe:
385            words = pipe.readline().rstrip().split()
386            try:
387                i = words.index('Address')
388            except ValueError:
389                return
390            for line in pipe:
391                try:
392                    words = line.rstrip().split()
393                    word = words[i]
394                    if len(word) == 17 and word.count(':') == 5:
395                        mac = int(word.replace(':', ''), 16)
396                        if mac:
397                            return mac
398                except (ValueError, IndexError):
399                    pass
400    except OSError:
401        pass
402
403def _ipconfig_getnode():
404    """Get the hardware address on Windows by running ipconfig.exe."""
405    import os, re
406    dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
407    try:
408        import ctypes
409        buffer = ctypes.create_string_buffer(300)
410        ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
411        dirs.insert(0, buffer.value.decode('mbcs'))
412    except:
413        pass
414    for dir in dirs:
415        try:
416            pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
417        except IOError:
418            continue
419        with pipe:
420            for line in pipe:
421                value = line.split(':')[-1].strip().lower()
422                if re.match('(?:[0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]$', value):
423                    return int(value.replace('-', ''), 16)
424
425def _netbios_getnode():
426    """Get the hardware address on Windows using NetBIOS calls.
427    See http://support.microsoft.com/kb/118623 for details."""
428    import win32wnet, netbios
429    ncb = netbios.NCB()
430    ncb.Command = netbios.NCBENUM
431    ncb.Buffer = adapters = netbios.LANA_ENUM()
432    adapters._pack()
433    if win32wnet.Netbios(ncb) != 0:
434        return
435    adapters._unpack()
436    for i in range(adapters.length):
437        ncb.Reset()
438        ncb.Command = netbios.NCBRESET
439        ncb.Lana_num = ord(adapters.lana[i])
440        if win32wnet.Netbios(ncb) != 0:
441            continue
442        ncb.Reset()
443        ncb.Command = netbios.NCBASTAT
444        ncb.Lana_num = ord(adapters.lana[i])
445        ncb.Callname = '*'.ljust(16)
446        ncb.Buffer = status = netbios.ADAPTER_STATUS()
447        if win32wnet.Netbios(ncb) != 0:
448            continue
449        status._unpack()
450        bytes = map(ord, status.adapter_address)
451        return ((bytes[0]<<40L) + (bytes[1]<<32L) + (bytes[2]<<24L) +
452                (bytes[3]<<16L) + (bytes[4]<<8L) + bytes[5])
453
454# Thanks to Thomas Heller for ctypes and for his help with its use here.
455
456# If ctypes is available, use it to find system routines for UUID generation.
457_uuid_generate_time = _UuidCreate = None
458try:
459    import ctypes, ctypes.util
460    import sys
461
462    # The uuid_generate_* routines are provided by libuuid on at least
463    # Linux and FreeBSD, and provided by libc on Mac OS X.
464    _libnames = ['uuid']
465    if not sys.platform.startswith('win'):
466        _libnames.append('c')
467    for libname in _libnames:
468        try:
469            lib = ctypes.CDLL(ctypes.util.find_library(libname))
470        except:
471            continue
472        if hasattr(lib, 'uuid_generate_time'):
473            _uuid_generate_time = lib.uuid_generate_time
474            break
475    del _libnames
476
477    # The uuid_generate_* functions are broken on MacOS X 10.5, as noted
478    # in issue #8621 the function generates the same sequence of values
479    # in the parent process and all children created using fork (unless
480    # those children use exec as well).
481    #
482    # Assume that the uuid_generate functions are broken from 10.5 onward,
483    # the test can be adjusted when a later version is fixed.
484    if sys.platform == 'darwin':
485        import os
486        if int(os.uname()[2].split('.')[0]) >= 9:
487            _uuid_generate_time = None
488
489    # On Windows prior to 2000, UuidCreate gives a UUID containing the
490    # hardware address.  On Windows 2000 and later, UuidCreate makes a
491    # random UUID and UuidCreateSequential gives a UUID containing the
492    # hardware address.  These routines are provided by the RPC runtime.
493    # NOTE:  at least on Tim's WinXP Pro SP2 desktop box, while the last
494    # 6 bytes returned by UuidCreateSequential are fixed, they don't appear
495    # to bear any relationship to the MAC address of any network device
496    # on the box.
497    try:
498        lib = ctypes.windll.rpcrt4
499    except:
500        lib = None
501    _UuidCreate = getattr(lib, 'UuidCreateSequential',
502                          getattr(lib, 'UuidCreate', None))
503except:
504    pass
505
506def _unixdll_getnode():
507    """Get the hardware address on Unix using ctypes."""
508    _buffer = ctypes.create_string_buffer(16)
509    _uuid_generate_time(_buffer)
510    return UUID(bytes=_buffer.raw).node
511
512def _windll_getnode():
513    """Get the hardware address on Windows using ctypes."""
514    _buffer = ctypes.create_string_buffer(16)
515    if _UuidCreate(_buffer) == 0:
516        return UUID(bytes=_buffer.raw).node
517
518def _random_getnode():
519    """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
520    import random
521    return random.randrange(0, 1<<48L) | 0x010000000000L
522
523_node = None
524
525_NODE_GETTERS_WIN32 = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
526
527_NODE_GETTERS_UNIX = [_unixdll_getnode, _ifconfig_getnode, _arp_getnode,
528                      _lanscan_getnode, _netstat_getnode]
529
530def getnode():
531    """Get the hardware address as a 48-bit positive integer.
532
533    The first time this runs, it may launch a separate program, which could
534    be quite slow.  If all attempts to obtain the hardware address fail, we
535    choose a random 48-bit number with its eighth bit set to 1 as recommended
536    in RFC 4122.
537    """
538
539    global _node
540    if _node is not None:
541        return _node
542
543    import sys
544    if sys.platform == 'win32':
545        getters = _NODE_GETTERS_WIN32
546    else:
547        getters = _NODE_GETTERS_UNIX
548
549    for getter in getters + [_random_getnode]:
550        try:
551            _node = getter()
552        except:
553            continue
554        if (_node is not None) and (0 <= _node < (1 << 48)):
555            return _node
556    assert False, '_random_getnode() returned invalid value: {}'.format(_node)
557
558
559_last_timestamp = None
560
561def uuid1(node=None, clock_seq=None):
562    """Generate a UUID from a host ID, sequence number, and the current time.
563    If 'node' is not given, getnode() is used to obtain the hardware
564    address.  If 'clock_seq' is given, it is used as the sequence number;
565    otherwise a random 14-bit sequence number is chosen."""
566
567    # When the system provides a version-1 UUID generator, use it (but don't
568    # use UuidCreate here because its UUIDs don't conform to RFC 4122).
569    if _uuid_generate_time and node is clock_seq is None:
570        _buffer = ctypes.create_string_buffer(16)
571        _uuid_generate_time(_buffer)
572        return UUID(bytes=_buffer.raw)
573
574    global _last_timestamp
575    import time
576    nanoseconds = int(time.time() * 1e9)
577    # 0x01b21dd213814000 is the number of 100-ns intervals between the
578    # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
579    timestamp = int(nanoseconds//100) + 0x01b21dd213814000L
580    if _last_timestamp is not None and timestamp <= _last_timestamp:
581        timestamp = _last_timestamp + 1
582    _last_timestamp = timestamp
583    if clock_seq is None:
584        import random
585        clock_seq = random.randrange(1<<14L) # instead of stable storage
586    time_low = timestamp & 0xffffffffL
587    time_mid = (timestamp >> 32L) & 0xffffL
588    time_hi_version = (timestamp >> 48L) & 0x0fffL
589    clock_seq_low = clock_seq & 0xffL
590    clock_seq_hi_variant = (clock_seq >> 8L) & 0x3fL
591    if node is None:
592        node = getnode()
593    return UUID(fields=(time_low, time_mid, time_hi_version,
594                        clock_seq_hi_variant, clock_seq_low, node), version=1)
595
596def uuid3(namespace, name):
597    """Generate a UUID from the MD5 hash of a namespace UUID and a name."""
598    from hashlib import md5
599    hash = md5(namespace.bytes + name).digest()
600    return UUID(bytes=hash[:16], version=3)
601
602def uuid4():
603    """Generate a random UUID."""
604    return UUID(bytes=os.urandom(16), version=4)
605
606def uuid5(namespace, name):
607    """Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
608    from hashlib import sha1
609    hash = sha1(namespace.bytes + name).digest()
610    return UUID(bytes=hash[:16], version=5)
611
612# The following standard UUIDs are for use with uuid3() or uuid5().
613
614NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
615NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
616NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
617NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')
618