1r"""UUID objects (universally unique identifiers) according to RFC 4122.
2
3This module provides immutable UUID objects (class UUID) and the functions
4uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
5UUIDs as specified in RFC 4122.
6
7If all you want is a unique ID, you should probably call uuid1() or uuid4().
8Note that uuid1() may compromise privacy since it creates a UUID containing
9the computer's network address.  uuid4() creates a random UUID.
10
11Typical usage:
12
13    >>> import uuid
14
15    # make a UUID based on the host ID and current time
16    >>> uuid.uuid1()    # doctest: +SKIP
17    UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')
18
19    # make a UUID using an MD5 hash of a namespace UUID and a name
20    >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
21    UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')
22
23    # make a random UUID
24    >>> uuid.uuid4()    # doctest: +SKIP
25    UUID('16fd2706-8baf-433b-82eb-8c7fada847da')
26
27    # make a UUID using a SHA-1 hash of a namespace UUID and a name
28    >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
29    UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')
30
31    # make a UUID from a string of hex digits (braces and hyphens ignored)
32    >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')
33
34    # convert a UUID to a string of hex digits in standard form
35    >>> str(x)
36    '00010203-0405-0607-0809-0a0b0c0d0e0f'
37
38    # get the raw 16 bytes of the UUID
39    >>> x.bytes
40    b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
41
42    # make a UUID from a 16-byte string
43    >>> uuid.UUID(bytes=x.bytes)
44    UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
45"""
46
47import os
48
49__author__ = 'Ka-Ping Yee <ping@zesty.ca>'
50
51RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
52    'reserved for NCS compatibility', 'specified in RFC 4122',
53    'reserved for Microsoft compatibility', 'reserved for future definition']
54
55int_ = int      # The built-in int type
56bytes_ = bytes  # The built-in bytes type
57
58class UUID(object):
59    """Instances of the UUID class represent UUIDs as specified in RFC 4122.
60    UUID objects are immutable, hashable, and usable as dictionary keys.
61    Converting a UUID to a string with str() yields something in the form
62    '12345678-1234-1234-1234-123456789abc'.  The UUID constructor accepts
63    five possible forms: a similar string of hexadecimal digits, or a tuple
64    of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
65    48-bit values respectively) as an argument named 'fields', or a string
66    of 16 bytes (with all the integer fields in big-endian order) as an
67    argument named 'bytes', or a string of 16 bytes (with the first three
68    fields in little-endian order) as an argument named 'bytes_le', or a
69    single 128-bit integer as an argument named 'int'.
70
71    UUIDs have these read-only attributes:
72
73        bytes       the UUID as a 16-byte string (containing the six
74                    integer fields in big-endian byte order)
75
76        bytes_le    the UUID as a 16-byte string (with time_low, time_mid,
77                    and time_hi_version in little-endian byte order)
78
79        fields      a tuple of the six integer fields of the UUID,
80                    which are also available as six individual attributes
81                    and two derived attributes:
82
83            time_low                the first 32 bits of the UUID
84            time_mid                the next 16 bits of the UUID
85            time_hi_version         the next 16 bits of the UUID
86            clock_seq_hi_variant    the next 8 bits of the UUID
87            clock_seq_low           the next 8 bits of the UUID
88            node                    the last 48 bits of the UUID
89
90            time                    the 60-bit timestamp
91            clock_seq               the 14-bit sequence number
92
93        hex         the UUID as a 32-character hexadecimal string
94
95        int         the UUID as a 128-bit integer
96
97        urn         the UUID as a URN as specified in RFC 4122
98
99        variant     the UUID variant (one of the constants RESERVED_NCS,
100                    RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)
101
102        version     the UUID version number (1 through 5, meaningful only
103                    when the variant is RFC_4122)
104    """
105
106    def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
107                       int=None, version=None):
108        r"""Create a UUID from either a string of 32 hexadecimal digits,
109        a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
110        in little-endian order as the 'bytes_le' argument, a tuple of six
111        integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
112        8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
113        the 'fields' argument, or a single 128-bit integer as the 'int'
114        argument.  When a string of hex digits is given, curly braces,
115        hyphens, and a URN prefix are all optional.  For example, these
116        expressions all yield the same UUID:
117
118        UUID('{12345678-1234-5678-1234-567812345678}')
119        UUID('12345678123456781234567812345678')
120        UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
121        UUID(bytes='\x12\x34\x56\x78'*4)
122        UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
123                      '\x12\x34\x56\x78\x12\x34\x56\x78')
124        UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
125        UUID(int=0x12345678123456781234567812345678)
126
127        Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
128        be given.  The 'version' argument is optional; if given, the resulting
129        UUID will have its variant and version set according to RFC 4122,
130        overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
131        """
132
133        if [hex, bytes, bytes_le, fields, int].count(None) != 4:
134            raise TypeError('one of the hex, bytes, bytes_le, fields, '
135                            'or int arguments must be given')
136        if hex is not None:
137            hex = hex.replace('urn:', '').replace('uuid:', '')
138            hex = hex.strip('{}').replace('-', '')
139            if len(hex) != 32:
140                raise ValueError('badly formed hexadecimal UUID string')
141            int = int_(hex, 16)
142        if bytes_le is not None:
143            if len(bytes_le) != 16:
144                raise ValueError('bytes_le is not a 16-char string')
145            bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
146                     bytes_le[8-1:6-1:-1] + bytes_le[8:])
147        if bytes is not None:
148            if len(bytes) != 16:
149                raise ValueError('bytes is not a 16-char string')
150            assert isinstance(bytes, bytes_), repr(bytes)
151            int = int_.from_bytes(bytes, byteorder='big')
152        if fields is not None:
153            if len(fields) != 6:
154                raise ValueError('fields is not a 6-tuple')
155            (time_low, time_mid, time_hi_version,
156             clock_seq_hi_variant, clock_seq_low, node) = fields
157            if not 0 <= time_low < 1<<32:
158                raise ValueError('field 1 out of range (need a 32-bit value)')
159            if not 0 <= time_mid < 1<<16:
160                raise ValueError('field 2 out of range (need a 16-bit value)')
161            if not 0 <= time_hi_version < 1<<16:
162                raise ValueError('field 3 out of range (need a 16-bit value)')
163            if not 0 <= clock_seq_hi_variant < 1<<8:
164                raise ValueError('field 4 out of range (need an 8-bit value)')
165            if not 0 <= clock_seq_low < 1<<8:
166                raise ValueError('field 5 out of range (need an 8-bit value)')
167            if not 0 <= node < 1<<48:
168                raise ValueError('field 6 out of range (need a 48-bit value)')
169            clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
170            int = ((time_low << 96) | (time_mid << 80) |
171                   (time_hi_version << 64) | (clock_seq << 48) | node)
172        if int is not None:
173            if not 0 <= int < 1<<128:
174                raise ValueError('int is out of range (need a 128-bit value)')
175        if version is not None:
176            if not 1 <= version <= 5:
177                raise ValueError('illegal version number')
178            # Set the variant to RFC 4122.
179            int &= ~(0xc000 << 48)
180            int |= 0x8000 << 48
181            # Set the version number.
182            int &= ~(0xf000 << 64)
183            int |= version << 76
184        self.__dict__['int'] = int
185
186    def __eq__(self, other):
187        if isinstance(other, UUID):
188            return self.int == other.int
189        return NotImplemented
190
191    # Q. What's the value of being able to sort UUIDs?
192    # A. Use them as keys in a B-Tree or similar mapping.
193
194    def __lt__(self, other):
195        if isinstance(other, UUID):
196            return self.int < other.int
197        return NotImplemented
198
199    def __gt__(self, other):
200        if isinstance(other, UUID):
201            return self.int > other.int
202        return NotImplemented
203
204    def __le__(self, other):
205        if isinstance(other, UUID):
206            return self.int <= other.int
207        return NotImplemented
208
209    def __ge__(self, other):
210        if isinstance(other, UUID):
211            return self.int >= other.int
212        return NotImplemented
213
214    def __hash__(self):
215        return hash(self.int)
216
217    def __int__(self):
218        return self.int
219
220    def __repr__(self):
221        return '%s(%r)' % (self.__class__.__name__, str(self))
222
223    def __setattr__(self, name, value):
224        raise TypeError('UUID objects are immutable')
225
226    def __str__(self):
227        hex = '%032x' % self.int
228        return '%s-%s-%s-%s-%s' % (
229            hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])
230
231    @property
232    def bytes(self):
233        return self.int.to_bytes(16, 'big')
234
235    @property
236    def bytes_le(self):
237        bytes = self.bytes
238        return (bytes[4-1::-1] + bytes[6-1:4-1:-1] + bytes[8-1:6-1:-1] +
239                bytes[8:])
240
241    @property
242    def fields(self):
243        return (self.time_low, self.time_mid, self.time_hi_version,
244                self.clock_seq_hi_variant, self.clock_seq_low, self.node)
245
246    @property
247    def time_low(self):
248        return self.int >> 96
249
250    @property
251    def time_mid(self):
252        return (self.int >> 80) & 0xffff
253
254    @property
255    def time_hi_version(self):
256        return (self.int >> 64) & 0xffff
257
258    @property
259    def clock_seq_hi_variant(self):
260        return (self.int >> 56) & 0xff
261
262    @property
263    def clock_seq_low(self):
264        return (self.int >> 48) & 0xff
265
266    @property
267    def time(self):
268        return (((self.time_hi_version & 0x0fff) << 48) |
269                (self.time_mid << 32) | self.time_low)
270
271    @property
272    def clock_seq(self):
273        return (((self.clock_seq_hi_variant & 0x3f) << 8) |
274                self.clock_seq_low)
275
276    @property
277    def node(self):
278        return self.int & 0xffffffffffff
279
280    @property
281    def hex(self):
282        return '%032x' % self.int
283
284    @property
285    def urn(self):
286        return 'urn:uuid:' + str(self)
287
288    @property
289    def variant(self):
290        if not self.int & (0x8000 << 48):
291            return RESERVED_NCS
292        elif not self.int & (0x4000 << 48):
293            return RFC_4122
294        elif not self.int & (0x2000 << 48):
295            return RESERVED_MICROSOFT
296        else:
297            return RESERVED_FUTURE
298
299    @property
300    def version(self):
301        # The version bits are only meaningful for RFC 4122 UUIDs.
302        if self.variant == RFC_4122:
303            return int((self.int >> 76) & 0xf)
304
305def _popen(command, *args):
306    import os, shutil, subprocess
307    executable = shutil.which(command)
308    if executable is None:
309        path = os.pathsep.join(('/sbin', '/usr/sbin'))
310        executable = shutil.which(command, path=path)
311        if executable is None:
312            return None
313    # LC_ALL=C to ensure English output, stderr=DEVNULL to prevent output
314    # on stderr (Note: we don't have an example where the words we search
315    # for are actually localized, but in theory some system could do so.)
316    env = dict(os.environ)
317    env['LC_ALL'] = 'C'
318    proc = subprocess.Popen((executable,) + args,
319                            stdout=subprocess.PIPE,
320                            stderr=subprocess.DEVNULL,
321                            env=env)
322    return proc
323
324def _find_mac(command, args, hw_identifiers, get_index):
325    try:
326        proc = _popen(command, *args.split())
327        if not proc:
328            return
329        with proc:
330            for line in proc.stdout:
331                words = line.lower().rstrip().split()
332                for i in range(len(words)):
333                    if words[i] in hw_identifiers:
334                        try:
335                            word = words[get_index(i)]
336                            mac = int(word.replace(b':', b''), 16)
337                            if mac:
338                                return mac
339                        except (ValueError, IndexError):
340                            # Virtual interfaces, such as those provided by
341                            # VPNs, do not have a colon-delimited MAC address
342                            # as expected, but a 16-byte HWAddr separated by
343                            # dashes. These should be ignored in favor of a
344                            # real MAC address
345                            pass
346    except OSError:
347        pass
348
349def _ifconfig_getnode():
350    """Get the hardware address on Unix by running ifconfig."""
351    # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
352    for args in ('', '-a', '-av'):
353        mac = _find_mac('ifconfig', args, [b'hwaddr', b'ether'], lambda i: i+1)
354        if mac:
355            return mac
356
357def _ip_getnode():
358    """Get the hardware address on Unix by running ip."""
359    # This works on Linux with iproute2.
360    mac = _find_mac('ip', 'link list', [b'link/ether'], lambda i: i+1)
361    if mac:
362        return mac
363
364def _arp_getnode():
365    """Get the hardware address on Unix by running arp."""
366    import os, socket
367    try:
368        ip_addr = socket.gethostbyname(socket.gethostname())
369    except OSError:
370        return None
371
372    # Try getting the MAC addr from arp based on our IP address (Solaris).
373    return _find_mac('arp', '-an', [os.fsencode(ip_addr)], lambda i: -1)
374
375def _lanscan_getnode():
376    """Get the hardware address on Unix by running lanscan."""
377    # This might work on HP-UX.
378    return _find_mac('lanscan', '-ai', [b'lan0'], lambda i: 0)
379
380def _netstat_getnode():
381    """Get the hardware address on Unix by running netstat."""
382    # This might work on AIX, Tru64 UNIX and presumably on IRIX.
383    try:
384        proc = _popen('netstat', '-ia')
385        if not proc:
386            return
387        with proc:
388            words = proc.stdout.readline().rstrip().split()
389            try:
390                i = words.index(b'Address')
391            except ValueError:
392                return
393            for line in proc.stdout:
394                try:
395                    words = line.rstrip().split()
396                    word = words[i]
397                    if len(word) == 17 and word.count(b':') == 5:
398                        mac = int(word.replace(b':', b''), 16)
399                        if mac:
400                            return mac
401                except (ValueError, IndexError):
402                    pass
403    except OSError:
404        pass
405
406def _ipconfig_getnode():
407    """Get the hardware address on Windows by running ipconfig.exe."""
408    import os, re
409    dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
410    try:
411        import ctypes
412        buffer = ctypes.create_string_buffer(300)
413        ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
414        dirs.insert(0, buffer.value.decode('mbcs'))
415    except:
416        pass
417    for dir in dirs:
418        try:
419            pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
420        except OSError:
421            continue
422        with pipe:
423            for line in pipe:
424                value = line.split(':')[-1].strip().lower()
425                if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
426                    return int(value.replace('-', ''), 16)
427
428def _netbios_getnode():
429    """Get the hardware address on Windows using NetBIOS calls.
430    See http://support.microsoft.com/kb/118623 for details."""
431    import win32wnet, netbios
432    ncb = netbios.NCB()
433    ncb.Command = netbios.NCBENUM
434    ncb.Buffer = adapters = netbios.LANA_ENUM()
435    adapters._pack()
436    if win32wnet.Netbios(ncb) != 0:
437        return
438    adapters._unpack()
439    for i in range(adapters.length):
440        ncb.Reset()
441        ncb.Command = netbios.NCBRESET
442        ncb.Lana_num = ord(adapters.lana[i])
443        if win32wnet.Netbios(ncb) != 0:
444            continue
445        ncb.Reset()
446        ncb.Command = netbios.NCBASTAT
447        ncb.Lana_num = ord(adapters.lana[i])
448        ncb.Callname = '*'.ljust(16)
449        ncb.Buffer = status = netbios.ADAPTER_STATUS()
450        if win32wnet.Netbios(ncb) != 0:
451            continue
452        status._unpack()
453        bytes = status.adapter_address[:6]
454        if len(bytes) != 6:
455            continue
456        return int.from_bytes(bytes, 'big')
457
458# Thanks to Thomas Heller for ctypes and for his help with its use here.
459
460# If ctypes is available, use it to find system routines for UUID generation.
461# XXX This makes the module non-thread-safe!
462_uuid_generate_time = _UuidCreate = None
463try:
464    import ctypes, ctypes.util
465    import sys
466
467    # The uuid_generate_* routines are provided by libuuid on at least
468    # Linux and FreeBSD, and provided by libc on Mac OS X.
469    _libnames = ['uuid']
470    if not sys.platform.startswith('win'):
471        _libnames.append('c')
472    for libname in _libnames:
473        try:
474            lib = ctypes.CDLL(ctypes.util.find_library(libname))
475        except Exception:
476            continue
477        if hasattr(lib, 'uuid_generate_time'):
478            _uuid_generate_time = lib.uuid_generate_time
479            break
480    del _libnames
481
482    # The uuid_generate_* functions are broken on MacOS X 10.5, as noted
483    # in issue #8621 the function generates the same sequence of values
484    # in the parent process and all children created using fork (unless
485    # those children use exec as well).
486    #
487    # Assume that the uuid_generate functions are broken from 10.5 onward,
488    # the test can be adjusted when a later version is fixed.
489    if sys.platform == 'darwin':
490        if int(os.uname().release.split('.')[0]) >= 9:
491            _uuid_generate_time = None
492
493    # On Windows prior to 2000, UuidCreate gives a UUID containing the
494    # hardware address.  On Windows 2000 and later, UuidCreate makes a
495    # random UUID and UuidCreateSequential gives a UUID containing the
496    # hardware address.  These routines are provided by the RPC runtime.
497    # NOTE:  at least on Tim's WinXP Pro SP2 desktop box, while the last
498    # 6 bytes returned by UuidCreateSequential are fixed, they don't appear
499    # to bear any relationship to the MAC address of any network device
500    # on the box.
501    try:
502        lib = ctypes.windll.rpcrt4
503    except:
504        lib = None
505    _UuidCreate = getattr(lib, 'UuidCreateSequential',
506                          getattr(lib, 'UuidCreate', None))
507except:
508    pass
509
510def _unixdll_getnode():
511    """Get the hardware address on Unix using ctypes."""
512    _buffer = ctypes.create_string_buffer(16)
513    _uuid_generate_time(_buffer)
514    return UUID(bytes=bytes_(_buffer.raw)).node
515
516def _windll_getnode():
517    """Get the hardware address on Windows using ctypes."""
518    _buffer = ctypes.create_string_buffer(16)
519    if _UuidCreate(_buffer) == 0:
520        return UUID(bytes=bytes_(_buffer.raw)).node
521
522def _random_getnode():
523    """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
524    import random
525    return random.getrandbits(48) | 0x010000000000
526
527_node = None
528
529def getnode():
530    """Get the hardware address as a 48-bit positive integer.
531
532    The first time this runs, it may launch a separate program, which could
533    be quite slow.  If all attempts to obtain the hardware address fail, we
534    choose a random 48-bit number with its eighth bit set to 1 as recommended
535    in RFC 4122.
536    """
537
538    global _node
539    if _node is not None:
540        return _node
541
542    import sys
543    if sys.platform == 'win32':
544        getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
545    else:
546        getters = [_unixdll_getnode, _ifconfig_getnode, _ip_getnode,
547                   _arp_getnode, _lanscan_getnode, _netstat_getnode]
548
549    for getter in getters + [_random_getnode]:
550        try:
551            _node = getter()
552        except:
553            continue
554        if _node is not None:
555            return _node
556
557_last_timestamp = None
558
559def uuid1(node=None, clock_seq=None):
560    """Generate a UUID from a host ID, sequence number, and the current time.
561    If 'node' is not given, getnode() is used to obtain the hardware
562    address.  If 'clock_seq' is given, it is used as the sequence number;
563    otherwise a random 14-bit sequence number is chosen."""
564
565    # When the system provides a version-1 UUID generator, use it (but don't
566    # use UuidCreate here because its UUIDs don't conform to RFC 4122).
567    if _uuid_generate_time and node is clock_seq is None:
568        _buffer = ctypes.create_string_buffer(16)
569        _uuid_generate_time(_buffer)
570        return UUID(bytes=bytes_(_buffer.raw))
571
572    global _last_timestamp
573    import time
574    nanoseconds = int(time.time() * 1e9)
575    # 0x01b21dd213814000 is the number of 100-ns intervals between the
576    # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
577    timestamp = int(nanoseconds/100) + 0x01b21dd213814000
578    if _last_timestamp is not None and timestamp <= _last_timestamp:
579        timestamp = _last_timestamp + 1
580    _last_timestamp = timestamp
581    if clock_seq is None:
582        import random
583        clock_seq = random.getrandbits(14) # instead of stable storage
584    time_low = timestamp & 0xffffffff
585    time_mid = (timestamp >> 32) & 0xffff
586    time_hi_version = (timestamp >> 48) & 0x0fff
587    clock_seq_low = clock_seq & 0xff
588    clock_seq_hi_variant = (clock_seq >> 8) & 0x3f
589    if node is None:
590        node = getnode()
591    return UUID(fields=(time_low, time_mid, time_hi_version,
592                        clock_seq_hi_variant, clock_seq_low, node), version=1)
593
594def uuid3(namespace, name):
595    """Generate a UUID from the MD5 hash of a namespace UUID and a name."""
596    from hashlib import md5
597    hash = md5(namespace.bytes + bytes(name, "utf-8")).digest()
598    return UUID(bytes=hash[:16], version=3)
599
600def uuid4():
601    """Generate a random UUID."""
602    return UUID(bytes=os.urandom(16), version=4)
603
604def uuid5(namespace, name):
605    """Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
606    from hashlib import sha1
607    hash = sha1(namespace.bytes + bytes(name, "utf-8")).digest()
608    return UUID(bytes=hash[:16], version=5)
609
610# The following standard UUIDs are for use with uuid3() or uuid5().
611
612NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
613NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
614NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
615NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')
616