1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7import os
8import re
9import importlib.util
10import string
11import sys
12from distutils.errors import DistutilsPlatformError
13from distutils.dep_util import newer
14from distutils.spawn import spawn
15from distutils import log
16from distutils.errors import DistutilsByteCompileError
17
18def get_platform ():
19    """Return a string that identifies the current platform.  This is used mainly to
20    distinguish platform-specific build directories and platform-specific built
21    distributions.  Typically includes the OS name and version and the
22    architecture (as supplied by 'os.uname()'), although the exact information
23    included depends on the OS; eg. on Linux, the kernel version isn't
24    particularly important.
25
26    Examples of returned values:
27       linux-i586
28       linux-alpha (?)
29       solaris-2.6-sun4u
30
31    Windows will return one of:
32       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
33       win32 (all others - specifically, sys.platform is returned)
34
35    For other non-POSIX platforms, currently just returns 'sys.platform'.
36
37    """
38    if os.name == 'nt':
39        if 'amd64' in sys.version.lower():
40            return 'win-amd64'
41        return sys.platform
42
43    # Set for cross builds explicitly
44    if "_PYTHON_HOST_PLATFORM" in os.environ:
45        return os.environ["_PYTHON_HOST_PLATFORM"]
46
47    if os.name != "posix" or not hasattr(os, 'uname'):
48        # XXX what about the architecture? NT is Intel or Alpha,
49        # Mac OS is M68k or PPC, etc.
50        return sys.platform
51
52    # Try to distinguish various flavours of Unix
53
54    (osname, host, release, version, machine) = os.uname()
55
56    # Convert the OS name to lowercase, remove '/' characters, and translate
57    # spaces (for "Power Macintosh")
58    osname = osname.lower().replace('/', '')
59    machine = machine.replace(' ', '_')
60    machine = machine.replace('/', '-')
61
62    if osname[:5] == "linux":
63        # At least on Linux/Intel, 'machine' is the processor --
64        # i386, etc.
65        # XXX what about Alpha, SPARC, etc?
66        return  "%s-%s" % (osname, machine)
67    elif osname[:5] == "sunos":
68        if release[0] >= "5":           # SunOS 5 == Solaris 2
69            osname = "solaris"
70            release = "%d.%s" % (int(release[0]) - 3, release[2:])
71            # We can't use "platform.architecture()[0]" because a
72            # bootstrap problem. We use a dict to get an error
73            # if some suspicious happens.
74            bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
75            machine += ".%s" % bitness[sys.maxsize]
76        # fall through to standard osname-release-machine representation
77    elif osname[:3] == "aix":
78        return "%s-%s.%s" % (osname, version, release)
79    elif osname[:6] == "cygwin":
80        osname = "cygwin"
81        rel_re = re.compile (r'[\d.]+', re.ASCII)
82        m = rel_re.match(release)
83        if m:
84            release = m.group()
85    elif osname[:6] == "darwin":
86        import _osx_support, distutils.sysconfig
87        osname, release, machine = _osx_support.get_platform_osx(
88                                        distutils.sysconfig.get_config_vars(),
89                                        osname, release, machine)
90
91    return "%s-%s-%s" % (osname, release, machine)
92
93# get_platform ()
94
95
96def convert_path (pathname):
97    """Return 'pathname' as a name that will work on the native filesystem,
98    i.e. split it on '/' and put it back together again using the current
99    directory separator.  Needed because filenames in the setup script are
100    always supplied in Unix style, and have to be converted to the local
101    convention before we can actually use them in the filesystem.  Raises
102    ValueError on non-Unix-ish systems if 'pathname' either starts or
103    ends with a slash.
104    """
105    if os.sep == '/':
106        return pathname
107    if not pathname:
108        return pathname
109    if pathname[0] == '/':
110        raise ValueError("path '%s' cannot be absolute" % pathname)
111    if pathname[-1] == '/':
112        raise ValueError("path '%s' cannot end with '/'" % pathname)
113
114    paths = pathname.split('/')
115    while '.' in paths:
116        paths.remove('.')
117    if not paths:
118        return os.curdir
119    return os.path.join(*paths)
120
121# convert_path ()
122
123
124def change_root (new_root, pathname):
125    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
126    relative, this is equivalent to "os.path.join(new_root,pathname)".
127    Otherwise, it requires making 'pathname' relative and then joining the
128    two, which is tricky on DOS/Windows and Mac OS.
129    """
130    if os.name == 'posix':
131        if not os.path.isabs(pathname):
132            return os.path.join(new_root, pathname)
133        else:
134            return os.path.join(new_root, pathname[1:])
135
136    elif os.name == 'nt':
137        (drive, path) = os.path.splitdrive(pathname)
138        if path[0] == '\\':
139            path = path[1:]
140        return os.path.join(new_root, path)
141
142    else:
143        raise DistutilsPlatformError("nothing known about platform '%s'" % os.name)
144
145
146_environ_checked = 0
147def check_environ ():
148    """Ensure that 'os.environ' has all the environment variables we
149    guarantee that users can use in config files, command-line options,
150    etc.  Currently this includes:
151      HOME - user's home directory (Unix only)
152      PLAT - description of the current platform, including hardware
153             and OS (see 'get_platform()')
154    """
155    global _environ_checked
156    if _environ_checked:
157        return
158
159    if os.name == 'posix' and 'HOME' not in os.environ:
160        try:
161            import pwd
162            os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
163        except (ImportError, KeyError):
164            # bpo-10496: if the current user identifier doesn't exist in the
165            # password database, do nothing
166            pass
167
168    if 'PLAT' not in os.environ:
169        os.environ['PLAT'] = get_platform()
170
171    _environ_checked = 1
172
173
174def subst_vars (s, local_vars):
175    """Perform shell/Perl-style variable substitution on 'string'.  Every
176    occurrence of '$' followed by a name is considered a variable, and
177    variable is substituted by the value found in the 'local_vars'
178    dictionary, or in 'os.environ' if it's not in 'local_vars'.
179    'os.environ' is first checked/augmented to guarantee that it contains
180    certain values: see 'check_environ()'.  Raise ValueError for any
181    variables not found in either 'local_vars' or 'os.environ'.
182    """
183    check_environ()
184    def _subst (match, local_vars=local_vars):
185        var_name = match.group(1)
186        if var_name in local_vars:
187            return str(local_vars[var_name])
188        else:
189            return os.environ[var_name]
190
191    try:
192        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
193    except KeyError as var:
194        raise ValueError("invalid variable '$%s'" % var)
195
196# subst_vars ()
197
198
199def grok_environment_error (exc, prefix="error: "):
200    # Function kept for backward compatibility.
201    # Used to try clever things with EnvironmentErrors,
202    # but nowadays str(exception) produces good messages.
203    return prefix + str(exc)
204
205
206# Needed by 'split_quoted()'
207_wordchars_re = _squote_re = _dquote_re = None
208def _init_regex():
209    global _wordchars_re, _squote_re, _dquote_re
210    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
211    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
212    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
213
214def split_quoted (s):
215    """Split a string up according to Unix shell-like rules for quotes and
216    backslashes.  In short: words are delimited by spaces, as long as those
217    spaces are not escaped by a backslash, or inside a quoted string.
218    Single and double quotes are equivalent, and the quote characters can
219    be backslash-escaped.  The backslash is stripped from any two-character
220    escape sequence, leaving only the escaped character.  The quote
221    characters are stripped from any quoted string.  Returns a list of
222    words.
223    """
224
225    # This is a nice algorithm for splitting up a single string, since it
226    # doesn't require character-by-character examination.  It was a little
227    # bit of a brain-bender to get it working right, though...
228    if _wordchars_re is None: _init_regex()
229
230    s = s.strip()
231    words = []
232    pos = 0
233
234    while s:
235        m = _wordchars_re.match(s, pos)
236        end = m.end()
237        if end == len(s):
238            words.append(s[:end])
239            break
240
241        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
242            words.append(s[:end])       # we definitely have a word delimiter
243            s = s[end:].lstrip()
244            pos = 0
245
246        elif s[end] == '\\':            # preserve whatever is being escaped;
247                                        # will become part of the current word
248            s = s[:end] + s[end+1:]
249            pos = end+1
250
251        else:
252            if s[end] == "'":           # slurp singly-quoted string
253                m = _squote_re.match(s, end)
254            elif s[end] == '"':         # slurp doubly-quoted string
255                m = _dquote_re.match(s, end)
256            else:
257                raise RuntimeError("this can't happen (bad char '%c')" % s[end])
258
259            if m is None:
260                raise ValueError("bad string (mismatched %s quotes?)" % s[end])
261
262            (beg, end) = m.span()
263            s = s[:beg] + s[beg+1:end-1] + s[end:]
264            pos = m.end() - 2
265
266        if pos >= len(s):
267            words.append(s)
268            break
269
270    return words
271
272# split_quoted ()
273
274
275def execute (func, args, msg=None, verbose=0, dry_run=0):
276    """Perform some action that affects the outside world (eg.  by
277    writing to the filesystem).  Such actions are special because they
278    are disabled by the 'dry_run' flag.  This method takes care of all
279    that bureaucracy for you; all you have to do is supply the
280    function to call and an argument tuple for it (to embody the
281    "external action" being performed), and an optional message to
282    print.
283    """
284    if msg is None:
285        msg = "%s%r" % (func.__name__, args)
286        if msg[-2:] == ',)':        # correct for singleton tuple
287            msg = msg[0:-2] + ')'
288
289    log.info(msg)
290    if not dry_run:
291        func(*args)
292
293
294def strtobool (val):
295    """Convert a string representation of truth to true (1) or false (0).
296
297    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
298    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
299    'val' is anything else.
300    """
301    val = val.lower()
302    if val in ('y', 'yes', 't', 'true', 'on', '1'):
303        return 1
304    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
305        return 0
306    else:
307        raise ValueError("invalid truth value %r" % (val,))
308
309
310def byte_compile (py_files,
311                  optimize=0, force=0,
312                  prefix=None, base_dir=None,
313                  verbose=1, dry_run=0,
314                  direct=None):
315    """Byte-compile a collection of Python source files to .pyc
316    files in a __pycache__ subdirectory.  'py_files' is a list
317    of files to compile; any files that don't end in ".py" are silently
318    skipped.  'optimize' must be one of the following:
319      0 - don't optimize
320      1 - normal optimization (like "python -O")
321      2 - extra optimization (like "python -OO")
322    If 'force' is true, all files are recompiled regardless of
323    timestamps.
324
325    The source filename encoded in each bytecode file defaults to the
326    filenames listed in 'py_files'; you can modify these with 'prefix' and
327    'basedir'.  'prefix' is a string that will be stripped off of each
328    source filename, and 'base_dir' is a directory name that will be
329    prepended (after 'prefix' is stripped).  You can supply either or both
330    (or neither) of 'prefix' and 'base_dir', as you wish.
331
332    If 'dry_run' is true, doesn't actually do anything that would
333    affect the filesystem.
334
335    Byte-compilation is either done directly in this interpreter process
336    with the standard py_compile module, or indirectly by writing a
337    temporary script and executing it.  Normally, you should let
338    'byte_compile()' figure out to use direct compilation or not (see
339    the source for details).  The 'direct' flag is used by the script
340    generated in indirect mode; unless you know what you're doing, leave
341    it set to None.
342    """
343
344    # Late import to fix a bootstrap issue: _posixsubprocess is built by
345    # setup.py, but setup.py uses distutils.
346    import subprocess
347
348    # nothing is done if sys.dont_write_bytecode is True
349    if sys.dont_write_bytecode:
350        raise DistutilsByteCompileError('byte-compiling is disabled.')
351
352    # First, if the caller didn't force us into direct or indirect mode,
353    # figure out which mode we should be in.  We take a conservative
354    # approach: choose direct mode *only* if the current interpreter is
355    # in debug mode and optimize is 0.  If we're not in debug mode (-O
356    # or -OO), we don't know which level of optimization this
357    # interpreter is running with, so we can't do direct
358    # byte-compilation and be certain that it's the right thing.  Thus,
359    # always compile indirectly if the current interpreter is in either
360    # optimize mode, or if either optimization level was requested by
361    # the caller.
362    if direct is None:
363        direct = (__debug__ and optimize == 0)
364
365    # "Indirect" byte-compilation: write a temporary script and then
366    # run it with the appropriate flags.
367    if not direct:
368        try:
369            from tempfile import mkstemp
370            (script_fd, script_name) = mkstemp(".py")
371        except ImportError:
372            from tempfile import mktemp
373            (script_fd, script_name) = None, mktemp(".py")
374        log.info("writing byte-compilation script '%s'", script_name)
375        if not dry_run:
376            if script_fd is not None:
377                script = os.fdopen(script_fd, "w")
378            else:
379                script = open(script_name, "w")
380
381            script.write("""\
382from distutils.util import byte_compile
383files = [
384""")
385
386            # XXX would be nice to write absolute filenames, just for
387            # safety's sake (script should be more robust in the face of
388            # chdir'ing before running it).  But this requires abspath'ing
389            # 'prefix' as well, and that breaks the hack in build_lib's
390            # 'byte_compile()' method that carefully tacks on a trailing
391            # slash (os.sep really) to make sure the prefix here is "just
392            # right".  This whole prefix business is rather delicate -- the
393            # problem is that it's really a directory, but I'm treating it
394            # as a dumb string, so trailing slashes and so forth matter.
395
396            #py_files = map(os.path.abspath, py_files)
397            #if prefix:
398            #    prefix = os.path.abspath(prefix)
399
400            script.write(",\n".join(map(repr, py_files)) + "]\n")
401            script.write("""
402byte_compile(files, optimize=%r, force=%r,
403             prefix=%r, base_dir=%r,
404             verbose=%r, dry_run=0,
405             direct=1)
406""" % (optimize, force, prefix, base_dir, verbose))
407
408            script.close()
409
410        cmd = [sys.executable]
411        cmd.extend(subprocess._optim_args_from_interpreter_flags())
412        cmd.append(script_name)
413        spawn(cmd, dry_run=dry_run)
414        execute(os.remove, (script_name,), "removing %s" % script_name,
415                dry_run=dry_run)
416
417    # "Direct" byte-compilation: use the py_compile module to compile
418    # right here, right now.  Note that the script generated in indirect
419    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
420    # cross-process recursion.  Hey, it works!
421    else:
422        from py_compile import compile
423
424        for file in py_files:
425            if file[-3:] != ".py":
426                # This lets us be lazy and not filter filenames in
427                # the "install_lib" command.
428                continue
429
430            # Terminology from the py_compile module:
431            #   cfile - byte-compiled file
432            #   dfile - purported source filename (same as 'file' by default)
433            if optimize >= 0:
434                opt = '' if optimize == 0 else optimize
435                cfile = importlib.util.cache_from_source(
436                    file, optimization=opt)
437            else:
438                cfile = importlib.util.cache_from_source(file)
439            dfile = file
440            if prefix:
441                if file[:len(prefix)] != prefix:
442                    raise ValueError("invalid prefix: filename %r doesn't start with %r"
443                           % (file, prefix))
444                dfile = dfile[len(prefix):]
445            if base_dir:
446                dfile = os.path.join(base_dir, dfile)
447
448            cfile_base = os.path.basename(cfile)
449            if direct:
450                if force or newer(file, cfile):
451                    log.info("byte-compiling %s to %s", file, cfile_base)
452                    if not dry_run:
453                        compile(file, cfile, dfile)
454                else:
455                    log.debug("skipping byte-compilation of %s to %s",
456                              file, cfile_base)
457
458# byte_compile ()
459
460def rfc822_escape (header):
461    """Return a version of the string escaped for inclusion in an
462    RFC-822 header, by ensuring there are 8 spaces space after each newline.
463    """
464    lines = header.split('\n')
465    sep = '\n' + 8 * ' '
466    return sep.join(lines)
467
468# 2to3 support
469
470def run_2to3(files, fixer_names=None, options=None, explicit=None):
471    """Invoke 2to3 on a list of Python files.
472    The files should all come from the build area, as the
473    modification is done in-place. To reduce the build time,
474    only files modified since the last invocation of this
475    function should be passed in the files argument."""
476
477    if not files:
478        return
479
480    # Make this class local, to delay import of 2to3
481    from lib2to3.refactor import RefactoringTool, get_fixers_from_package
482    class DistutilsRefactoringTool(RefactoringTool):
483        def log_error(self, msg, *args, **kw):
484            log.error(msg, *args)
485
486        def log_message(self, msg, *args):
487            log.info(msg, *args)
488
489        def log_debug(self, msg, *args):
490            log.debug(msg, *args)
491
492    if fixer_names is None:
493        fixer_names = get_fixers_from_package('lib2to3.fixes')
494    r = DistutilsRefactoringTool(fixer_names, options=options)
495    r.refactor(files, write=True)
496
497def copydir_run_2to3(src, dest, template=None, fixer_names=None,
498                     options=None, explicit=None):
499    """Recursively copy a directory, only copying new and changed files,
500    running run_2to3 over all newly copied Python modules afterward.
501
502    If you give a template string, it's parsed like a MANIFEST.in.
503    """
504    from distutils.dir_util import mkpath
505    from distutils.file_util import copy_file
506    from distutils.filelist import FileList
507    filelist = FileList()
508    curdir = os.getcwd()
509    os.chdir(src)
510    try:
511        filelist.findall()
512    finally:
513        os.chdir(curdir)
514    filelist.files[:] = filelist.allfiles
515    if template:
516        for line in template.splitlines():
517            line = line.strip()
518            if not line: continue
519            filelist.process_template_line(line)
520    copied = []
521    for filename in filelist.files:
522        outname = os.path.join(dest, filename)
523        mkpath(os.path.dirname(outname))
524        res = copy_file(os.path.join(src, filename), outname, update=1)
525        if res[1]: copied.append(outname)
526    run_2to3([fn for fn in copied if fn.lower().endswith('.py')],
527             fixer_names=fixer_names, options=options, explicit=explicit)
528    return copied
529
530class Mixin2to3:
531    '''Mixin class for commands that run 2to3.
532    To configure 2to3, setup scripts may either change
533    the class variables, or inherit from individual commands
534    to override how 2to3 is invoked.'''
535
536    # provide list of fixers to run;
537    # defaults to all from lib2to3.fixers
538    fixer_names = None
539
540    # options dictionary
541    options = None
542
543    # list of fixers to invoke even though they are marked as explicit
544    explicit = None
545
546    def run_2to3(self, files):
547        return run_2to3(files, self.fixer_names, self.options, self.explicit)
548