1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7import os
8import re
9import importlib.util
10import string
11import sys
12from distutils.errors import DistutilsPlatformError
13from distutils.dep_util import newer
14from distutils.spawn import spawn
15from distutils import log
16from distutils.errors import DistutilsByteCompileError
17
18def get_host_platform():
19    """Return a string that identifies the current platform.  This is used mainly to
20    distinguish platform-specific build directories and platform-specific built
21    distributions.  Typically includes the OS name and version and the
22    architecture (as supplied by 'os.uname()'), although the exact information
23    included depends on the OS; eg. on Linux, the kernel version isn't
24    particularly important.
25
26    Examples of returned values:
27       linux-i586
28       linux-alpha (?)
29       solaris-2.6-sun4u
30
31    Windows will return one of:
32       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
33       win32 (all others - specifically, sys.platform is returned)
34
35    For other non-POSIX platforms, currently just returns 'sys.platform'.
36
37    """
38    if os.name == 'nt':
39        if 'amd64' in sys.version.lower():
40            return 'win-amd64'
41        if '(arm)' in sys.version.lower():
42            return 'win-arm32'
43        if '(arm64)' in sys.version.lower():
44            return 'win-arm64'
45        return sys.platform
46
47    # Set for cross builds explicitly
48    if "_PYTHON_HOST_PLATFORM" in os.environ:
49        return os.environ["_PYTHON_HOST_PLATFORM"]
50
51    if os.name != "posix" or not hasattr(os, 'uname'):
52        # XXX what about the architecture? NT is Intel or Alpha,
53        # Mac OS is M68k or PPC, etc.
54        return sys.platform
55
56    # Try to distinguish various flavours of Unix
57
58    (osname, host, release, version, machine) = os.uname()
59
60    # Convert the OS name to lowercase, remove '/' characters, and translate
61    # spaces (for "Power Macintosh")
62    osname = osname.lower().replace('/', '')
63    machine = machine.replace(' ', '_')
64    machine = machine.replace('/', '-')
65
66    if osname[:5] == "linux":
67        # At least on Linux/Intel, 'machine' is the processor --
68        # i386, etc.
69        # XXX what about Alpha, SPARC, etc?
70        return  "%s-%s" % (osname, machine)
71    elif osname[:5] == "sunos":
72        if release[0] >= "5":           # SunOS 5 == Solaris 2
73            osname = "solaris"
74            release = "%d.%s" % (int(release[0]) - 3, release[2:])
75            # We can't use "platform.architecture()[0]" because a
76            # bootstrap problem. We use a dict to get an error
77            # if some suspicious happens.
78            bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
79            machine += ".%s" % bitness[sys.maxsize]
80        # fall through to standard osname-release-machine representation
81    elif osname[:3] == "aix":
82        from _aix_support import aix_platform
83        return aix_platform()
84    elif osname[:6] == "cygwin":
85        osname = "cygwin"
86        rel_re = re.compile (r'[\d.]+', re.ASCII)
87        m = rel_re.match(release)
88        if m:
89            release = m.group()
90    elif osname[:6] == "darwin":
91        import _osx_support, distutils.sysconfig
92        osname, release, machine = _osx_support.get_platform_osx(
93                                        distutils.sysconfig.get_config_vars(),
94                                        osname, release, machine)
95
96    return "%s-%s-%s" % (osname, release, machine)
97
98def get_platform():
99    if os.name == 'nt':
100        TARGET_TO_PLAT = {
101            'x86' : 'win32',
102            'x64' : 'win-amd64',
103            'arm' : 'win-arm32',
104        }
105        return TARGET_TO_PLAT.get(os.environ.get('VSCMD_ARG_TGT_ARCH')) or get_host_platform()
106    else:
107        return get_host_platform()
108
109def convert_path (pathname):
110    """Return 'pathname' as a name that will work on the native filesystem,
111    i.e. split it on '/' and put it back together again using the current
112    directory separator.  Needed because filenames in the setup script are
113    always supplied in Unix style, and have to be converted to the local
114    convention before we can actually use them in the filesystem.  Raises
115    ValueError on non-Unix-ish systems if 'pathname' either starts or
116    ends with a slash.
117    """
118    if os.sep == '/':
119        return pathname
120    if not pathname:
121        return pathname
122    if pathname[0] == '/':
123        raise ValueError("path '%s' cannot be absolute" % pathname)
124    if pathname[-1] == '/':
125        raise ValueError("path '%s' cannot end with '/'" % pathname)
126
127    paths = pathname.split('/')
128    while '.' in paths:
129        paths.remove('.')
130    if not paths:
131        return os.curdir
132    return os.path.join(*paths)
133
134# convert_path ()
135
136
137def change_root (new_root, pathname):
138    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
139    relative, this is equivalent to "os.path.join(new_root,pathname)".
140    Otherwise, it requires making 'pathname' relative and then joining the
141    two, which is tricky on DOS/Windows and Mac OS.
142    """
143    if os.name == 'posix':
144        if not os.path.isabs(pathname):
145            return os.path.join(new_root, pathname)
146        else:
147            return os.path.join(new_root, pathname[1:])
148
149    elif os.name == 'nt':
150        (drive, path) = os.path.splitdrive(pathname)
151        if path[0] == '\\':
152            path = path[1:]
153        return os.path.join(new_root, path)
154
155    else:
156        raise DistutilsPlatformError("nothing known about platform '%s'" % os.name)
157
158
159_environ_checked = 0
160def check_environ ():
161    """Ensure that 'os.environ' has all the environment variables we
162    guarantee that users can use in config files, command-line options,
163    etc.  Currently this includes:
164      HOME - user's home directory (Unix only)
165      PLAT - description of the current platform, including hardware
166             and OS (see 'get_platform()')
167    """
168    global _environ_checked
169    if _environ_checked:
170        return
171
172    if os.name == 'posix' and 'HOME' not in os.environ:
173        try:
174            import pwd
175            os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
176        except (ImportError, KeyError):
177            # bpo-10496: if the current user identifier doesn't exist in the
178            # password database, do nothing
179            pass
180
181    if 'PLAT' not in os.environ:
182        os.environ['PLAT'] = get_platform()
183
184    _environ_checked = 1
185
186
187def subst_vars (s, local_vars):
188    """Perform shell/Perl-style variable substitution on 'string'.  Every
189    occurrence of '$' followed by a name is considered a variable, and
190    variable is substituted by the value found in the 'local_vars'
191    dictionary, or in 'os.environ' if it's not in 'local_vars'.
192    'os.environ' is first checked/augmented to guarantee that it contains
193    certain values: see 'check_environ()'.  Raise ValueError for any
194    variables not found in either 'local_vars' or 'os.environ'.
195    """
196    check_environ()
197    def _subst (match, local_vars=local_vars):
198        var_name = match.group(1)
199        if var_name in local_vars:
200            return str(local_vars[var_name])
201        else:
202            return os.environ[var_name]
203
204    try:
205        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
206    except KeyError as var:
207        raise ValueError("invalid variable '$%s'" % var)
208
209# subst_vars ()
210
211
212def grok_environment_error (exc, prefix="error: "):
213    # Function kept for backward compatibility.
214    # Used to try clever things with EnvironmentErrors,
215    # but nowadays str(exception) produces good messages.
216    return prefix + str(exc)
217
218
219# Needed by 'split_quoted()'
220_wordchars_re = _squote_re = _dquote_re = None
221def _init_regex():
222    global _wordchars_re, _squote_re, _dquote_re
223    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
224    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
225    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
226
227def split_quoted (s):
228    """Split a string up according to Unix shell-like rules for quotes and
229    backslashes.  In short: words are delimited by spaces, as long as those
230    spaces are not escaped by a backslash, or inside a quoted string.
231    Single and double quotes are equivalent, and the quote characters can
232    be backslash-escaped.  The backslash is stripped from any two-character
233    escape sequence, leaving only the escaped character.  The quote
234    characters are stripped from any quoted string.  Returns a list of
235    words.
236    """
237
238    # This is a nice algorithm for splitting up a single string, since it
239    # doesn't require character-by-character examination.  It was a little
240    # bit of a brain-bender to get it working right, though...
241    if _wordchars_re is None: _init_regex()
242
243    s = s.strip()
244    words = []
245    pos = 0
246
247    while s:
248        m = _wordchars_re.match(s, pos)
249        end = m.end()
250        if end == len(s):
251            words.append(s[:end])
252            break
253
254        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
255            words.append(s[:end])       # we definitely have a word delimiter
256            s = s[end:].lstrip()
257            pos = 0
258
259        elif s[end] == '\\':            # preserve whatever is being escaped;
260                                        # will become part of the current word
261            s = s[:end] + s[end+1:]
262            pos = end+1
263
264        else:
265            if s[end] == "'":           # slurp singly-quoted string
266                m = _squote_re.match(s, end)
267            elif s[end] == '"':         # slurp doubly-quoted string
268                m = _dquote_re.match(s, end)
269            else:
270                raise RuntimeError("this can't happen (bad char '%c')" % s[end])
271
272            if m is None:
273                raise ValueError("bad string (mismatched %s quotes?)" % s[end])
274
275            (beg, end) = m.span()
276            s = s[:beg] + s[beg+1:end-1] + s[end:]
277            pos = m.end() - 2
278
279        if pos >= len(s):
280            words.append(s)
281            break
282
283    return words
284
285# split_quoted ()
286
287
288def execute (func, args, msg=None, verbose=0, dry_run=0):
289    """Perform some action that affects the outside world (eg.  by
290    writing to the filesystem).  Such actions are special because they
291    are disabled by the 'dry_run' flag.  This method takes care of all
292    that bureaucracy for you; all you have to do is supply the
293    function to call and an argument tuple for it (to embody the
294    "external action" being performed), and an optional message to
295    print.
296    """
297    if msg is None:
298        msg = "%s%r" % (func.__name__, args)
299        if msg[-2:] == ',)':        # correct for singleton tuple
300            msg = msg[0:-2] + ')'
301
302    log.info(msg)
303    if not dry_run:
304        func(*args)
305
306
307def strtobool (val):
308    """Convert a string representation of truth to true (1) or false (0).
309
310    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
311    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
312    'val' is anything else.
313    """
314    val = val.lower()
315    if val in ('y', 'yes', 't', 'true', 'on', '1'):
316        return 1
317    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
318        return 0
319    else:
320        raise ValueError("invalid truth value %r" % (val,))
321
322
323def byte_compile (py_files,
324                  optimize=0, force=0,
325                  prefix=None, base_dir=None,
326                  verbose=1, dry_run=0,
327                  direct=None):
328    """Byte-compile a collection of Python source files to .pyc
329    files in a __pycache__ subdirectory.  'py_files' is a list
330    of files to compile; any files that don't end in ".py" are silently
331    skipped.  'optimize' must be one of the following:
332      0 - don't optimize
333      1 - normal optimization (like "python -O")
334      2 - extra optimization (like "python -OO")
335    If 'force' is true, all files are recompiled regardless of
336    timestamps.
337
338    The source filename encoded in each bytecode file defaults to the
339    filenames listed in 'py_files'; you can modify these with 'prefix' and
340    'basedir'.  'prefix' is a string that will be stripped off of each
341    source filename, and 'base_dir' is a directory name that will be
342    prepended (after 'prefix' is stripped).  You can supply either or both
343    (or neither) of 'prefix' and 'base_dir', as you wish.
344
345    If 'dry_run' is true, doesn't actually do anything that would
346    affect the filesystem.
347
348    Byte-compilation is either done directly in this interpreter process
349    with the standard py_compile module, or indirectly by writing a
350    temporary script and executing it.  Normally, you should let
351    'byte_compile()' figure out to use direct compilation or not (see
352    the source for details).  The 'direct' flag is used by the script
353    generated in indirect mode; unless you know what you're doing, leave
354    it set to None.
355    """
356
357    # Late import to fix a bootstrap issue: _posixsubprocess is built by
358    # setup.py, but setup.py uses distutils.
359    import subprocess
360
361    # nothing is done if sys.dont_write_bytecode is True
362    if sys.dont_write_bytecode:
363        raise DistutilsByteCompileError('byte-compiling is disabled.')
364
365    # First, if the caller didn't force us into direct or indirect mode,
366    # figure out which mode we should be in.  We take a conservative
367    # approach: choose direct mode *only* if the current interpreter is
368    # in debug mode and optimize is 0.  If we're not in debug mode (-O
369    # or -OO), we don't know which level of optimization this
370    # interpreter is running with, so we can't do direct
371    # byte-compilation and be certain that it's the right thing.  Thus,
372    # always compile indirectly if the current interpreter is in either
373    # optimize mode, or if either optimization level was requested by
374    # the caller.
375    if direct is None:
376        direct = (__debug__ and optimize == 0)
377
378    # "Indirect" byte-compilation: write a temporary script and then
379    # run it with the appropriate flags.
380    if not direct:
381        try:
382            from tempfile import mkstemp
383            (script_fd, script_name) = mkstemp(".py")
384        except ImportError:
385            from tempfile import mktemp
386            (script_fd, script_name) = None, mktemp(".py")
387        log.info("writing byte-compilation script '%s'", script_name)
388        if not dry_run:
389            if script_fd is not None:
390                script = os.fdopen(script_fd, "w")
391            else:
392                script = open(script_name, "w")
393
394            with script:
395                script.write("""\
396from distutils.util import byte_compile
397files = [
398""")
399
400                # XXX would be nice to write absolute filenames, just for
401                # safety's sake (script should be more robust in the face of
402                # chdir'ing before running it).  But this requires abspath'ing
403                # 'prefix' as well, and that breaks the hack in build_lib's
404                # 'byte_compile()' method that carefully tacks on a trailing
405                # slash (os.sep really) to make sure the prefix here is "just
406                # right".  This whole prefix business is rather delicate -- the
407                # problem is that it's really a directory, but I'm treating it
408                # as a dumb string, so trailing slashes and so forth matter.
409
410                #py_files = map(os.path.abspath, py_files)
411                #if prefix:
412                #    prefix = os.path.abspath(prefix)
413
414                script.write(",\n".join(map(repr, py_files)) + "]\n")
415                script.write("""
416byte_compile(files, optimize=%r, force=%r,
417             prefix=%r, base_dir=%r,
418             verbose=%r, dry_run=0,
419             direct=1)
420""" % (optimize, force, prefix, base_dir, verbose))
421
422        cmd = [sys.executable]
423        cmd.extend(subprocess._optim_args_from_interpreter_flags())
424        cmd.append(script_name)
425        spawn(cmd, dry_run=dry_run)
426        execute(os.remove, (script_name,), "removing %s" % script_name,
427                dry_run=dry_run)
428
429    # "Direct" byte-compilation: use the py_compile module to compile
430    # right here, right now.  Note that the script generated in indirect
431    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
432    # cross-process recursion.  Hey, it works!
433    else:
434        from py_compile import compile
435
436        for file in py_files:
437            if file[-3:] != ".py":
438                # This lets us be lazy and not filter filenames in
439                # the "install_lib" command.
440                continue
441
442            # Terminology from the py_compile module:
443            #   cfile - byte-compiled file
444            #   dfile - purported source filename (same as 'file' by default)
445            if optimize >= 0:
446                opt = '' if optimize == 0 else optimize
447                cfile = importlib.util.cache_from_source(
448                    file, optimization=opt)
449            else:
450                cfile = importlib.util.cache_from_source(file)
451            dfile = file
452            if prefix:
453                if file[:len(prefix)] != prefix:
454                    raise ValueError("invalid prefix: filename %r doesn't start with %r"
455                           % (file, prefix))
456                dfile = dfile[len(prefix):]
457            if base_dir:
458                dfile = os.path.join(base_dir, dfile)
459
460            cfile_base = os.path.basename(cfile)
461            if direct:
462                if force or newer(file, cfile):
463                    log.info("byte-compiling %s to %s", file, cfile_base)
464                    if not dry_run:
465                        compile(file, cfile, dfile)
466                else:
467                    log.debug("skipping byte-compilation of %s to %s",
468                              file, cfile_base)
469
470# byte_compile ()
471
472def rfc822_escape (header):
473    """Return a version of the string escaped for inclusion in an
474    RFC-822 header, by ensuring there are 8 spaces space after each newline.
475    """
476    lines = header.split('\n')
477    sep = '\n' + 8 * ' '
478    return sep.join(lines)
479
480# 2to3 support
481
482def run_2to3(files, fixer_names=None, options=None, explicit=None):
483    """Invoke 2to3 on a list of Python files.
484    The files should all come from the build area, as the
485    modification is done in-place. To reduce the build time,
486    only files modified since the last invocation of this
487    function should be passed in the files argument."""
488
489    if not files:
490        return
491
492    # Make this class local, to delay import of 2to3
493    from lib2to3.refactor import RefactoringTool, get_fixers_from_package
494    class DistutilsRefactoringTool(RefactoringTool):
495        def log_error(self, msg, *args, **kw):
496            log.error(msg, *args)
497
498        def log_message(self, msg, *args):
499            log.info(msg, *args)
500
501        def log_debug(self, msg, *args):
502            log.debug(msg, *args)
503
504    if fixer_names is None:
505        fixer_names = get_fixers_from_package('lib2to3.fixes')
506    r = DistutilsRefactoringTool(fixer_names, options=options)
507    r.refactor(files, write=True)
508
509def copydir_run_2to3(src, dest, template=None, fixer_names=None,
510                     options=None, explicit=None):
511    """Recursively copy a directory, only copying new and changed files,
512    running run_2to3 over all newly copied Python modules afterward.
513
514    If you give a template string, it's parsed like a MANIFEST.in.
515    """
516    from distutils.dir_util import mkpath
517    from distutils.file_util import copy_file
518    from distutils.filelist import FileList
519    filelist = FileList()
520    curdir = os.getcwd()
521    os.chdir(src)
522    try:
523        filelist.findall()
524    finally:
525        os.chdir(curdir)
526    filelist.files[:] = filelist.allfiles
527    if template:
528        for line in template.splitlines():
529            line = line.strip()
530            if not line: continue
531            filelist.process_template_line(line)
532    copied = []
533    for filename in filelist.files:
534        outname = os.path.join(dest, filename)
535        mkpath(os.path.dirname(outname))
536        res = copy_file(os.path.join(src, filename), outname, update=1)
537        if res[1]: copied.append(outname)
538    run_2to3([fn for fn in copied if fn.lower().endswith('.py')],
539             fixer_names=fixer_names, options=options, explicit=explicit)
540    return copied
541
542class Mixin2to3:
543    '''Mixin class for commands that run 2to3.
544    To configure 2to3, setup scripts may either change
545    the class variables, or inherit from individual commands
546    to override how 2to3 is invoked.'''
547
548    # provide list of fixers to run;
549    # defaults to all from lib2to3.fixers
550    fixer_names = None
551
552    # options dictionary
553    options = None
554
555    # list of fixers to invoke even though they are marked as explicit
556    explicit = None
557
558    def run_2to3(self, files):
559        return run_2to3(files, self.fixer_names, self.options, self.explicit)
560