1"""distutils.util 2 3Miscellaneous utility functions -- anything that doesn't fit into 4one of the other *util.py modules. 5""" 6 7__revision__ = "$Id$" 8 9import sys, os, string, re 10from distutils.errors import DistutilsPlatformError 11from distutils.dep_util import newer 12from distutils.spawn import spawn 13from distutils import log 14from distutils.errors import DistutilsByteCompileError 15 16def get_platform (): 17 """Return a string that identifies the current platform. This is used 18 mainly to distinguish platform-specific build directories and 19 platform-specific built distributions. Typically includes the OS name 20 and version and the architecture (as supplied by 'os.uname()'), 21 although the exact information included depends on the OS; eg. for IRIX 22 the architecture isn't particularly important (IRIX only runs on SGI 23 hardware), but for Linux the kernel version isn't particularly 24 important. 25 26 Examples of returned values: 27 linux-i586 28 linux-alpha (?) 29 solaris-2.6-sun4u 30 irix-5.3 31 irix64-6.2 32 33 Windows will return one of: 34 win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) 35 win-ia64 (64bit Windows on Itanium) 36 win32 (all others - specifically, sys.platform is returned) 37 38 For other non-POSIX platforms, currently just returns 'sys.platform'. 39 """ 40 if os.name == 'nt': 41 # sniff sys.version for architecture. 42 prefix = " bit (" 43 i = string.find(sys.version, prefix) 44 if i == -1: 45 return sys.platform 46 j = string.find(sys.version, ")", i) 47 look = sys.version[i+len(prefix):j].lower() 48 if look=='amd64': 49 return 'win-amd64' 50 if look=='itanium': 51 return 'win-ia64' 52 return sys.platform 53 54 if os.name != "posix" or not hasattr(os, 'uname'): 55 # XXX what about the architecture? NT is Intel or Alpha, 56 # Mac OS is M68k or PPC, etc. 57 return sys.platform 58 59 # Try to distinguish various flavours of Unix 60 61 (osname, host, release, version, machine) = os.uname() 62 63 # Convert the OS name to lowercase, remove '/' characters 64 # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") 65 osname = string.lower(osname) 66 osname = string.replace(osname, '/', '') 67 machine = string.replace(machine, ' ', '_') 68 machine = string.replace(machine, '/', '-') 69 70 if osname[:5] == "linux": 71 # At least on Linux/Intel, 'machine' is the processor -- 72 # i386, etc. 73 # XXX what about Alpha, SPARC, etc? 74 return "%s-%s" % (osname, machine) 75 elif osname[:5] == "sunos": 76 if release[0] >= "5": # SunOS 5 == Solaris 2 77 osname = "solaris" 78 release = "%d.%s" % (int(release[0]) - 3, release[2:]) 79 # fall through to standard osname-release-machine representation 80 elif osname[:4] == "irix": # could be "irix64"! 81 return "%s-%s" % (osname, release) 82 elif osname[:3] == "aix": 83 return "%s-%s.%s" % (osname, version, release) 84 elif osname[:6] == "cygwin": 85 osname = "cygwin" 86 rel_re = re.compile (r'[\d.]+') 87 m = rel_re.match(release) 88 if m: 89 release = m.group() 90 elif osname[:6] == "darwin": 91 # 92 # For our purposes, we'll assume that the system version from 93 # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set 94 # to. This makes the compatibility story a bit more sane because the 95 # machine is going to compile and link as if it were 96 # MACOSX_DEPLOYMENT_TARGET. 97 from distutils.sysconfig import get_config_vars 98 cfgvars = get_config_vars() 99 100 macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET') 101 102 if 1: 103 # Always calculate the release of the running machine, 104 # needed to determine if we can build fat binaries or not. 105 106 macrelease = macver 107 # Get the system version. Reading this plist is a documented 108 # way to get the system version (see the documentation for 109 # the Gestalt Manager) 110 try: 111 f = open('/System/Library/CoreServices/SystemVersion.plist') 112 except IOError: 113 # We're on a plain darwin box, fall back to the default 114 # behaviour. 115 pass 116 else: 117 try: 118 m = re.search( 119 r'<key>ProductUserVisibleVersion</key>\s*' + 120 r'<string>(.*?)</string>', f.read()) 121 if m is not None: 122 macrelease = '.'.join(m.group(1).split('.')[:2]) 123 # else: fall back to the default behaviour 124 finally: 125 f.close() 126 127 if not macver: 128 macver = macrelease 129 130 if macver: 131 from distutils.sysconfig import get_config_vars 132 release = macver 133 osname = "macosx" 134 135 if (macrelease + '.') >= '10.4.' and \ 136 '-arch' in get_config_vars().get('CFLAGS', '').strip(): 137 # The universal build will build fat binaries, but not on 138 # systems before 10.4 139 # 140 # Try to detect 4-way universal builds, those have machine-type 141 # 'universal' instead of 'fat'. 142 143 machine = 'fat' 144 cflags = get_config_vars().get('CFLAGS') 145 146 archs = re.findall('-arch\s+(\S+)', cflags) 147 archs = tuple(sorted(set(archs))) 148 149 if len(archs) == 1: 150 machine = archs[0] 151 elif archs == ('i386', 'ppc'): 152 machine = 'fat' 153 elif archs == ('i386', 'x86_64'): 154 machine = 'intel' 155 elif archs == ('i386', 'ppc', 'x86_64'): 156 machine = 'fat3' 157 elif archs == ('ppc64', 'x86_64'): 158 machine = 'fat64' 159 elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): 160 machine = 'universal' 161 else: 162 raise ValueError( 163 "Don't know machine value for archs=%r"%(archs,)) 164 165 elif machine == 'i386': 166 # On OSX the machine type returned by uname is always the 167 # 32-bit variant, even if the executable architecture is 168 # the 64-bit variant 169 if sys.maxint >= 2**32: 170 machine = 'x86_64' 171 172 elif machine in ('PowerPC', 'Power_Macintosh'): 173 # Pick a sane name for the PPC architecture. 174 machine = 'ppc' 175 176 # See 'i386' case 177 if sys.maxint >= 2**32: 178 machine = 'ppc64' 179 180 return "%s-%s-%s" % (osname, release, machine) 181 182# get_platform () 183 184 185def convert_path (pathname): 186 """Return 'pathname' as a name that will work on the native filesystem, 187 i.e. split it on '/' and put it back together again using the current 188 directory separator. Needed because filenames in the setup script are 189 always supplied in Unix style, and have to be converted to the local 190 convention before we can actually use them in the filesystem. Raises 191 ValueError on non-Unix-ish systems if 'pathname' either starts or 192 ends with a slash. 193 """ 194 if os.sep == '/': 195 return pathname 196 if not pathname: 197 return pathname 198 if pathname[0] == '/': 199 raise ValueError, "path '%s' cannot be absolute" % pathname 200 if pathname[-1] == '/': 201 raise ValueError, "path '%s' cannot end with '/'" % pathname 202 203 paths = string.split(pathname, '/') 204 while '.' in paths: 205 paths.remove('.') 206 if not paths: 207 return os.curdir 208 return os.path.join(*paths) 209 210# convert_path () 211 212 213def change_root (new_root, pathname): 214 """Return 'pathname' with 'new_root' prepended. If 'pathname' is 215 relative, this is equivalent to "os.path.join(new_root,pathname)". 216 Otherwise, it requires making 'pathname' relative and then joining the 217 two, which is tricky on DOS/Windows and Mac OS. 218 """ 219 if os.name == 'posix': 220 if not os.path.isabs(pathname): 221 return os.path.join(new_root, pathname) 222 else: 223 return os.path.join(new_root, pathname[1:]) 224 225 elif os.name == 'nt': 226 (drive, path) = os.path.splitdrive(pathname) 227 if path[0] == '\\': 228 path = path[1:] 229 return os.path.join(new_root, path) 230 231 elif os.name == 'os2': 232 (drive, path) = os.path.splitdrive(pathname) 233 if path[0] == os.sep: 234 path = path[1:] 235 return os.path.join(new_root, path) 236 237 else: 238 raise DistutilsPlatformError, \ 239 "nothing known about platform '%s'" % os.name 240 241 242_environ_checked = 0 243def check_environ (): 244 """Ensure that 'os.environ' has all the environment variables we 245 guarantee that users can use in config files, command-line options, 246 etc. Currently this includes: 247 HOME - user's home directory (Unix only) 248 PLAT - description of the current platform, including hardware 249 and OS (see 'get_platform()') 250 """ 251 global _environ_checked 252 if _environ_checked: 253 return 254 255 if os.name == 'posix' and 'HOME' not in os.environ: 256 import pwd 257 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] 258 259 if 'PLAT' not in os.environ: 260 os.environ['PLAT'] = get_platform() 261 262 _environ_checked = 1 263 264 265def subst_vars (s, local_vars): 266 """Perform shell/Perl-style variable substitution on 'string'. Every 267 occurrence of '$' followed by a name is considered a variable, and 268 variable is substituted by the value found in the 'local_vars' 269 dictionary, or in 'os.environ' if it's not in 'local_vars'. 270 'os.environ' is first checked/augmented to guarantee that it contains 271 certain values: see 'check_environ()'. Raise ValueError for any 272 variables not found in either 'local_vars' or 'os.environ'. 273 """ 274 check_environ() 275 def _subst (match, local_vars=local_vars): 276 var_name = match.group(1) 277 if var_name in local_vars: 278 return str(local_vars[var_name]) 279 else: 280 return os.environ[var_name] 281 282 try: 283 return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s) 284 except KeyError, var: 285 raise ValueError, "invalid variable '$%s'" % var 286 287# subst_vars () 288 289 290def grok_environment_error (exc, prefix="error: "): 291 """Generate a useful error message from an EnvironmentError (IOError or 292 OSError) exception object. Handles Python 1.5.1 and 1.5.2 styles, and 293 does what it can to deal with exception objects that don't have a 294 filename (which happens when the error is due to a two-file operation, 295 such as 'rename()' or 'link()'. Returns the error message as a string 296 prefixed with 'prefix'. 297 """ 298 # check for Python 1.5.2-style {IO,OS}Error exception objects 299 if hasattr(exc, 'filename') and hasattr(exc, 'strerror'): 300 if exc.filename: 301 error = prefix + "%s: %s" % (exc.filename, exc.strerror) 302 else: 303 # two-argument functions in posix module don't 304 # include the filename in the exception object! 305 error = prefix + "%s" % exc.strerror 306 else: 307 error = prefix + str(exc[-1]) 308 309 return error 310 311 312# Needed by 'split_quoted()' 313_wordchars_re = _squote_re = _dquote_re = None 314def _init_regex(): 315 global _wordchars_re, _squote_re, _dquote_re 316 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) 317 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") 318 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') 319 320def split_quoted (s): 321 """Split a string up according to Unix shell-like rules for quotes and 322 backslashes. In short: words are delimited by spaces, as long as those 323 spaces are not escaped by a backslash, or inside a quoted string. 324 Single and double quotes are equivalent, and the quote characters can 325 be backslash-escaped. The backslash is stripped from any two-character 326 escape sequence, leaving only the escaped character. The quote 327 characters are stripped from any quoted string. Returns a list of 328 words. 329 """ 330 331 # This is a nice algorithm for splitting up a single string, since it 332 # doesn't require character-by-character examination. It was a little 333 # bit of a brain-bender to get it working right, though... 334 if _wordchars_re is None: _init_regex() 335 336 s = string.strip(s) 337 words = [] 338 pos = 0 339 340 while s: 341 m = _wordchars_re.match(s, pos) 342 end = m.end() 343 if end == len(s): 344 words.append(s[:end]) 345 break 346 347 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now 348 words.append(s[:end]) # we definitely have a word delimiter 349 s = string.lstrip(s[end:]) 350 pos = 0 351 352 elif s[end] == '\\': # preserve whatever is being escaped; 353 # will become part of the current word 354 s = s[:end] + s[end+1:] 355 pos = end+1 356 357 else: 358 if s[end] == "'": # slurp singly-quoted string 359 m = _squote_re.match(s, end) 360 elif s[end] == '"': # slurp doubly-quoted string 361 m = _dquote_re.match(s, end) 362 else: 363 raise RuntimeError, \ 364 "this can't happen (bad char '%c')" % s[end] 365 366 if m is None: 367 raise ValueError, \ 368 "bad string (mismatched %s quotes?)" % s[end] 369 370 (beg, end) = m.span() 371 s = s[:beg] + s[beg+1:end-1] + s[end:] 372 pos = m.end() - 2 373 374 if pos >= len(s): 375 words.append(s) 376 break 377 378 return words 379 380# split_quoted () 381 382 383def execute (func, args, msg=None, verbose=0, dry_run=0): 384 """Perform some action that affects the outside world (eg. by 385 writing to the filesystem). Such actions are special because they 386 are disabled by the 'dry_run' flag. This method takes care of all 387 that bureaucracy for you; all you have to do is supply the 388 function to call and an argument tuple for it (to embody the 389 "external action" being performed), and an optional message to 390 print. 391 """ 392 if msg is None: 393 msg = "%s%r" % (func.__name__, args) 394 if msg[-2:] == ',)': # correct for singleton tuple 395 msg = msg[0:-2] + ')' 396 397 log.info(msg) 398 if not dry_run: 399 func(*args) 400 401 402def strtobool (val): 403 """Convert a string representation of truth to true (1) or false (0). 404 405 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values 406 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 407 'val' is anything else. 408 """ 409 val = string.lower(val) 410 if val in ('y', 'yes', 't', 'true', 'on', '1'): 411 return 1 412 elif val in ('n', 'no', 'f', 'false', 'off', '0'): 413 return 0 414 else: 415 raise ValueError, "invalid truth value %r" % (val,) 416 417 418def byte_compile (py_files, 419 optimize=0, force=0, 420 prefix=None, base_dir=None, 421 verbose=1, dry_run=0, 422 direct=None): 423 """Byte-compile a collection of Python source files to either .pyc 424 or .pyo files in the same directory. 'py_files' is a list of files 425 to compile; any files that don't end in ".py" are silently skipped. 426 'optimize' must be one of the following: 427 0 - don't optimize (generate .pyc) 428 1 - normal optimization (like "python -O") 429 2 - extra optimization (like "python -OO") 430 If 'force' is true, all files are recompiled regardless of 431 timestamps. 432 433 The source filename encoded in each bytecode file defaults to the 434 filenames listed in 'py_files'; you can modify these with 'prefix' and 435 'basedir'. 'prefix' is a string that will be stripped off of each 436 source filename, and 'base_dir' is a directory name that will be 437 prepended (after 'prefix' is stripped). You can supply either or both 438 (or neither) of 'prefix' and 'base_dir', as you wish. 439 440 If 'dry_run' is true, doesn't actually do anything that would 441 affect the filesystem. 442 443 Byte-compilation is either done directly in this interpreter process 444 with the standard py_compile module, or indirectly by writing a 445 temporary script and executing it. Normally, you should let 446 'byte_compile()' figure out to use direct compilation or not (see 447 the source for details). The 'direct' flag is used by the script 448 generated in indirect mode; unless you know what you're doing, leave 449 it set to None. 450 """ 451 # nothing is done if sys.dont_write_bytecode is True 452 if sys.dont_write_bytecode: 453 raise DistutilsByteCompileError('byte-compiling is disabled.') 454 455 # First, if the caller didn't force us into direct or indirect mode, 456 # figure out which mode we should be in. We take a conservative 457 # approach: choose direct mode *only* if the current interpreter is 458 # in debug mode and optimize is 0. If we're not in debug mode (-O 459 # or -OO), we don't know which level of optimization this 460 # interpreter is running with, so we can't do direct 461 # byte-compilation and be certain that it's the right thing. Thus, 462 # always compile indirectly if the current interpreter is in either 463 # optimize mode, or if either optimization level was requested by 464 # the caller. 465 if direct is None: 466 direct = (__debug__ and optimize == 0) 467 468 # "Indirect" byte-compilation: write a temporary script and then 469 # run it with the appropriate flags. 470 if not direct: 471 try: 472 from tempfile import mkstemp 473 (script_fd, script_name) = mkstemp(".py") 474 except ImportError: 475 from tempfile import mktemp 476 (script_fd, script_name) = None, mktemp(".py") 477 log.info("writing byte-compilation script '%s'", script_name) 478 if not dry_run: 479 if script_fd is not None: 480 script = os.fdopen(script_fd, "w") 481 else: 482 script = open(script_name, "w") 483 484 script.write("""\ 485from distutils.util import byte_compile 486files = [ 487""") 488 489 # XXX would be nice to write absolute filenames, just for 490 # safety's sake (script should be more robust in the face of 491 # chdir'ing before running it). But this requires abspath'ing 492 # 'prefix' as well, and that breaks the hack in build_lib's 493 # 'byte_compile()' method that carefully tacks on a trailing 494 # slash (os.sep really) to make sure the prefix here is "just 495 # right". This whole prefix business is rather delicate -- the 496 # problem is that it's really a directory, but I'm treating it 497 # as a dumb string, so trailing slashes and so forth matter. 498 499 #py_files = map(os.path.abspath, py_files) 500 #if prefix: 501 # prefix = os.path.abspath(prefix) 502 503 script.write(string.join(map(repr, py_files), ",\n") + "]\n") 504 script.write(""" 505byte_compile(files, optimize=%r, force=%r, 506 prefix=%r, base_dir=%r, 507 verbose=%r, dry_run=0, 508 direct=1) 509""" % (optimize, force, prefix, base_dir, verbose)) 510 511 script.close() 512 513 cmd = [sys.executable, script_name] 514 if optimize == 1: 515 cmd.insert(1, "-O") 516 elif optimize == 2: 517 cmd.insert(1, "-OO") 518 spawn(cmd, dry_run=dry_run) 519 execute(os.remove, (script_name,), "removing %s" % script_name, 520 dry_run=dry_run) 521 522 # "Direct" byte-compilation: use the py_compile module to compile 523 # right here, right now. Note that the script generated in indirect 524 # mode simply calls 'byte_compile()' in direct mode, a weird sort of 525 # cross-process recursion. Hey, it works! 526 else: 527 from py_compile import compile 528 529 for file in py_files: 530 if file[-3:] != ".py": 531 # This lets us be lazy and not filter filenames in 532 # the "install_lib" command. 533 continue 534 535 # Terminology from the py_compile module: 536 # cfile - byte-compiled file 537 # dfile - purported source filename (same as 'file' by default) 538 cfile = file + (__debug__ and "c" or "o") 539 dfile = file 540 if prefix: 541 if file[:len(prefix)] != prefix: 542 raise ValueError, \ 543 ("invalid prefix: filename %r doesn't start with %r" 544 % (file, prefix)) 545 dfile = dfile[len(prefix):] 546 if base_dir: 547 dfile = os.path.join(base_dir, dfile) 548 549 cfile_base = os.path.basename(cfile) 550 if direct: 551 if force or newer(file, cfile): 552 log.info("byte-compiling %s to %s", file, cfile_base) 553 if not dry_run: 554 compile(file, cfile, dfile) 555 else: 556 log.debug("skipping byte-compilation of %s to %s", 557 file, cfile_base) 558 559# byte_compile () 560 561def rfc822_escape (header): 562 """Return a version of the string escaped for inclusion in an 563 RFC-822 header, by ensuring there are 8 spaces space after each newline. 564 """ 565 lines = string.split(header, '\n') 566 header = string.join(lines, '\n' + 8*' ') 567 return header 568