1"""Utility functions for copying and archiving files and directory trees. 2 3XXX The functions here don't copy the resource fork or other metadata on Mac. 4 5""" 6 7import os 8import sys 9import stat 10from os.path import abspath 11import fnmatch 12import collections 13import errno 14 15try: 16 import zlib 17 del zlib 18 _ZLIB_SUPPORTED = True 19except ImportError: 20 _ZLIB_SUPPORTED = False 21 22try: 23 import bz2 24 del bz2 25 _BZ2_SUPPORTED = True 26except ImportError: 27 _BZ2_SUPPORTED = False 28 29try: 30 from pwd import getpwnam 31except ImportError: 32 getpwnam = None 33 34try: 35 from grp import getgrnam 36except ImportError: 37 getgrnam = None 38 39__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", 40 "copytree", "move", "rmtree", "Error", "SpecialFileError", 41 "ExecError", "make_archive", "get_archive_formats", 42 "register_archive_format", "unregister_archive_format", 43 "ignore_patterns"] 44 45class Error(EnvironmentError): 46 pass 47 48class SpecialFileError(EnvironmentError): 49 """Raised when trying to do a kind of operation (e.g. copying) which is 50 not supported on a special file (e.g. a named pipe)""" 51 52class ExecError(EnvironmentError): 53 """Raised when a command could not be executed""" 54 55try: 56 WindowsError 57except NameError: 58 WindowsError = None 59 60def copyfileobj(fsrc, fdst, length=16*1024): 61 """copy data from file-like object fsrc to file-like object fdst""" 62 while 1: 63 buf = fsrc.read(length) 64 if not buf: 65 break 66 fdst.write(buf) 67 68def _samefile(src, dst): 69 # Macintosh, Unix. 70 if hasattr(os.path, 'samefile'): 71 try: 72 return os.path.samefile(src, dst) 73 except OSError: 74 return False 75 76 # All other platforms: check for same pathname. 77 return (os.path.normcase(os.path.abspath(src)) == 78 os.path.normcase(os.path.abspath(dst))) 79 80def copyfile(src, dst): 81 """Copy data from src to dst""" 82 if _samefile(src, dst): 83 raise Error("`%s` and `%s` are the same file" % (src, dst)) 84 85 for fn in [src, dst]: 86 try: 87 st = os.stat(fn) 88 except OSError: 89 # File most likely does not exist 90 pass 91 else: 92 # XXX What about other special files? (sockets, devices...) 93 if stat.S_ISFIFO(st.st_mode): 94 raise SpecialFileError("`%s` is a named pipe" % fn) 95 96 with open(src, 'rb') as fsrc: 97 with open(dst, 'wb') as fdst: 98 copyfileobj(fsrc, fdst) 99 100def copymode(src, dst): 101 """Copy mode bits from src to dst""" 102 if hasattr(os, 'chmod'): 103 st = os.stat(src) 104 mode = stat.S_IMODE(st.st_mode) 105 os.chmod(dst, mode) 106 107def copystat(src, dst): 108 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" 109 st = os.stat(src) 110 mode = stat.S_IMODE(st.st_mode) 111 if hasattr(os, 'utime'): 112 os.utime(dst, (st.st_atime, st.st_mtime)) 113 if hasattr(os, 'chmod'): 114 os.chmod(dst, mode) 115 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): 116 try: 117 os.chflags(dst, st.st_flags) 118 except OSError, why: 119 for err in 'EOPNOTSUPP', 'ENOTSUP': 120 if hasattr(errno, err) and why.errno == getattr(errno, err): 121 break 122 else: 123 raise 124 125def copy(src, dst): 126 """Copy data and mode bits ("cp src dst"). 127 128 The destination may be a directory. 129 130 """ 131 if os.path.isdir(dst): 132 dst = os.path.join(dst, os.path.basename(src)) 133 copyfile(src, dst) 134 copymode(src, dst) 135 136def copy2(src, dst): 137 """Copy data and all stat info ("cp -p src dst"). 138 139 The destination may be a directory. 140 141 """ 142 if os.path.isdir(dst): 143 dst = os.path.join(dst, os.path.basename(src)) 144 copyfile(src, dst) 145 copystat(src, dst) 146 147def ignore_patterns(*patterns): 148 """Function that can be used as copytree() ignore parameter. 149 150 Patterns is a sequence of glob-style patterns 151 that are used to exclude files""" 152 def _ignore_patterns(path, names): 153 ignored_names = [] 154 for pattern in patterns: 155 ignored_names.extend(fnmatch.filter(names, pattern)) 156 return set(ignored_names) 157 return _ignore_patterns 158 159def copytree(src, dst, symlinks=False, ignore=None): 160 """Recursively copy a directory tree using copy2(). 161 162 The destination directory must not already exist. 163 If exception(s) occur, an Error is raised with a list of reasons. 164 165 If the optional symlinks flag is true, symbolic links in the 166 source tree result in symbolic links in the destination tree; if 167 it is false, the contents of the files pointed to by symbolic 168 links are copied. 169 170 The optional ignore argument is a callable. If given, it 171 is called with the `src` parameter, which is the directory 172 being visited by copytree(), and `names` which is the list of 173 `src` contents, as returned by os.listdir(): 174 175 callable(src, names) -> ignored_names 176 177 Since copytree() is called recursively, the callable will be 178 called once for each directory that is copied. It returns a 179 list of names relative to the `src` directory that should 180 not be copied. 181 182 XXX Consider this example code rather than the ultimate tool. 183 184 """ 185 names = os.listdir(src) 186 if ignore is not None: 187 ignored_names = ignore(src, names) 188 else: 189 ignored_names = set() 190 191 os.makedirs(dst) 192 errors = [] 193 for name in names: 194 if name in ignored_names: 195 continue 196 srcname = os.path.join(src, name) 197 dstname = os.path.join(dst, name) 198 try: 199 if symlinks and os.path.islink(srcname): 200 linkto = os.readlink(srcname) 201 os.symlink(linkto, dstname) 202 elif os.path.isdir(srcname): 203 copytree(srcname, dstname, symlinks, ignore) 204 else: 205 # Will raise a SpecialFileError for unsupported file types 206 copy2(srcname, dstname) 207 # catch the Error from the recursive copytree so that we can 208 # continue with other files 209 except Error, err: 210 errors.extend(err.args[0]) 211 except EnvironmentError, why: 212 errors.append((srcname, dstname, str(why))) 213 try: 214 copystat(src, dst) 215 except OSError, why: 216 if WindowsError is not None and isinstance(why, WindowsError): 217 # Copying file access times may fail on Windows 218 pass 219 else: 220 errors.append((src, dst, str(why))) 221 if errors: 222 raise Error, errors 223 224def rmtree(path, ignore_errors=False, onerror=None): 225 """Recursively delete a directory tree. 226 227 If ignore_errors is set, errors are ignored; otherwise, if onerror 228 is set, it is called to handle the error with arguments (func, 229 path, exc_info) where func is os.listdir, os.remove, or os.rmdir; 230 path is the argument to that function that caused it to fail; and 231 exc_info is a tuple returned by sys.exc_info(). If ignore_errors 232 is false and onerror is None, an exception is raised. 233 234 """ 235 if ignore_errors: 236 def onerror(*args): 237 pass 238 elif onerror is None: 239 def onerror(*args): 240 raise 241 try: 242 if os.path.islink(path): 243 # symlinks to directories are forbidden, see bug #1669 244 raise OSError("Cannot call rmtree on a symbolic link") 245 except OSError: 246 onerror(os.path.islink, path, sys.exc_info()) 247 # can't continue even if onerror hook returns 248 return 249 names = [] 250 try: 251 names = os.listdir(path) 252 except os.error, err: 253 onerror(os.listdir, path, sys.exc_info()) 254 for name in names: 255 fullname = os.path.join(path, name) 256 try: 257 mode = os.lstat(fullname).st_mode 258 except os.error: 259 mode = 0 260 if stat.S_ISDIR(mode): 261 rmtree(fullname, ignore_errors, onerror) 262 else: 263 try: 264 os.remove(fullname) 265 except os.error, err: 266 onerror(os.remove, fullname, sys.exc_info()) 267 try: 268 os.rmdir(path) 269 except os.error: 270 onerror(os.rmdir, path, sys.exc_info()) 271 272 273def _basename(path): 274 # A basename() variant which first strips the trailing slash, if present. 275 # Thus we always get the last component of the path, even for directories. 276 sep = os.path.sep + (os.path.altsep or '') 277 return os.path.basename(path.rstrip(sep)) 278 279def move(src, dst): 280 """Recursively move a file or directory to another location. This is 281 similar to the Unix "mv" command. 282 283 If the destination is a directory or a symlink to a directory, the source 284 is moved inside the directory. The destination path must not already 285 exist. 286 287 If the destination already exists but is not a directory, it may be 288 overwritten depending on os.rename() semantics. 289 290 If the destination is on our current filesystem, then rename() is used. 291 Otherwise, src is copied to the destination and then removed. 292 A lot more could be done here... A look at a mv.c shows a lot of 293 the issues this implementation glosses over. 294 295 """ 296 real_dst = dst 297 if os.path.isdir(dst): 298 if _samefile(src, dst): 299 # We might be on a case insensitive filesystem, 300 # perform the rename anyway. 301 os.rename(src, dst) 302 return 303 304 real_dst = os.path.join(dst, _basename(src)) 305 if os.path.exists(real_dst): 306 raise Error, "Destination path '%s' already exists" % real_dst 307 try: 308 os.rename(src, real_dst) 309 except OSError: 310 if os.path.isdir(src): 311 if _destinsrc(src, dst): 312 raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst) 313 copytree(src, real_dst, symlinks=True) 314 rmtree(src) 315 else: 316 copy2(src, real_dst) 317 os.unlink(src) 318 319def _destinsrc(src, dst): 320 src = abspath(src) 321 dst = abspath(dst) 322 if not src.endswith(os.path.sep): 323 src += os.path.sep 324 if not dst.endswith(os.path.sep): 325 dst += os.path.sep 326 return dst.startswith(src) 327 328def _get_gid(name): 329 """Returns a gid, given a group name.""" 330 if getgrnam is None or name is None: 331 return None 332 try: 333 result = getgrnam(name) 334 except KeyError: 335 result = None 336 if result is not None: 337 return result[2] 338 return None 339 340def _get_uid(name): 341 """Returns an uid, given a user name.""" 342 if getpwnam is None or name is None: 343 return None 344 try: 345 result = getpwnam(name) 346 except KeyError: 347 result = None 348 if result is not None: 349 return result[2] 350 return None 351 352def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, 353 owner=None, group=None, logger=None): 354 """Create a (possibly compressed) tar file from all the files under 355 'base_dir'. 356 357 'compress' must be "gzip" (the default), "bzip2", or None. 358 359 'owner' and 'group' can be used to define an owner and a group for the 360 archive that is being built. If not provided, the current owner and group 361 will be used. 362 363 The output tar file will be named 'base_name' + ".tar", possibly plus 364 the appropriate compression extension (".gz", or ".bz2"). 365 366 Returns the output filename. 367 """ 368 if compress is None: 369 tar_compression = '' 370 elif _ZLIB_SUPPORTED and compress == 'gzip': 371 tar_compression = 'gz' 372 elif _BZ2_SUPPORTED and compress == 'bzip2': 373 tar_compression = 'bz2' 374 else: 375 raise ValueError("bad value for 'compress', or compression format not " 376 "supported : {0}".format(compress)) 377 378 compress_ext = '.' + tar_compression if compress else '' 379 archive_name = base_name + '.tar' + compress_ext 380 archive_dir = os.path.dirname(archive_name) 381 382 if archive_dir and not os.path.exists(archive_dir): 383 if logger is not None: 384 logger.info("creating %s", archive_dir) 385 if not dry_run: 386 os.makedirs(archive_dir) 387 388 389 # creating the tarball 390 import tarfile # late import so Python build itself doesn't break 391 392 if logger is not None: 393 logger.info('Creating tar archive') 394 395 uid = _get_uid(owner) 396 gid = _get_gid(group) 397 398 def _set_uid_gid(tarinfo): 399 if gid is not None: 400 tarinfo.gid = gid 401 tarinfo.gname = group 402 if uid is not None: 403 tarinfo.uid = uid 404 tarinfo.uname = owner 405 return tarinfo 406 407 if not dry_run: 408 tar = tarfile.open(archive_name, 'w|%s' % tar_compression) 409 try: 410 tar.add(base_dir, filter=_set_uid_gid) 411 finally: 412 tar.close() 413 414 return archive_name 415 416def _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger): 417 # XXX see if we want to keep an external call here 418 if verbose: 419 zipoptions = "-r" 420 else: 421 zipoptions = "-rq" 422 cmd = ["zip", zipoptions, zip_filename, base_dir] 423 if logger is not None: 424 logger.info(' '.join(cmd)) 425 if dry_run: 426 return 427 import subprocess 428 try: 429 subprocess.check_call(cmd) 430 except subprocess.CalledProcessError: 431 # XXX really should distinguish between "couldn't find 432 # external 'zip' command" and "zip failed". 433 raise ExecError, \ 434 ("unable to create zip file '%s': " 435 "could neither import the 'zipfile' module nor " 436 "find a standalone zip utility") % zip_filename 437 438def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): 439 """Create a zip file from all the files under 'base_dir'. 440 441 The output zip file will be named 'base_name' + ".zip". Uses either the 442 "zipfile" Python module (if available) or the InfoZIP "zip" utility 443 (if installed and found on the default search path). If neither tool is 444 available, raises ExecError. Returns the name of the output zip 445 file. 446 """ 447 zip_filename = base_name + ".zip" 448 archive_dir = os.path.dirname(base_name) 449 450 if archive_dir and not os.path.exists(archive_dir): 451 if logger is not None: 452 logger.info("creating %s", archive_dir) 453 if not dry_run: 454 os.makedirs(archive_dir) 455 456 # If zipfile module is not available, try spawning an external 'zip' 457 # command. 458 try: 459 import zlib 460 import zipfile 461 except ImportError: 462 zipfile = None 463 464 if zipfile is None: 465 _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger) 466 else: 467 if logger is not None: 468 logger.info("creating '%s' and adding '%s' to it", 469 zip_filename, base_dir) 470 471 if not dry_run: 472 with zipfile.ZipFile(zip_filename, "w", 473 compression=zipfile.ZIP_DEFLATED) as zf: 474 path = os.path.normpath(base_dir) 475 if path != os.curdir: 476 zf.write(path, path) 477 if logger is not None: 478 logger.info("adding '%s'", path) 479 for dirpath, dirnames, filenames in os.walk(base_dir): 480 for name in sorted(dirnames): 481 path = os.path.normpath(os.path.join(dirpath, name)) 482 zf.write(path, path) 483 if logger is not None: 484 logger.info("adding '%s'", path) 485 for name in filenames: 486 path = os.path.normpath(os.path.join(dirpath, name)) 487 if os.path.isfile(path): 488 zf.write(path, path) 489 if logger is not None: 490 logger.info("adding '%s'", path) 491 492 return zip_filename 493 494_ARCHIVE_FORMATS = { 495 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), 496 'zip': (_make_zipfile, [], "ZIP file") 497} 498 499if _ZLIB_SUPPORTED: 500 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')], 501 "gzip'ed tar-file") 502 503if _BZ2_SUPPORTED: 504 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], 505 "bzip2'ed tar-file") 506 507def get_archive_formats(): 508 """Returns a list of supported formats for archiving and unarchiving. 509 510 Each element of the returned sequence is a tuple (name, description) 511 """ 512 formats = [(name, registry[2]) for name, registry in 513 _ARCHIVE_FORMATS.items()] 514 formats.sort() 515 return formats 516 517def register_archive_format(name, function, extra_args=None, description=''): 518 """Registers an archive format. 519 520 name is the name of the format. function is the callable that will be 521 used to create archives. If provided, extra_args is a sequence of 522 (name, value) tuples that will be passed as arguments to the callable. 523 description can be provided to describe the format, and will be returned 524 by the get_archive_formats() function. 525 """ 526 if extra_args is None: 527 extra_args = [] 528 if not isinstance(function, collections.Callable): 529 raise TypeError('The %s object is not callable' % function) 530 if not isinstance(extra_args, (tuple, list)): 531 raise TypeError('extra_args needs to be a sequence') 532 for element in extra_args: 533 if not isinstance(element, (tuple, list)) or len(element) !=2 : 534 raise TypeError('extra_args elements are : (arg_name, value)') 535 536 _ARCHIVE_FORMATS[name] = (function, extra_args, description) 537 538def unregister_archive_format(name): 539 del _ARCHIVE_FORMATS[name] 540 541def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, 542 dry_run=0, owner=None, group=None, logger=None): 543 """Create an archive file (eg. zip or tar). 544 545 'base_name' is the name of the file to create, minus any format-specific 546 extension; 'format' is the archive format: one of "zip", "tar", "gztar", 547 or "bztar". Or any other registered format. 548 549 'root_dir' is a directory that will be the root directory of the 550 archive; ie. we typically chdir into 'root_dir' before creating the 551 archive. 'base_dir' is the directory where we start archiving from; 552 ie. 'base_dir' will be the common prefix of all files and 553 directories in the archive. 'root_dir' and 'base_dir' both default 554 to the current directory. Returns the name of the archive file. 555 556 'owner' and 'group' are used when creating a tar archive. By default, 557 uses the current owner and group. 558 """ 559 save_cwd = os.getcwd() 560 if root_dir is not None: 561 if logger is not None: 562 logger.debug("changing into '%s'", root_dir) 563 base_name = os.path.abspath(base_name) 564 if not dry_run: 565 os.chdir(root_dir) 566 567 if base_dir is None: 568 base_dir = os.curdir 569 570 kwargs = {'dry_run': dry_run, 'logger': logger} 571 572 try: 573 format_info = _ARCHIVE_FORMATS[format] 574 except KeyError: 575 raise ValueError, "unknown archive format '%s'" % format 576 577 func = format_info[0] 578 for arg, val in format_info[1]: 579 kwargs[arg] = val 580 581 if format != 'zip': 582 kwargs['owner'] = owner 583 kwargs['group'] = group 584 585 try: 586 filename = func(base_name, base_dir, **kwargs) 587 finally: 588 if root_dir is not None: 589 if logger is not None: 590 logger.debug("changing back to '%s'", save_cwd) 591 os.chdir(save_cwd) 592 593 return filename 594