• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Utility functions for copying and archiving files and directory trees.
2
3XXX The functions here don't copy the resource fork or other metadata on Mac.
4
5"""
6
7import os
8import sys
9import stat
10from os.path import abspath
11import fnmatch
12import collections
13import errno
14
15try:
16    import zlib
17    del zlib
18    _ZLIB_SUPPORTED = True
19except ImportError:
20    _ZLIB_SUPPORTED = False
21
22try:
23    import bz2
24    del bz2
25    _BZ2_SUPPORTED = True
26except ImportError:
27    _BZ2_SUPPORTED = False
28
29try:
30    from pwd import getpwnam
31except ImportError:
32    getpwnam = None
33
34try:
35    from grp import getgrnam
36except ImportError:
37    getgrnam = None
38
39__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
40           "copytree", "move", "rmtree", "Error", "SpecialFileError",
41           "ExecError", "make_archive", "get_archive_formats",
42           "register_archive_format", "unregister_archive_format",
43           "ignore_patterns"]
44
45class Error(EnvironmentError):
46    pass
47
48class SpecialFileError(EnvironmentError):
49    """Raised when trying to do a kind of operation (e.g. copying) which is
50    not supported on a special file (e.g. a named pipe)"""
51
52class ExecError(EnvironmentError):
53    """Raised when a command could not be executed"""
54
55try:
56    WindowsError
57except NameError:
58    WindowsError = None
59
60def copyfileobj(fsrc, fdst, length=16*1024):
61    """copy data from file-like object fsrc to file-like object fdst"""
62    while 1:
63        buf = fsrc.read(length)
64        if not buf:
65            break
66        fdst.write(buf)
67
68def _samefile(src, dst):
69    # Macintosh, Unix.
70    if hasattr(os.path, 'samefile'):
71        try:
72            return os.path.samefile(src, dst)
73        except OSError:
74            return False
75
76    # All other platforms: check for same pathname.
77    return (os.path.normcase(os.path.abspath(src)) ==
78            os.path.normcase(os.path.abspath(dst)))
79
80def copyfile(src, dst):
81    """Copy data from src to dst"""
82    if _samefile(src, dst):
83        raise Error("`%s` and `%s` are the same file" % (src, dst))
84
85    for fn in [src, dst]:
86        try:
87            st = os.stat(fn)
88        except OSError:
89            # File most likely does not exist
90            pass
91        else:
92            # XXX What about other special files? (sockets, devices...)
93            if stat.S_ISFIFO(st.st_mode):
94                raise SpecialFileError("`%s` is a named pipe" % fn)
95
96    with open(src, 'rb') as fsrc:
97        with open(dst, 'wb') as fdst:
98            copyfileobj(fsrc, fdst)
99
100def copymode(src, dst):
101    """Copy mode bits from src to dst"""
102    if hasattr(os, 'chmod'):
103        st = os.stat(src)
104        mode = stat.S_IMODE(st.st_mode)
105        os.chmod(dst, mode)
106
107def copystat(src, dst):
108    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
109    st = os.stat(src)
110    mode = stat.S_IMODE(st.st_mode)
111    if hasattr(os, 'utime'):
112        os.utime(dst, (st.st_atime, st.st_mtime))
113    if hasattr(os, 'chmod'):
114        os.chmod(dst, mode)
115    if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
116        try:
117            os.chflags(dst, st.st_flags)
118        except OSError, why:
119            for err in 'EOPNOTSUPP', 'ENOTSUP':
120                if hasattr(errno, err) and why.errno == getattr(errno, err):
121                    break
122            else:
123                raise
124
125def copy(src, dst):
126    """Copy data and mode bits ("cp src dst").
127
128    The destination may be a directory.
129
130    """
131    if os.path.isdir(dst):
132        dst = os.path.join(dst, os.path.basename(src))
133    copyfile(src, dst)
134    copymode(src, dst)
135
136def copy2(src, dst):
137    """Copy data and all stat info ("cp -p src dst").
138
139    The destination may be a directory.
140
141    """
142    if os.path.isdir(dst):
143        dst = os.path.join(dst, os.path.basename(src))
144    copyfile(src, dst)
145    copystat(src, dst)
146
147def ignore_patterns(*patterns):
148    """Function that can be used as copytree() ignore parameter.
149
150    Patterns is a sequence of glob-style patterns
151    that are used to exclude files"""
152    def _ignore_patterns(path, names):
153        ignored_names = []
154        for pattern in patterns:
155            ignored_names.extend(fnmatch.filter(names, pattern))
156        return set(ignored_names)
157    return _ignore_patterns
158
159def copytree(src, dst, symlinks=False, ignore=None):
160    """Recursively copy a directory tree using copy2().
161
162    The destination directory must not already exist.
163    If exception(s) occur, an Error is raised with a list of reasons.
164
165    If the optional symlinks flag is true, symbolic links in the
166    source tree result in symbolic links in the destination tree; if
167    it is false, the contents of the files pointed to by symbolic
168    links are copied.
169
170    The optional ignore argument is a callable. If given, it
171    is called with the `src` parameter, which is the directory
172    being visited by copytree(), and `names` which is the list of
173    `src` contents, as returned by os.listdir():
174
175        callable(src, names) -> ignored_names
176
177    Since copytree() is called recursively, the callable will be
178    called once for each directory that is copied. It returns a
179    list of names relative to the `src` directory that should
180    not be copied.
181
182    XXX Consider this example code rather than the ultimate tool.
183
184    """
185    names = os.listdir(src)
186    if ignore is not None:
187        ignored_names = ignore(src, names)
188    else:
189        ignored_names = set()
190
191    os.makedirs(dst)
192    errors = []
193    for name in names:
194        if name in ignored_names:
195            continue
196        srcname = os.path.join(src, name)
197        dstname = os.path.join(dst, name)
198        try:
199            if symlinks and os.path.islink(srcname):
200                linkto = os.readlink(srcname)
201                os.symlink(linkto, dstname)
202            elif os.path.isdir(srcname):
203                copytree(srcname, dstname, symlinks, ignore)
204            else:
205                # Will raise a SpecialFileError for unsupported file types
206                copy2(srcname, dstname)
207        # catch the Error from the recursive copytree so that we can
208        # continue with other files
209        except Error, err:
210            errors.extend(err.args[0])
211        except EnvironmentError, why:
212            errors.append((srcname, dstname, str(why)))
213    try:
214        copystat(src, dst)
215    except OSError, why:
216        if WindowsError is not None and isinstance(why, WindowsError):
217            # Copying file access times may fail on Windows
218            pass
219        else:
220            errors.append((src, dst, str(why)))
221    if errors:
222        raise Error, errors
223
224def rmtree(path, ignore_errors=False, onerror=None):
225    """Recursively delete a directory tree.
226
227    If ignore_errors is set, errors are ignored; otherwise, if onerror
228    is set, it is called to handle the error with arguments (func,
229    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
230    path is the argument to that function that caused it to fail; and
231    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
232    is false and onerror is None, an exception is raised.
233
234    """
235    if ignore_errors:
236        def onerror(*args):
237            pass
238    elif onerror is None:
239        def onerror(*args):
240            raise
241    try:
242        if os.path.islink(path):
243            # symlinks to directories are forbidden, see bug #1669
244            raise OSError("Cannot call rmtree on a symbolic link")
245    except OSError:
246        onerror(os.path.islink, path, sys.exc_info())
247        # can't continue even if onerror hook returns
248        return
249    names = []
250    try:
251        names = os.listdir(path)
252    except os.error, err:
253        onerror(os.listdir, path, sys.exc_info())
254    for name in names:
255        fullname = os.path.join(path, name)
256        try:
257            mode = os.lstat(fullname).st_mode
258        except os.error:
259            mode = 0
260        if stat.S_ISDIR(mode):
261            rmtree(fullname, ignore_errors, onerror)
262        else:
263            try:
264                os.remove(fullname)
265            except os.error, err:
266                onerror(os.remove, fullname, sys.exc_info())
267    try:
268        os.rmdir(path)
269    except os.error:
270        onerror(os.rmdir, path, sys.exc_info())
271
272
273def _basename(path):
274    # A basename() variant which first strips the trailing slash, if present.
275    # Thus we always get the last component of the path, even for directories.
276    sep = os.path.sep + (os.path.altsep or '')
277    return os.path.basename(path.rstrip(sep))
278
279def move(src, dst):
280    """Recursively move a file or directory to another location. This is
281    similar to the Unix "mv" command.
282
283    If the destination is a directory or a symlink to a directory, the source
284    is moved inside the directory. The destination path must not already
285    exist.
286
287    If the destination already exists but is not a directory, it may be
288    overwritten depending on os.rename() semantics.
289
290    If the destination is on our current filesystem, then rename() is used.
291    Otherwise, src is copied to the destination and then removed.
292    A lot more could be done here...  A look at a mv.c shows a lot of
293    the issues this implementation glosses over.
294
295    """
296    real_dst = dst
297    if os.path.isdir(dst):
298        if _samefile(src, dst):
299            # We might be on a case insensitive filesystem,
300            # perform the rename anyway.
301            os.rename(src, dst)
302            return
303
304        real_dst = os.path.join(dst, _basename(src))
305        if os.path.exists(real_dst):
306            raise Error, "Destination path '%s' already exists" % real_dst
307    try:
308        os.rename(src, real_dst)
309    except OSError:
310        if os.path.isdir(src):
311            if _destinsrc(src, dst):
312                raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
313            copytree(src, real_dst, symlinks=True)
314            rmtree(src)
315        else:
316            copy2(src, real_dst)
317            os.unlink(src)
318
319def _destinsrc(src, dst):
320    src = abspath(src)
321    dst = abspath(dst)
322    if not src.endswith(os.path.sep):
323        src += os.path.sep
324    if not dst.endswith(os.path.sep):
325        dst += os.path.sep
326    return dst.startswith(src)
327
328def _get_gid(name):
329    """Returns a gid, given a group name."""
330    if getgrnam is None or name is None:
331        return None
332    try:
333        result = getgrnam(name)
334    except KeyError:
335        result = None
336    if result is not None:
337        return result[2]
338    return None
339
340def _get_uid(name):
341    """Returns an uid, given a user name."""
342    if getpwnam is None or name is None:
343        return None
344    try:
345        result = getpwnam(name)
346    except KeyError:
347        result = None
348    if result is not None:
349        return result[2]
350    return None
351
352def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
353                  owner=None, group=None, logger=None):
354    """Create a (possibly compressed) tar file from all the files under
355    'base_dir'.
356
357    'compress' must be "gzip" (the default), "bzip2", or None.
358
359    'owner' and 'group' can be used to define an owner and a group for the
360    archive that is being built. If not provided, the current owner and group
361    will be used.
362
363    The output tar file will be named 'base_name' +  ".tar", possibly plus
364    the appropriate compression extension (".gz", or ".bz2").
365
366    Returns the output filename.
367    """
368    if compress is None:
369        tar_compression = ''
370    elif _ZLIB_SUPPORTED and compress == 'gzip':
371        tar_compression = 'gz'
372    elif _BZ2_SUPPORTED and compress == 'bzip2':
373        tar_compression = 'bz2'
374    else:
375        raise ValueError("bad value for 'compress', or compression format not "
376                         "supported : {0}".format(compress))
377
378    compress_ext = '.' + tar_compression if compress else ''
379    archive_name = base_name + '.tar' + compress_ext
380    archive_dir = os.path.dirname(archive_name)
381
382    if archive_dir and not os.path.exists(archive_dir):
383        if logger is not None:
384            logger.info("creating %s", archive_dir)
385        if not dry_run:
386            os.makedirs(archive_dir)
387
388
389    # creating the tarball
390    import tarfile  # late import so Python build itself doesn't break
391
392    if logger is not None:
393        logger.info('Creating tar archive')
394
395    uid = _get_uid(owner)
396    gid = _get_gid(group)
397
398    def _set_uid_gid(tarinfo):
399        if gid is not None:
400            tarinfo.gid = gid
401            tarinfo.gname = group
402        if uid is not None:
403            tarinfo.uid = uid
404            tarinfo.uname = owner
405        return tarinfo
406
407    if not dry_run:
408        tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
409        try:
410            tar.add(base_dir, filter=_set_uid_gid)
411        finally:
412            tar.close()
413
414    return archive_name
415
416def _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger):
417    # XXX see if we want to keep an external call here
418    if verbose:
419        zipoptions = "-r"
420    else:
421        zipoptions = "-rq"
422    cmd = ["zip", zipoptions, zip_filename, base_dir]
423    if logger is not None:
424        logger.info(' '.join(cmd))
425    if dry_run:
426        return
427    import subprocess
428    try:
429        subprocess.check_call(cmd)
430    except subprocess.CalledProcessError:
431        # XXX really should distinguish between "couldn't find
432        # external 'zip' command" and "zip failed".
433        raise ExecError, \
434            ("unable to create zip file '%s': "
435            "could neither import the 'zipfile' module nor "
436            "find a standalone zip utility") % zip_filename
437
438def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
439    """Create a zip file from all the files under 'base_dir'.
440
441    The output zip file will be named 'base_name' + ".zip".  Uses either the
442    "zipfile" Python module (if available) or the InfoZIP "zip" utility
443    (if installed and found on the default search path).  If neither tool is
444    available, raises ExecError.  Returns the name of the output zip
445    file.
446    """
447    zip_filename = base_name + ".zip"
448    archive_dir = os.path.dirname(base_name)
449
450    if archive_dir and not os.path.exists(archive_dir):
451        if logger is not None:
452            logger.info("creating %s", archive_dir)
453        if not dry_run:
454            os.makedirs(archive_dir)
455
456    # If zipfile module is not available, try spawning an external 'zip'
457    # command.
458    try:
459        import zlib
460        import zipfile
461    except ImportError:
462        zipfile = None
463
464    if zipfile is None:
465        _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger)
466    else:
467        if logger is not None:
468            logger.info("creating '%s' and adding '%s' to it",
469                        zip_filename, base_dir)
470
471        if not dry_run:
472            with zipfile.ZipFile(zip_filename, "w",
473                                 compression=zipfile.ZIP_DEFLATED) as zf:
474                path = os.path.normpath(base_dir)
475                if path != os.curdir:
476                    zf.write(path, path)
477                    if logger is not None:
478                        logger.info("adding '%s'", path)
479                for dirpath, dirnames, filenames in os.walk(base_dir):
480                    for name in sorted(dirnames):
481                        path = os.path.normpath(os.path.join(dirpath, name))
482                        zf.write(path, path)
483                        if logger is not None:
484                            logger.info("adding '%s'", path)
485                    for name in filenames:
486                        path = os.path.normpath(os.path.join(dirpath, name))
487                        if os.path.isfile(path):
488                            zf.write(path, path)
489                            if logger is not None:
490                                logger.info("adding '%s'", path)
491
492    return zip_filename
493
494_ARCHIVE_FORMATS = {
495    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
496    'zip':   (_make_zipfile, [], "ZIP file")
497}
498
499if _ZLIB_SUPPORTED:
500    _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
501                                "gzip'ed tar-file")
502
503if _BZ2_SUPPORTED:
504    _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
505                                "bzip2'ed tar-file")
506
507def get_archive_formats():
508    """Returns a list of supported formats for archiving and unarchiving.
509
510    Each element of the returned sequence is a tuple (name, description)
511    """
512    formats = [(name, registry[2]) for name, registry in
513               _ARCHIVE_FORMATS.items()]
514    formats.sort()
515    return formats
516
517def register_archive_format(name, function, extra_args=None, description=''):
518    """Registers an archive format.
519
520    name is the name of the format. function is the callable that will be
521    used to create archives. If provided, extra_args is a sequence of
522    (name, value) tuples that will be passed as arguments to the callable.
523    description can be provided to describe the format, and will be returned
524    by the get_archive_formats() function.
525    """
526    if extra_args is None:
527        extra_args = []
528    if not isinstance(function, collections.Callable):
529        raise TypeError('The %s object is not callable' % function)
530    if not isinstance(extra_args, (tuple, list)):
531        raise TypeError('extra_args needs to be a sequence')
532    for element in extra_args:
533        if not isinstance(element, (tuple, list)) or len(element) !=2 :
534            raise TypeError('extra_args elements are : (arg_name, value)')
535
536    _ARCHIVE_FORMATS[name] = (function, extra_args, description)
537
538def unregister_archive_format(name):
539    del _ARCHIVE_FORMATS[name]
540
541def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
542                 dry_run=0, owner=None, group=None, logger=None):
543    """Create an archive file (eg. zip or tar).
544
545    'base_name' is the name of the file to create, minus any format-specific
546    extension; 'format' is the archive format: one of "zip", "tar", "gztar",
547    or "bztar".  Or any other registered format.
548
549    'root_dir' is a directory that will be the root directory of the
550    archive; ie. we typically chdir into 'root_dir' before creating the
551    archive.  'base_dir' is the directory where we start archiving from;
552    ie. 'base_dir' will be the common prefix of all files and
553    directories in the archive.  'root_dir' and 'base_dir' both default
554    to the current directory.  Returns the name of the archive file.
555
556    'owner' and 'group' are used when creating a tar archive. By default,
557    uses the current owner and group.
558    """
559    save_cwd = os.getcwd()
560    if root_dir is not None:
561        if logger is not None:
562            logger.debug("changing into '%s'", root_dir)
563        base_name = os.path.abspath(base_name)
564        if not dry_run:
565            os.chdir(root_dir)
566
567    if base_dir is None:
568        base_dir = os.curdir
569
570    kwargs = {'dry_run': dry_run, 'logger': logger}
571
572    try:
573        format_info = _ARCHIVE_FORMATS[format]
574    except KeyError:
575        raise ValueError, "unknown archive format '%s'" % format
576
577    func = format_info[0]
578    for arg, val in format_info[1]:
579        kwargs[arg] = val
580
581    if format != 'zip':
582        kwargs['owner'] = owner
583        kwargs['group'] = group
584
585    try:
586        filename = func(base_name, base_dir, **kwargs)
587    finally:
588        if root_dir is not None:
589            if logger is not None:
590                logger.debug("changing back to '%s'", save_cwd)
591            os.chdir(save_cwd)
592
593    return filename
594