1#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission  is  hereby granted,  free  of charge,  to  any person
10# obtaining a  copy of  this software  and associated documentation
11# files  (the  "Software"),  to   deal  in  the  Software   without
12# restriction,  including  without limitation  the  rights to  use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies  of  the  Software,  and to  permit  persons  to  whom the
15# Software  is  furnished  to  do  so,  subject  to  the  following
16# conditions:
17#
18# The above copyright  notice and this  permission notice shall  be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
22# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
23# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
24# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
25# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
26# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version     = "0.9.0"
37__author__  = "Lars Gust�bel (lars@gustaebel.de)"
38__date__    = "$Date$"
39__cvsid__   = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gust�bel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52import copy
53import re
54import operator
55
56try:
57    import grp, pwd
58except ImportError:
59    grp = pwd = None
60
61# from tarfile import *
62__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64#---------------------------------------------------------
65# tar constants
66#---------------------------------------------------------
67NUL = "\0"                      # the null character
68BLOCKSIZE = 512                 # length of processing blocks
69RECORDSIZE = BLOCKSIZE * 20     # length of records
70GNU_MAGIC = "ustar  \0"         # magic gnu tar string
71POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
72
73LENGTH_NAME = 100               # maximum length of a filename
74LENGTH_LINK = 100               # maximum length of a linkname
75LENGTH_PREFIX = 155             # maximum length of the prefix field
76
77REGTYPE = "0"                   # regular file
78AREGTYPE = "\0"                 # regular file
79LNKTYPE = "1"                   # link (inside tarfile)
80SYMTYPE = "2"                   # symbolic link
81CHRTYPE = "3"                   # character special device
82BLKTYPE = "4"                   # block special device
83DIRTYPE = "5"                   # directory
84FIFOTYPE = "6"                  # fifo special device
85CONTTYPE = "7"                  # contiguous file
86
87GNUTYPE_LONGNAME = "L"          # GNU tar longname
88GNUTYPE_LONGLINK = "K"          # GNU tar longlink
89GNUTYPE_SPARSE = "S"            # GNU tar sparse file
90
91XHDTYPE = "x"                   # POSIX.1-2001 extended header
92XGLTYPE = "g"                   # POSIX.1-2001 global header
93SOLARIS_XHDTYPE = "X"           # Solaris extended header
94
95USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
96GNU_FORMAT = 1                  # GNU tar format
97PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
98DEFAULT_FORMAT = GNU_FORMAT
99
100#---------------------------------------------------------
101# tarfile constants
102#---------------------------------------------------------
103# File types that tarfile supports:
104SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105                   SYMTYPE, DIRTYPE, FIFOTYPE,
106                   CONTTYPE, CHRTYPE, BLKTYPE,
107                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108                   GNUTYPE_SPARSE)
109
110# File types that will be treated as a regular file.
111REGULAR_TYPES = (REGTYPE, AREGTYPE,
112                 CONTTYPE, GNUTYPE_SPARSE)
113
114# File types that are part of the GNU tar format.
115GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116             GNUTYPE_SPARSE)
117
118# Fields from a pax header that override a TarInfo attribute.
119PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120              "uid", "gid", "uname", "gname")
121
122# Fields in a pax header that are numbers, all other fields
123# are treated as strings.
124PAX_NUMBER_FIELDS = {
125    "atime": float,
126    "ctime": float,
127    "mtime": float,
128    "uid": int,
129    "gid": int,
130    "size": int
131}
132
133#---------------------------------------------------------
134# Bits used in the mode field, values in octal.
135#---------------------------------------------------------
136S_IFLNK = 0120000        # symbolic link
137S_IFREG = 0100000        # regular file
138S_IFBLK = 0060000        # block device
139S_IFDIR = 0040000        # directory
140S_IFCHR = 0020000        # character device
141S_IFIFO = 0010000        # fifo
142
143TSUID   = 04000          # set UID on execution
144TSGID   = 02000          # set GID on execution
145TSVTX   = 01000          # reserved
146
147TUREAD  = 0400           # read by owner
148TUWRITE = 0200           # write by owner
149TUEXEC  = 0100           # execute/search by owner
150TGREAD  = 0040           # read by group
151TGWRITE = 0020           # write by group
152TGEXEC  = 0010           # execute/search by group
153TOREAD  = 0004           # read by other
154TOWRITE = 0002           # write by other
155TOEXEC  = 0001           # execute/search by other
156
157#---------------------------------------------------------
158# initialization
159#---------------------------------------------------------
160ENCODING = sys.getfilesystemencoding()
161if ENCODING is None:
162    ENCODING = sys.getdefaultencoding()
163
164#---------------------------------------------------------
165# Some useful functions
166#---------------------------------------------------------
167
168def stn(s, length):
169    """Convert a python string to a null-terminated string buffer.
170    """
171    return s[:length] + (length - len(s)) * NUL
172
173def nts(s):
174    """Convert a null-terminated string field to a python string.
175    """
176    # Use the string up to the first null char.
177    p = s.find("\0")
178    if p == -1:
179        return s
180    return s[:p]
181
182def nti(s):
183    """Convert a number field to a python number.
184    """
185    # There are two possible encodings for a number field, see
186    # itn() below.
187    if s[0] != chr(0200):
188        try:
189            n = int(nts(s) or "0", 8)
190        except ValueError:
191            raise InvalidHeaderError("invalid header")
192    else:
193        n = 0L
194        for i in xrange(len(s) - 1):
195            n <<= 8
196            n += ord(s[i + 1])
197    return n
198
199def itn(n, digits=8, format=DEFAULT_FORMAT):
200    """Convert a python number to a number field.
201    """
202    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
203    # octal digits followed by a null-byte, this allows values up to
204    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
205    # that if necessary. A leading 0200 byte indicates this particular
206    # encoding, the following digits-1 bytes are a big-endian
207    # representation. This allows values up to (256**(digits-1))-1.
208    if 0 <= n < 8 ** (digits - 1):
209        s = "%0*o" % (digits - 1, n) + NUL
210    else:
211        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
212            raise ValueError("overflow in number field")
213
214        if n < 0:
215            # XXX We mimic GNU tar's behaviour with negative numbers,
216            # this could raise OverflowError.
217            n = struct.unpack("L", struct.pack("l", n))[0]
218
219        s = ""
220        for i in xrange(digits - 1):
221            s = chr(n & 0377) + s
222            n >>= 8
223        s = chr(0200) + s
224    return s
225
226def uts(s, encoding, errors):
227    """Convert a unicode object to a string.
228    """
229    if errors == "utf-8":
230        # An extra error handler similar to the -o invalid=UTF-8 option
231        # in POSIX.1-2001. Replace untranslatable characters with their
232        # UTF-8 representation.
233        try:
234            return s.encode(encoding, "strict")
235        except UnicodeEncodeError:
236            x = []
237            for c in s:
238                try:
239                    x.append(c.encode(encoding, "strict"))
240                except UnicodeEncodeError:
241                    x.append(c.encode("utf8"))
242            return "".join(x)
243    else:
244        return s.encode(encoding, errors)
245
246def calc_chksums(buf):
247    """Calculate the checksum for a member's header by summing up all
248       characters except for the chksum field which is treated as if
249       it was filled with spaces. According to the GNU tar sources,
250       some tars (Sun and NeXT) calculate chksum with signed char,
251       which will be different if there are chars in the buffer with
252       the high bit set. So we calculate two checksums, unsigned and
253       signed.
254    """
255    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257    return unsigned_chksum, signed_chksum
258
259def copyfileobj(src, dst, length=None):
260    """Copy length bytes from fileobj src to fileobj dst.
261       If length is None, copy the entire content.
262    """
263    if length == 0:
264        return
265    if length is None:
266        shutil.copyfileobj(src, dst)
267        return
268
269    BUFSIZE = 16 * 1024
270    blocks, remainder = divmod(length, BUFSIZE)
271    for b in xrange(blocks):
272        buf = src.read(BUFSIZE)
273        if len(buf) < BUFSIZE:
274            raise IOError("end of file reached")
275        dst.write(buf)
276
277    if remainder != 0:
278        buf = src.read(remainder)
279        if len(buf) < remainder:
280            raise IOError("end of file reached")
281        dst.write(buf)
282    return
283
284filemode_table = (
285    ((S_IFLNK,      "l"),
286     (S_IFREG,      "-"),
287     (S_IFBLK,      "b"),
288     (S_IFDIR,      "d"),
289     (S_IFCHR,      "c"),
290     (S_IFIFO,      "p")),
291
292    ((TUREAD,       "r"),),
293    ((TUWRITE,      "w"),),
294    ((TUEXEC|TSUID, "s"),
295     (TSUID,        "S"),
296     (TUEXEC,       "x")),
297
298    ((TGREAD,       "r"),),
299    ((TGWRITE,      "w"),),
300    ((TGEXEC|TSGID, "s"),
301     (TSGID,        "S"),
302     (TGEXEC,       "x")),
303
304    ((TOREAD,       "r"),),
305    ((TOWRITE,      "w"),),
306    ((TOEXEC|TSVTX, "t"),
307     (TSVTX,        "T"),
308     (TOEXEC,       "x"))
309)
310
311def filemode(mode):
312    """Convert a file's mode to a string of the form
313       -rwxrwxrwx.
314       Used by TarFile.list()
315    """
316    perm = []
317    for table in filemode_table:
318        for bit, char in table:
319            if mode & bit == bit:
320                perm.append(char)
321                break
322        else:
323            perm.append("-")
324    return "".join(perm)
325
326class TarError(Exception):
327    """Base exception."""
328    pass
329class ExtractError(TarError):
330    """General exception for extract errors."""
331    pass
332class ReadError(TarError):
333    """Exception for unreadble tar archives."""
334    pass
335class CompressionError(TarError):
336    """Exception for unavailable compression methods."""
337    pass
338class StreamError(TarError):
339    """Exception for unsupported operations on stream-like TarFiles."""
340    pass
341class HeaderError(TarError):
342    """Base exception for header errors."""
343    pass
344class EmptyHeaderError(HeaderError):
345    """Exception for empty headers."""
346    pass
347class TruncatedHeaderError(HeaderError):
348    """Exception for truncated headers."""
349    pass
350class EOFHeaderError(HeaderError):
351    """Exception for end of file headers."""
352    pass
353class InvalidHeaderError(HeaderError):
354    """Exception for invalid headers."""
355    pass
356class SubsequentHeaderError(HeaderError):
357    """Exception for missing and invalid extended headers."""
358    pass
359
360#---------------------------
361# internal stream interface
362#---------------------------
363class _LowLevelFile:
364    """Low-level file object. Supports reading and writing.
365       It is used instead of a regular file object for streaming
366       access.
367    """
368
369    def __init__(self, name, mode):
370        mode = {
371            "r": os.O_RDONLY,
372            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
373        }[mode]
374        if hasattr(os, "O_BINARY"):
375            mode |= os.O_BINARY
376        self.fd = os.open(name, mode, 0666)
377
378    def close(self):
379        os.close(self.fd)
380
381    def read(self, size):
382        return os.read(self.fd, size)
383
384    def write(self, s):
385        os.write(self.fd, s)
386
387class _Stream:
388    """Class that serves as an adapter between TarFile and
389       a stream-like object.  The stream-like object only
390       needs to have a read() or write() method and is accessed
391       blockwise.  Use of gzip or bzip2 compression is possible.
392       A stream-like object could be for example: sys.stdin,
393       sys.stdout, a socket, a tape device etc.
394
395       _Stream is intended to be used only internally.
396    """
397
398    def __init__(self, name, mode, comptype, fileobj, bufsize):
399        """Construct a _Stream object.
400        """
401        self._extfileobj = True
402        if fileobj is None:
403            fileobj = _LowLevelFile(name, mode)
404            self._extfileobj = False
405
406        if comptype == '*':
407            # Enable transparent compression detection for the
408            # stream interface
409            fileobj = _StreamProxy(fileobj)
410            comptype = fileobj.getcomptype()
411
412        self.name     = name or ""
413        self.mode     = mode
414        self.comptype = comptype
415        self.fileobj  = fileobj
416        self.bufsize  = bufsize
417        self.buf      = ""
418        self.pos      = 0L
419        self.closed   = False
420
421        if comptype == "gz":
422            try:
423                import zlib
424            except ImportError:
425                raise CompressionError("zlib module is not available")
426            self.zlib = zlib
427            self.crc = zlib.crc32("") & 0xffffffffL
428            if mode == "r":
429                self._init_read_gz()
430            else:
431                self._init_write_gz()
432
433        if comptype == "bz2":
434            try:
435                import bz2
436            except ImportError:
437                raise CompressionError("bz2 module is not available")
438            if mode == "r":
439                self.dbuf = ""
440                self.cmp = bz2.BZ2Decompressor()
441            else:
442                self.cmp = bz2.BZ2Compressor()
443
444    def __del__(self):
445        if hasattr(self, "closed") and not self.closed:
446            self.close()
447
448    def _init_write_gz(self):
449        """Initialize for writing with gzip compression.
450        """
451        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
452                                            -self.zlib.MAX_WBITS,
453                                            self.zlib.DEF_MEM_LEVEL,
454                                            0)
455        timestamp = struct.pack("<L", long(time.time()))
456        self.__write("\037\213\010\010%s\002\377" % timestamp)
457        if self.name.endswith(".gz"):
458            self.name = self.name[:-3]
459        self.__write(self.name + NUL)
460
461    def write(self, s):
462        """Write string s to the stream.
463        """
464        if self.comptype == "gz":
465            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
466        self.pos += len(s)
467        if self.comptype != "tar":
468            s = self.cmp.compress(s)
469        self.__write(s)
470
471    def __write(self, s):
472        """Write string s to the stream if a whole new block
473           is ready to be written.
474        """
475        self.buf += s
476        while len(self.buf) > self.bufsize:
477            self.fileobj.write(self.buf[:self.bufsize])
478            self.buf = self.buf[self.bufsize:]
479
480    def close(self):
481        """Close the _Stream object. No operation should be
482           done on it afterwards.
483        """
484        if self.closed:
485            return
486
487        if self.mode == "w" and self.comptype != "tar":
488            self.buf += self.cmp.flush()
489
490        if self.mode == "w" and self.buf:
491            self.fileobj.write(self.buf)
492            self.buf = ""
493            if self.comptype == "gz":
494                # The native zlib crc is an unsigned 32-bit integer, but
495                # the Python wrapper implicitly casts that to a signed C
496                # long.  So, on a 32-bit box self.crc may "look negative",
497                # while the same crc on a 64-bit box may "look positive".
498                # To avoid irksome warnings from the `struct` module, force
499                # it to look positive on all boxes.
500                self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
501                self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
502
503        if not self._extfileobj:
504            self.fileobj.close()
505
506        self.closed = True
507
508    def _init_read_gz(self):
509        """Initialize for reading a gzip compressed fileobj.
510        """
511        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
512        self.dbuf = ""
513
514        # taken from gzip.GzipFile with some alterations
515        if self.__read(2) != "\037\213":
516            raise ReadError("not a gzip file")
517        if self.__read(1) != "\010":
518            raise CompressionError("unsupported compression method")
519
520        flag = ord(self.__read(1))
521        self.__read(6)
522
523        if flag & 4:
524            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
525            self.read(xlen)
526        if flag & 8:
527            while True:
528                s = self.__read(1)
529                if not s or s == NUL:
530                    break
531        if flag & 16:
532            while True:
533                s = self.__read(1)
534                if not s or s == NUL:
535                    break
536        if flag & 2:
537            self.__read(2)
538
539    def tell(self):
540        """Return the stream's file pointer position.
541        """
542        return self.pos
543
544    def seek(self, pos=0):
545        """Set the stream's file pointer to pos. Negative seeking
546           is forbidden.
547        """
548        if pos - self.pos >= 0:
549            blocks, remainder = divmod(pos - self.pos, self.bufsize)
550            for i in xrange(blocks):
551                self.read(self.bufsize)
552            self.read(remainder)
553        else:
554            raise StreamError("seeking backwards is not allowed")
555        return self.pos
556
557    def read(self, size=None):
558        """Return the next size number of bytes from the stream.
559           If size is not defined, return all bytes of the stream
560           up to EOF.
561        """
562        if size is None:
563            t = []
564            while True:
565                buf = self._read(self.bufsize)
566                if not buf:
567                    break
568                t.append(buf)
569            buf = "".join(t)
570        else:
571            buf = self._read(size)
572        self.pos += len(buf)
573        return buf
574
575    def _read(self, size):
576        """Return size bytes from the stream.
577        """
578        if self.comptype == "tar":
579            return self.__read(size)
580
581        c = len(self.dbuf)
582        t = [self.dbuf]
583        while c < size:
584            buf = self.__read(self.bufsize)
585            if not buf:
586                break
587            try:
588                buf = self.cmp.decompress(buf)
589            except IOError:
590                raise ReadError("invalid compressed data")
591            t.append(buf)
592            c += len(buf)
593        t = "".join(t)
594        self.dbuf = t[size:]
595        return t[:size]
596
597    def __read(self, size):
598        """Return size bytes from stream. If internal buffer is empty,
599           read another block from the stream.
600        """
601        c = len(self.buf)
602        t = [self.buf]
603        while c < size:
604            buf = self.fileobj.read(self.bufsize)
605            if not buf:
606                break
607            t.append(buf)
608            c += len(buf)
609        t = "".join(t)
610        self.buf = t[size:]
611        return t[:size]
612# class _Stream
613
614class _StreamProxy(object):
615    """Small proxy class that enables transparent compression
616       detection for the Stream interface (mode 'r|*').
617    """
618
619    def __init__(self, fileobj):
620        self.fileobj = fileobj
621        self.buf = self.fileobj.read(BLOCKSIZE)
622
623    def read(self, size):
624        self.read = self.fileobj.read
625        return self.buf
626
627    def getcomptype(self):
628        if self.buf.startswith("\037\213\010"):
629            return "gz"
630        if self.buf.startswith("BZh91"):
631            return "bz2"
632        return "tar"
633
634    def close(self):
635        self.fileobj.close()
636# class StreamProxy
637
638class _BZ2Proxy(object):
639    """Small proxy class that enables external file object
640       support for "r:bz2" and "w:bz2" modes. This is actually
641       a workaround for a limitation in bz2 module's BZ2File
642       class which (unlike gzip.GzipFile) has no support for
643       a file object argument.
644    """
645
646    blocksize = 16 * 1024
647
648    def __init__(self, fileobj, mode):
649        self.fileobj = fileobj
650        self.mode = mode
651        self.name = getattr(self.fileobj, "name", None)
652        self.init()
653
654    def init(self):
655        import bz2
656        self.pos = 0
657        if self.mode == "r":
658            self.bz2obj = bz2.BZ2Decompressor()
659            self.fileobj.seek(0)
660            self.buf = ""
661        else:
662            self.bz2obj = bz2.BZ2Compressor()
663
664    def read(self, size):
665        b = [self.buf]
666        x = len(self.buf)
667        while x < size:
668            raw = self.fileobj.read(self.blocksize)
669            if not raw:
670                break
671            data = self.bz2obj.decompress(raw)
672            b.append(data)
673            x += len(data)
674        self.buf = "".join(b)
675
676        buf = self.buf[:size]
677        self.buf = self.buf[size:]
678        self.pos += len(buf)
679        return buf
680
681    def seek(self, pos):
682        if pos < self.pos:
683            self.init()
684        self.read(pos - self.pos)
685
686    def tell(self):
687        return self.pos
688
689    def write(self, data):
690        self.pos += len(data)
691        raw = self.bz2obj.compress(data)
692        self.fileobj.write(raw)
693
694    def close(self):
695        if self.mode == "w":
696            raw = self.bz2obj.flush()
697            self.fileobj.write(raw)
698# class _BZ2Proxy
699
700#------------------------
701# Extraction file object
702#------------------------
703class _FileInFile(object):
704    """A thin wrapper around an existing file object that
705       provides a part of its data as an individual file
706       object.
707    """
708
709    def __init__(self, fileobj, offset, size, sparse=None):
710        self.fileobj = fileobj
711        self.offset = offset
712        self.size = size
713        self.sparse = sparse
714        self.position = 0
715
716    def tell(self):
717        """Return the current file position.
718        """
719        return self.position
720
721    def seek(self, position):
722        """Seek to a position in the file.
723        """
724        self.position = position
725
726    def read(self, size=None):
727        """Read data from the file.
728        """
729        if size is None:
730            size = self.size - self.position
731        else:
732            size = min(size, self.size - self.position)
733
734        if self.sparse is None:
735            return self.readnormal(size)
736        else:
737            return self.readsparse(size)
738
739    def readnormal(self, size):
740        """Read operation for regular files.
741        """
742        self.fileobj.seek(self.offset + self.position)
743        self.position += size
744        return self.fileobj.read(size)
745
746    def readsparse(self, size):
747        """Read operation for sparse files.
748        """
749        data = []
750        while size > 0:
751            buf = self.readsparsesection(size)
752            if not buf:
753                break
754            size -= len(buf)
755            data.append(buf)
756        return "".join(data)
757
758    def readsparsesection(self, size):
759        """Read a single section of a sparse file.
760        """
761        section = self.sparse.find(self.position)
762
763        if section is None:
764            return ""
765
766        size = min(size, section.offset + section.size - self.position)
767
768        if isinstance(section, _data):
769            realpos = section.realpos + self.position - section.offset
770            self.fileobj.seek(self.offset + realpos)
771            self.position += size
772            return self.fileobj.read(size)
773        else:
774            self.position += size
775            return NUL * size
776#class _FileInFile
777
778
779class ExFileObject(object):
780    """File-like object for reading an archive member.
781       Is returned by TarFile.extractfile().
782    """
783    blocksize = 1024
784
785    def __init__(self, tarfile, tarinfo):
786        self.fileobj = _FileInFile(tarfile.fileobj,
787                                   tarinfo.offset_data,
788                                   tarinfo.size,
789                                   getattr(tarinfo, "sparse", None))
790        self.name = tarinfo.name
791        self.mode = "r"
792        self.closed = False
793        self.size = tarinfo.size
794
795        self.position = 0
796        self.buffer = ""
797
798    def read(self, size=None):
799        """Read at most size bytes from the file. If size is not
800           present or None, read all data until EOF is reached.
801        """
802        if self.closed:
803            raise ValueError("I/O operation on closed file")
804
805        buf = ""
806        if self.buffer:
807            if size is None:
808                buf = self.buffer
809                self.buffer = ""
810            else:
811                buf = self.buffer[:size]
812                self.buffer = self.buffer[size:]
813
814        if size is None:
815            buf += self.fileobj.read()
816        else:
817            buf += self.fileobj.read(size - len(buf))
818
819        self.position += len(buf)
820        return buf
821
822    def readline(self, size=-1):
823        """Read one entire line from the file. If size is present
824           and non-negative, return a string with at most that
825           size, which may be an incomplete line.
826        """
827        if self.closed:
828            raise ValueError("I/O operation on closed file")
829
830        if "\n" in self.buffer:
831            pos = self.buffer.find("\n") + 1
832        else:
833            buffers = [self.buffer]
834            while True:
835                buf = self.fileobj.read(self.blocksize)
836                buffers.append(buf)
837                if not buf or "\n" in buf:
838                    self.buffer = "".join(buffers)
839                    pos = self.buffer.find("\n") + 1
840                    if pos == 0:
841                        # no newline found.
842                        pos = len(self.buffer)
843                    break
844
845        if size != -1:
846            pos = min(size, pos)
847
848        buf = self.buffer[:pos]
849        self.buffer = self.buffer[pos:]
850        self.position += len(buf)
851        return buf
852
853    def readlines(self):
854        """Return a list with all remaining lines.
855        """
856        result = []
857        while True:
858            line = self.readline()
859            if not line: break
860            result.append(line)
861        return result
862
863    def tell(self):
864        """Return the current file position.
865        """
866        if self.closed:
867            raise ValueError("I/O operation on closed file")
868
869        return self.position
870
871    def seek(self, pos, whence=os.SEEK_SET):
872        """Seek to a position in the file.
873        """
874        if self.closed:
875            raise ValueError("I/O operation on closed file")
876
877        if whence == os.SEEK_SET:
878            self.position = min(max(pos, 0), self.size)
879        elif whence == os.SEEK_CUR:
880            if pos < 0:
881                self.position = max(self.position + pos, 0)
882            else:
883                self.position = min(self.position + pos, self.size)
884        elif whence == os.SEEK_END:
885            self.position = max(min(self.size + pos, self.size), 0)
886        else:
887            raise ValueError("Invalid argument")
888
889        self.buffer = ""
890        self.fileobj.seek(self.position)
891
892    def close(self):
893        """Close the file object.
894        """
895        self.closed = True
896
897    def __iter__(self):
898        """Get an iterator over the file's lines.
899        """
900        while True:
901            line = self.readline()
902            if not line:
903                break
904            yield line
905#class ExFileObject
906
907#------------------
908# Exported Classes
909#------------------
910class TarInfo(object):
911    """Informational class which holds the details about an
912       archive member given by a tar header block.
913       TarInfo objects are returned by TarFile.getmember(),
914       TarFile.getmembers() and TarFile.gettarinfo() and are
915       usually created internally.
916    """
917
918    def __init__(self, name=""):
919        """Construct a TarInfo object. name is the optional name
920           of the member.
921        """
922        self.name = name        # member name
923        self.mode = 0644        # file permissions
924        self.uid = 0            # user id
925        self.gid = 0            # group id
926        self.size = 0           # file size
927        self.mtime = 0          # modification time
928        self.chksum = 0         # header checksum
929        self.type = REGTYPE     # member type
930        self.linkname = ""      # link name
931        self.uname = ""         # user name
932        self.gname = ""         # group name
933        self.devmajor = 0       # device major number
934        self.devminor = 0       # device minor number
935
936        self.offset = 0         # the tar header starts here
937        self.offset_data = 0    # the file's data starts here
938
939        self.pax_headers = {}   # pax header information
940
941    # In pax headers the "name" and "linkname" field are called
942    # "path" and "linkpath".
943    def _getpath(self):
944        return self.name
945    def _setpath(self, name):
946        self.name = name
947    path = property(_getpath, _setpath)
948
949    def _getlinkpath(self):
950        return self.linkname
951    def _setlinkpath(self, linkname):
952        self.linkname = linkname
953    linkpath = property(_getlinkpath, _setlinkpath)
954
955    def __repr__(self):
956        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
957
958    def get_info(self, encoding, errors):
959        """Return the TarInfo's attributes as a dictionary.
960        """
961        info = {
962            "name":     self.name,
963            "mode":     self.mode & 07777,
964            "uid":      self.uid,
965            "gid":      self.gid,
966            "size":     self.size,
967            "mtime":    self.mtime,
968            "chksum":   self.chksum,
969            "type":     self.type,
970            "linkname": self.linkname,
971            "uname":    self.uname,
972            "gname":    self.gname,
973            "devmajor": self.devmajor,
974            "devminor": self.devminor
975        }
976
977        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
978            info["name"] += "/"
979
980        for key in ("name", "linkname", "uname", "gname"):
981            if type(info[key]) is unicode:
982                info[key] = info[key].encode(encoding, errors)
983
984        return info
985
986    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
987        """Return a tar header as a string of 512 byte blocks.
988        """
989        info = self.get_info(encoding, errors)
990
991        if format == USTAR_FORMAT:
992            return self.create_ustar_header(info)
993        elif format == GNU_FORMAT:
994            return self.create_gnu_header(info)
995        elif format == PAX_FORMAT:
996            return self.create_pax_header(info, encoding, errors)
997        else:
998            raise ValueError("invalid format")
999
1000    def create_ustar_header(self, info):
1001        """Return the object as a ustar header block.
1002        """
1003        info["magic"] = POSIX_MAGIC
1004
1005        if len(info["linkname"]) > LENGTH_LINK:
1006            raise ValueError("linkname is too long")
1007
1008        if len(info["name"]) > LENGTH_NAME:
1009            info["prefix"], info["name"] = self._posix_split_name(info["name"])
1010
1011        return self._create_header(info, USTAR_FORMAT)
1012
1013    def create_gnu_header(self, info):
1014        """Return the object as a GNU header block sequence.
1015        """
1016        info["magic"] = GNU_MAGIC
1017
1018        buf = ""
1019        if len(info["linkname"]) > LENGTH_LINK:
1020            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1021
1022        if len(info["name"]) > LENGTH_NAME:
1023            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1024
1025        return buf + self._create_header(info, GNU_FORMAT)
1026
1027    def create_pax_header(self, info, encoding, errors):
1028        """Return the object as a ustar header block. If it cannot be
1029           represented this way, prepend a pax extended header sequence
1030           with supplement information.
1031        """
1032        info["magic"] = POSIX_MAGIC
1033        pax_headers = self.pax_headers.copy()
1034
1035        # Test string fields for values that exceed the field length or cannot
1036        # be represented in ASCII encoding.
1037        for name, hname, length in (
1038                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1039                ("uname", "uname", 32), ("gname", "gname", 32)):
1040
1041            if hname in pax_headers:
1042                # The pax header has priority.
1043                continue
1044
1045            val = info[name].decode(encoding, errors)
1046
1047            # Try to encode the string as ASCII.
1048            try:
1049                val.encode("ascii")
1050            except UnicodeEncodeError:
1051                pax_headers[hname] = val
1052                continue
1053
1054            if len(info[name]) > length:
1055                pax_headers[hname] = val
1056
1057        # Test number fields for values that exceed the field limit or values
1058        # that like to be stored as float.
1059        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1060            if name in pax_headers:
1061                # The pax header has priority. Avoid overflow.
1062                info[name] = 0
1063                continue
1064
1065            val = info[name]
1066            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1067                pax_headers[name] = unicode(val)
1068                info[name] = 0
1069
1070        # Create a pax extended header if necessary.
1071        if pax_headers:
1072            buf = self._create_pax_generic_header(pax_headers)
1073        else:
1074            buf = ""
1075
1076        return buf + self._create_header(info, USTAR_FORMAT)
1077
1078    @classmethod
1079    def create_pax_global_header(cls, pax_headers):
1080        """Return the object as a pax global header block sequence.
1081        """
1082        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1083
1084    def _posix_split_name(self, name):
1085        """Split a name longer than 100 chars into a prefix
1086           and a name part.
1087        """
1088        prefix = name[:LENGTH_PREFIX + 1]
1089        while prefix and prefix[-1] != "/":
1090            prefix = prefix[:-1]
1091
1092        name = name[len(prefix):]
1093        prefix = prefix[:-1]
1094
1095        if not prefix or len(name) > LENGTH_NAME:
1096            raise ValueError("name is too long")
1097        return prefix, name
1098
1099    @staticmethod
1100    def _create_header(info, format):
1101        """Return a header block. info is a dictionary with file
1102           information, format must be one of the *_FORMAT constants.
1103        """
1104        parts = [
1105            stn(info.get("name", ""), 100),
1106            itn(info.get("mode", 0) & 07777, 8, format),
1107            itn(info.get("uid", 0), 8, format),
1108            itn(info.get("gid", 0), 8, format),
1109            itn(info.get("size", 0), 12, format),
1110            itn(info.get("mtime", 0), 12, format),
1111            "        ", # checksum field
1112            info.get("type", REGTYPE),
1113            stn(info.get("linkname", ""), 100),
1114            stn(info.get("magic", POSIX_MAGIC), 8),
1115            stn(info.get("uname", ""), 32),
1116            stn(info.get("gname", ""), 32),
1117            itn(info.get("devmajor", 0), 8, format),
1118            itn(info.get("devminor", 0), 8, format),
1119            stn(info.get("prefix", ""), 155)
1120        ]
1121
1122        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1123        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1124        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1125        return buf
1126
1127    @staticmethod
1128    def _create_payload(payload):
1129        """Return the string payload filled with zero bytes
1130           up to the next 512 byte border.
1131        """
1132        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133        if remainder > 0:
1134            payload += (BLOCKSIZE - remainder) * NUL
1135        return payload
1136
1137    @classmethod
1138    def _create_gnu_long_header(cls, name, type):
1139        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140           for name.
1141        """
1142        name += NUL
1143
1144        info = {}
1145        info["name"] = "././@LongLink"
1146        info["type"] = type
1147        info["size"] = len(name)
1148        info["magic"] = GNU_MAGIC
1149
1150        # create extended header + name blocks.
1151        return cls._create_header(info, USTAR_FORMAT) + \
1152                cls._create_payload(name)
1153
1154    @classmethod
1155    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1156        """Return a POSIX.1-2001 extended or global header sequence
1157           that contains a list of keyword, value pairs. The values
1158           must be unicode objects.
1159        """
1160        records = []
1161        for keyword, value in pax_headers.iteritems():
1162            keyword = keyword.encode("utf8")
1163            value = value.encode("utf8")
1164            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1165            n = p = 0
1166            while True:
1167                n = l + len(str(p))
1168                if n == p:
1169                    break
1170                p = n
1171            records.append("%d %s=%s\n" % (p, keyword, value))
1172        records = "".join(records)
1173
1174        # We use a hardcoded "././@PaxHeader" name like star does
1175        # instead of the one that POSIX recommends.
1176        info = {}
1177        info["name"] = "././@PaxHeader"
1178        info["type"] = type
1179        info["size"] = len(records)
1180        info["magic"] = POSIX_MAGIC
1181
1182        # Create pax header + record blocks.
1183        return cls._create_header(info, USTAR_FORMAT) + \
1184                cls._create_payload(records)
1185
1186    @classmethod
1187    def frombuf(cls, buf):
1188        """Construct a TarInfo object from a 512 byte string buffer.
1189        """
1190        if len(buf) == 0:
1191            raise EmptyHeaderError("empty header")
1192        if len(buf) != BLOCKSIZE:
1193            raise TruncatedHeaderError("truncated header")
1194        if buf.count(NUL) == BLOCKSIZE:
1195            raise EOFHeaderError("end of file header")
1196
1197        chksum = nti(buf[148:156])
1198        if chksum not in calc_chksums(buf):
1199            raise InvalidHeaderError("bad checksum")
1200
1201        obj = cls()
1202        obj.buf = buf
1203        obj.name = nts(buf[0:100])
1204        obj.mode = nti(buf[100:108])
1205        obj.uid = nti(buf[108:116])
1206        obj.gid = nti(buf[116:124])
1207        obj.size = nti(buf[124:136])
1208        obj.mtime = nti(buf[136:148])
1209        obj.chksum = chksum
1210        obj.type = buf[156:157]
1211        obj.linkname = nts(buf[157:257])
1212        obj.uname = nts(buf[265:297])
1213        obj.gname = nts(buf[297:329])
1214        obj.devmajor = nti(buf[329:337])
1215        obj.devminor = nti(buf[337:345])
1216        prefix = nts(buf[345:500])
1217
1218        # Old V7 tar format represents a directory as a regular
1219        # file with a trailing slash.
1220        if obj.type == AREGTYPE and obj.name.endswith("/"):
1221            obj.type = DIRTYPE
1222
1223        # Remove redundant slashes from directories.
1224        if obj.isdir():
1225            obj.name = obj.name.rstrip("/")
1226
1227        # Reconstruct a ustar longname.
1228        if prefix and obj.type not in GNU_TYPES:
1229            obj.name = prefix + "/" + obj.name
1230        return obj
1231
1232    @classmethod
1233    def fromtarfile(cls, tarfile):
1234        """Return the next TarInfo object from TarFile object
1235           tarfile.
1236        """
1237        buf = tarfile.fileobj.read(BLOCKSIZE)
1238        obj = cls.frombuf(buf)
1239        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1240        return obj._proc_member(tarfile)
1241
1242    #--------------------------------------------------------------------------
1243    # The following are methods that are called depending on the type of a
1244    # member. The entry point is _proc_member() which can be overridden in a
1245    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1246    # implement the following
1247    # operations:
1248    # 1. Set self.offset_data to the position where the data blocks begin,
1249    #    if there is data that follows.
1250    # 2. Set tarfile.offset to the position where the next member's header will
1251    #    begin.
1252    # 3. Return self or another valid TarInfo object.
1253    def _proc_member(self, tarfile):
1254        """Choose the right processing method depending on
1255           the type and call it.
1256        """
1257        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1258            return self._proc_gnulong(tarfile)
1259        elif self.type == GNUTYPE_SPARSE:
1260            return self._proc_sparse(tarfile)
1261        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1262            return self._proc_pax(tarfile)
1263        else:
1264            return self._proc_builtin(tarfile)
1265
1266    def _proc_builtin(self, tarfile):
1267        """Process a builtin type or an unknown type which
1268           will be treated as a regular file.
1269        """
1270        self.offset_data = tarfile.fileobj.tell()
1271        offset = self.offset_data
1272        if self.isreg() or self.type not in SUPPORTED_TYPES:
1273            # Skip the following data blocks.
1274            offset += self._block(self.size)
1275        tarfile.offset = offset
1276
1277        # Patch the TarInfo object with saved global
1278        # header information.
1279        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1280
1281        return self
1282
1283    def _proc_gnulong(self, tarfile):
1284        """Process the blocks that hold a GNU longname
1285           or longlink member.
1286        """
1287        buf = tarfile.fileobj.read(self._block(self.size))
1288
1289        # Fetch the next header and process it.
1290        try:
1291            next = self.fromtarfile(tarfile)
1292        except HeaderError:
1293            raise SubsequentHeaderError("missing or bad subsequent header")
1294
1295        # Patch the TarInfo object from the next header with
1296        # the longname information.
1297        next.offset = self.offset
1298        if self.type == GNUTYPE_LONGNAME:
1299            next.name = nts(buf)
1300        elif self.type == GNUTYPE_LONGLINK:
1301            next.linkname = nts(buf)
1302
1303        return next
1304
1305    def _proc_sparse(self, tarfile):
1306        """Process a GNU sparse header plus extra headers.
1307        """
1308        buf = self.buf
1309        sp = _ringbuffer()
1310        pos = 386
1311        lastpos = 0L
1312        realpos = 0L
1313        # There are 4 possible sparse structs in the
1314        # first header.
1315        for i in xrange(4):
1316            try:
1317                offset = nti(buf[pos:pos + 12])
1318                numbytes = nti(buf[pos + 12:pos + 24])
1319            except ValueError:
1320                break
1321            if offset > lastpos:
1322                sp.append(_hole(lastpos, offset - lastpos))
1323            sp.append(_data(offset, numbytes, realpos))
1324            realpos += numbytes
1325            lastpos = offset + numbytes
1326            pos += 24
1327
1328        isextended = ord(buf[482])
1329        origsize = nti(buf[483:495])
1330
1331        # If the isextended flag is given,
1332        # there are extra headers to process.
1333        while isextended == 1:
1334            buf = tarfile.fileobj.read(BLOCKSIZE)
1335            pos = 0
1336            for i in xrange(21):
1337                try:
1338                    offset = nti(buf[pos:pos + 12])
1339                    numbytes = nti(buf[pos + 12:pos + 24])
1340                except ValueError:
1341                    break
1342                if offset > lastpos:
1343                    sp.append(_hole(lastpos, offset - lastpos))
1344                sp.append(_data(offset, numbytes, realpos))
1345                realpos += numbytes
1346                lastpos = offset + numbytes
1347                pos += 24
1348            isextended = ord(buf[504])
1349
1350        if lastpos < origsize:
1351            sp.append(_hole(lastpos, origsize - lastpos))
1352
1353        self.sparse = sp
1354
1355        self.offset_data = tarfile.fileobj.tell()
1356        tarfile.offset = self.offset_data + self._block(self.size)
1357        self.size = origsize
1358
1359        return self
1360
1361    def _proc_pax(self, tarfile):
1362        """Process an extended or global header as described in
1363           POSIX.1-2001.
1364        """
1365        # Read the header information.
1366        buf = tarfile.fileobj.read(self._block(self.size))
1367
1368        # A pax header stores supplemental information for either
1369        # the following file (extended) or all following files
1370        # (global).
1371        if self.type == XGLTYPE:
1372            pax_headers = tarfile.pax_headers
1373        else:
1374            pax_headers = tarfile.pax_headers.copy()
1375
1376        # Parse pax header information. A record looks like that:
1377        # "%d %s=%s\n" % (length, keyword, value). length is the size
1378        # of the complete record including the length field itself and
1379        # the newline. keyword and value are both UTF-8 encoded strings.
1380        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1381        pos = 0
1382        while True:
1383            match = regex.match(buf, pos)
1384            if not match:
1385                break
1386
1387            length, keyword = match.groups()
1388            length = int(length)
1389            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1390
1391            keyword = keyword.decode("utf8")
1392            value = value.decode("utf8")
1393
1394            pax_headers[keyword] = value
1395            pos += length
1396
1397        # Fetch the next header.
1398        try:
1399            next = self.fromtarfile(tarfile)
1400        except HeaderError:
1401            raise SubsequentHeaderError("missing or bad subsequent header")
1402
1403        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1404            # Patch the TarInfo object with the extended header info.
1405            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1406            next.offset = self.offset
1407
1408            if "size" in pax_headers:
1409                # If the extended header replaces the size field,
1410                # we need to recalculate the offset where the next
1411                # header starts.
1412                offset = next.offset_data
1413                if next.isreg() or next.type not in SUPPORTED_TYPES:
1414                    offset += next._block(next.size)
1415                tarfile.offset = offset
1416
1417        return next
1418
1419    def _apply_pax_info(self, pax_headers, encoding, errors):
1420        """Replace fields with supplemental information from a previous
1421           pax extended or global header.
1422        """
1423        for keyword, value in pax_headers.iteritems():
1424            if keyword not in PAX_FIELDS:
1425                continue
1426
1427            if keyword == "path":
1428                value = value.rstrip("/")
1429
1430            if keyword in PAX_NUMBER_FIELDS:
1431                try:
1432                    value = PAX_NUMBER_FIELDS[keyword](value)
1433                except ValueError:
1434                    value = 0
1435            else:
1436                value = uts(value, encoding, errors)
1437
1438            setattr(self, keyword, value)
1439
1440        self.pax_headers = pax_headers.copy()
1441
1442    def _block(self, count):
1443        """Round up a byte count by BLOCKSIZE and return it,
1444           e.g. _block(834) => 1024.
1445        """
1446        blocks, remainder = divmod(count, BLOCKSIZE)
1447        if remainder:
1448            blocks += 1
1449        return blocks * BLOCKSIZE
1450
1451    def isreg(self):
1452        return self.type in REGULAR_TYPES
1453    def isfile(self):
1454        return self.isreg()
1455    def isdir(self):
1456        return self.type == DIRTYPE
1457    def issym(self):
1458        return self.type == SYMTYPE
1459    def islnk(self):
1460        return self.type == LNKTYPE
1461    def ischr(self):
1462        return self.type == CHRTYPE
1463    def isblk(self):
1464        return self.type == BLKTYPE
1465    def isfifo(self):
1466        return self.type == FIFOTYPE
1467    def issparse(self):
1468        return self.type == GNUTYPE_SPARSE
1469    def isdev(self):
1470        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1471# class TarInfo
1472
1473class TarFile(object):
1474    """The TarFile Class provides an interface to tar archives.
1475    """
1476
1477    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1478
1479    dereference = False         # If true, add content of linked file to the
1480                                # tar file, else the link.
1481
1482    ignore_zeros = False        # If true, skips empty or invalid blocks and
1483                                # continues processing.
1484
1485    errorlevel = 1              # If 0, fatal errors only appear in debug
1486                                # messages (if debug >= 0). If > 0, errors
1487                                # are passed to the caller as exceptions.
1488
1489    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1490
1491    encoding = ENCODING         # Encoding for 8-bit character strings.
1492
1493    errors = None               # Error handler for unicode conversion.
1494
1495    tarinfo = TarInfo           # The default TarInfo class to use.
1496
1497    fileobject = ExFileObject   # The default ExFileObject class to use.
1498
1499    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1500            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1501            errors=None, pax_headers=None, debug=None, errorlevel=None):
1502        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1503           read from an existing archive, 'a' to append data to an existing
1504           file or 'w' to create a new file overwriting an existing one. `mode'
1505           defaults to 'r'.
1506           If `fileobj' is given, it is used for reading or writing data. If it
1507           can be determined, `mode' is overridden by `fileobj's mode.
1508           `fileobj' is not closed, when TarFile is closed.
1509        """
1510        if len(mode) > 1 or mode not in "raw":
1511            raise ValueError("mode must be 'r', 'a' or 'w'")
1512        self.mode = mode
1513        self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1514
1515        if not fileobj:
1516            if self.mode == "a" and not os.path.exists(name):
1517                # Create nonexistent files in append mode.
1518                self.mode = "w"
1519                self._mode = "wb"
1520            fileobj = bltn_open(name, self._mode)
1521            self._extfileobj = False
1522        else:
1523            if name is None and hasattr(fileobj, "name"):
1524                name = fileobj.name
1525            if hasattr(fileobj, "mode"):
1526                self._mode = fileobj.mode
1527            self._extfileobj = True
1528        self.name = os.path.abspath(name) if name else None
1529        self.fileobj = fileobj
1530
1531        # Init attributes.
1532        if format is not None:
1533            self.format = format
1534        if tarinfo is not None:
1535            self.tarinfo = tarinfo
1536        if dereference is not None:
1537            self.dereference = dereference
1538        if ignore_zeros is not None:
1539            self.ignore_zeros = ignore_zeros
1540        if encoding is not None:
1541            self.encoding = encoding
1542
1543        if errors is not None:
1544            self.errors = errors
1545        elif mode == "r":
1546            self.errors = "utf-8"
1547        else:
1548            self.errors = "strict"
1549
1550        if pax_headers is not None and self.format == PAX_FORMAT:
1551            self.pax_headers = pax_headers
1552        else:
1553            self.pax_headers = {}
1554
1555        if debug is not None:
1556            self.debug = debug
1557        if errorlevel is not None:
1558            self.errorlevel = errorlevel
1559
1560        # Init datastructures.
1561        self.closed = False
1562        self.members = []       # list of members as TarInfo objects
1563        self._loaded = False    # flag if all members have been read
1564        self.offset = self.fileobj.tell()
1565                                # current position in the archive file
1566        self.inodes = {}        # dictionary caching the inodes of
1567                                # archive members already added
1568
1569        try:
1570            if self.mode == "r":
1571                self.firstmember = None
1572                self.firstmember = self.next()
1573
1574            if self.mode == "a":
1575                # Move to the end of the archive,
1576                # before the first empty block.
1577                while True:
1578                    self.fileobj.seek(self.offset)
1579                    try:
1580                        tarinfo = self.tarinfo.fromtarfile(self)
1581                        self.members.append(tarinfo)
1582                    except EOFHeaderError:
1583                        self.fileobj.seek(self.offset)
1584                        break
1585                    except HeaderError, e:
1586                        raise ReadError(str(e))
1587
1588            if self.mode in "aw":
1589                self._loaded = True
1590
1591                if self.pax_headers:
1592                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1593                    self.fileobj.write(buf)
1594                    self.offset += len(buf)
1595        except:
1596            if not self._extfileobj:
1597                self.fileobj.close()
1598            self.closed = True
1599            raise
1600
1601    def _getposix(self):
1602        return self.format == USTAR_FORMAT
1603    def _setposix(self, value):
1604        import warnings
1605        warnings.warn("use the format attribute instead", DeprecationWarning,
1606                      2)
1607        if value:
1608            self.format = USTAR_FORMAT
1609        else:
1610            self.format = GNU_FORMAT
1611    posix = property(_getposix, _setposix)
1612
1613    #--------------------------------------------------------------------------
1614    # Below are the classmethods which act as alternate constructors to the
1615    # TarFile class. The open() method is the only one that is needed for
1616    # public use; it is the "super"-constructor and is able to select an
1617    # adequate "sub"-constructor for a particular compression using the mapping
1618    # from OPEN_METH.
1619    #
1620    # This concept allows one to subclass TarFile without losing the comfort of
1621    # the super-constructor. A sub-constructor is registered and made available
1622    # by adding it to the mapping in OPEN_METH.
1623
1624    @classmethod
1625    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1626        """Open a tar archive for reading, writing or appending. Return
1627           an appropriate TarFile class.
1628
1629           mode:
1630           'r' or 'r:*' open for reading with transparent compression
1631           'r:'         open for reading exclusively uncompressed
1632           'r:gz'       open for reading with gzip compression
1633           'r:bz2'      open for reading with bzip2 compression
1634           'a' or 'a:'  open for appending, creating the file if necessary
1635           'w' or 'w:'  open for writing without compression
1636           'w:gz'       open for writing with gzip compression
1637           'w:bz2'      open for writing with bzip2 compression
1638
1639           'r|*'        open a stream of tar blocks with transparent compression
1640           'r|'         open an uncompressed stream of tar blocks for reading
1641           'r|gz'       open a gzip compressed stream of tar blocks
1642           'r|bz2'      open a bzip2 compressed stream of tar blocks
1643           'w|'         open an uncompressed stream for writing
1644           'w|gz'       open a gzip compressed stream for writing
1645           'w|bz2'      open a bzip2 compressed stream for writing
1646        """
1647
1648        if not name and not fileobj:
1649            raise ValueError("nothing to open")
1650
1651        if mode in ("r", "r:*"):
1652            # Find out which *open() is appropriate for opening the file.
1653            for comptype in cls.OPEN_METH:
1654                func = getattr(cls, cls.OPEN_METH[comptype])
1655                if fileobj is not None:
1656                    saved_pos = fileobj.tell()
1657                try:
1658                    return func(name, "r", fileobj, **kwargs)
1659                except (ReadError, CompressionError), e:
1660                    if fileobj is not None:
1661                        fileobj.seek(saved_pos)
1662                    continue
1663            raise ReadError("file could not be opened successfully")
1664
1665        elif ":" in mode:
1666            filemode, comptype = mode.split(":", 1)
1667            filemode = filemode or "r"
1668            comptype = comptype or "tar"
1669
1670            # Select the *open() function according to
1671            # given compression.
1672            if comptype in cls.OPEN_METH:
1673                func = getattr(cls, cls.OPEN_METH[comptype])
1674            else:
1675                raise CompressionError("unknown compression type %r" % comptype)
1676            return func(name, filemode, fileobj, **kwargs)
1677
1678        elif "|" in mode:
1679            filemode, comptype = mode.split("|", 1)
1680            filemode = filemode or "r"
1681            comptype = comptype or "tar"
1682
1683            if filemode not in "rw":
1684                raise ValueError("mode must be 'r' or 'w'")
1685
1686            t = cls(name, filemode,
1687                    _Stream(name, filemode, comptype, fileobj, bufsize),
1688                    **kwargs)
1689            t._extfileobj = False
1690            return t
1691
1692        elif mode in "aw":
1693            return cls.taropen(name, mode, fileobj, **kwargs)
1694
1695        raise ValueError("undiscernible mode")
1696
1697    @classmethod
1698    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1699        """Open uncompressed tar archive name for reading or writing.
1700        """
1701        if len(mode) > 1 or mode not in "raw":
1702            raise ValueError("mode must be 'r', 'a' or 'w'")
1703        return cls(name, mode, fileobj, **kwargs)
1704
1705    @classmethod
1706    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1707        """Open gzip compressed tar archive name for reading or writing.
1708           Appending is not allowed.
1709        """
1710        if len(mode) > 1 or mode not in "rw":
1711            raise ValueError("mode must be 'r' or 'w'")
1712
1713        try:
1714            import gzip
1715            gzip.GzipFile
1716        except (ImportError, AttributeError):
1717            raise CompressionError("gzip module is not available")
1718
1719        if fileobj is None:
1720            fileobj = bltn_open(name, mode + "b")
1721
1722        try:
1723            t = cls.taropen(name, mode,
1724                gzip.GzipFile(name, mode, compresslevel, fileobj),
1725                **kwargs)
1726        except IOError:
1727            raise ReadError("not a gzip file")
1728        t._extfileobj = False
1729        return t
1730
1731    @classmethod
1732    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1733        """Open bzip2 compressed tar archive name for reading or writing.
1734           Appending is not allowed.
1735        """
1736        if len(mode) > 1 or mode not in "rw":
1737            raise ValueError("mode must be 'r' or 'w'.")
1738
1739        try:
1740            import bz2
1741        except ImportError:
1742            raise CompressionError("bz2 module is not available")
1743
1744        if fileobj is not None:
1745            fileobj = _BZ2Proxy(fileobj, mode)
1746        else:
1747            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1748
1749        try:
1750            t = cls.taropen(name, mode, fileobj, **kwargs)
1751        except (IOError, EOFError):
1752            raise ReadError("not a bzip2 file")
1753        t._extfileobj = False
1754        return t
1755
1756    # All *open() methods are registered here.
1757    OPEN_METH = {
1758        "tar": "taropen",   # uncompressed tar
1759        "gz":  "gzopen",    # gzip compressed tar
1760        "bz2": "bz2open"    # bzip2 compressed tar
1761    }
1762
1763    #--------------------------------------------------------------------------
1764    # The public methods which TarFile provides:
1765
1766    def close(self):
1767        """Close the TarFile. In write-mode, two finishing zero blocks are
1768           appended to the archive.
1769        """
1770        if self.closed:
1771            return
1772
1773        if self.mode in "aw":
1774            self.fileobj.write(NUL * (BLOCKSIZE * 2))
1775            self.offset += (BLOCKSIZE * 2)
1776            # fill up the end with zero-blocks
1777            # (like option -b20 for tar does)
1778            blocks, remainder = divmod(self.offset, RECORDSIZE)
1779            if remainder > 0:
1780                self.fileobj.write(NUL * (RECORDSIZE - remainder))
1781
1782        if not self._extfileobj:
1783            self.fileobj.close()
1784        self.closed = True
1785
1786    def getmember(self, name):
1787        """Return a TarInfo object for member `name'. If `name' can not be
1788           found in the archive, KeyError is raised. If a member occurs more
1789           than once in the archive, its last occurrence is assumed to be the
1790           most up-to-date version.
1791        """
1792        tarinfo = self._getmember(name)
1793        if tarinfo is None:
1794            raise KeyError("filename %r not found" % name)
1795        return tarinfo
1796
1797    def getmembers(self):
1798        """Return the members of the archive as a list of TarInfo objects. The
1799           list has the same order as the members in the archive.
1800        """
1801        self._check()
1802        if not self._loaded:    # if we want to obtain a list of
1803            self._load()        # all members, we first have to
1804                                # scan the whole archive.
1805        return self.members
1806
1807    def getnames(self):
1808        """Return the members of the archive as a list of their names. It has
1809           the same order as the list returned by getmembers().
1810        """
1811        return [tarinfo.name for tarinfo in self.getmembers()]
1812
1813    def gettarinfo(self, name=None, arcname=None, fileobj=None):
1814        """Create a TarInfo object for either the file `name' or the file
1815           object `fileobj' (using os.fstat on its file descriptor). You can
1816           modify some of the TarInfo's attributes before you add it using
1817           addfile(). If given, `arcname' specifies an alternative name for the
1818           file in the archive.
1819        """
1820        self._check("aw")
1821
1822        # When fileobj is given, replace name by
1823        # fileobj's real name.
1824        if fileobj is not None:
1825            name = fileobj.name
1826
1827        # Building the name of the member in the archive.
1828        # Backward slashes are converted to forward slashes,
1829        # Absolute paths are turned to relative paths.
1830        if arcname is None:
1831            arcname = name
1832        drv, arcname = os.path.splitdrive(arcname)
1833        arcname = arcname.replace(os.sep, "/")
1834        arcname = arcname.lstrip("/")
1835
1836        # Now, fill the TarInfo object with
1837        # information specific for the file.
1838        tarinfo = self.tarinfo()
1839        tarinfo.tarfile = self
1840
1841        # Use os.stat or os.lstat, depending on platform
1842        # and if symlinks shall be resolved.
1843        if fileobj is None:
1844            if hasattr(os, "lstat") and not self.dereference:
1845                statres = os.lstat(name)
1846            else:
1847                statres = os.stat(name)
1848        else:
1849            statres = os.fstat(fileobj.fileno())
1850        linkname = ""
1851
1852        stmd = statres.st_mode
1853        if stat.S_ISREG(stmd):
1854            inode = (statres.st_ino, statres.st_dev)
1855            if not self.dereference and statres.st_nlink > 1 and \
1856                    inode in self.inodes and arcname != self.inodes[inode]:
1857                # Is it a hardlink to an already
1858                # archived file?
1859                type = LNKTYPE
1860                linkname = self.inodes[inode]
1861            else:
1862                # The inode is added only if its valid.
1863                # For win32 it is always 0.
1864                type = REGTYPE
1865                if inode[0]:
1866                    self.inodes[inode] = arcname
1867        elif stat.S_ISDIR(stmd):
1868            type = DIRTYPE
1869        elif stat.S_ISFIFO(stmd):
1870            type = FIFOTYPE
1871        elif stat.S_ISLNK(stmd):
1872            type = SYMTYPE
1873            linkname = os.readlink(name)
1874        elif stat.S_ISCHR(stmd):
1875            type = CHRTYPE
1876        elif stat.S_ISBLK(stmd):
1877            type = BLKTYPE
1878        else:
1879            return None
1880
1881        # Fill the TarInfo object with all
1882        # information we can get.
1883        tarinfo.name = arcname
1884        tarinfo.mode = stmd
1885        tarinfo.uid = statres.st_uid
1886        tarinfo.gid = statres.st_gid
1887        if type == REGTYPE:
1888            tarinfo.size = statres.st_size
1889        else:
1890            tarinfo.size = 0L
1891        tarinfo.mtime = statres.st_mtime
1892        tarinfo.type = type
1893        tarinfo.linkname = linkname
1894        if pwd:
1895            try:
1896                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1897            except KeyError:
1898                pass
1899        if grp:
1900            try:
1901                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1902            except KeyError:
1903                pass
1904
1905        if type in (CHRTYPE, BLKTYPE):
1906            if hasattr(os, "major") and hasattr(os, "minor"):
1907                tarinfo.devmajor = os.major(statres.st_rdev)
1908                tarinfo.devminor = os.minor(statres.st_rdev)
1909        return tarinfo
1910
1911    def list(self, verbose=True):
1912        """Print a table of contents to sys.stdout. If `verbose' is False, only
1913           the names of the members are printed. If it is True, an `ls -l'-like
1914           output is produced.
1915        """
1916        self._check()
1917
1918        for tarinfo in self:
1919            if verbose:
1920                print filemode(tarinfo.mode),
1921                print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1922                                 tarinfo.gname or tarinfo.gid),
1923                if tarinfo.ischr() or tarinfo.isblk():
1924                    print "%10s" % ("%d,%d" \
1925                                    % (tarinfo.devmajor, tarinfo.devminor)),
1926                else:
1927                    print "%10d" % tarinfo.size,
1928                print "%d-%02d-%02d %02d:%02d:%02d" \
1929                      % time.localtime(tarinfo.mtime)[:6],
1930
1931            print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1932
1933            if verbose:
1934                if tarinfo.issym():
1935                    print "->", tarinfo.linkname,
1936                if tarinfo.islnk():
1937                    print "link to", tarinfo.linkname,
1938            print
1939
1940    def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1941        """Add the file `name' to the archive. `name' may be any type of file
1942           (directory, fifo, symbolic link, etc.). If given, `arcname'
1943           specifies an alternative name for the file in the archive.
1944           Directories are added recursively by default. This can be avoided by
1945           setting `recursive' to False. `exclude' is a function that should
1946           return True for each filename to be excluded. `filter' is a function
1947           that expects a TarInfo object argument and returns the changed
1948           TarInfo object, if it returns None the TarInfo object will be
1949           excluded from the archive.
1950        """
1951        self._check("aw")
1952
1953        if arcname is None:
1954            arcname = name
1955
1956        # Exclude pathnames.
1957        if exclude is not None:
1958            import warnings
1959            warnings.warn("use the filter argument instead",
1960                    DeprecationWarning, 2)
1961            if exclude(name):
1962                self._dbg(2, "tarfile: Excluded %r" % name)
1963                return
1964
1965        # Skip if somebody tries to archive the archive...
1966        if self.name is not None and os.path.abspath(name) == self.name:
1967            self._dbg(2, "tarfile: Skipped %r" % name)
1968            return
1969
1970        self._dbg(1, name)
1971
1972        # Create a TarInfo object from the file.
1973        tarinfo = self.gettarinfo(name, arcname)
1974
1975        if tarinfo is None:
1976            self._dbg(1, "tarfile: Unsupported type %r" % name)
1977            return
1978
1979        # Change or exclude the TarInfo object.
1980        if filter is not None:
1981            tarinfo = filter(tarinfo)
1982            if tarinfo is None:
1983                self._dbg(2, "tarfile: Excluded %r" % name)
1984                return
1985
1986        # Append the tar header and data to the archive.
1987        if tarinfo.isreg():
1988            f = bltn_open(name, "rb")
1989            self.addfile(tarinfo, f)
1990            f.close()
1991
1992        elif tarinfo.isdir():
1993            self.addfile(tarinfo)
1994            if recursive:
1995                for f in os.listdir(name):
1996                    self.add(os.path.join(name, f), os.path.join(arcname, f),
1997                            recursive, exclude, filter)
1998
1999        else:
2000            self.addfile(tarinfo)
2001
2002    def addfile(self, tarinfo, fileobj=None):
2003        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2004           given, tarinfo.size bytes are read from it and added to the archive.
2005           You can create TarInfo objects using gettarinfo().
2006           On Windows platforms, `fileobj' should always be opened with mode
2007           'rb' to avoid irritation about the file size.
2008        """
2009        self._check("aw")
2010
2011        tarinfo = copy.copy(tarinfo)
2012
2013        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2014        self.fileobj.write(buf)
2015        self.offset += len(buf)
2016
2017        # If there's data to follow, append it.
2018        if fileobj is not None:
2019            copyfileobj(fileobj, self.fileobj, tarinfo.size)
2020            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2021            if remainder > 0:
2022                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2023                blocks += 1
2024            self.offset += blocks * BLOCKSIZE
2025
2026        self.members.append(tarinfo)
2027
2028    def extractall(self, path=".", members=None):
2029        """Extract all members from the archive to the current working
2030           directory and set owner, modification time and permissions on
2031           directories afterwards. `path' specifies a different directory
2032           to extract to. `members' is optional and must be a subset of the
2033           list returned by getmembers().
2034        """
2035        directories = []
2036
2037        if members is None:
2038            members = self
2039
2040        for tarinfo in members:
2041            if tarinfo.isdir():
2042                # Extract directories with a safe mode.
2043                directories.append(tarinfo)
2044                tarinfo = copy.copy(tarinfo)
2045                tarinfo.mode = 0700
2046            self.extract(tarinfo, path)
2047
2048        # Reverse sort directories.
2049        directories.sort(key=operator.attrgetter('name'))
2050        directories.reverse()
2051
2052        # Set correct owner, mtime and filemode on directories.
2053        for tarinfo in directories:
2054            dirpath = os.path.join(path, tarinfo.name)
2055            try:
2056                self.chown(tarinfo, dirpath)
2057                self.utime(tarinfo, dirpath)
2058                self.chmod(tarinfo, dirpath)
2059            except ExtractError, e:
2060                if self.errorlevel > 1:
2061                    raise
2062                else:
2063                    self._dbg(1, "tarfile: %s" % e)
2064
2065    def extract(self, member, path=""):
2066        """Extract a member from the archive to the current working directory,
2067           using its full name. Its file information is extracted as accurately
2068           as possible. `member' may be a filename or a TarInfo object. You can
2069           specify a different directory using `path'.
2070        """
2071        self._check("r")
2072
2073        if isinstance(member, basestring):
2074            tarinfo = self.getmember(member)
2075        else:
2076            tarinfo = member
2077
2078        # Prepare the link target for makelink().
2079        if tarinfo.islnk():
2080            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2081
2082        try:
2083            self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2084        except EnvironmentError, e:
2085            if self.errorlevel > 0:
2086                raise
2087            else:
2088                if e.filename is None:
2089                    self._dbg(1, "tarfile: %s" % e.strerror)
2090                else:
2091                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2092        except ExtractError, e:
2093            if self.errorlevel > 1:
2094                raise
2095            else:
2096                self._dbg(1, "tarfile: %s" % e)
2097
2098    def extractfile(self, member):
2099        """Extract a member from the archive as a file object. `member' may be
2100           a filename or a TarInfo object. If `member' is a regular file, a
2101           file-like object is returned. If `member' is a link, a file-like
2102           object is constructed from the link's target. If `member' is none of
2103           the above, None is returned.
2104           The file-like object is read-only and provides the following
2105           methods: read(), readline(), readlines(), seek() and tell()
2106        """
2107        self._check("r")
2108
2109        if isinstance(member, basestring):
2110            tarinfo = self.getmember(member)
2111        else:
2112            tarinfo = member
2113
2114        if tarinfo.isreg():
2115            return self.fileobject(self, tarinfo)
2116
2117        elif tarinfo.type not in SUPPORTED_TYPES:
2118            # If a member's type is unknown, it is treated as a
2119            # regular file.
2120            return self.fileobject(self, tarinfo)
2121
2122        elif tarinfo.islnk() or tarinfo.issym():
2123            if isinstance(self.fileobj, _Stream):
2124                # A small but ugly workaround for the case that someone tries
2125                # to extract a (sym)link as a file-object from a non-seekable
2126                # stream of tar blocks.
2127                raise StreamError("cannot extract (sym)link as file object")
2128            else:
2129                # A (sym)link's file object is its target's file object.
2130                return self.extractfile(self._find_link_target(tarinfo))
2131        else:
2132            # If there's no data associated with the member (directory, chrdev,
2133            # blkdev, etc.), return None instead of a file object.
2134            return None
2135
2136    def _extract_member(self, tarinfo, targetpath):
2137        """Extract the TarInfo object tarinfo to a physical
2138           file called targetpath.
2139        """
2140        # Fetch the TarInfo object for the given name
2141        # and build the destination pathname, replacing
2142        # forward slashes to platform specific separators.
2143        targetpath = targetpath.rstrip("/")
2144        targetpath = targetpath.replace("/", os.sep)
2145
2146        # Create all upper directories.
2147        upperdirs = os.path.dirname(targetpath)
2148        if upperdirs and not os.path.exists(upperdirs):
2149            # Create directories that are not part of the archive with
2150            # default permissions.
2151            os.makedirs(upperdirs)
2152
2153        if tarinfo.islnk() or tarinfo.issym():
2154            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2155        else:
2156            self._dbg(1, tarinfo.name)
2157
2158        if tarinfo.isreg():
2159            self.makefile(tarinfo, targetpath)
2160        elif tarinfo.isdir():
2161            self.makedir(tarinfo, targetpath)
2162        elif tarinfo.isfifo():
2163            self.makefifo(tarinfo, targetpath)
2164        elif tarinfo.ischr() or tarinfo.isblk():
2165            self.makedev(tarinfo, targetpath)
2166        elif tarinfo.islnk() or tarinfo.issym():
2167            self.makelink(tarinfo, targetpath)
2168        elif tarinfo.type not in SUPPORTED_TYPES:
2169            self.makeunknown(tarinfo, targetpath)
2170        else:
2171            self.makefile(tarinfo, targetpath)
2172
2173        self.chown(tarinfo, targetpath)
2174        if not tarinfo.issym():
2175            self.chmod(tarinfo, targetpath)
2176            self.utime(tarinfo, targetpath)
2177
2178    #--------------------------------------------------------------------------
2179    # Below are the different file methods. They are called via
2180    # _extract_member() when extract() is called. They can be replaced in a
2181    # subclass to implement other functionality.
2182
2183    def makedir(self, tarinfo, targetpath):
2184        """Make a directory called targetpath.
2185        """
2186        try:
2187            # Use a safe mode for the directory, the real mode is set
2188            # later in _extract_member().
2189            os.mkdir(targetpath, 0700)
2190        except EnvironmentError, e:
2191            if e.errno != errno.EEXIST:
2192                raise
2193
2194    def makefile(self, tarinfo, targetpath):
2195        """Make a file called targetpath.
2196        """
2197        source = self.extractfile(tarinfo)
2198        target = bltn_open(targetpath, "wb")
2199        copyfileobj(source, target)
2200        source.close()
2201        target.close()
2202
2203    def makeunknown(self, tarinfo, targetpath):
2204        """Make a file from a TarInfo object with an unknown type
2205           at targetpath.
2206        """
2207        self.makefile(tarinfo, targetpath)
2208        self._dbg(1, "tarfile: Unknown file type %r, " \
2209                     "extracted as regular file." % tarinfo.type)
2210
2211    def makefifo(self, tarinfo, targetpath):
2212        """Make a fifo called targetpath.
2213        """
2214        if hasattr(os, "mkfifo"):
2215            os.mkfifo(targetpath)
2216        else:
2217            raise ExtractError("fifo not supported by system")
2218
2219    def makedev(self, tarinfo, targetpath):
2220        """Make a character or block device called targetpath.
2221        """
2222        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2223            raise ExtractError("special devices not supported by system")
2224
2225        mode = tarinfo.mode
2226        if tarinfo.isblk():
2227            mode |= stat.S_IFBLK
2228        else:
2229            mode |= stat.S_IFCHR
2230
2231        os.mknod(targetpath, mode,
2232                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2233
2234    def makelink(self, tarinfo, targetpath):
2235        """Make a (symbolic) link called targetpath. If it cannot be created
2236          (platform limitation), we try to make a copy of the referenced file
2237          instead of a link.
2238        """
2239        if hasattr(os, "symlink") and hasattr(os, "link"):
2240            # For systems that support symbolic and hard links.
2241            if tarinfo.issym():
2242                if os.path.lexists(targetpath):
2243                    os.unlink(targetpath)
2244                os.symlink(tarinfo.linkname, targetpath)
2245            else:
2246                # See extract().
2247                if os.path.exists(tarinfo._link_target):
2248                    if os.path.lexists(targetpath):
2249                        os.unlink(targetpath)
2250                    os.link(tarinfo._link_target, targetpath)
2251                else:
2252                    self._extract_member(self._find_link_target(tarinfo), targetpath)
2253        else:
2254            try:
2255                self._extract_member(self._find_link_target(tarinfo), targetpath)
2256            except KeyError:
2257                raise ExtractError("unable to resolve link inside archive")
2258
2259    def chown(self, tarinfo, targetpath):
2260        """Set owner of targetpath according to tarinfo.
2261        """
2262        if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2263            # We have to be root to do so.
2264            try:
2265                g = grp.getgrnam(tarinfo.gname)[2]
2266            except KeyError:
2267                try:
2268                    g = grp.getgrgid(tarinfo.gid)[2]
2269                except KeyError:
2270                    g = os.getgid()
2271            try:
2272                u = pwd.getpwnam(tarinfo.uname)[2]
2273            except KeyError:
2274                try:
2275                    u = pwd.getpwuid(tarinfo.uid)[2]
2276                except KeyError:
2277                    u = os.getuid()
2278            try:
2279                if tarinfo.issym() and hasattr(os, "lchown"):
2280                    os.lchown(targetpath, u, g)
2281                else:
2282                    if sys.platform != "os2emx":
2283                        os.chown(targetpath, u, g)
2284            except EnvironmentError, e:
2285                raise ExtractError("could not change owner")
2286
2287    def chmod(self, tarinfo, targetpath):
2288        """Set file permissions of targetpath according to tarinfo.
2289        """
2290        if hasattr(os, 'chmod'):
2291            try:
2292                os.chmod(targetpath, tarinfo.mode)
2293            except EnvironmentError, e:
2294                raise ExtractError("could not change mode")
2295
2296    def utime(self, tarinfo, targetpath):
2297        """Set modification time of targetpath according to tarinfo.
2298        """
2299        if not hasattr(os, 'utime'):
2300            return
2301        try:
2302            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2303        except EnvironmentError, e:
2304            raise ExtractError("could not change modification time")
2305
2306    #--------------------------------------------------------------------------
2307    def next(self):
2308        """Return the next member of the archive as a TarInfo object, when
2309           TarFile is opened for reading. Return None if there is no more
2310           available.
2311        """
2312        self._check("ra")
2313        if self.firstmember is not None:
2314            m = self.firstmember
2315            self.firstmember = None
2316            return m
2317
2318        # Read the next block.
2319        self.fileobj.seek(self.offset)
2320        tarinfo = None
2321        while True:
2322            try:
2323                tarinfo = self.tarinfo.fromtarfile(self)
2324            except EOFHeaderError, e:
2325                if self.ignore_zeros:
2326                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2327                    self.offset += BLOCKSIZE
2328                    continue
2329            except InvalidHeaderError, e:
2330                if self.ignore_zeros:
2331                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2332                    self.offset += BLOCKSIZE
2333                    continue
2334                elif self.offset == 0:
2335                    raise ReadError(str(e))
2336            except EmptyHeaderError:
2337                if self.offset == 0:
2338                    raise ReadError("empty file")
2339            except TruncatedHeaderError, e:
2340                if self.offset == 0:
2341                    raise ReadError(str(e))
2342            except SubsequentHeaderError, e:
2343                raise ReadError(str(e))
2344            break
2345
2346        if tarinfo is not None:
2347            self.members.append(tarinfo)
2348        else:
2349            self._loaded = True
2350
2351        return tarinfo
2352
2353    #--------------------------------------------------------------------------
2354    # Little helper methods:
2355
2356    def _getmember(self, name, tarinfo=None, normalize=False):
2357        """Find an archive member by name from bottom to top.
2358           If tarinfo is given, it is used as the starting point.
2359        """
2360        # Ensure that all members have been loaded.
2361        members = self.getmembers()
2362
2363        # Limit the member search list up to tarinfo.
2364        if tarinfo is not None:
2365            members = members[:members.index(tarinfo)]
2366
2367        if normalize:
2368            name = os.path.normpath(name)
2369
2370        for member in reversed(members):
2371            if normalize:
2372                member_name = os.path.normpath(member.name)
2373            else:
2374                member_name = member.name
2375
2376            if name == member_name:
2377                return member
2378
2379    def _load(self):
2380        """Read through the entire archive file and look for readable
2381           members.
2382        """
2383        while True:
2384            tarinfo = self.next()
2385            if tarinfo is None:
2386                break
2387        self._loaded = True
2388
2389    def _check(self, mode=None):
2390        """Check if TarFile is still open, and if the operation's mode
2391           corresponds to TarFile's mode.
2392        """
2393        if self.closed:
2394            raise IOError("%s is closed" % self.__class__.__name__)
2395        if mode is not None and self.mode not in mode:
2396            raise IOError("bad operation for mode %r" % self.mode)
2397
2398    def _find_link_target(self, tarinfo):
2399        """Find the target member of a symlink or hardlink member in the
2400           archive.
2401        """
2402        if tarinfo.issym():
2403            # Always search the entire archive.
2404            linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
2405            limit = None
2406        else:
2407            # Search the archive before the link, because a hard link is
2408            # just a reference to an already archived file.
2409            linkname = tarinfo.linkname
2410            limit = tarinfo
2411
2412        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2413        if member is None:
2414            raise KeyError("linkname %r not found" % linkname)
2415        return member
2416
2417    def __iter__(self):
2418        """Provide an iterator object.
2419        """
2420        if self._loaded:
2421            return iter(self.members)
2422        else:
2423            return TarIter(self)
2424
2425    def _dbg(self, level, msg):
2426        """Write debugging output to sys.stderr.
2427        """
2428        if level <= self.debug:
2429            print >> sys.stderr, msg
2430
2431    def __enter__(self):
2432        self._check()
2433        return self
2434
2435    def __exit__(self, type, value, traceback):
2436        if type is None:
2437            self.close()
2438        else:
2439            # An exception occurred. We must not call close() because
2440            # it would try to write end-of-archive blocks and padding.
2441            if not self._extfileobj:
2442                self.fileobj.close()
2443            self.closed = True
2444# class TarFile
2445
2446class TarIter:
2447    """Iterator Class.
2448
2449       for tarinfo in TarFile(...):
2450           suite...
2451    """
2452
2453    def __init__(self, tarfile):
2454        """Construct a TarIter object.
2455        """
2456        self.tarfile = tarfile
2457        self.index = 0
2458    def __iter__(self):
2459        """Return iterator object.
2460        """
2461        return self
2462    def next(self):
2463        """Return the next item using TarFile's next() method.
2464           When all members have been read, set TarFile as _loaded.
2465        """
2466        # Fix for SF #1100429: Under rare circumstances it can
2467        # happen that getmembers() is called during iteration,
2468        # which will cause TarIter to stop prematurely.
2469        if not self.tarfile._loaded:
2470            tarinfo = self.tarfile.next()
2471            if not tarinfo:
2472                self.tarfile._loaded = True
2473                raise StopIteration
2474        else:
2475            try:
2476                tarinfo = self.tarfile.members[self.index]
2477            except IndexError:
2478                raise StopIteration
2479        self.index += 1
2480        return tarinfo
2481
2482# Helper classes for sparse file support
2483class _section:
2484    """Base class for _data and _hole.
2485    """
2486    def __init__(self, offset, size):
2487        self.offset = offset
2488        self.size = size
2489    def __contains__(self, offset):
2490        return self.offset <= offset < self.offset + self.size
2491
2492class _data(_section):
2493    """Represent a data section in a sparse file.
2494    """
2495    def __init__(self, offset, size, realpos):
2496        _section.__init__(self, offset, size)
2497        self.realpos = realpos
2498
2499class _hole(_section):
2500    """Represent a hole section in a sparse file.
2501    """
2502    pass
2503
2504class _ringbuffer(list):
2505    """Ringbuffer class which increases performance
2506       over a regular list.
2507    """
2508    def __init__(self):
2509        self.idx = 0
2510    def find(self, offset):
2511        idx = self.idx
2512        while True:
2513            item = self[idx]
2514            if offset in item:
2515                break
2516            idx += 1
2517            if idx == len(self):
2518                idx = 0
2519            if idx == self.idx:
2520                # End of File
2521                return None
2522        self.idx = idx
2523        return item
2524
2525#---------------------------------------------
2526# zipfile compatible TarFile class
2527#---------------------------------------------
2528TAR_PLAIN = 0           # zipfile.ZIP_STORED
2529TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2530class TarFileCompat:
2531    """TarFile class compatible with standard module zipfile's
2532       ZipFile class.
2533    """
2534    def __init__(self, file, mode="r", compression=TAR_PLAIN):
2535        from warnings import warnpy3k
2536        warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2537                stacklevel=2)
2538        if compression == TAR_PLAIN:
2539            self.tarfile = TarFile.taropen(file, mode)
2540        elif compression == TAR_GZIPPED:
2541            self.tarfile = TarFile.gzopen(file, mode)
2542        else:
2543            raise ValueError("unknown compression constant")
2544        if mode[0:1] == "r":
2545            members = self.tarfile.getmembers()
2546            for m in members:
2547                m.filename = m.name
2548                m.file_size = m.size
2549                m.date_time = time.gmtime(m.mtime)[:6]
2550    def namelist(self):
2551        return map(lambda m: m.name, self.infolist())
2552    def infolist(self):
2553        return filter(lambda m: m.type in REGULAR_TYPES,
2554                      self.tarfile.getmembers())
2555    def printdir(self):
2556        self.tarfile.list()
2557    def testzip(self):
2558        return
2559    def getinfo(self, name):
2560        return self.tarfile.getmember(name)
2561    def read(self, name):
2562        return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2563    def write(self, filename, arcname=None, compress_type=None):
2564        self.tarfile.add(filename, arcname)
2565    def writestr(self, zinfo, bytes):
2566        try:
2567            from cStringIO import StringIO
2568        except ImportError:
2569            from StringIO import StringIO
2570        import calendar
2571        tinfo = TarInfo(zinfo.filename)
2572        tinfo.size = len(bytes)
2573        tinfo.mtime = calendar.timegm(zinfo.date_time)
2574        self.tarfile.addfile(tinfo, StringIO(bytes))
2575    def close(self):
2576        self.tarfile.close()
2577#class TarFileCompat
2578
2579#--------------------
2580# exported functions
2581#--------------------
2582def is_tarfile(name):
2583    """Return True if name points to a tar archive that we
2584       are able to handle, else return False.
2585    """
2586    try:
2587        t = open(name)
2588        t.close()
2589        return True
2590    except TarError:
2591        return False
2592
2593bltn_open = open
2594open = TarFile.open
2595