1"""
2Read and write ZIP files.
3"""
4import struct, os, time, sys, shutil
5import binascii, cStringIO, stat
6import io
7import re
8import string
9
10try:
11    import zlib # We may need its compression method
12    crc32 = zlib.crc32
13except ImportError:
14    zlib = None
15    crc32 = binascii.crc32
16
17__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
18           "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
19
20class BadZipfile(Exception):
21    pass
22
23
24class LargeZipFile(Exception):
25    """
26    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27    and those extensions are disabled.
28    """
29
30error = BadZipfile      # The exception raised by this module
31
32ZIP64_LIMIT = (1 << 31) - 1
33ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
34ZIP_MAX_COMMENT = (1 << 16) - 1
35
36# constants for Zip file compression methods
37ZIP_STORED = 0
38ZIP_DEFLATED = 8
39# Other ZIP compression methods not supported
40
41# Below are some formats and associated data for reading/writing headers using
42# the struct module.  The names and structures of headers/records are those used
43# in the PKWARE description of the ZIP file format:
44#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45# (URL valid as of January 2008)
46
47# The "end of central directory" structure, magic number, size, and indices
48# (section V.I in the format document)
49structEndArchive = "<4s4H2LH"
50stringEndArchive = "PK\005\006"
51sizeEndCentDir = struct.calcsize(structEndArchive)
52
53_ECD_SIGNATURE = 0
54_ECD_DISK_NUMBER = 1
55_ECD_DISK_START = 2
56_ECD_ENTRIES_THIS_DISK = 3
57_ECD_ENTRIES_TOTAL = 4
58_ECD_SIZE = 5
59_ECD_OFFSET = 6
60_ECD_COMMENT_SIZE = 7
61# These last two indices are not part of the structure as defined in the
62# spec, but they are used internally by this module as a convenience
63_ECD_COMMENT = 8
64_ECD_LOCATION = 9
65
66# The "central directory" structure, magic number, size, and indices
67# of entries in the structure (section V.F in the format document)
68structCentralDir = "<4s4B4HL2L5H2L"
69stringCentralDir = "PK\001\002"
70sizeCentralDir = struct.calcsize(structCentralDir)
71
72# indexes of entries in the central directory structure
73_CD_SIGNATURE = 0
74_CD_CREATE_VERSION = 1
75_CD_CREATE_SYSTEM = 2
76_CD_EXTRACT_VERSION = 3
77_CD_EXTRACT_SYSTEM = 4
78_CD_FLAG_BITS = 5
79_CD_COMPRESS_TYPE = 6
80_CD_TIME = 7
81_CD_DATE = 8
82_CD_CRC = 9
83_CD_COMPRESSED_SIZE = 10
84_CD_UNCOMPRESSED_SIZE = 11
85_CD_FILENAME_LENGTH = 12
86_CD_EXTRA_FIELD_LENGTH = 13
87_CD_COMMENT_LENGTH = 14
88_CD_DISK_NUMBER_START = 15
89_CD_INTERNAL_FILE_ATTRIBUTES = 16
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91_CD_LOCAL_HEADER_OFFSET = 18
92
93# The "local file header" structure, magic number, size, and indices
94# (section V.A in the format document)
95structFileHeader = "<4s2B4HL2L2H"
96stringFileHeader = "PK\003\004"
97sizeFileHeader = struct.calcsize(structFileHeader)
98
99_FH_SIGNATURE = 0
100_FH_EXTRACT_VERSION = 1
101_FH_EXTRACT_SYSTEM = 2
102_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103_FH_COMPRESSION_METHOD = 4
104_FH_LAST_MOD_TIME = 5
105_FH_LAST_MOD_DATE = 6
106_FH_CRC = 7
107_FH_COMPRESSED_SIZE = 8
108_FH_UNCOMPRESSED_SIZE = 9
109_FH_FILENAME_LENGTH = 10
110_FH_EXTRA_FIELD_LENGTH = 11
111
112# The "Zip64 end of central directory locator" structure, magic number, and size
113structEndArchive64Locator = "<4sLQL"
114stringEndArchive64Locator = "PK\x06\x07"
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
116
117# The "Zip64 end of central directory" record, magic number, size, and indices
118# (section V.G in the format document)
119structEndArchive64 = "<4sQ2H2L4Q"
120stringEndArchive64 = "PK\x06\x06"
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)
122
123_CD64_SIGNATURE = 0
124_CD64_DIRECTORY_RECSIZE = 1
125_CD64_CREATE_VERSION = 2
126_CD64_EXTRACT_VERSION = 3
127_CD64_DISK_NUMBER = 4
128_CD64_DISK_NUMBER_START = 5
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130_CD64_NUMBER_ENTRIES_TOTAL = 7
131_CD64_DIRECTORY_SIZE = 8
132_CD64_OFFSET_START_CENTDIR = 9
133
134def _check_zipfile(fp):
135    try:
136        if _EndRecData(fp):
137            return True         # file has correct magic number
138    except IOError:
139        pass
140    return False
141
142def is_zipfile(filename):
143    """Quickly see if a file is a ZIP file by checking the magic number.
144
145    The filename argument may be a file or file-like object too.
146    """
147    result = False
148    try:
149        if hasattr(filename, "read"):
150            result = _check_zipfile(fp=filename)
151        else:
152            with open(filename, "rb") as fp:
153                result = _check_zipfile(fp)
154    except IOError:
155        pass
156    return result
157
158def _EndRecData64(fpin, offset, endrec):
159    """
160    Read the ZIP64 end-of-archive records and use that to update endrec
161    """
162    try:
163        fpin.seek(offset - sizeEndCentDir64Locator, 2)
164    except IOError:
165        # If the seek fails, the file is not large enough to contain a ZIP64
166        # end-of-archive record, so just return the end record we were given.
167        return endrec
168
169    data = fpin.read(sizeEndCentDir64Locator)
170    if len(data) != sizeEndCentDir64Locator:
171        return endrec
172    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173    if sig != stringEndArchive64Locator:
174        return endrec
175
176    if diskno != 0 or disks != 1:
177        raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179    # Assume no 'zip64 extensible data'
180    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181    data = fpin.read(sizeEndCentDir64)
182    if len(data) != sizeEndCentDir64:
183        return endrec
184    sig, sz, create_version, read_version, disk_num, disk_dir, \
185            dircount, dircount2, dirsize, diroffset = \
186            struct.unpack(structEndArchive64, data)
187    if sig != stringEndArchive64:
188        return endrec
189
190    # Update the original endrec using data from the ZIP64 record
191    endrec[_ECD_SIGNATURE] = sig
192    endrec[_ECD_DISK_NUMBER] = disk_num
193    endrec[_ECD_DISK_START] = disk_dir
194    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195    endrec[_ECD_ENTRIES_TOTAL] = dircount2
196    endrec[_ECD_SIZE] = dirsize
197    endrec[_ECD_OFFSET] = diroffset
198    return endrec
199
200
201def _EndRecData(fpin):
202    """Return data from the "End of Central Directory" record, or None.
203
204    The data is a list of the nine items in the ZIP "End of central dir"
205    record followed by a tenth item, the file seek offset of this record."""
206
207    # Determine file size
208    fpin.seek(0, 2)
209    filesize = fpin.tell()
210
211    # Check to see if this is ZIP file with no archive comment (the
212    # "end of central directory" structure should be the last item in the
213    # file if this is the case).
214    try:
215        fpin.seek(-sizeEndCentDir, 2)
216    except IOError:
217        return None
218    data = fpin.read()
219    if (len(data) == sizeEndCentDir and
220        data[0:4] == stringEndArchive and
221        data[-2:] == b"\000\000"):
222        # the signature is correct and there's no comment, unpack structure
223        endrec = struct.unpack(structEndArchive, data)
224        endrec=list(endrec)
225
226        # Append a blank comment and record start offset
227        endrec.append("")
228        endrec.append(filesize - sizeEndCentDir)
229
230        # Try to read the "Zip64 end of central directory" structure
231        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233    # Either this is not a ZIP file, or it is a ZIP file with an archive
234    # comment.  Search the end of the file for the "end of central directory"
235    # record signature. The comment is the last item in the ZIP file and may be
236    # up to 64K long.  It is assumed that the "end of central directory" magic
237    # number does not appear in the comment.
238    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239    fpin.seek(maxCommentStart, 0)
240    data = fpin.read()
241    start = data.rfind(stringEndArchive)
242    if start >= 0:
243        # found the magic number; attempt to unpack and interpret
244        recData = data[start:start+sizeEndCentDir]
245        if len(recData) != sizeEndCentDir:
246            # Zip file is corrupted.
247            return None
248        endrec = list(struct.unpack(structEndArchive, recData))
249        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251        endrec.append(comment)
252        endrec.append(maxCommentStart + start)
253
254        # Try to read the "Zip64 end of central directory" structure
255        return _EndRecData64(fpin, maxCommentStart + start - filesize,
256                             endrec)
257
258    # Unable to find a valid end of central directory structure
259    return None
260
261
262class ZipInfo (object):
263    """Class with attributes describing each file in the ZIP archive."""
264
265    __slots__ = (
266            'orig_filename',
267            'filename',
268            'date_time',
269            'compress_type',
270            'comment',
271            'extra',
272            'create_system',
273            'create_version',
274            'extract_version',
275            'reserved',
276            'flag_bits',
277            'volume',
278            'internal_attr',
279            'external_attr',
280            'header_offset',
281            'CRC',
282            'compress_size',
283            'file_size',
284            '_raw_time',
285        )
286
287    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
288        self.orig_filename = filename   # Original file name in archive
289
290        # Terminate the file name at the first null byte.  Null bytes in file
291        # names are used as tricks by viruses in archives.
292        null_byte = filename.find(chr(0))
293        if null_byte >= 0:
294            filename = filename[0:null_byte]
295        # This is used to ensure paths in generated ZIP files always use
296        # forward slashes as the directory separator, as required by the
297        # ZIP format specification.
298        if os.sep != "/" and os.sep in filename:
299            filename = filename.replace(os.sep, "/")
300
301        self.filename = filename        # Normalized file name
302        self.date_time = date_time      # year, month, day, hour, min, sec
303
304        if date_time[0] < 1980:
305            raise ValueError('ZIP does not support timestamps before 1980')
306
307        # Standard values:
308        self.compress_type = ZIP_STORED # Type of compression for the file
309        self.comment = ""               # Comment for each file
310        self.extra = ""                 # ZIP extra data
311        if sys.platform == 'win32':
312            self.create_system = 0          # System which created ZIP archive
313        else:
314            # Assume everything else is unix-y
315            self.create_system = 3          # System which created ZIP archive
316        self.create_version = 20        # Version which created ZIP archive
317        self.extract_version = 20       # Version needed to extract archive
318        self.reserved = 0               # Must be zero
319        self.flag_bits = 0              # ZIP flag bits
320        self.volume = 0                 # Volume number of file header
321        self.internal_attr = 0          # Internal attributes
322        self.external_attr = 0          # External file attributes
323        # Other attributes are set by class ZipFile:
324        # header_offset         Byte offset to the file header
325        # CRC                   CRC-32 of the uncompressed file
326        # compress_size         Size of the compressed file
327        # file_size             Size of the uncompressed file
328
329    def FileHeader(self, zip64=None):
330        """Return the per-file header as a string."""
331        dt = self.date_time
332        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
333        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
334        if self.flag_bits & 0x08:
335            # Set these to zero because we write them after the file data
336            CRC = compress_size = file_size = 0
337        else:
338            CRC = self.CRC
339            compress_size = self.compress_size
340            file_size = self.file_size
341
342        extra = self.extra
343
344        if zip64 is None:
345            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346        if zip64:
347            fmt = '<HHQQ'
348            extra = extra + struct.pack(fmt,
349                    1, struct.calcsize(fmt)-4, file_size, compress_size)
350        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351            if not zip64:
352                raise LargeZipFile("Filesize would require ZIP64 extensions")
353            # File is larger than what fits into a 4 byte integer,
354            # fall back to the ZIP64 extension
355            file_size = 0xffffffff
356            compress_size = 0xffffffff
357            self.extract_version = max(45, self.extract_version)
358            self.create_version = max(45, self.extract_version)
359
360        filename, flag_bits = self._encodeFilenameFlags()
361        header = struct.pack(structFileHeader, stringFileHeader,
362                 self.extract_version, self.reserved, flag_bits,
363                 self.compress_type, dostime, dosdate, CRC,
364                 compress_size, file_size,
365                 len(filename), len(extra))
366        return header + filename + extra
367
368    def _encodeFilenameFlags(self):
369        if isinstance(self.filename, unicode):
370            try:
371                return self.filename.encode('ascii'), self.flag_bits
372            except UnicodeEncodeError:
373                return self.filename.encode('utf-8'), self.flag_bits | 0x800
374        else:
375            return self.filename, self.flag_bits
376
377    def _decodeFilename(self):
378        if self.flag_bits & 0x800:
379            return self.filename.decode('utf-8')
380        else:
381            return self.filename
382
383    def _decodeExtra(self):
384        # Try to decode the extra field.
385        extra = self.extra
386        unpack = struct.unpack
387        while len(extra) >= 4:
388            tp, ln = unpack('<HH', extra[:4])
389            if tp == 1:
390                if ln >= 24:
391                    counts = unpack('<QQQ', extra[4:28])
392                elif ln == 16:
393                    counts = unpack('<QQ', extra[4:20])
394                elif ln == 8:
395                    counts = unpack('<Q', extra[4:12])
396                elif ln == 0:
397                    counts = ()
398                else:
399                    raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401                idx = 0
402
403                # ZIP64 extension (large files and/or large archives)
404                if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
405                    self.file_size = counts[idx]
406                    idx += 1
407
408                if self.compress_size == 0xFFFFFFFFL:
409                    self.compress_size = counts[idx]
410                    idx += 1
411
412                if self.header_offset == 0xffffffffL:
413                    old = self.header_offset
414                    self.header_offset = counts[idx]
415                    idx+=1
416
417            extra = extra[ln+4:]
418
419
420class _ZipDecrypter:
421    """Class to handle decryption of files stored within a ZIP archive.
422
423    ZIP supports a password-based form of encryption. Even though known
424    plaintext attacks have been found against it, it is still useful
425    to be able to get data out of such a file.
426
427    Usage:
428        zd = _ZipDecrypter(mypwd)
429        plain_char = zd(cypher_char)
430        plain_text = map(zd, cypher_text)
431    """
432
433    def _GenerateCRCTable():
434        """Generate a CRC-32 table.
435
436        ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437        internal keys. We noticed that a direct implementation is faster than
438        relying on binascii.crc32().
439        """
440        poly = 0xedb88320
441        table = [0] * 256
442        for i in range(256):
443            crc = i
444            for j in range(8):
445                if crc & 1:
446                    crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447                else:
448                    crc = ((crc >> 1) & 0x7FFFFFFF)
449            table[i] = crc
450        return table
451    crctable = _GenerateCRCTable()
452
453    def _crc32(self, ch, crc):
454        """Compute the CRC32 primitive on one byte."""
455        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457    def __init__(self, pwd):
458        self.key0 = 305419896
459        self.key1 = 591751049
460        self.key2 = 878082192
461        for p in pwd:
462            self._UpdateKeys(p)
463
464    def _UpdateKeys(self, c):
465        self.key0 = self._crc32(c, self.key0)
466        self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467        self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470    def __call__(self, c):
471        """Decrypt a single character."""
472        c = ord(c)
473        k = self.key2 | 2
474        c = c ^ (((k * (k^1)) >> 8) & 255)
475        c = chr(c)
476        self._UpdateKeys(c)
477        return c
478
479
480compressor_names = {
481    0: 'store',
482    1: 'shrink',
483    2: 'reduce',
484    3: 'reduce',
485    4: 'reduce',
486    5: 'reduce',
487    6: 'implode',
488    7: 'tokenize',
489    8: 'deflate',
490    9: 'deflate64',
491    10: 'implode',
492    12: 'bzip2',
493    14: 'lzma',
494    18: 'terse',
495    19: 'lz77',
496    97: 'wavpack',
497    98: 'ppmd',
498}
499
500
501class ZipExtFile(io.BufferedIOBase):
502    """File-like object for reading an archive member.
503       Is returned by ZipFile.open().
504    """
505
506    # Max size supported by decompressor.
507    MAX_N = 1 << 31 - 1
508
509    # Read from compressed files in 4k blocks.
510    MIN_READ_SIZE = 4096
511
512    # Search for universal newlines or line chunks.
513    PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
514
515    def __init__(self, fileobj, mode, zipinfo, decrypter=None,
516            close_fileobj=False):
517        self._fileobj = fileobj
518        self._decrypter = decrypter
519        self._close_fileobj = close_fileobj
520
521        self._compress_type = zipinfo.compress_type
522        self._compress_size = zipinfo.compress_size
523        self._compress_left = zipinfo.compress_size
524
525        if self._compress_type == ZIP_DEFLATED:
526            self._decompressor = zlib.decompressobj(-15)
527        elif self._compress_type != ZIP_STORED:
528            descr = compressor_names.get(self._compress_type)
529            if descr:
530                raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
531            else:
532                raise NotImplementedError("compression type %d" % (self._compress_type,))
533        self._unconsumed = ''
534
535        self._readbuffer = ''
536        self._offset = 0
537
538        self._universal = 'U' in mode
539        self.newlines = None
540
541        # Adjust read size for encrypted files since the first 12 bytes
542        # are for the encryption/password information.
543        if self._decrypter is not None:
544            self._compress_left -= 12
545
546        self.mode = mode
547        self.name = zipinfo.filename
548
549        if hasattr(zipinfo, 'CRC'):
550            self._expected_crc = zipinfo.CRC
551            self._running_crc = crc32(b'') & 0xffffffff
552        else:
553            self._expected_crc = None
554
555    def readline(self, limit=-1):
556        """Read and return a line from the stream.
557
558        If limit is specified, at most limit bytes will be read.
559        """
560
561        if not self._universal and limit < 0:
562            # Shortcut common case - newline found in buffer.
563            i = self._readbuffer.find('\n', self._offset) + 1
564            if i > 0:
565                line = self._readbuffer[self._offset: i]
566                self._offset = i
567                return line
568
569        if not self._universal:
570            return io.BufferedIOBase.readline(self, limit)
571
572        line = ''
573        while limit < 0 or len(line) < limit:
574            readahead = self.peek(2)
575            if readahead == '':
576                return line
577
578            #
579            # Search for universal newlines or line chunks.
580            #
581            # The pattern returns either a line chunk or a newline, but not
582            # both. Combined with peek(2), we are assured that the sequence
583            # '\r\n' is always retrieved completely and never split into
584            # separate newlines - '\r', '\n' due to coincidental readaheads.
585            #
586            match = self.PATTERN.search(readahead)
587            newline = match.group('newline')
588            if newline is not None:
589                if self.newlines is None:
590                    self.newlines = []
591                if newline not in self.newlines:
592                    self.newlines.append(newline)
593                self._offset += len(newline)
594                return line + '\n'
595
596            chunk = match.group('chunk')
597            if limit >= 0:
598                chunk = chunk[: limit - len(line)]
599
600            self._offset += len(chunk)
601            line += chunk
602
603        return line
604
605    def peek(self, n=1):
606        """Returns buffered bytes without advancing the position."""
607        if n > len(self._readbuffer) - self._offset:
608            chunk = self.read(n)
609            if len(chunk) > self._offset:
610                self._readbuffer = chunk + self._readbuffer[self._offset:]
611                self._offset = 0
612            else:
613                self._offset -= len(chunk)
614
615        # Return up to 512 bytes to reduce allocation overhead for tight loops.
616        return self._readbuffer[self._offset: self._offset + 512]
617
618    def readable(self):
619        return True
620
621    def read(self, n=-1):
622        """Read and return up to n bytes.
623        If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
624        """
625        buf = ''
626        if n is None:
627            n = -1
628        while True:
629            if n < 0:
630                data = self.read1(n)
631            elif n > len(buf):
632                data = self.read1(n - len(buf))
633            else:
634                return buf
635            if len(data) == 0:
636                return buf
637            buf += data
638
639    def _update_crc(self, newdata, eof):
640        # Update the CRC using the given data.
641        if self._expected_crc is None:
642            # No need to compute the CRC if we don't have a reference value
643            return
644        self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
645        # Check the CRC if we're at the end of the file
646        if eof and self._running_crc != self._expected_crc:
647            raise BadZipfile("Bad CRC-32 for file %r" % self.name)
648
649    def read1(self, n):
650        """Read up to n bytes with at most one read() system call."""
651
652        # Simplify algorithm (branching) by transforming negative n to large n.
653        if n < 0 or n is None:
654            n = self.MAX_N
655
656        # Bytes available in read buffer.
657        len_readbuffer = len(self._readbuffer) - self._offset
658
659        # Read from file.
660        if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
661            nbytes = n - len_readbuffer - len(self._unconsumed)
662            nbytes = max(nbytes, self.MIN_READ_SIZE)
663            nbytes = min(nbytes, self._compress_left)
664
665            data = self._fileobj.read(nbytes)
666            self._compress_left -= len(data)
667
668            if data and self._decrypter is not None:
669                data = ''.join(map(self._decrypter, data))
670
671            if self._compress_type == ZIP_STORED:
672                self._update_crc(data, eof=(self._compress_left==0))
673                self._readbuffer = self._readbuffer[self._offset:] + data
674                self._offset = 0
675            else:
676                # Prepare deflated bytes for decompression.
677                self._unconsumed += data
678
679        # Handle unconsumed data.
680        if (len(self._unconsumed) > 0 and n > len_readbuffer and
681            self._compress_type == ZIP_DEFLATED):
682            data = self._decompressor.decompress(
683                self._unconsumed,
684                max(n - len_readbuffer, self.MIN_READ_SIZE)
685            )
686
687            self._unconsumed = self._decompressor.unconsumed_tail
688            eof = len(self._unconsumed) == 0 and self._compress_left == 0
689            if eof:
690                data += self._decompressor.flush()
691
692            self._update_crc(data, eof=eof)
693            self._readbuffer = self._readbuffer[self._offset:] + data
694            self._offset = 0
695
696        # Read from buffer.
697        data = self._readbuffer[self._offset: self._offset + n]
698        self._offset += len(data)
699        return data
700
701    def close(self):
702        try :
703            if self._close_fileobj:
704                self._fileobj.close()
705        finally:
706            super(ZipExtFile, self).close()
707
708
709class ZipFile(object):
710    """ Class with methods to open, read, write, close, list zip files.
711
712    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
713
714    file: Either the path to the file, or a file-like object.
715          If it is a path, the file will be opened and closed by ZipFile.
716    mode: The mode can be either read "r", write "w" or append "a".
717    compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
718    allowZip64: if True ZipFile will create files with ZIP64 extensions when
719                needed, otherwise it will raise an exception when this would
720                be necessary.
721
722    """
723
724    fp = None                   # Set here since __del__ checks it
725
726    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
727        """Open the ZIP file with mode read "r", write "w" or append "a"."""
728        if mode not in ("r", "w", "a"):
729            raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
730
731        if compression == ZIP_STORED:
732            pass
733        elif compression == ZIP_DEFLATED:
734            if not zlib:
735                raise RuntimeError,\
736                      "Compression requires the (missing) zlib module"
737        else:
738            raise RuntimeError, "That compression method is not supported"
739
740        self._allowZip64 = allowZip64
741        self._didModify = False
742        self.debug = 0  # Level of printing: 0 through 3
743        self.NameToInfo = {}    # Find file info given name
744        self.filelist = []      # List of ZipInfo instances for archive
745        self.compression = compression  # Method of compression
746        self.mode = key = mode.replace('b', '')[0]
747        self.pwd = None
748        self._comment = ''
749
750        # Check if we were passed a file-like object
751        if isinstance(file, basestring):
752            self._filePassed = 0
753            self.filename = file
754            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
755            try:
756                self.fp = open(file, modeDict[mode])
757            except IOError:
758                if mode == 'a':
759                    mode = key = 'w'
760                    self.fp = open(file, modeDict[mode])
761                else:
762                    raise
763        else:
764            self._filePassed = 1
765            self.fp = file
766            self.filename = getattr(file, 'name', None)
767
768        try:
769            if key == 'r':
770                self._RealGetContents()
771            elif key == 'w':
772                # set the modified flag so central directory gets written
773                # even if no files are added to the archive
774                self._didModify = True
775            elif key == 'a':
776                try:
777                    # See if file is a zip file
778                    self._RealGetContents()
779                    # seek to start of directory and overwrite
780                    self.fp.seek(self.start_dir, 0)
781                except BadZipfile:
782                    # file is not a zip file, just append
783                    self.fp.seek(0, 2)
784
785                    # set the modified flag so central directory gets written
786                    # even if no files are added to the archive
787                    self._didModify = True
788            else:
789                raise RuntimeError('Mode must be "r", "w" or "a"')
790        except:
791            fp = self.fp
792            self.fp = None
793            if not self._filePassed:
794                fp.close()
795            raise
796
797    def __enter__(self):
798        return self
799
800    def __exit__(self, type, value, traceback):
801        self.close()
802
803    def _RealGetContents(self):
804        """Read in the table of contents for the ZIP file."""
805        fp = self.fp
806        try:
807            endrec = _EndRecData(fp)
808        except IOError:
809            raise BadZipfile("File is not a zip file")
810        if not endrec:
811            raise BadZipfile, "File is not a zip file"
812        if self.debug > 1:
813            print endrec
814        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
815        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
816        self._comment = endrec[_ECD_COMMENT]    # archive comment
817
818        # "concat" is zero, unless zip was concatenated to another file
819        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
820        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
821            # If Zip64 extension structures are present, account for them
822            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
823
824        if self.debug > 2:
825            inferred = concat + offset_cd
826            print "given, inferred, offset", offset_cd, inferred, concat
827        # self.start_dir:  Position of start of central directory
828        self.start_dir = offset_cd + concat
829        fp.seek(self.start_dir, 0)
830        data = fp.read(size_cd)
831        fp = cStringIO.StringIO(data)
832        total = 0
833        while total < size_cd:
834            centdir = fp.read(sizeCentralDir)
835            if len(centdir) != sizeCentralDir:
836                raise BadZipfile("Truncated central directory")
837            centdir = struct.unpack(structCentralDir, centdir)
838            if centdir[_CD_SIGNATURE] != stringCentralDir:
839                raise BadZipfile("Bad magic number for central directory")
840            if self.debug > 2:
841                print centdir
842            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
843            # Create ZipInfo instance to store file information
844            x = ZipInfo(filename)
845            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
846            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
847            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
848            (x.create_version, x.create_system, x.extract_version, x.reserved,
849                x.flag_bits, x.compress_type, t, d,
850                x.CRC, x.compress_size, x.file_size) = centdir[1:12]
851            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
852            # Convert date/time code to (year, month, day, hour, min, sec)
853            x._raw_time = t
854            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
855                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
856
857            x._decodeExtra()
858            x.header_offset = x.header_offset + concat
859            x.filename = x._decodeFilename()
860            self.filelist.append(x)
861            self.NameToInfo[x.filename] = x
862
863            # update total bytes read from central directory
864            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
865                     + centdir[_CD_EXTRA_FIELD_LENGTH]
866                     + centdir[_CD_COMMENT_LENGTH])
867
868            if self.debug > 2:
869                print "total", total
870
871
872    def namelist(self):
873        """Return a list of file names in the archive."""
874        l = []
875        for data in self.filelist:
876            l.append(data.filename)
877        return l
878
879    def infolist(self):
880        """Return a list of class ZipInfo instances for files in the
881        archive."""
882        return self.filelist
883
884    def printdir(self):
885        """Print a table of contents for the zip file."""
886        print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
887        for zinfo in self.filelist:
888            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
889            print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
890
891    def testzip(self):
892        """Read all the files and check the CRC."""
893        chunk_size = 2 ** 20
894        for zinfo in self.filelist:
895            try:
896                # Read by chunks, to avoid an OverflowError or a
897                # MemoryError with very large embedded files.
898                with self.open(zinfo.filename, "r") as f:
899                    while f.read(chunk_size):     # Check CRC-32
900                        pass
901            except BadZipfile:
902                return zinfo.filename
903
904    def getinfo(self, name):
905        """Return the instance of ZipInfo given 'name'."""
906        info = self.NameToInfo.get(name)
907        if info is None:
908            raise KeyError(
909                'There is no item named %r in the archive' % name)
910
911        return info
912
913    def setpassword(self, pwd):
914        """Set default password for encrypted files."""
915        self.pwd = pwd
916
917    @property
918    def comment(self):
919        """The comment text associated with the ZIP file."""
920        return self._comment
921
922    @comment.setter
923    def comment(self, comment):
924        # check for valid comment length
925        if len(comment) > ZIP_MAX_COMMENT:
926            import warnings
927            warnings.warn('Archive comment is too long; truncating to %d bytes'
928                          % ZIP_MAX_COMMENT, stacklevel=2)
929            comment = comment[:ZIP_MAX_COMMENT]
930        self._comment = comment
931        self._didModify = True
932
933    def read(self, name, pwd=None):
934        """Return file bytes (as a string) for name."""
935        return self.open(name, "r", pwd).read()
936
937    def open(self, name, mode="r", pwd=None):
938        """Return file-like object for 'name'."""
939        if mode not in ("r", "U", "rU"):
940            raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
941        if not self.fp:
942            raise RuntimeError, \
943                  "Attempt to read ZIP archive that was already closed"
944
945        # Only open a new file for instances where we were not
946        # given a file object in the constructor
947        if self._filePassed:
948            zef_file = self.fp
949            should_close = False
950        else:
951            zef_file = open(self.filename, 'rb')
952            should_close = True
953
954        try:
955            # Make sure we have an info object
956            if isinstance(name, ZipInfo):
957                # 'name' is already an info object
958                zinfo = name
959            else:
960                # Get info object for name
961                zinfo = self.getinfo(name)
962
963            zef_file.seek(zinfo.header_offset, 0)
964
965            # Skip the file header:
966            fheader = zef_file.read(sizeFileHeader)
967            if len(fheader) != sizeFileHeader:
968                raise BadZipfile("Truncated file header")
969            fheader = struct.unpack(structFileHeader, fheader)
970            if fheader[_FH_SIGNATURE] != stringFileHeader:
971                raise BadZipfile("Bad magic number for file header")
972
973            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
974            if fheader[_FH_EXTRA_FIELD_LENGTH]:
975                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
976
977            if fname != zinfo.orig_filename:
978                raise BadZipfile, \
979                        'File name in directory "%s" and header "%s" differ.' % (
980                            zinfo.orig_filename, fname)
981
982            # check for encrypted flag & handle password
983            is_encrypted = zinfo.flag_bits & 0x1
984            zd = None
985            if is_encrypted:
986                if not pwd:
987                    pwd = self.pwd
988                if not pwd:
989                    raise RuntimeError, "File %s is encrypted, " \
990                        "password required for extraction" % name
991
992                zd = _ZipDecrypter(pwd)
993                # The first 12 bytes in the cypher stream is an encryption header
994                #  used to strengthen the algorithm. The first 11 bytes are
995                #  completely random, while the 12th contains the MSB of the CRC,
996                #  or the MSB of the file time depending on the header type
997                #  and is used to check the correctness of the password.
998                bytes = zef_file.read(12)
999                h = map(zd, bytes[0:12])
1000                if zinfo.flag_bits & 0x8:
1001                    # compare against the file type from extended local headers
1002                    check_byte = (zinfo._raw_time >> 8) & 0xff
1003                else:
1004                    # compare against the CRC otherwise
1005                    check_byte = (zinfo.CRC >> 24) & 0xff
1006                if ord(h[11]) != check_byte:
1007                    raise RuntimeError("Bad password for file", name)
1008
1009            return ZipExtFile(zef_file, mode, zinfo, zd,
1010                    close_fileobj=should_close)
1011        except:
1012            if should_close:
1013                zef_file.close()
1014            raise
1015
1016    def extract(self, member, path=None, pwd=None):
1017        """Extract a member from the archive to the current working directory,
1018           using its full name. Its file information is extracted as accurately
1019           as possible. `member' may be a filename or a ZipInfo object. You can
1020           specify a different directory using `path'.
1021        """
1022        if not isinstance(member, ZipInfo):
1023            member = self.getinfo(member)
1024
1025        if path is None:
1026            path = os.getcwd()
1027
1028        return self._extract_member(member, path, pwd)
1029
1030    def extractall(self, path=None, members=None, pwd=None):
1031        """Extract all members from the archive to the current working
1032           directory. `path' specifies a different directory to extract to.
1033           `members' is optional and must be a subset of the list returned
1034           by namelist().
1035        """
1036        if members is None:
1037            members = self.namelist()
1038
1039        for zipinfo in members:
1040            self.extract(zipinfo, path, pwd)
1041
1042    def _extract_member(self, member, targetpath, pwd):
1043        """Extract the ZipInfo object 'member' to a physical
1044           file on the path targetpath.
1045        """
1046        # build the destination pathname, replacing
1047        # forward slashes to platform specific separators.
1048        arcname = member.filename.replace('/', os.path.sep)
1049
1050        if os.path.altsep:
1051            arcname = arcname.replace(os.path.altsep, os.path.sep)
1052        # interpret absolute pathname as relative, remove drive letter or
1053        # UNC path, redundant separators, "." and ".." components.
1054        arcname = os.path.splitdrive(arcname)[1]
1055        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1056                    if x not in ('', os.path.curdir, os.path.pardir))
1057        if os.path.sep == '\\':
1058            # filter illegal characters on Windows
1059            illegal = ':<>|"?*'
1060            if isinstance(arcname, unicode):
1061                table = {ord(c): ord('_') for c in illegal}
1062            else:
1063                table = string.maketrans(illegal, '_' * len(illegal))
1064            arcname = arcname.translate(table)
1065            # remove trailing dots
1066            arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1067            arcname = os.path.sep.join(x for x in arcname if x)
1068
1069        targetpath = os.path.join(targetpath, arcname)
1070        targetpath = os.path.normpath(targetpath)
1071
1072        # Create all upper directories if necessary.
1073        upperdirs = os.path.dirname(targetpath)
1074        if upperdirs and not os.path.exists(upperdirs):
1075            os.makedirs(upperdirs)
1076
1077        if member.filename[-1] == '/':
1078            if not os.path.isdir(targetpath):
1079                os.mkdir(targetpath)
1080            return targetpath
1081
1082        with self.open(member, pwd=pwd) as source, \
1083             file(targetpath, "wb") as target:
1084            shutil.copyfileobj(source, target)
1085
1086        return targetpath
1087
1088    def _writecheck(self, zinfo):
1089        """Check for errors before writing a file to the archive."""
1090        if zinfo.filename in self.NameToInfo:
1091            import warnings
1092            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1093        if self.mode not in ("w", "a"):
1094            raise RuntimeError, 'write() requires mode "w" or "a"'
1095        if not self.fp:
1096            raise RuntimeError, \
1097                  "Attempt to write ZIP archive that was already closed"
1098        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1099            raise RuntimeError, \
1100                  "Compression requires the (missing) zlib module"
1101        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1102            raise RuntimeError, \
1103                  "That compression method is not supported"
1104        if not self._allowZip64:
1105            requires_zip64 = None
1106            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1107                requires_zip64 = "Files count"
1108            elif zinfo.file_size > ZIP64_LIMIT:
1109                requires_zip64 = "Filesize"
1110            elif zinfo.header_offset > ZIP64_LIMIT:
1111                requires_zip64 = "Zipfile size"
1112            if requires_zip64:
1113                raise LargeZipFile(requires_zip64 +
1114                                   " would require ZIP64 extensions")
1115
1116    def write(self, filename, arcname=None, compress_type=None):
1117        """Put the bytes from filename into the archive under the name
1118        arcname."""
1119        if not self.fp:
1120            raise RuntimeError(
1121                  "Attempt to write to ZIP archive that was already closed")
1122
1123        st = os.stat(filename)
1124        isdir = stat.S_ISDIR(st.st_mode)
1125        mtime = time.localtime(st.st_mtime)
1126        date_time = mtime[0:6]
1127        # Create ZipInfo instance to store file information
1128        if arcname is None:
1129            arcname = filename
1130        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1131        while arcname[0] in (os.sep, os.altsep):
1132            arcname = arcname[1:]
1133        if isdir:
1134            arcname += '/'
1135        zinfo = ZipInfo(arcname, date_time)
1136        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1137        if isdir:
1138            zinfo.compress_type = ZIP_STORED
1139        elif compress_type is None:
1140            zinfo.compress_type = self.compression
1141        else:
1142            zinfo.compress_type = compress_type
1143
1144        zinfo.file_size = st.st_size
1145        zinfo.flag_bits = 0x00
1146        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1147
1148        self._writecheck(zinfo)
1149        self._didModify = True
1150
1151        if isdir:
1152            zinfo.file_size = 0
1153            zinfo.compress_size = 0
1154            zinfo.CRC = 0
1155            zinfo.external_attr |= 0x10  # MS-DOS directory flag
1156            self.filelist.append(zinfo)
1157            self.NameToInfo[zinfo.filename] = zinfo
1158            self.fp.write(zinfo.FileHeader(False))
1159            return
1160
1161        with open(filename, "rb") as fp:
1162            # Must overwrite CRC and sizes with correct data later
1163            zinfo.CRC = CRC = 0
1164            zinfo.compress_size = compress_size = 0
1165            # Compressed size can be larger than uncompressed size
1166            zip64 = self._allowZip64 and \
1167                    zinfo.file_size * 1.05 > ZIP64_LIMIT
1168            self.fp.write(zinfo.FileHeader(zip64))
1169            if zinfo.compress_type == ZIP_DEFLATED:
1170                cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1171                     zlib.DEFLATED, -15)
1172            else:
1173                cmpr = None
1174            file_size = 0
1175            while 1:
1176                buf = fp.read(1024 * 8)
1177                if not buf:
1178                    break
1179                file_size = file_size + len(buf)
1180                CRC = crc32(buf, CRC) & 0xffffffff
1181                if cmpr:
1182                    buf = cmpr.compress(buf)
1183                    compress_size = compress_size + len(buf)
1184                self.fp.write(buf)
1185        if cmpr:
1186            buf = cmpr.flush()
1187            compress_size = compress_size + len(buf)
1188            self.fp.write(buf)
1189            zinfo.compress_size = compress_size
1190        else:
1191            zinfo.compress_size = file_size
1192        zinfo.CRC = CRC
1193        zinfo.file_size = file_size
1194        if not zip64 and self._allowZip64:
1195            if file_size > ZIP64_LIMIT:
1196                raise RuntimeError('File size has increased during compressing')
1197            if compress_size > ZIP64_LIMIT:
1198                raise RuntimeError('Compressed size larger than uncompressed size')
1199        # Seek backwards and write file header (which will now include
1200        # correct CRC and file sizes)
1201        position = self.fp.tell()       # Preserve current position in file
1202        self.fp.seek(zinfo.header_offset, 0)
1203        self.fp.write(zinfo.FileHeader(zip64))
1204        self.fp.seek(position, 0)
1205        self.filelist.append(zinfo)
1206        self.NameToInfo[zinfo.filename] = zinfo
1207
1208    def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1209        """Write a file into the archive.  The contents is the string
1210        'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1211        the name of the file in the archive."""
1212        if not isinstance(zinfo_or_arcname, ZipInfo):
1213            zinfo = ZipInfo(filename=zinfo_or_arcname,
1214                            date_time=time.localtime(time.time())[:6])
1215
1216            zinfo.compress_type = self.compression
1217            if zinfo.filename[-1] == '/':
1218                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1219                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1220            else:
1221                zinfo.external_attr = 0o600 << 16     # ?rw-------
1222        else:
1223            zinfo = zinfo_or_arcname
1224
1225        if not self.fp:
1226            raise RuntimeError(
1227                  "Attempt to write to ZIP archive that was already closed")
1228
1229        if compress_type is not None:
1230            zinfo.compress_type = compress_type
1231
1232        zinfo.file_size = len(bytes)            # Uncompressed size
1233        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1234        self._writecheck(zinfo)
1235        self._didModify = True
1236        zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1237        if zinfo.compress_type == ZIP_DEFLATED:
1238            co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1239                 zlib.DEFLATED, -15)
1240            bytes = co.compress(bytes) + co.flush()
1241            zinfo.compress_size = len(bytes)    # Compressed size
1242        else:
1243            zinfo.compress_size = zinfo.file_size
1244        zip64 = zinfo.file_size > ZIP64_LIMIT or \
1245                zinfo.compress_size > ZIP64_LIMIT
1246        if zip64 and not self._allowZip64:
1247            raise LargeZipFile("Filesize would require ZIP64 extensions")
1248        self.fp.write(zinfo.FileHeader(zip64))
1249        self.fp.write(bytes)
1250        if zinfo.flag_bits & 0x08:
1251            # Write CRC and file sizes after the file data
1252            fmt = '<LQQ' if zip64 else '<LLL'
1253            self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1254                  zinfo.file_size))
1255        self.fp.flush()
1256        self.filelist.append(zinfo)
1257        self.NameToInfo[zinfo.filename] = zinfo
1258
1259    def __del__(self):
1260        """Call the "close()" method in case the user forgot."""
1261        self.close()
1262
1263    def close(self):
1264        """Close the file, and for mode "w" and "a" write the ending
1265        records."""
1266        if self.fp is None:
1267            return
1268
1269        try:
1270            if self.mode in ("w", "a") and self._didModify: # write ending records
1271                pos1 = self.fp.tell()
1272                for zinfo in self.filelist:         # write central directory
1273                    dt = zinfo.date_time
1274                    dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1275                    dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1276                    extra = []
1277                    if zinfo.file_size > ZIP64_LIMIT \
1278                            or zinfo.compress_size > ZIP64_LIMIT:
1279                        extra.append(zinfo.file_size)
1280                        extra.append(zinfo.compress_size)
1281                        file_size = 0xffffffff
1282                        compress_size = 0xffffffff
1283                    else:
1284                        file_size = zinfo.file_size
1285                        compress_size = zinfo.compress_size
1286
1287                    if zinfo.header_offset > ZIP64_LIMIT:
1288                        extra.append(zinfo.header_offset)
1289                        header_offset = 0xffffffffL
1290                    else:
1291                        header_offset = zinfo.header_offset
1292
1293                    extra_data = zinfo.extra
1294                    if extra:
1295                        # Append a ZIP64 field to the extra's
1296                        extra_data = struct.pack(
1297                                '<HH' + 'Q'*len(extra),
1298                                1, 8*len(extra), *extra) + extra_data
1299
1300                        extract_version = max(45, zinfo.extract_version)
1301                        create_version = max(45, zinfo.create_version)
1302                    else:
1303                        extract_version = zinfo.extract_version
1304                        create_version = zinfo.create_version
1305
1306                    try:
1307                        filename, flag_bits = zinfo._encodeFilenameFlags()
1308                        centdir = struct.pack(structCentralDir,
1309                        stringCentralDir, create_version,
1310                        zinfo.create_system, extract_version, zinfo.reserved,
1311                        flag_bits, zinfo.compress_type, dostime, dosdate,
1312                        zinfo.CRC, compress_size, file_size,
1313                        len(filename), len(extra_data), len(zinfo.comment),
1314                        0, zinfo.internal_attr, zinfo.external_attr,
1315                        header_offset)
1316                    except DeprecationWarning:
1317                        print >>sys.stderr, (structCentralDir,
1318                        stringCentralDir, create_version,
1319                        zinfo.create_system, extract_version, zinfo.reserved,
1320                        zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1321                        zinfo.CRC, compress_size, file_size,
1322                        len(zinfo.filename), len(extra_data), len(zinfo.comment),
1323                        0, zinfo.internal_attr, zinfo.external_attr,
1324                        header_offset)
1325                        raise
1326                    self.fp.write(centdir)
1327                    self.fp.write(filename)
1328                    self.fp.write(extra_data)
1329                    self.fp.write(zinfo.comment)
1330
1331                pos2 = self.fp.tell()
1332                # Write end-of-zip-archive record
1333                centDirCount = len(self.filelist)
1334                centDirSize = pos2 - pos1
1335                centDirOffset = pos1
1336                requires_zip64 = None
1337                if centDirCount > ZIP_FILECOUNT_LIMIT:
1338                    requires_zip64 = "Files count"
1339                elif centDirOffset > ZIP64_LIMIT:
1340                    requires_zip64 = "Central directory offset"
1341                elif centDirSize > ZIP64_LIMIT:
1342                    requires_zip64 = "Central directory size"
1343                if requires_zip64:
1344                    # Need to write the ZIP64 end-of-archive records
1345                    if not self._allowZip64:
1346                        raise LargeZipFile(requires_zip64 +
1347                                           " would require ZIP64 extensions")
1348                    zip64endrec = struct.pack(
1349                            structEndArchive64, stringEndArchive64,
1350                            44, 45, 45, 0, 0, centDirCount, centDirCount,
1351                            centDirSize, centDirOffset)
1352                    self.fp.write(zip64endrec)
1353
1354                    zip64locrec = struct.pack(
1355                            structEndArchive64Locator,
1356                            stringEndArchive64Locator, 0, pos2, 1)
1357                    self.fp.write(zip64locrec)
1358                    centDirCount = min(centDirCount, 0xFFFF)
1359                    centDirSize = min(centDirSize, 0xFFFFFFFF)
1360                    centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1361
1362                endrec = struct.pack(structEndArchive, stringEndArchive,
1363                                    0, 0, centDirCount, centDirCount,
1364                                    centDirSize, centDirOffset, len(self._comment))
1365                self.fp.write(endrec)
1366                self.fp.write(self._comment)
1367                self.fp.flush()
1368        finally:
1369            fp = self.fp
1370            self.fp = None
1371            if not self._filePassed:
1372                fp.close()
1373
1374
1375class PyZipFile(ZipFile):
1376    """Class to create ZIP archives with Python library files and packages."""
1377
1378    def writepy(self, pathname, basename = ""):
1379        """Add all files from "pathname" to the ZIP archive.
1380
1381        If pathname is a package directory, search the directory and
1382        all package subdirectories recursively for all *.py and enter
1383        the modules into the archive.  If pathname is a plain
1384        directory, listdir *.py and enter all modules.  Else, pathname
1385        must be a Python *.py file and the module will be put into the
1386        archive.  Added modules are always module.pyo or module.pyc.
1387        This method will compile the module.py into module.pyc if
1388        necessary.
1389        """
1390        dir, name = os.path.split(pathname)
1391        if os.path.isdir(pathname):
1392            initname = os.path.join(pathname, "__init__.py")
1393            if os.path.isfile(initname):
1394                # This is a package directory, add it
1395                if basename:
1396                    basename = "%s/%s" % (basename, name)
1397                else:
1398                    basename = name
1399                if self.debug:
1400                    print "Adding package in", pathname, "as", basename
1401                fname, arcname = self._get_codename(initname[0:-3], basename)
1402                if self.debug:
1403                    print "Adding", arcname
1404                self.write(fname, arcname)
1405                dirlist = os.listdir(pathname)
1406                dirlist.remove("__init__.py")
1407                # Add all *.py files and package subdirectories
1408                for filename in dirlist:
1409                    path = os.path.join(pathname, filename)
1410                    root, ext = os.path.splitext(filename)
1411                    if os.path.isdir(path):
1412                        if os.path.isfile(os.path.join(path, "__init__.py")):
1413                            # This is a package directory, add it
1414                            self.writepy(path, basename)  # Recursive call
1415                    elif ext == ".py":
1416                        fname, arcname = self._get_codename(path[0:-3],
1417                                         basename)
1418                        if self.debug:
1419                            print "Adding", arcname
1420                        self.write(fname, arcname)
1421            else:
1422                # This is NOT a package directory, add its files at top level
1423                if self.debug:
1424                    print "Adding files from directory", pathname
1425                for filename in os.listdir(pathname):
1426                    path = os.path.join(pathname, filename)
1427                    root, ext = os.path.splitext(filename)
1428                    if ext == ".py":
1429                        fname, arcname = self._get_codename(path[0:-3],
1430                                         basename)
1431                        if self.debug:
1432                            print "Adding", arcname
1433                        self.write(fname, arcname)
1434        else:
1435            if pathname[-3:] != ".py":
1436                raise RuntimeError, \
1437                      'Files added with writepy() must end with ".py"'
1438            fname, arcname = self._get_codename(pathname[0:-3], basename)
1439            if self.debug:
1440                print "Adding file", arcname
1441            self.write(fname, arcname)
1442
1443    def _get_codename(self, pathname, basename):
1444        """Return (filename, archivename) for the path.
1445
1446        Given a module name path, return the correct file path and
1447        archive name, compiling if necessary.  For example, given
1448        /python/lib/string, return (/python/lib/string.pyc, string).
1449        """
1450        file_py  = pathname + ".py"
1451        file_pyc = pathname + ".pyc"
1452        file_pyo = pathname + ".pyo"
1453        if os.path.isfile(file_pyo) and \
1454                            os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1455            fname = file_pyo    # Use .pyo file
1456        elif not os.path.isfile(file_pyc) or \
1457             os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1458            import py_compile
1459            if self.debug:
1460                print "Compiling", file_py
1461            try:
1462                py_compile.compile(file_py, file_pyc, None, True)
1463            except py_compile.PyCompileError,err:
1464                print err.msg
1465            fname = file_pyc
1466        else:
1467            fname = file_pyc
1468        archivename = os.path.split(fname)[1]
1469        if basename:
1470            archivename = "%s/%s" % (basename, archivename)
1471        return (fname, archivename)
1472
1473
1474def main(args = None):
1475    import textwrap
1476    USAGE=textwrap.dedent("""\
1477        Usage:
1478            zipfile.py -l zipfile.zip        # Show listing of a zipfile
1479            zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1480            zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1481            zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1482        """)
1483    if args is None:
1484        args = sys.argv[1:]
1485
1486    if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1487        print USAGE
1488        sys.exit(1)
1489
1490    if args[0] == '-l':
1491        if len(args) != 2:
1492            print USAGE
1493            sys.exit(1)
1494        with ZipFile(args[1], 'r') as zf:
1495            zf.printdir()
1496
1497    elif args[0] == '-t':
1498        if len(args) != 2:
1499            print USAGE
1500            sys.exit(1)
1501        with ZipFile(args[1], 'r') as zf:
1502            badfile = zf.testzip()
1503        if badfile:
1504            print("The following enclosed file is corrupted: {!r}".format(badfile))
1505        print "Done testing"
1506
1507    elif args[0] == '-e':
1508        if len(args) != 3:
1509            print USAGE
1510            sys.exit(1)
1511
1512        with ZipFile(args[1], 'r') as zf:
1513            zf.extractall(args[2])
1514
1515    elif args[0] == '-c':
1516        if len(args) < 3:
1517            print USAGE
1518            sys.exit(1)
1519
1520        def addToZip(zf, path, zippath):
1521            if os.path.isfile(path):
1522                zf.write(path, zippath, ZIP_DEFLATED)
1523            elif os.path.isdir(path):
1524                if zippath:
1525                    zf.write(path, zippath)
1526                for nm in os.listdir(path):
1527                    addToZip(zf,
1528                            os.path.join(path, nm), os.path.join(zippath, nm))
1529            # else: ignore
1530
1531        with ZipFile(args[1], 'w', allowZip64=True) as zf:
1532            for path in args[2:]:
1533                zippath = os.path.basename(path)
1534                if not zippath:
1535                    zippath = os.path.basename(os.path.dirname(path))
1536                if zippath in ('', os.curdir, os.pardir):
1537                    zippath = ''
1538                addToZip(zf, path, zippath)
1539
1540if __name__ == "__main__":
1541    main()
1542